In [1]:
import os
from pymongo import MongoClient
from typing import List, Dict

MONGO_URI = os.getenv("MONGODB_URI")
DB_NAME = os.getenv("MONGODB_DB_NAME")
collection_name = 'si'
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "containergenie.ai"
os.environ['USER_AGENT'] = 'chapter2-1'

####################################################################################

from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import create_openai_functions_agent, AgentExecutor

###################################################################################

# block included to check whether the whole chain works out or not
def fetch_data_from_mongodb(collection_name: str, query: Dict = None, limit: int = None) -> List[Dict]:
 
    client = MongoClient(MONGO_URI)
    db = client[DB_NAME]
    collection = db[collection_name]
    
    # Prepare the find operation
    find_operation = collection.find(query) if query else collection.find()
    
    # Fetch and return the data
    data = list(find_operation)
    
    # Close the connection
    client.close()
    
    return data

In [2]:
# Build a set of tools 

## Search online results as many as 5
search = TavilySearchResults(k=5)

In [3]:

## look for relevant parts in pdfs

PDF_loader = PyPDFLoader("/Users/seongyeon/Desktop/Aifflethon/aiffelthon_tys/ksy974498/si_validation_story/resources/docs/cherry_comliance.pdf")

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100, length_function=len, separators=["\n\n", "\n", ":", "-", ""])
PDF_split_docs = PDF_loader.load_and_split(text_splitter)
len(PDF_split_docs)

769

In [6]:
PDF_split_docs

[Document(metadata={'source': '/Users/seongyeon/Desktop/Aifflethon/aiffelthon_tys/ksy974498/si_validation_story/resources/docs/cherry_comliance.pdf', 'page': 0}, page_content='CHERRY Shipping Line Company Policy  \nCHERRY Shipping Line - Comprehensive Company Policy  \nCHERRY Shipping Line:  UAE - Requirements and Restrictions  \nCHERRY Shipping Line:  United States of America (USA) - Requirements and Restrictions  \nCHERRY Shipping Line:  Qatar - Requirements and Restrictions   \nCHERRY Shipping Line:  Saudi Arabia - Requirements and Restrictions  \nCHERRY Shipping Line:  Jordan - Requirements and Restrictions   \nCHERRY Shipping Line:  Belgium - Requirements and Restrictions  \nCHERRY Shipping Line:  Canada - Requirements and Restrictions  \nCHERRY Shipping Line:  Germany - Requirements and Restrictions  \nCHERRY Shipping Line:  Netherlands – Requirements and Restrictions  \nCHERRY Shipping Line:  Australia - Requirements and Restrictions  \nCHERRY Shipping Line:  Singapore - Require

In [7]:

embeddings = OpenAIEmbeddings()

PDF_vector = FAISS.from_documents(documents=PDF_split_docs, embedding=embeddings)

PDF_retriever = PDF_vector.as_retriever()

PDF_retriever_tool = create_retriever_tool(
    PDF_retriever,
    name="pdf_search",
    description="Use this tool for compliance for shipper, consignee, and notifyParty" \
                "including checking what info is required for each entity" \
                "based on the requirements of both the company and relevant countries",
)

In [8]:
# set of tools
tools = [search, PDF_retriever_tool]

In [9]:
# 1. Explain your reasoning process step by step.
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)

prompt = PromptTemplate.from_template(
"""You are a documentation validation assistant specializing in verifying party details in shipping instructions.
Make sure shipper, consignee, and nofifyParty in the data below contain all the essential info in the Guideline:

# Data: \n{data}


# Guideline              
1. Take a Chain of Thought approaching in the process.
2. It is the rule that you have to mention address, phone or fax number, and email address, which are fundamentally mandatory items.              
3. Put to use PDF_retriever_tool to tell whether email address is not required.
4. Confirm address including zip code is in proper format of the respective country.
5. Verify if phone or FAX number matches the general contacts format including country and area codes.
6. When the country code doesn't match the country itself, the digits might be for VOIP service.
7. Check whether email address is in proper format.
8. NotifyParty can be the same as consignee.
9. You don't improvise if you don't know anything based on the given conditions.
              
# Respond in the format as below:
This is the summarized validation report for shipping instruction.
*{bookingReference}*

1. Shipper
- detailed issue about address(only if any)
- detailed issue about phone or fax number(only if any)
- detailed issue about email address(only if any)

2. Consignee
- detailed issue about address(only if any)
- detailed issue about phone or fax number(only if any)
- detailed issue about email address(only if any)

3. Notify Party
- detailed issue about address(only if any)
- detailed issue about phone or fax number(only if any)
- detailed issue about email address(only if any)

# Answer:
{agent_scratchpad}""")

In [10]:
agent = create_openai_functions_agent(llm, tools, prompt)

In [11]:
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [12]:
data = fetch_data_from_mongodb(collection_name, {"bookingReference": "CHERRY20240911091202"})
data

[{'_id': ObjectId('66e1562f0abd52a6a79a3250'),
  'bookingReference': 'CHERRY20240911091202',
  'voyageDetails': {'vesselName': 'ARONIA 3',
   'voyageNumber': '2024041',
   'bound': 'E'},
  'routeDetails': {'placeOfReceipt': 'BUSAN, KOREA',
   'portOfLoading': 'BUSAN, KOREA',
   'portOfDischarge': 'VLADIVOSTOK, RUSSIA',
   'placeOfDelivery': 'VLADIVOSTOK, RUSSIA',
   'finalDestination': ''},
  'paymentDetails': {'freightPaymentTerms': 'PREPAID',
   'freightPayableAt': 'BUSAN, KOREA'},
  'documentationDetails': {'blType': 'SURRENDER',
   'numberOfOriginalBLs': 0,
   'numberOfCopies': 0},
  'partyDetails': {'shipper': {'name': 'LX PANTOS CO., LTD.',
    'onBehalfOf': 'LG H&H CO., LTD',
    'address': 'LG GWANGHWAMOON BUILDING, 92 SINMUNNO 2-GA, JONGNO-GU, SEOUL 03184, SOUTH KOREA',
    'telephone': '+82-2-3500-0001',
    'fax': '+82-2-3500-1000'},
   'consignee': {'name': 'LLC IC GA GROUP',
    'address': '62 SACCO AND VANZETTI STR. OFFICE 703, 620014,, EKATERINBURG RUSSIA',
    'companyN

In [13]:
result = agent_executor.invoke({"bookingReference": "CHERRY20240911091203", "data": data})
print(result['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `pdf_search` with `{'query': 'email address requirement for shipping instructions'}`


[0m[33;1m[1;3m1.2 Shipper Details  
• Full name and complete address required  
• Contact information:  
o Phone number with country and area code (mandatory)  
o Email address (mandatory)  
• P.O. Box is accepted but not sufficient alone; physical address required  
1.3 Consignee Details  
• Full name and complete address required  
• Contact information:  
o Phone number with country and area code (mandatory)  
o Email address (mandatory)  
• P.O. Box is accepted but not sufficient alone; physical address required  
• For "To Order" B/Ls:  
o Consignee field should state "TO ORDER OF [Bank Name]"  
o Notify party must be the actual consignee  
• Tax Identification Number (NINEA) required for Senegalese companies  
1.4 Notify Party Details  
• Full name and complete address required  
• Contact information:  
o Phone number w