In [2]:
import openai, langchain, os
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain_community.embeddings import SentenceTransformerEmbeddings
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_community.llms import OpenAI 
from dotenv import load_dotenv 
from sentence_transformers import SentenceTransformer  

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model = SentenceTransformer('paraphrase-MiniLM-L3-v2')
embeddings = SentenceTransformerEmbeddings(model_name='paraphrase-MiniLM-L3-v2')

load_dotenv()


  embeddings = SentenceTransformerEmbeddings(model_name='paraphrase-MiniLM-L3-v2')


True

In [4]:
from langchain.document_loaders import DirectoryLoader, TextLoader, CSVLoader, UnstructuredFileLoader
from langchain_community.document_loaders import PyPDFLoader
from pathlib import Path

def load_docs_from_directory(path):
    def custom_loader(file_path_str):
        file_path = Path(file_path_str)
        ext = file_path.suffix.lower()

        loader_map = {
            ".pdf": PyPDFLoader,
            ".txt": TextLoader,
            ".csv": CSVLoader,
        }

        loader_cls = loader_map.get(ext, UnstructuredFileLoader)
        return loader_cls(str(file_path))

    loader = DirectoryLoader(
        path,
        glob="**/*",
        loader_cls=custom_loader
    )
    documents = loader.load()
    print(documents)
    return documents



In [5]:
def chunk_data(docs, chunk_size=800, chunk_overlap=50):
    text_spliter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    doc = text_spliter.split_documents(docs)
    print(type(doc), type(doc[0]), len(doc))
    
    return doc


In [6]:
data = load_docs_from_directory('./static')
doc_list = chunk_data(docs=data)

vector = embeddings.embed_query("any text")
dimension = len(vector)
print(dimension)

[Document(metadata={'source': 'static\\13.txt'}, page_content='An alien called Jasper landed on Planet Mars in the year 1945.\n\nJasper had 3 heads and 6 legs making it almost impossible for his enemy to chase it. \n\nHarry Potter was a giant unicorn who lived in scotland and had a gibberish accent. \n\nHarry had 5 legs instead of usual 4 and 4 eyes instead of 2. '), Document(metadata={'source': 'static\\20.txt'}, page_content='During friendly encounters and bonding, tigers rub against each other\'s bodies.[97] Facial expressions include the "defence threat", which involves a wrinkled face, bared teeth, pulled-back ears and widened pupils.[98][47] Both males and females show a flehmen response, a characteristic curled-lip grimace, when smelling urine markings. Males also use the flehmen to detect the markings made by tigresses in oestrus.[47] Tigers will move their ears around to display the white spots, particularly during aggressive encounters and between mothers and cubs.[99] They a

In [7]:
api_key=os.getenv("PINECONE_API_KEY")

In [8]:
index_name = "lcvector"

vectorstore_from_docs = PineconeVectorStore.from_documents(
        doc_list,
        index_name=index_name,
        embedding=embeddings
)


In [9]:
def retrieve_query(query, k=2):
    matching_results = vectorstore_from_docs.similarity_search(query, k=k
                                                               )
    
    print(matching_results)
    return matching_results


In [10]:
from langchain.chains.question_answering import load_qa_chain
from langchain_community.llms import Ollama

# Initialize the local LLM via Ollama
llm = Ollama(model="mistral", temperature=0.1)

# Load QA chain
chain = load_qa_chain(llm, chain_type="stuff")

def get_answer_from_docs(query):
    doc_search=retrieve_query(query)
    response= chain.run(input_documents=doc_search, question=query) 
    return response 



  llm = Ollama(model="mistral", temperature=0.1)
stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  chain = load_qa_chain(llm, chain_type="stuff")


In [11]:
our_query= "Who is Jasper from Planet Mars? How many legs it had?"
answer=get_answer_from_docs(our_query)
print(answer)

[Document(id='2005647d-0ed4-4bac-b288-da602b955b47', metadata={'source': 'static\\13.txt'}, page_content='An alien called Jasper landed on Planet Mars in the year 1945.\n\nJasper had 3 heads and 6 legs making it almost impossible for his enemy to chase it.'), Document(id='b77818cc-3733-4ad6-90df-95f1a1dcf128', metadata={'source': 'static\\13.txt'}, page_content='An alien called Jasper landed on Planet Mars in the year 1945.\n\nJasper had 3 heads and 6 legs making it almost impossible for his enemy to chase it.')]


  response= chain.run(input_documents=doc_search, question=query)


 Jasper is an alien who landed on Planet Mars in the year 1945. He has 6 legs.


In [None]:
from langchain.agents import Tool, initialize_agent, AgentType
import random
from datetime import datetime as dt

def juggler(a, b): 
    return [random.randint(a, b) for _ in range(5)]

def daysGetter(date: str) -> int:
    
    if "'" in date:
        date = date.replace("'", "")
    start_date = dt.strptime(date, "%d-%m-%Y")
    current_date = dt.now()
    days_diff = (current_date - start_date).days
    return days_diff


def getHotelsData():
    db = [
        {
            "id" : "1",
        "name" : "Taj Santa cruz", 
        "room_type" : "Deluxe", 
        "breakfast" : "yes"
    }, 
        {
            "id" : "2",
        "name" : "Vivanta", 
        "room_type" : "Normal", 
        "breakfast" : "yes"
    },
        {
            "id" : "3",
        "name" : "Mariott", 
        "room_type" : "Deluxe", 
        "breakfast" : "no"
    }
        ]
    return db
 
def ShowHotels(place: str, date : str, duration: str) -> list[dict]: 
    """Returns a list of available hotels for specified duration to the user 
    along with options"""
    
    l = getHotelsData()
    return l
    

def CreateBooking(id: str) -> bool:
    "Returns true if booking was successful else false"
    data  = getHotelsData()
    
    booking = list(filter(lambda x : x["id"] == id, data) )[0]
    name, room = booking.get("name"), booking.get("room_type")
    print(f"Booking confimed for {name} for the room type {room}")
    
    

juggler_tool = Tool(name= "Juggler", \
                    func=lambda x: juggler( *map( int, x.split(" "))), 
                    description="Pass a single string arguments containing two integers which mean the range (a, b), pass like a b ; where a and b are integers"
                    )


days_getter_tool = Tool(name= "getdays", \
                        func= daysGetter, 
                        description="Pass a single string called date and gets the time difference in days from current date"
                        )


showHotels_tool = Tool(name = "GetHotels", \
                     func = lambda x: ShowHotels(*x.split(" ")), 
                     description= "Use this tool whenever a new query related to hotel bookings is there" 
                     "This tool is used to enquire about hotels by passing in three parameters place, start date and duration in days." 
                     "The start date d is to be passed as string and should be of this format dd-mm-yyyy"
                     "The current year can be get by checking google"
                     "The parameters which are place p, start date d and duration du are passed like p d du in the function"
                     "Ask the user to reply with desired id to create a booking next"
                     )

bookings_tool = Tool(name="Booker", 
                     func = CreateBooking, 
                     description= "Use this tool to create a new booking using the id provided by user" 
                        "Just pass id as the string parameter and booking should be made"
                            "Returns true if booking was successful else false"
                     )



In [140]:
document_search_tool = Tool(name="docu_search", \
                            func=get_answer_from_docs, \
                            description = "Use this tool only to get any information about Harry Potter."
                            "If you have to get information about Harry Potter do not use any other source"
                            "Pass the related question about Harry Potter as the argument"
                            )

In [141]:
import pandas as pd
from langchain_experimental.agents import create_pandas_dataframe_agent
# from langchain.llms import OpenAI

# Step 1: Read multiple CSVs into a list of DataFrames
csv_files = ["./static/customers1.csv"]
dfs = [pd.read_csv(file) for file in csv_files]

# Step 2: Concatenate all into a single DataFrame (can also merge if needed)
combined_df = pd.concat(dfs, ignore_index=True)

# Step 3: Create a Pandas agent
pd_agent = create_pandas_dataframe_agent(llm, combined_df, verbose=True, allow_dangerous_code=True)



In [142]:
import re

def run_agent_safely(agent, query):
    try:
        return agent.run(query)
    except Exception as e:
        print("Caught Exception:", str(e))

        # Try to extract "Final Answer" from the exception string
        match = re.search(r"Final Answer\s*:\s*(.*)", str(e), re.IGNORECASE | re.DOTALL)
        if match:
            extracted_answer = match.group(1).strip().split('\n')[0]  # Grab only the first line if multiline
            print("Extracted Final Answer:", extracted_answer)
            return extracted_answer
        else:
            print("Could not extract Final Answer from the error.")
            return "An error occurred and no final answer could be recovered."


In [143]:
pd_tool = Tool(
    name="PandasData", 
    func=pd_agent.run, 
    description= ("Use this tool to query customer CSV data loaded into a dataframe. "
        "This includes retrieving or analyzing columns such as 'Subscription Date', 'Customer ID', etc. "
        "Always use this tool to interact with the customer CSV, instead of using raw Python."
    )
)

In [144]:
agent= initialize_agent(
    tools= [ document_search_tool, showHotels_tool, bookings_tool], 
    llm = llm, 
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, 
    verbose= True, 
    handle_parsing_errors=True
)

In [145]:
# Step 4: Ask your questions
response = run_agent_safely(agent=agent, query = "I am thinking to book a hotel for Goa from 1st May this year for 5 days. Can you help me with that?")
print(response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find out about hotels in Goa for the specified dates and then create a booking if possible.

Action: GetHotels
Action Input: Goa, 01-05-2023, 5[0m
Observation: [33;1m[1;3m[{'id': '1', 'name': 'Taj Santa cruz', 'room_type': 'Deluxe', 'breakfast': 'yes'}, {'id': '2', 'name': 'Vivanta', 'room_type': 'Normal', 'breakfast': 'yes'}, {'id': '3', 'name': 'Mariott', 'room_type': 'Deluxe', 'breakfast': 'no'}][0m
Thought:[32;1m[1;3m I have found some hotels in Goa for the specified dates. Here are the options:

1. Taj Santa cruz (Deluxe room with breakfast)
2. Vivanta (Normal room with breakfast)
3. Mariott (Deluxe room without breakfast)

Please let me know your preferred hotel id to proceed with booking.

Final Answer: I have found some hotels in Goa for the specified dates. Here are the options:
1. Taj Santa cruz (Deluxe room with breakfast)
2. Vivanta (Normal room with breakfast)
3. Mariott (Deluxe room without brea

In [None]:
# Step 4: Ask your questions
response = run_agent_safely(agent=agent, query = "Sure, can you  book one with id 2 please?")
print(response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m The user has asked me to create a new booking using the id provided. I need to use the Booker tool for this.

Action: Booker
Action Input: "2"[0mBooking confimed for Vivanta for the room type Normal

Observation: [38;5;200m[1;3mNone[0m
Thought:[32;1m[1;3m The action has been executed, but since the Booker function returns None, there is no observation to report. However, assuming that the booking was successfully created, I can now say that the final answer to the original question is "Yes, the booking with id 2 has been made."[0m
Observation: Invalid Format: Missing 'Action:' after 'Thought:
Thought:[32;1m[1;3m I apologize for the confusion. Here's the corrected response:

Thought: The user has asked me to create a new booking using the id provided. I need to use the Booker tool for this.

Action: Booker
Action Input: "2"[0mBooking confimed for Vivanta for the room type Normal

Observation: [38;5;200m[1;3mNone[

Deletion from DB code

In [None]:
# idx = pc.Index(index_name)

# # Delete all vectors from the index
# idx.delete(delete_all=True)

# print("All vectors have been deleted from the index.")

In [None]:
# idx = pc.Index(index_name)
# pdf_path = "static\\RahatBhambriResume_alter.pdf"

# idx.delete(
#     filter={
#         # "source": pdf_path
#     }
# )
# print(f"Vectors with source '{pdf_path}' have been deleted.")
