In [None]:
#!pip install chromadb langchain

In [1]:
import pandas as pd

In [2]:
main_df = pd.read_excel("topical_chat.xlsx")
main_df = main_df.dropna()
main_df = main_df[main_df['message'].notna() & (main_df['message'] != '')]
main_df = main_df.reset_index(drop=True)

first_100_conversations = main_df[main_df["conversation_id"] <= 100]


In [9]:
#group messages with same conversation id
unique_conversations_ids = first_100_conversations['conversation_id'].unique()
print("Unique Conversation ids:", unique_conversations_ids)


texts = []

for conversation_id in unique_conversations_ids:
    x = first_100_conversations[first_100_conversations["conversation_id"] == conversation_id]
    texts.append({"id" : conversation_id , "text" : "\n ".join(x["message"])})

Unique Conversation ids: [  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
  91  92  93  94  95  96  97  98  99 100]


In [10]:
#checking concatinated messages in a conversation

print(texts[3])

{'id': 4, 'text': "Hi!  do you like to dance?\n I love to dance a lot. How about you?\n  I am really bad, but it is a good time.\n Dancing is a lot of fun. Did you know that Bruce Lee was a great dancer?\n  I heard that, winning Cha Cha championships and everything!\n Yes that is amazing. He won the Hong Kong cha-cha championship back in 1958 in fact.\n I always just thought of him as a martial arts legend.  Now he is a dance legend of sorts too!\n Yeah!! That is correct. He was a fantastic martial artist. Did you know that Tupac danced ballet in high school?\n  Yeah!  He was the mouse king in the Nutcracker.  Thats pretty cool, I would definitely never have guessed that about him.\n Neither did I. That is insane because Tupac was a famous rapper.\n  He was indeed, his music is even in the library of congress.\n I didn't know this thanks for sharing.\n Sure thing!  Did you hear about Michael Jackson's special patent shoes?\n No. I know that Michael Jackson was a fantastic dancer but ca

In [5]:
#setting openai credentials

import os

# os.environ["OPENAI_API_KEY"] = "key"

In [23]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

import time
from tqdm import tqdm



embeddings = OpenAIEmbeddings() #set openai credentials for embedding

collection = Chroma(
            collection_name="task_3",
            embedding_function=embeddings,
            persist_directory="onebyzero_chroma",
            collection_metadata={"hnsw:space": "cosine"},
        )


for text in tqdm(texts):
    conversation_id = str(text['id'])
    doc = text["text"]
    metadata = {
        "id" : str(conversation_id)
    }

    collection.add_texts(texts=[doc],ids=[conversation_id],metadatas=[metadata])

    time.sleep(1)

collection.persist()





  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [02:22<00:00,  1.43s/it]


In [24]:
# db = Chroma.from_texts(texts, embeddings, persist_directory="onebyzero_chroma")
# retriever = db.as_retriever()

retriever = collection.as_retriever()

In [25]:
retrieved_docs = retriever.invoke(
    "What did the president say about Ketanji Brown Jackson?"
)
print(retrieved_docs[0].page_content)
print(retrieved_docs[0].metadata)


Good evening!  Do you know much about Maryland's governor?
  Not too much,  so you?  Did you know that ralph lawrence carr was the only governor to oppose the internement of japanese americans during ww2?
  I didn't know that.  Maryland's governor, Larry Hogan, is a republican who was under some hot water for policies on climate change.
  Thats interesting.  the beatles song"come together" was written for 1969 governor campaign.
  Larry Hogan could probably have used the support of a group like the Beatles, but I'm sure he will be ok.  At least he isn't as bad as North Dakota's in 1934 who actually seceded from the US for a short time!
 when schwarzenegger became governor of ca he commuted 3 hours everyday lol
  Maybe so he could leave the office every day and say "I'll be back!"  
 haha good one!
 Did you hear that Texas had a female governor before other states even allowed women to vote?
 Yeah thats really interesting. did you know there hasnt been a us president who wasnt a rep or 

In [26]:

from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.prompts import PromptTemplate

from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers import RetryWithErrorOutputParser
from pydantic import BaseModel, Field, validator


llm = ChatOpenAI() #add openai credentials

class AnswerModel(BaseModel):
    answer: str = Field(description="answer to the user query")


parser = PydanticOutputParser(pydantic_object=AnswerModel)

template = """Answer the user query based on the given context , the question is irrelavant to the context provided then say 'I Can't answer right now'.context : {context}, query : {user_query} \n format instructions : {format_instructions}"""


prompt = PromptTemplate(
    template=template,
    input_variables=["user_query","context"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [28]:

def answer_query(query):

    retrieved_docs = retriever.invoke(query)
    context = retrieved_docs[0].page_content
    conversation = retrieved_docs[0].metadata['id']

    input = prompt.format_prompt(user_query=query,context=context)
    o = llm.predict(input.to_string())
    retry_parser = RetryWithErrorOutputParser.from_llm(parser=parser, llm=llm)
    model = retry_parser.parse_with_prompt(o, input)
    return model.answer , f"conversation reference : {conversation}"

answer_query Method Takes the Questions and get the context from the conversation and answers the question.

In [30]:
answer_query("who is ralph lawrence carr")

('Ralph Lawrence Carr was the only governor to oppose the internment of Japanese Americans during WW2.',
 'conversation reference : 63')