In [2]:
# https://github.com/langchain-ai/langchain/issues/9717
# https://python.langchain.com/docs/use_cases/question_answering/

import os
import sys
from config import GOOGLE_AI_API_KEY

from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.embeddings import GooglePalmEmbeddings
from langchain.vectorstores import Chroma

from langchain.chains import RetrievalQA
from langchain.chat_models import ChatGooglePalm
from langchain.prompts import PromptTemplate
import pandas as pd



In [3]:
# Document loading
path = './case_files./case_file.txt'
loader = TextLoader(path)
data = loader.load()



In [4]:
# Text splitting
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
all_splits = text_splitter.split_documents(data)
all_splits[10].metadata

{'source': './case_files./case_file.txt'}

In [5]:
# Create retriever
embedding = GooglePalmEmbeddings(google_api_key = GOOGLE_AI_API_KEY)
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding)

  from .autonotebook import tqdm as notebook_tqdm


In [143]:
# Connect to LLM for generation

template = """Use the following pieces of context to answer the questions at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Keep the answer as concise as possible, Avoid using full sentences for fact related answers.
{context}
Questions: 
"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)


In [144]:
llm = ChatGooglePalm(google_api_key=GOOGLE_AI_API_KEY, temperature=0.0)
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    verbose = True
)


In [145]:
from langchain.prompts import ChatPromptTemplate

with open('./questions.txt', mode='r') as questions_file:
    list_questions = questions_file.readlines()

prompt = """For the below given context answer these questions. If the information to answer the question is not present in the context say "I dont know". 
Answer as precise and concise as possible.
Context: {context}
Questions:{questions}"
"""

promt_template_question = ChatPromptTemplate.from_template(prompt)

In [146]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema import StrOutputParser

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": vectorstore.as_retriever() | format_docs, "questions": RunnablePassthrough()}
    | promt_template_question
    | llm
    | StrOutputParser()
)



In [149]:
list_answers = []
for question in list_questions:
    try:
        list_answers.append(rag_chain.invoke(question))
    except:
        print("Exception occured for the question.", question)
        list_answers.append("")

In [150]:
df_qa_case = pd.DataFrame(list_questions, list_answers).reset_index()

In [151]:
df_qa_case.to_csv("qa_case.csv")