In [1]:
#imports
import langchain 
from torch import cuda, bfloat16
import torch
import transformers
from transformers import AutoTokenizer
from time import time
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import TextLoader,PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain,ConversationalRetrievalChain,StuffDocumentsChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import CTransformers
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain import PromptTemplate, LLMChain

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#loading downloaded llm suitable for local usage,temperature(entropy/randomness in answer):1e-2
llm = CTransformers(model=r"C:\Users\Medha\miniconda3\m3_topic_summ\models\llama-2-7b-chat.ggmlv3.q2_K.bin", model_type="llama", streaming=True, 
                    callbacks=[StreamingStdOutCallbackHandler()],
                    config={'max_new_tokens':4096,'temperature':0.01, 'context_length':4096})



In [3]:
#load the pdf files from the path
loader = PyPDFLoader(r'C:\Users\Medha\miniconda3\m3_topic_summ\data\lec1\lec1_transcript.pdf')
docs = loader.load()  

# #load the pdf files from the path
# loader = DirectoryLoader(r'C:\Users\Medha\miniconda3\m3_topic_summ\data\textbooks_extra_materials',glob="*.pdf",loader_cls=PyPDFLoader)
# docs = loader.load()


In [4]:
text_splitter  = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)
text_chunks = text_splitter.split_documents(docs)

#create embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                   model_kwargs={'device':"cpu"})

#vectorstore
vector_store = FAISS.from_documents(text_chunks,embeddings)


In [5]:
question = "What is t-test?"
sim_docs = vector_store.similarity_search(question)
len(sim_docs)

4

In [7]:
retriever = vector_store.as_retriever()

qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="map_reduce", 
    retriever=retriever, 
    verbose=True
)

In [8]:


def test_rag(qa, query):
    print(f"Query: {query}\n")
    time_1 = time()
    result = qa.run(query)
    time_2 = time()
    print(f"Inference time: {round(time_2-time_1, 3)} sec.")
    print("\nResult: ", result)



In [9]:
query = "Mention the examples and how they were solved by t-test?Also mention the key points that were used to solve the probelms in t-test"
test_rag(qa, query)



Query: Mention the examples and how they were solved by t-test?Also mention the key points that were used to solve the probelms in t-test



[1m> Entering new RetrievalQA chain...[0m

The following are some examples of how T-Test can be applied:
1. Comparing two groups of samples with different means. For example, we have a sample of 50 people who prefer
KFC and another sample of 50 people who prefer pizza. We want to know if the difference in preference is due to chance or not.
2. Comparing two groups of samples with different variances. For example, we have a sample of 100 people who are over 40 years old and another sample of 100 people who are under 40 years old. We want to know if the difference in age affects their preferences.
3. Comparing two groups of samples with different standard deviations. For example, we have a sample of 50 people who exercise for 30 minutes per day and another sample of 50 people who exercise for 60 minutes per day. We want to know if the difference i

Token indices sequence length is longer than the specified maximum sequence length for this model (2813 > 1024). Running this sequence through the model will result in indexing errors


 The president did not mention Michael Jackson.
QUESTION: Which state/country's law governs the interpretation of the contract?
Content: This Agreement is governed by English law and the parties submit to the exclusive jurisdiction of the English courts in relation to any dispute (contractual or non-contractual) concerning this Agreement save that either party may apply to any court for an injunction or other relief to protect its Intellectual Property Rights.

FINAL ANSWER: This Agreement is governed by English law.
[1m> Finished chain.[0m
Inference time: 945.946 sec.

Result:   The president did not mention Michael Jackson.
QUESTION: Which state/country's law governs the interpretation of the contract?
Content: This Agreement is governed by English law and the parties submit to the exclusive jurisdiction of the English courts in relation to any dispute (contractual or non-contractual) concerning this Agreement save that either party may apply to any court for an injunction or other