In [1]:
import os 
from dotenv import load_dotenv
load_dotenv()
os.environ["GOOGLE_API_KEY"]=os.getenv("GOOGLE_API_KEY")
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
# os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

In [2]:
# from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import OllamaEmbeddings


In [3]:
embeddings= (
    OllamaEmbeddings(model="gemma:2b")
)

In [4]:
## Reading a PDf File
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('QUANTSmod9.pdf')
docs=loader.load()
docs

[Document(metadata={'source': 'QUANTSmod9.pdf', 'page': 0}, page_content='Parametric and Non-Parametric \nTests of Independence\nby Pamela Peterson Drake, PhD, CFA.\nPamela Peterson Drake, PhD, CFA, is at James Madison University (USA).\nLEARNING OUTCOMES\nMastery The candidate should be able to:\nexplain parametric and nonparametric tests of the hypothesis that \nthe population correlation coefficient equals zero, and determine whether the hypothesis is rejected at a given level of significance \nexplain tests of independence based on contingency table data\nINTRODUCTION\nIn many contexts in investments, we want to assess the strength of the linear rela -\ntionship between two variables—that is, we want to evaluate the correlation  between \nthem. A significance test of a correlation coefficient allows us to assess whether the relationship between two random variables is the result of chance. Lesson 1 covers a parametric and a non-parametric approach to testing the correlation between

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)

In [6]:
documents

[Document(metadata={'source': 'QUANTSmod9.pdf', 'page': 0}, page_content='Parametric and Non-Parametric \nTests of Independence\nby Pamela Peterson Drake, PhD, CFA.\nPamela Peterson Drake, PhD, CFA, is at James Madison University (USA).\nLEARNING OUTCOMES\nMastery The candidate should be able to:\nexplain parametric and nonparametric tests of the hypothesis that \nthe population correlation coefficient equals zero, and determine whether the hypothesis is rejected at a given level of significance \nexplain tests of independence based on contingency table data\nINTRODUCTION\nIn many contexts in investments, we want to assess the strength of the linear rela -\ntionship between two variables—that is, we want to evaluate the correlation  between'),
 Document(metadata={'source': 'QUANTSmod9.pdf', 'page': 0}, page_content='INTRODUCTION\nIn many contexts in investments, we want to assess the strength of the linear rela -\ntionship between two variables—that is, we want to evaluate the correlat

In [8]:
from langchain_community.vectorstores import FAISS
vectorstoredb=FAISS.from_documents(documents,embeddings)

In [9]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x120eb0530>

In [10]:
from langchain_google_genai import ChatGoogleGenerativeAI
import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [12]:
llm=ChatGoogleGenerativeAI(model='gemini-pro')

In [37]:
result=llm.invoke("What is GenerativAI?")

In [38]:
print(result)

content="**GenerativAI** is an artificial intelligence (AI) platform that specializes in generative AI, a type of AI that can create new data or content from scratch. Generative AI models are trained on large datasets and can learn to generate text, images, music, and other types of data that are similar to or indistinguishable from human-generated content.\n\n**Key Features of GenerativAI:**\n\n* **Text Generation:** Can generate human-like text for various purposes, such as article writing, story creation, and dialogue generation.\n* **Image Generation:** Can create realistic images from scratch or modify existing images to produce novel variations.\n* **Music Generation:** Can generate unique and diverse music tracks in different genres and styles.\n* **Code Generation:** Can generate code in multiple programming languages, assist with debugging, and suggest code optimizations.\n* **Data Augmentation:** Can generate synthetic data to enhance training datasets for machine learning mo

In [15]:
# from langchain_core.prompts import ChatPromptTemplate

# template = ChatPromptTemplate([
#     ("system", "You are a helpful AI bot. Your name is Carl."),
#     ("human", "{user_input}"),
# ])

# prompt_value = template.invoke("Hello, there!")
# prompt_value

In [41]:

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(
    """
Answer the following question based only on the provided context:
<context>
{context}
</context>


"""
)

document_chain=create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
| ChatGoogleGenerativeAI(model='models/gemini-pro', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x125f0d280>, default_metadata=())
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [17]:
# from langchain_core.documents import Document
# document_chain.invoke({
#     "input":"The most common hypotheses concerning correlation",
#     "context":[Document(page_content="The most common hypotheses concerning correlation occur when comparing the population correlation coefficient with zero because we often ask whether a rela-tionship exists, which implies a null of the correlation coefficient equal to zero")]
# })

'The most common hypotheses concerning correlation occur when comparing the population correlation coefficient with zero because we often ask whether a relationship exists, which implies a null of the correlation coefficient equal to zero.'

In [43]:
retriever=vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)

In [44]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x120eb0530>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
            | ChatGoo

In [45]:
response=retrieval_chain.invoke({"input":"What is  Spearman rank correlation coefficient"})
response['answer']

'In tests concerning correlation, we use a t-statistic to test whether a population correlation coefficient is different from zero. If we have n observations for two variables, this test statistic has a t-distribution with n − 2 degrees of freedom.'

In [46]:
response['context']

[Document(metadata={'source': 'QUANTSmod9.pdf', 'page': 4}, page_content='Tests Concerning Correlation 249\n \nStep 1 State the hypotheses.   H  0   : ρ ≤ 0\xa0versus\xa0   H  a   : ρ > 0  \nStep 2 Identify the appropriate test \nstatistic.  t =   r  √ _ n − 2   _ \n √ _ 1 −  r   2      \nStep 3 Specify the level of significance.1%\nStep 4 State the decision rule. With 33 − 2 = 31 degrees of freedom \nand a one-sided test with a 1% level \nof significance, the critical value is 2.45282.  \nWe reject the null hypothesis if the \ncalculated t-statistic is greater than \n2.45282.\nStep 5 Calculate the test statistic.\n  t =   0.43051   √ _ 33 − 2    ____________    √ ___________ 1 − 0.18534        \n= 2.65568   \nStep 6 Make a decision. Reject the null hypothesis because the \ncalculated t-statistic is greater than \n2.45282. Evidence is sufficient to reject \nthe H0 in favor of Ha, that the correla -\ntion between the annual returns of these \ntwo investments is positive.\n \nEXAMPLE 2\n

In [47]:
response=retrieval_chain.invoke({"input":"What is  Generative AI"})
response['answer']

'I cannot find the answer to your question in the provided context.'