In [2]:
# Import Libraries
import openai
import langchain
import pinecone
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI

  from tqdm.autonotebook import tqdm


In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
import os

In [5]:
## Let's read the document
def read_doc(directory):
  file_loader = PyPDFDirectoryLoader(directory)
  documents = file_loader.load()
  return documents

In [6]:
doc = read_doc("documents/")
len(doc)

177

In [7]:
## Divide the docs into chuncks 

def chunk_data(docs, chunk_size=800, chunk_overlap=50):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  doc = text_splitter.split_documents(docs)
  return docs

In [8]:
documents = chunk_data(docs=doc)
len(documents)

177

## Embidding Technique of OPENAI

In [None]:
embeddings=OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
embeddings

In [10]:
vectors = embeddings.embed_query("How are you ?")
len(vectors)

1536

In [11]:
## Vector search DB in  Pinecone
pinecone.init(
  api_key=os.environ['PINECONE_API_KEY'],
  environment=os.environ['PINECONE_ENVIRONMENT']
)
index_name=os.environ['PINECONE_INDEX_NAME']

In [12]:
index = Pinecone.from_documents(doc, embeddings, index_name=index_name)
index

<langchain_community.vectorstores.pinecone.Pinecone at 0x2086ea5f9a0>

In [13]:
## Cosine Similarity Retreive Results from vectorDB
def retrieve_query(query, index, k=2):
  matching_results=index.similarity_search(query, k=k)
  return matching_results

In [14]:
from langchain.chains.question_answering import load_qa_chain
from langchain import OpenAI

In [15]:
llm = OpenAI(model_name="davinci-002", temperature=0.5)
chain = load_qa_chain(llm, chain_type="stuff")

In [16]:
## Search answers from VectorDB
def retrieve_answer(query, chain, index):
  doc_search = retrieve_query(query=query, index=index)
  print(doc_search)
  response=chain.run(input_documents=doc_search, question=query)
  return response

In [17]:
# out_query = "How much the agriculture target will be increased by how many crores"
out_query = "what is preceptron in terms of statistics?"
answer = retrieve_answer(query=out_query, chain=chain, index=index)
print(answer)

[Document(page_content='118  Linear regression is one of the easiest and most popular Machine Learning algorithms. It is a statistical method \nthat is used for predictive analysis. Linear regression makes predictions for continuous/real or numeric \nvariables such as  sales, salary, age, product price,  etc. \nLinear regression algorithm shows a linear relationship between a dependent (y) and one or more \nindependent (y) variables, hence called as linear regression. Since linear regression shows the linear \nrelationship, which means it finds how the value of the dependent variable is changing according to the value \nof the indepe ndent variable.  \nThe linear regression model provides a sloped straight line representing the relationship between the \nvariables. Consider the below image:  \n \n \n \ny= a 0+a1x+ ε \nHere,  \nY= Dependent Variable (Target Variable)  \nX= Independent Variable (predictor Var iable)  \na0= intercept of the line (Gives an additional degree of freedom)  \n

In [18]:
query = "what is deep learning?"
index = Pinecone.from_documents("", embeddings, index_name=index_name)
doc_search = index.similarity_search(query, k=2)
response=chain.run(input_documents=doc_search, question=query)
print(str(response))

 Deep learning is a subfield of machine learning that focuses on algorithms inspired by the structure and function of the brain called artificial neural networks. Deep learning is a subset of machine learning, and is a method for data analysis that uses learning algorithms that iteratively learn from data. Deep learning is a machine learning technique that teaches computers to do what comes naturally to humans: learn by example. Deep learning is a class of machine learning algorithms that (pp. 4–5) uses multiple layers to progressively extract higher-level features from the raw input. Deep learning is a machine learning method that teaches computers to do what comes naturally to humans: learn by example. Deep learning is a machine learning method that teaches computers to do what comes naturally to humans: learn by example. Deep learning is a machine learning method that teaches computers to do what comes naturally to humans: learn by example. Deep learning is a machine learning method