In [22]:
# Import Libraries 
import openai
import langchain
import pinecone
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Pinecone
from langchain.llms import OpenAI
import os 

In [23]:
from dotenv import load_dotenv
load_dotenv()

True

## Read the document 

In [24]:
def read_doc (directory):
    file_loader = PyPDFDirectoryLoader(directory)
    document = file_loader.load()
    return document



In [25]:
doc = read_doc("documents/")
len(doc)

72

## Divide Document into chunks 

In [26]:
def chunk_data (docs, chunk_size=800, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    doc =text_splitter.split_documents(docs)
    return docs

In [27]:
documents = chunk_data(docs=doc)
len(documents)

72

## Embedd the Chunks of Document

In [28]:
embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])

In [29]:
vectors=embeddings.embed_query("How are you")

In [30]:
len(vectors)

1536

## Vector Search DB in Pinecone 

In [32]:
os.environ["PINECONE_API_KEY"]

index_name = "llm-app"

In [76]:
vector_db = Pinecone.from_documents(
        documents = documents, 
        embedding = OpenAIEmbeddings(), 
        index_name = "llm-app",)

## Retrieve results with Cosine Similarity

In [77]:
def retrieve_query(query, k=2):
    matching_result = vector_db.similarity_search(query, k=k)
    return matching_result

In [78]:
from langchain.chains.question_answering import load_qa_chain
from langchain_openai import OpenAI


In [79]:
llm =OpenAI(model="gpt-3.5-turbo-instruct", temperature=0.6)
chain = load_qa_chain(llm, chain_type='stuff')

## Search for answers 

In [80]:
def retrieve_answers(query):
    doc_search = retrieve_query(query)
    print(doc_search)
    response = chain.run(input_documents=doc_search, question=query)
    return response

In [83]:
our_query = "How do they plan on diversifying the economy"
answer= retrieve_answers(our_query)


[Document(metadata={'page': 22.0, 'source': 'documents\\Obi-BabaAhmed-Manifesto-23.pdf'}, page_content='XI. Although non-oil  GDP has been marginally increasing in recent years, this \ngrowth has not translated to increased non-oil exports, despite various national \npolicies on economic diversification. We believe that diversification of the \neconomy can be attained through increasing the value addition and export \npotential of our agriculture and natural resources value chain.  We shall achieve \nthis through targeted export incentives and deliberate actions in: \na. Scaling up the development of manufacturing and processing \ntechnological capabilities across the primary products value chains where \nwe enjoy comparative advantage;\nb. A monitored scheme of targeted funding to entrepreneurs taking \nadvantage of the capacity development as outlined above.\nc.  A radical  reform of our logistics and distribution systems including ports, \ncustoms, and trade facilitation instruments

In [85]:
print(answer)

 They plan on increasing the value addition and export potential of agriculture and natural resources, scaling up manufacturing and processing capabilities, providing targeted funding to entrepreneurs, reforming logistics and distribution systems, strengthening product quality support, and investing in agro-cluster and industrial cluster development. They also plan on shifting emphasis from consumption to production and running a production-centered economy driven by an agrarian revolution and export-oriented industrialization.
