### LLM App project 



In [1]:
import openai
from pinecone import Pinecone# Import Pinecone module as-is for SDK usage
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore

from langchain.llms import OpenAI


  from tqdm.autonotebook import tqdm


In [2]:
from dotenv import load_dotenv
load_dotenv()


True

In [3]:
import os 


In [4]:
## Read pdf file 
def read_doc(directory):
    file_loader = PyPDFDirectoryLoader(directory)
    documents = file_loader.load()
    return documents

In [5]:
doc = read_doc('documents/')


In [6]:
def chunk_data(docs, chunk_size = 800, chunk_overlap = 50):
    text_splitter = RecursiveCharacterTextSplitter (
        chunk_size = chunk_size, chunk_overlap = chunk_overlap
    )
    doc = text_splitter.split_documents(docs)
    return doc

In [None]:
documents = chunk_data(docs = doc)
documents

In [None]:
embeddings = OpenAIEmbeddings(api_key = os.environ["OPEN_API_KEY"])


In [9]:
vectors = embeddings.embed_query("How are you?")
len(vectors )

1536

In [10]:

# Initialize Pinecone using the Pinecone class
api_key= os.environ['PINECONE_API_KEY']   
# Define index parameters
pc = Pinecone(
    api_key=api_key)

# Define the index name
index_name = "langchainvector"

# Connect to an existing index
index = pc.Index(index_name)




In [None]:
print("Available indexes:", pc.list_indexes())


### We need to export PINECONE_API_KEY here before usage.

ex: 
''' pip install -qU langchain-pinecone  
    export PINECONE_API_KEY = "your-pinecone-api-key" 
'''

In [12]:
vector_database_index = PineconeVectorStore.from_documents(
    documents=documents,       # List of LangChain Document objects
    embedding=embeddings,              # OpenAI embeddings object
    index_name="langchainvector",
)

print("Documents successfully stored in Pinecone!")

Documents successfully stored in Pinecone!


In [22]:
def retrieve_query(query,k=2):
    matching_results=vector_database_index.similarity_search(query,k=k)
    return matching_results

In [23]:
from langchain.chains.question_answering import load_qa_chain
from langchain import OpenAI
from langchain.chat_models import ChatOpenAI

In [24]:
llm=ChatOpenAI(api_key = os.environ["OPEN_API_KEY"], model_name="gpt-4",temperature=0.5)
chain=load_qa_chain(llm,chain_type="stuff")

In [25]:
def retrieve_answers(query):
    doc_search=retrieve_query(query)
    print(doc_search)
    response=chain.run(input_documents=doc_search,question=query)
    return response

In [27]:
our_query = "What is the usecase for AI in our daily lives"
answer = retrieve_answers(our_query)
print(answer)

[Document(id='c4c9345a-992f-4a7a-a5d7-d56915572804', metadata={'page': 18.0, 'source': 'documents/Student-Guide-Module-1-Fundamentals-of-AI.pdf'}, page_content='various tools to managers that help measure the performance of employees, and even select \nthe right employee for promotion without the element of human bias. \nPersonal Life \nOur personal life is full of examples of AI being used daily. Our engagement is seamless and \ntherefore goes almost unnoticed. However, many of the apps we run on our smartphones are \nall AI-powered. When we want to listen to music we speak a command, our phone analyses our \nspeech, understands the task, and runs a search to find the song before playing it.  \nReminders for a meeting, or to buy groceries on a particular day for your parents or reminding \nyou to study a particular subject for an upcoming test are all AI interventions in our daily life.'), Document(id='d1921320-4ac9-4195-aa65-be33c6a89a29', metadata={'page': 18.0, 'source': 'documents