In [2]:
import os
from dotenv import load_dotenv
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# Load environment variables
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

# Initialize embeddings and Pinecone store
embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
index_name = "dr-arif-butt"

# Create a Pinecone vector store from the existing index
pinecone_store = PineconeVectorStore(index_name=index_name, embedding=embeddings)

# Set up the ChatPromptTemplate
template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# Set up the model
model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo")

# Create a retriever using the Pinecone index
retriever = pinecone_store.as_retriever()

# Define the output parser
parser = StrOutputParser()

# Create a chain for querying
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | parser
)

# Example query
query = "What is the color of the car of Dr. Arif Butt?"

# Define a function to include the documents in the result
def query_with_documents(query):
    # Retrieve documents from Pinecone using get_relevant_documents
    documents = retriever.get_relevant_documents(query)
    
    # Execute the chain to get the model's answer
    answer = chain.invoke(query)
    
    return answer, documents

# Execute the query
result, retrieved_documents = query_with_documents(query)

# Display the results
print("Query Result:", result)
print("\nRetrieved Documents:")
for doc in retrieved_documents:
    print(doc)

Query Result: The color of the car of Dr. Arif Butt is red.

Retrieved Documents:
page_content='Dr Arif butt currently lives in Lahore.The car he drives is kia sportage.The color of his car is' metadata={'source': 'dr_arif_butt_current.pdf'}
page_content='color of his car is red.' metadata={'source': 'dr_arif_butt_current.pdf'}
page_content='Dr. Muhammad Arif Butt is an Assistant Professor at the Department of Data Science,' metadata={'source': 'dr_arif_butt_1.pdf'}
page_content='2023) Summary - Dr. Muhammad Arif Butt is an Assistant Professor at University of the Punjab,' metadata={'source': 'dr_arif_butt_resume.pdf'}


In [5]:
result, retrieved_documents = query_with_documents('which specific department does he teach in?')
print(result)
for document in retrieved_documents:
    print(document)

He teaches in the Department of Information Technology at Punjab University College of Information.
page_content='1. In-Charge/Principal, PUCIT - 2. Departmental Doctoral Program Committee Member 3.' metadata={'source': 'dr_arif_butt_resume.pdf'}
page_content='parameters.
His teaching interests are embedded/real time operating systems, system programming,' metadata={'source': 'dr_arif_butt_1.pdf'}
page_content='Campus Chairman 17. Dress Code Committee Chairman 6 Courses Taught Graduate level' metadata={'source': 'dr_arif_butt_resume.pdf'}
page_content='best teachers throughout the teaching carrier at Punjab University College of Information' metadata={'source': 'dr_arif_butt_resume.pdf'}
