In [None]:
import os 
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")
## Langsmith Tracking
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

In [None]:
from langchain.document_loaders import TextLoader

file_path = "speech.txt"
loader = TextLoader(file_path)

documents = loader.load()

# Print the first document content
print("Loaded Document Content:\n", documents[0].page_content[:500])  # Print first 500 chars

In [None]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=250, 
                                      chunk_overlap=80
                                      )

# Split the document into smaller chunks
docs = text_splitter.split_documents(documents)

# Print the number of chunks created
print(f"Number of text chunks created: {len(docs)}")

# Show the first chunk
print("\nFirst Chunk Content:\n", docs[0].page_content)

In [None]:
# Import OpenAI embeddings
from langchain.embeddings.openai import OpenAIEmbeddings

# Initialize OpenAI embeddings
embeddings = OpenAIEmbeddings()

# Convert the first chunk into an embedding vector
sample_embedding = embeddings.embed_query(docs[0].page_content)

# Print vector representation (first 5 numbers)
print("Sample Embedding Vector (First 5 values):", sample_embedding[:5])


In [None]:
# Import FAISS (Vector database)
from langchain.vectorstores import FAISS

# Store all document chunks in FAISS vector store
vectorstore = FAISS.from_documents(docs, embeddings)

# Create a retriever to fetch similar chunks
retriever = vectorstore.as_retriever()

In [None]:
retriever

In [None]:
# Search the database with a sample query
query = "what is about her blood?"
retrieved_docs = retriever.get_relevant_documents(query)

In [None]:
retrieved_docs

In [None]:
# Print the most relevant chunk retrieved
print("Top Retrieved Document:\n", retrieved_docs[0].page_content)

In [None]:
# Import OpenAI model
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

# Initialize ChatGPT with RAG (Retrieval-Augmented Generation)
qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-4o-mini"),
    retriever=retriever
)




In [None]:
qa_chain

In [None]:
# Ask a question and get an answer
query = "what is about her blood?"
response = qa_chain.run(query)

# Print the generated response
print("\nGenerated Answer:\n", response)

In [None]:
# Ask a question and get an answer
query = "what is about her blood?"
response = qa_chain.invoke(query)

# Print the generated response
print("\nGenerated Answer:\n", response)