In [None]:
# Install required packages
# Install required packages
%pip install langchain langchain-openai langchain-community langchain-text-splitters faiss-cpu pypdf python-dotenv


[31mERROR: Could not find a version that satisfies the requirement langchain-chains (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for langchain-chains[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [4]:
# Create sample text documents
import os

# Create documents folder if it doesn't exist
os.makedirs("documents", exist_ok=True)

# Sample TXT file 1 - Python Basics
with open("documents/python_basics.txt", "w") as f:
    f.write("""Python Programming Basics

Python is a high-level programming language. It was created by Guido van Rossum in 1991.
Python is known for its simple syntax and readability. It uses indentation to define code blocks.
Popular uses include web development, data science, and automation.

Key Features:
- Easy to learn and read
- Large standard library
- Cross-platform compatibility
- Strong community support""")

# Sample TXT file 2 - AI Introduction
with open("documents/ai_intro.txt", "w") as f:
    f.write("""Introduction to Artificial Intelligence

AI is the simulation of human intelligence by machines. Machine learning is a subset of AI.
Deep learning uses neural networks with multiple layers.

Common AI Applications:
- Chatbots and virtual assistants
- Image and speech recognition
- Recommendation systems
- Autonomous vehicles

AI has transformed industries like healthcare, finance, and entertainment.""")

# Sample TXT file 3 - Company Policy
with open("documents/company_policy.txt", "w") as f:
    f.write("""Company Remote Work Policy

Effective Date: January 2024

Remote Work Guidelines:
- Employees can work remotely up to 3 days per week
- Core hours are 10 AM to 3 PM local time
- All team meetings must be scheduled during core hours
- Use Slack for daily communication
- Submit timesheets every Friday by 5 PM

Equipment:
- Company provides laptop and monitor
- Internet stipend of $50 per month

Contact HR for questions about this policy.""")

# Sample TXT file 4 - Product Info
with open("documents/product_info.txt", "w") as f:
    f.write("""Product Information - SmartWatch Pro

Price: $299
Release Date: March 2024

Features:
- Heart rate monitoring
- GPS tracking
- 7-day battery life
- Water resistant up to 50 meters
- Compatible with iOS and Android

Warranty: 2 years
Colors available: Black, Silver, Rose Gold""")

print("‚úì Successfully created 4 sample text documents!")
print("\nFiles created in 'documents' folder:")
print("1. python_basics.txt")
print("2. ai_intro.txt")
print("3. company_policy.txt")
print("4. product_info.txt")

# List files to verify
files = os.listdir("documents")
print(f"\nTotal files: {len(files)}")
for file in files:
    size = os.path.getsize(f"documents/{file}")
    print(f"  - {file} ({size} bytes)")

‚úì Successfully created 4 sample text documents!

Files created in 'documents' folder:
1. python_basics.txt
2. ai_intro.txt
3. company_policy.txt
4. product_info.txt

Total files: 4
  - company_policy.txt (439 bytes)
  - python_basics.txt (404 bytes)
  - product_info.txt (275 bytes)
  - ai_intro.txt (402 bytes)


In [4]:
import os
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv

# Set your OpenAI API key
load_dotenv()
print("API Key:", os.environ.get("OPENAI_API_KEY", "Not set")[:20] + "...")

# 1. Load documents from folder
def load_documents(folder_path):
    txt_loader = DirectoryLoader(folder_path, glob="**/*.txt", loader_cls=TextLoader)
    txt_docs = txt_loader.load()
    return txt_docs

# 2. Create embeddings and save to FAISS
def create_vector_store(documents, save_path="faiss_index"):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_documents(documents)
    
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    vectorstore = FAISS.from_documents(chunks, embeddings)
    vectorstore.save_local(save_path)
    
    return vectorstore

# 3. Load existing vector store
def load_vector_store(save_path="faiss_index"):
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    vectorstore = FAISS.load_local(save_path, embeddings, allow_dangerous_deserialization=True)
    return vectorstore

# 4. Query and generate response (simplified)
def query_documents(vectorstore, query):
    # Retrieve relevant documents
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
    docs = retriever.invoke(query)
    
    # Create context from documents
    context = "\n\n".join([doc.page_content for doc in docs])
    
    # Create prompt
    prompt = f"""Answer the question based on the following context:

Context:
{context}

Question: {query}

Answer:"""
    
    # Get response from LLM
    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    response = llm.invoke(prompt)
    
    return response.content, docs

# Load and create vector store
folder_path = "./documents"
documents = load_documents(folder_path)
vectorstore = create_vector_store(documents)

print(f"‚úì Loaded {len(documents)} documents and created embeddings")


API Key: sk-proj-2D_k1B8OV3MW...
‚úì Loaded 2 documents and created embeddings


In [5]:
# Query the documents
def ask(question):
    answer, sources = query_documents(vectorstore, question)
    print(f"\n‚ùì Question: {question}")
    print(f"‚úÖ Answer: {answer}")
    print(f"\nüìÑ Sources ({len(sources)} documents):")
    for i, doc in enumerate(sources, 1):
        print(f"  {i}. {doc.metadata.get('source', 'Unknown')}")
    print("-" * 80)

# Test queries
ask("Who created Python?")
ask("What is the remote work policy?")
ask("What is machine learning?")



‚ùì Question: Who created Python?
‚úÖ Answer: Python was created by Guido van Rossum and was first released in 1991.

üìÑ Sources (2 documents):
  1. documents/ai_intro.txt
  2. documents/company_policy.txt
--------------------------------------------------------------------------------

‚ùì Question: What is the remote work policy?
‚úÖ Answer: The remote work policy allows employees to work remotely up to 3 days per week. Core hours are set from 10 AM to 3 PM local time, during which all team meetings must be scheduled. Employees are required to use Slack for daily communication and must submit their timesheets every Friday by 5 PM. The company provides a laptop and monitor, along with an internet stipend of $50 per month. For any questions regarding this policy, employees should contact HR.

üìÑ Sources (2 documents):
  1. documents/company_policy.txt
  2. documents/ai_intro.txt
--------------------------------------------------------------------------------

‚ùì Question: What is

# Step 2: Query the documents
query = "What is the main topic of the documents?"

# Load vector store (if already created)
# vectorstore = load_vector_store("faiss_index")

# Get answer
answer, sources = query_documents(vectorstore, query)

print("Answer:", answer)
print("\nSources:")
for i, doc in enumerate(sources):
    print(f"{i+1}. {doc.metadata.get('source', 'Unknown')}")
