# **Basic Setup**

In [None]:
!pip install -qq langchain
!pip install -qq langchain-community
!pip install -qq langchain-google-genai

In [2]:
import os

In [3]:
from google.colab import userdata
GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')

In [4]:
os.environ['GOOGLE_API_KEY'] = GEMINI_API_KEY

In [5]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.3,google_api_key=GEMINI_API_KEY)

# **Loading the pdfs and making chunks**

In [6]:
!pip install -qq pypdf

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/310.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.5/310.5 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [7]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [12]:
loader = PyPDFDirectoryLoader("/content/pdfs/")

In [13]:
data = loader.load()

In [None]:
data

In [15]:
files_loaded = set(doc.metadata['source'] for doc in data)
print("Files loaded:")
for f in files_loaded:
    print(f)

Files loaded:
/content/pdfs/Data structures (Knapsack).pdf
/content/pdfs/Data structures (Hasing).pdf
/content/pdfs/Data structures (Graph).pdf
/content/pdfs/Data structures (Binary Trees).pdf
/content/pdfs/Data structures (Binary Search Tree).pdf
/content/pdfs/Data structures (Linked List).pdf


In [16]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
text_chunks = text_splitter.split_documents(data)

In [17]:
len(text_chunks)

177

In [19]:
text_chunks[5]

Document(metadata={'producer': 'Microsoft® PowerPoint® 2016', 'creator': 'Microsoft® PowerPoint® 2016', 'creationdate': '2023-12-29T11:03:01+05:00', 'title': 'Slide 1', 'author': 'fsaleem', 'moddate': '2023-12-29T11:03:01+05:00', 'source': '/content/pdfs/Data structures (Binary Trees).pdf', 'total_pages': 41, 'page': 5, 'page_label': '6'}, page_content='Example')

# **Generating the embeddings and storing it in FAAIS**

In [None]:
''' This is the old way of using hugging face wrapper, in newer version it is depreciated

from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

'''


In [None]:
# New way of using hugging face wrapper

!pip install langchain_huggingface

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}

hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [27]:
!pip install -qq faiss-cpu

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m67.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from langchain_community.vectorstores import FAISS

In [28]:
# Build FAISS vectorstore
vectorstore = FAISS.from_documents(text_chunks, hf)
vectorstore.save_local("faiss_index")

In [29]:
# Later if you want to load it
# vectorstore = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

In [30]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

# **Making a chat template of Interview QA for LLM**

In [31]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
You are an interviewer.
Generate {n} interview-style questions on the topic "{topic}"
based only on the provided context.
After each question, also provide a clear and correct answer.

Context:
{context}

Format:
Q1: ...
A1: ...
Q2: ...
A2: ...
""")


# **RAG chain**

In [40]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter # when given a dict, returns the value for that key

rag_chain = (
    {
        "context": itemgetter("topic") | retriever,  # only pass "topic" to retriever
        "topic": itemgetter("topic"),
        "n": itemgetter("n"),
    }
    | prompt
    | llm
    | StrOutputParser()
)

In [41]:
result = rag_chain.invoke({
    "topic": "How to solve Knapsack problem?",
    #"topic": "Tell me about Riyan",
    "n": 3
})

print(result)


Q1:  Explain the difference between the 0/1 Knapsack problem and the Fractional Knapsack problem, and how this difference impacts the approach to solving them.

A1: The 0/1 Knapsack problem deals with indivisible items; you can either take an entire item or none of it.  The Fractional Knapsack problem allows you to take fractions of items.  This key difference leads to different solution approaches. The 0/1 Knapsack problem is typically solved using dynamic programming because of the discrete nature of the items.  The Fractional Knapsack problem, on the other hand, can be efficiently solved using a greedy approach, prioritizing items with the highest value-to-weight ratio.


Q2:  The provided text mentions that the 0/1 Knapsack problem can be solved using dynamic programming.  Describe at a high level how a dynamic programming solution would work.

A2: A dynamic programming solution to the 0/1 Knapsack problem typically uses a table (often a 2D array) to store the maximum value achieva