In [87]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser

# Set up the LangChain Gemini model
model = ChatGoogleGenerativeAI(
    api_key="AIzaSyA3xxNyX9FdTugUqnN7940fcMNZQjQZ2EY",
    model="gemini-1.5-flash"
)

# Create the parser
parser = StrOutputParser()

# Correct chaining with LangChain
chain = model | parser

# Invoke the chain with the query
response = chain.invoke("what is rag")

print(response)

RAG stands for **Retrieval Augmented Generation**.  It's a technique used in large language models (LLMs) to improve their performance and factual accuracy.  Instead of relying solely on the knowledge embedded within the model itself, RAG augments the model's capabilities by allowing it to access and process external information from a knowledge base or document store during generation.

Here's a breakdown:

* **Retrieval:** The model first receives a prompt or question.  A retrieval component then searches a relevant knowledge base (which could be a database, a set of documents, or even the entire web) to find information pertinent to the prompt.

* **Augmentation:** The retrieved information is then "augmented" into the model's input.  This means the retrieved text is added to the original prompt before the model generates its response.

* **Generation:** Finally, the LLM generates a response based on both its internal knowledge and the newly retrieved external information.

The key 

In [89]:
from langchain.prompts import ChatPromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
chain = prompt | model | parser


In [101]:
# Read the original file with UTF-8 encoding
with open('qns.txt', 'r', encoding='utf-8') as qns:
    content = qns.read()

# Save the content to a new file with UTF-8 encoding
with open('context.txt', 'w', encoding='utf-8') as context:
    context.write(content)

# Read the new file with UTF-8 encoding
with open("context.txt", 'r', encoding='utf-8') as file:
    context = file.read()

# Display the first 100 characters
print(context[:100])




Top 50 Data Structures Interview Questions
And Answers PDF

Data Structure Interview Questions for F


In [102]:
try:
    response = chain.invoke({
        "context": context,
        "question": "What is a data structure"
    })
    print(response)
except Exception as e:
    print(f"Error occurred: {e}")

A data structure is a way to organize, align, and manipulate data according to requirements.  It's not limited to tables; it encompasses various datasets and how efficiently they're arranged for optimal access and processing.  The way data is organized significantly impacts a program's performance.  Data dependencies and relationships between datasets are also crucial considerations.  Incorrect or inefficient data structuring can hinder overall code performance.


In [128]:
from langchain_community.document_loaders import TextLoader

# Specify utf-8 encoding while loading the file
loader = TextLoader("context.txt", encoding="utf-8")

# Load the document properly
text_documents = loader.load()

# Print the loaded document(s)
print(text_documents)
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
documents = text_splitter.split_documents(text_documents)


[Document(metadata={'source': 'context.txt'}, page_content='Top 50 Data Structures Interview Questions\nAnd Answers PDF\n\nData Structure Interview Questions for Freshers\n1. What is a data structure?\nA data structure is a mechanical way to organise, align, and manipulate data as per\nrequirements. It is not restricted to putting data in a table but deals with different datasets\nand how well they are aligned.\nThe aim is to ensure that data can be organised and accessed efficiently. Data organisation\ndetermines how a program performs. Moreover, data dependency and relationships between\ntwo or more datasets play a crucial role in data structures.\nWhile designing code, we need to pay utmost attention to how data is structured because\nincorrectly structured or inefficiently stored data can hamper the overall performance of the\ncode.\n\n2. What are the applications of data structures?\nData structures are applied across multiple industries and domains as algorithms are the\nprimary 

In [129]:
from langchain_community.document_loaders import TextLoader

# Specify utf-8 encoding while loading the file
loader = TextLoader("ans.txt", encoding="utf-8")

# Load the document properly
text_documents = loader.load()

# Print the loaded document(s)
print(text_documents)
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
documents_ans = text_splitter.split_documents(text_documents)

[Document(metadata={'source': 'ans.txt'}, page_content='\n\n9. What is a linked list data structure?\nnoooooooooooooo\n\n')]


In [None]:
# import pinecone
# from langchain.vectorstores import Pinecone
# from langchain.embeddings import HuggingFaceEmbeddings
# hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
# embeddings = hf_embeddings.embed_documents([doc.page_content for doc in documents])
# # Initialize Pinecone
# pc = pinecone.Pinecone(api_key="pcsk_5jvG4s_GPWUrsc8cBhH4gXoUW2RfP9T1qsuQkksmyr8585oTjLJbdG4CmcjCM67fppTWNG", environment="us-east-1")

# index_name = "qns-rag"
# if index_name not in pc.list_indexes().names():
#     pc.create_index(name=index_name, metric="cosine", dimension=len(embeddings[0]))

# index = pc.Index(index_name)

# # Upsert embeddings into Pinecone
# vectors = [(str(i), emb, {"text": documents[i].page_content}) for i, emb in enumerate(embeddings)]
# index.upsert(vectors=vectors)

# # Correct Pinecone VectorStore initialization
# pinecone_vectorstore = Pinecone(index=index, embedding=hf_embeddings, text_key="text")

# print(f"Documents upserted to Pinecone index '{index_name}' successfully.")


Documents upserted to Pinecone index 'qns-rag' successfully.


In [None]:
# from langchain_core.runnables import RunnableParallel, RunnablePassthrough
# chain = (
#     {"context": pinecone_vectorstore.as_retriever(), "question": RunnablePassthrough()}
#     | prompt
#     | model
#     | parser
# )

# # Invoke the chain with the query
# chain.invoke("Are linked lists linear or non-linear data structures?")

'Linked lists can be both linear and non-linear depending on their usage.  If used for storage, they are non-linear; if used for access or retrieval, they are linear.'

In [130]:
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings import HuggingFaceEmbeddings
# Initialize HuggingFace embeddings
hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Embed both context and user chunks
context_embeddings = hf_embeddings.embed_documents([doc.page_content for doc in documents])
user_embeddings = hf_embeddings.embed_documents([doc.page_content for doc in documents_ans])

# Initialize Pinecone
pc = pinecone.Pinecone(api_key="pcsk_5jvG4s_GPWUrsc8cBhH4gXoUW2RfP9T1qsuQkksmyr8585oTjLJbdG4CmcjCM67fppTWNG", environment="us-east-1")


# Create index if not available
index_name = "qns-rag"
if index_name not in pc.list_indexes().names():
    pc.create_index(name=index_name, metric="cosine", dimension=len(context_embeddings[0]))

index = pc.Index(index_name)

# Upsert correct answers (context) into Pinecone under a 'context' namespace
context_vectors = [(f"context_{i}", emb, {"text": documents[i].page_content}) for i, emb in enumerate(context_embeddings)]
index.upsert(vectors=context_vectors, namespace="context")

# Upsert user answers under a 'user' namespace
user_vectors = [(f"user_{i}", emb, {"text": documents_ans[i].page_content}) for i, emb in enumerate(user_embeddings)]
index.upsert(vectors=user_vectors, namespace="user")

print("Documents upserted to Pinecone successfully!")


Documents upserted to Pinecone successfully!


In [132]:
from langchain.vectorstores import Pinecone

# Retrieve top matches from context for each user answer
def find_incorrect_answers(documents_ans, user_embeddings, index, threshold=0.85):
    incorrect_answers = []

    for i, emb in enumerate(user_embeddings):
        # Search for the most similar chunk in context
        results = index.query(vector=emb, top_k=1, namespace="context", include_metadata=True)

        
        # Get best match and its similarity score
        if results["matches"]:
            best_match = results["matches"][0]
            score = best_match["score"]
            
            # If similarity is below the threshold, mark as incorrect
            if score < threshold:
                incorrect_answers.append({
                    "user_answer": documents_ans[i].page_content,
                    "correct_answer": best_match["metadata"]["text"],
                    "similarity": score
                })
    
    return incorrect_answers

# Search for incorrect answers
incorrect_answers = find_incorrect_answers(documents_ans, user_embeddings, index)

# Print incorrect answers with corrections
if incorrect_answers:
    print("Incorrect Answers Found:")
    for ans in incorrect_answers:
        print(f"\nUser Answer: {ans['user_answer']}")
        print(f"Correct Answer: {ans['correct_answer']}")
        print(f"Similarity Score: {ans['similarity']:.2f}")
else:
    print("All answers are correct!")



Incorrect Answers Found:

User Answer: 9. What is a linked list data structure?
noooooooooooooo
Correct Answer: 10. Are linked lists linear or non-linear data structures?
Linked lists can be linear and non-linear data structures based on their usage. If it’s used for
storage, it’s non-linear, but if it’s used for access or retrieval strategies, it’s considered a
linear data structure.

11. What are the pros of a linked list over an array? In which
scenarios do we use a linked list and when array?
The benefits of a linked list over an array are:
Similarity Score: 0.76


In [133]:
from langchain.vectorstores import Pinecone

# Retrieve matches and classify answers
def classify_answers(documents_ans, user_embeddings, index, threshold=0.85):
    correct_answers = []
    incorrect_answers = []

    for i, emb in enumerate(user_embeddings):
        # Search for the most similar chunk in context
        results = index.query(vector=emb, top_k=1, namespace="context", include_metadata=True)

        # Get best match and its similarity score
        if results["matches"]:
            best_match = results["matches"][0]
            score = best_match["score"]

            # Check similarity and classify the answer
            if score >= threshold:
                correct_answers.append({
                    "user_answer": documents_ans[i].page_content,
                    "correct_answer": best_match["metadata"]["text"],
                    "similarity": score
                })
            else:
                incorrect_answers.append({
                    "user_answer": documents_ans[i].page_content,
                    "correct_answer": best_match["metadata"]["text"],
                    "similarity": score
                })

    return correct_answers, incorrect_answers


# Classify answers as correct or incorrect
correct_answers, incorrect_answers = classify_answers(documents_ans, user_embeddings, index)

# Print correct answers
if correct_answers:
    print("✅ Correct Answers:")
    for ans in correct_answers:
        print(f"\nUser Answer: {ans['user_answer']}")
        print(f"Correct Answer: {ans['correct_answer']}")
        print(f"Similarity Score: {ans['similarity']:.2f}")

# Print incorrect answers
if incorrect_answers:
    print("\n❌ Incorrect Answers:")
    for ans in incorrect_answers:
        print(f"\nUser Answer: {ans['user_answer']}")
        print(f"Correct Answer: {ans['correct_answer']}")
        print(f"Similarity Score: {ans['similarity']:.2f}")

# Final summary
if not correct_answers and not incorrect_answers:
    print("No answers found!")
elif not incorrect_answers:
    print("\n🎉 All answers are correct!")
elif not correct_answers:
    print("\n⚠️ All answers are incorrect!")



❌ Incorrect Answers:

User Answer: 9. What is a linked list data structure?
noooooooooooooo
Correct Answer: 10. Are linked lists linear or non-linear data structures?
Linked lists can be linear and non-linear data structures based on their usage. If it’s used for
storage, it’s non-linear, but if it’s used for access or retrieval strategies, it’s considered a
linear data structure.

11. What are the pros of a linked list over an array? In which
scenarios do we use a linked list and when array?
The benefits of a linked list over an array are:
Similarity Score: 0.76

⚠️ All answers are incorrect!


In [127]:
import pinecone

# Initialize Pinecone
pc = pinecone.Pinecone(api_key="pcsk_5jvG4s_GPWUrsc8cBhH4gXoUW2RfP9T1qsuQkksmyr8585oTjLJbdG4CmcjCM67fppTWNG", environment="us-east-1")

# Name of your index
index_name = "qns-rag"

# Connect to the index
index = pc.Index(index_name)

# Delete all data from the index
index.delete(delete_all=True, namespace="context")
index.delete(delete_all=True, namespace="user")
# index.delete(delete_all=True, namespace="")
print(f"All data from index '{index_name}' has been deleted successfully!")

All data from index 'qns-rag' has been deleted successfully!
