In [2]:
# imports
from sentence_transformers import SentenceTransformer
import faiss
from transformers import pipeline

In [3]:
documents = [
    "Deep learning models are solving complex problems.",
    "Generative AI can create lifelike images and videos.",
    "AI models need optimization to reduce biases.",
    "Natural language processing enables better human-computer interaction.",
    "Computer vision algorithms can detect objects in real-time.",
    "Reinforcement learning helps agents learn optimal strategies.",
    "Transfer learning accelerates model training on new tasks.",
    "Attention mechanisms have revolutionized sequence modeling."
]

In [5]:
# Create Embeddings and Index

# 1. Load embedding model

model = SentenceTransformer("all-MiniLM-L6-v2")   # 384‑dim embeddings

# 2. Generate embeddings

embeddings = model.encode(documents, convert_to_numpy=True)

print("Embedding shape:", embeddings.shape)   # should be (num_docs, 384)

# 3. Create FAISS index

embedding_dim = embeddings.shape[1]           # must match model output
index = faiss.IndexFlatL2(embedding_dim)      # simple L2 similarity index

# 4. Add embeddings to index

index.add(embeddings)

# 5. Verify index size

print("Number of vectors in index:", index.ntotal)

Embedding shape: (8, 384)
Number of vectors in index: 8


In [6]:
# Task 3

import numpy as np

# 1. Define a simple query

query = "How do attention mechanisms improve AI models?"

# Embed the query
query_embedding = model.encode([query], convert_to_numpy=True)

# 2. Retrieve top-k documents

k = 3
distances, indices = index.search(query_embedding, k)

retrieved_docs = [documents[i] for i in indices[0]]

print("Retrieved Documents:")
for doc in retrieved_docs:
    print("-", doc)

# 3. Create enhanced prompt

context_block = "\n".join(f"- {doc}" for doc in retrieved_docs)

enhanced_prompt = f"""
You are an AI assistant. Use the context below to answer the question.

Context:
{context_block}

Question:
{query}

Answer:
"""

# 4. Baseline response (no retrieval)

def mock_llm(prompt):
    # Replace this with your real LLM call later
    return "Attention mechanisms help models focus on important parts of the input sequence."

baseline_response = mock_llm(query)

# 5. Enhanced RAG response
# -----------------------------
rag_response = mock_llm(enhanced_prompt)

# 6. Compare responses
# -----------------------------
print("\n--- Baseline Response ---")
print(baseline_response)

print("\n--- RAG-Enhanced Response ---")
print(rag_response)

Retrieved Documents:
- Attention mechanisms have revolutionized sequence modeling.
- AI models need optimization to reduce biases.
- Reinforcement learning helps agents learn optimal strategies.

--- Baseline Response ---
Attention mechanisms help models focus on important parts of the input sequence.

--- RAG-Enhanced Response ---
Attention mechanisms help models focus on important parts of the input sequence.


In [7]:
def mock_llm(prompt):
    """
    A simple simulated LLM that uses retrieved context.
    It looks for the 'Context:' block and incorporates it into the answer.
    """
    if "Context:" in prompt:
        context = prompt.split("Context:")[1].split("Question:")[0].strip()
        return f"Based on the retrieved context, here is the answer:\n\n{context}\n\nIn summary, attention mechanisms improve sequence modeling by allowing models to focus on the most relevant parts of the input."
    else:
        return "Attention mechanisms help models focus on important parts of the input sequence."

In [8]:
baseline_response = mock_llm(query)

# 5. Enhanced RAG response
# -----------------------------
rag_response = mock_llm(enhanced_prompt)

# 6. Compare responses
# -----------------------------
print("\n--- Baseline Response ---")
print(baseline_response)

print("\n--- RAG-Enhanced Response ---")
print(rag_response)


--- Baseline Response ---
Attention mechanisms help models focus on important parts of the input sequence.

--- RAG-Enhanced Response ---
Based on the retrieved context, here is the answer:

- Attention mechanisms have revolutionized sequence modeling.
- AI models need optimization to reduce biases.
- Reinforcement learning helps agents learn optimal strategies.

In summary, attention mechanisms improve sequence modeling by allowing models to focus on the most relevant parts of the input.


In [9]:
!pip install transformers accelerate sentencepiece

Defaulting to user installation because normal site-packages is not writeable
Collecting sentencepiece
  Downloading sentencepiece-0.2.1-cp312-cp312-win_amd64.whl.metadata (10 kB)
Downloading sentencepiece-0.2.1-cp312-cp312-win_amd64.whl (1.1 MB)
   ---------------------------------------- 0.0/1.1 MB ? eta -:--:--
   ---------------------------------------- 1.1/1.1 MB 7.3 MB/s eta 0:00:00
Installing collected packages: sentencepiece
Successfully installed sentencepiece-0.2.1



[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: C:\Users\Kanan\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [11]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

In [12]:
# Load the model
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # public, no token needed

tokenizer = AutoTokenizer.from_pretrained(model_name)
llama_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto"
)

tokenizer_config.json: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Some parameters are on the meta device because they were offloaded to the disk and cpu.


In [13]:
# Use model in RAG_pipeline 

def llama_generate(prompt, max_tokens=200):
    inputs = tokenizer(prompt, return_tensors="pt").to(llama_model.device)
    outputs = llama_model.generate(
        **inputs,
        max_new_tokens=max_tokens,
        temperature=0.2,
        do_sample=True
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [14]:
# Compare Baseline vs RAG‑Enhanced Responses

baseline_response = mock_llm(query)

# 5. Enhanced RAG response
# -----------------------------
rag_response = llama_generate(enhanced_prompt)

# 6. Compare responses
# -----------------------------
print("\n--- Baseline Response ---")
print(baseline_response)

print("\n--- RAG-Enhanced Response ---")
print(rag_response)


--- Baseline Response ---
Attention mechanisms help models focus on important parts of the input sequence.

--- RAG-Enhanced Response ---

You are an AI assistant. Use the context below to answer the question.

Context:
- Attention mechanisms have revolutionized sequence modeling.
- AI models need optimization to reduce biases.
- Reinforcement learning helps agents learn optimal strategies.

Question:
How do attention mechanisms improve AI models?

Answer:
Attention mechanisms help AI models focus on specific parts of the input sequence. This helps reduce the impact of noise and distracting information. By focusing on the most important parts of the sequence, AI models can learn optimal strategies for solving problems.
