In [1]:
import os
import json

# Directory where your articles are stored
DATA_DIR = "data"

def load_text_files(directory):
    texts = []
    for filename in os.listdir(directory):
        if filename.endswith(".txt"):
            with open(os.path.join(directory, filename), "r", encoding="utf-8") as f:
                texts.append(f.read())
    return texts

def clean_text(text):
    text = text.replace("\n", " ")
    text = " ".join(text.split())  # remove multiple spaces
    return text

def chunk_text(text, chunk_size=500, overlap=100):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunk = text[start:end]
        chunks.append(chunk)
        start = end - overlap
    return chunks

# Load and clean articles
articles = load_text_files(DATA_DIR)
cleaned_articles = [clean_text(t) for t in articles]

# Chunk all articles
all_chunks = []
chunk_id = 0

for idx, text in enumerate(cleaned_articles):
    chunks = chunk_text(text)
    for c in chunks:
        all_chunks.append({
            "chunk_id": chunk_id,
            "article_id": idx,
            "text": c
        })
        chunk_id += 1

# Save chunks to JSON
with open("chunks.json", "w", encoding="utf-8") as f:
    json.dump(all_chunks, f, indent=4)

len(all_chunks)


8

In [2]:
!pip install sentence-transformers faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m57.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0


In [3]:
from sentence_transformers import SentenceTransformer
import numpy as np
import json

# Load chunks from Step-2
with open("chunks.json", "r", encoding="utf-8") as f:
    chunks = json.load(f)

# Extract only the text part
texts = [c["text"] for c in chunks]

# Load embedding model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Create embeddings
embeddings = model.encode(texts, show_progress_bar=True)

# Save embeddings
np.save("embeddings.npy", embeddings)

# Print shape
embeddings.shape

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

(8, 384)

In [4]:
import faiss
import numpy as np
import json

# Load embeddings
embeddings = np.load("embeddings.npy")

# Dimension of embeddings
d = embeddings.shape[1]

# Create FAISS index (Cosine Similarity)
index = faiss.IndexFlatIP(d)

# Normalize embeddings for cosine similarity
embeddings_norm = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)

# Add to FAISS index
index.add(embeddings_norm)

print("FAISS index created with:", index.ntotal, "vectors")

# Load chunks metadata
with open("chunks.json", "r", encoding="utf-8") as f:
    chunks = json.load(f)

# Function for query → top-k retrieval
def retrieve_similar_chunks(query, k=3):
    # Embed query using same model
    query_embedding = model.encode([query])
    query_embedding = query_embedding / np.linalg.norm(query_embedding)

    # FAISS search
    scores, ids = index.search(query_embedding.astype(np.float32), k)

    # Prepare results
    results = []
    for i, idx in enumerate(ids[0]):
        results.append({
            "chunk_id": int(idx),
            "score": float(scores[0][i]),
            "text": chunks[idx]["text"]
        })

    return results

# Test retrieval
test_results = retrieve_similar_chunks("What is machine learning?")
test_results


FAISS index created with: 8 vectors


[{'chunk_id': 4,
  'score': 0.7513055801391602,
  'text': 'Machine learning is a subfield of artificial intelligence that focuses on building systems capable of learning from data. Instead of writing explicit rules for every task, machine learning models identify patterns in data and make predictions or decisions based on those patterns. Machine learning is widely used in applications such as recommendation systems, fraud detection, medical diagnosis, and speech recognition. There are three main types of machine learning: supervised learning, unsupervis'},
 {'chunk_id': 5,
  'score': 0.6167935729026794,
  'text': ' speech recognition. There are three main types of machine learning: supervised learning, unsupervised learning, and reinforcement learning. In supervised learning, the model learns from labeled examples to predict outputs for unseen data. Common algorithms include linear regression, decision trees, random forests, and support vector machines. Unsupervised learning involves d

In [5]:
!pip install huggingface-hub
from huggingface_hub import InferenceClient
import os




In [6]:
from google.colab import userdata

HUGGINGFACE_TOKEN = userdata.get('HUGGINGFACE_TOKEN')
print("Your Token starts with:", HUGGINGFACE_TOKEN[:10])


Your Token starts with: hf_uBqKghb


In [7]:
client = InferenceClient(
    "meta-llama/Llama-3.2-3B-Instruct",
    token=HUGGINGFACE_TOKEN
)

print("Chat model ready!")


Chat model ready!


In [8]:
response = client.chat.completions.create(
    model="meta-llama/Llama-3.2-3B-Instruct",
    messages=[{"role": "user", "content": "Say hello in one sentence."}],
    max_tokens=50
)

print(response.choices[0].message["content"])


Hello!


In [9]:
def build_prompt(query, retrieved_chunks):
    context = ""
    for i, chunk in enumerate(retrieved_chunks):
        context += f"[{i}] {chunk['text']}\n\n"

    prompt = f"""
You are an AI assistant. Answer the user's question using ONLY the context below.
If the answer is not present, reply "I don't know".

Context:
{context}

Question: {query}

Answer with correct citations like [0], [1], etc.
    """

    return prompt


In [10]:
def rag_answer(query, k=3):
    # retrieve chunks
    retrieved = retrieve_similar_chunks(query, k)

    # build prompt
    prompt = build_prompt(query, retrieved)

    # LLM chat call
    response = client.chat.completions.create(
        model="meta-llama/Llama-3.2-3B-Instruct",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=300
    )

    answer = response.choices[0].message["content"]

    return {
        "query": query,
        "retrieved_chunks": retrieved,
        "answer": answer
    }


In [11]:
result = rag_answer("What are the types of machine learning?")
result


{'query': 'What are the types of machine learning?',
 'retrieved_chunks': [{'chunk_id': 4,
   'score': 0.7774506211280823,
   'text': 'Machine learning is a subfield of artificial intelligence that focuses on building systems capable of learning from data. Instead of writing explicit rules for every task, machine learning models identify patterns in data and make predictions or decisions based on those patterns. Machine learning is widely used in applications such as recommendation systems, fraud detection, medical diagnosis, and speech recognition. There are three main types of machine learning: supervised learning, unsupervis'},
  {'chunk_id': 5,
   'score': 0.7587686777114868,
   'text': ' speech recognition. There are three main types of machine learning: supervised learning, unsupervised learning, and reinforcement learning. In supervised learning, the model learns from labeled examples to predict outputs for unseen data. Common algorithms include linear regression, decision trees