Code reference: Huggingface https://huggingface.co/ngxson/demo_simple_rag_py/blob/main/demo.py

Txt file source: Wikipedia https://en.wikipedia.org/wiki/Smartphone

In [1]:
from sentence_transformers import SentenceTransformer
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch
import numpy as np

In [2]:
dataset = []
with open('smartphones.txt', 'r', encoding='utf-8') as file:
    dataset = [line.strip() for line in file if line.strip()]
print(f"Loaded {len(dataset)} entries")

Loaded 20 entries


In [3]:
dataset

['A smartphone is a mobile device that combines the functionality of a traditional mobile phone with advanced computing capabilities.',
 'It typically has a touchscreen interface, allowing users to access a wide range of applications and services, such as web browsing, email, and social media, as well as multimedia playback and streaming.',
 'Smartphones have built-in cameras, GPS navigation, and support for various communication methods, including voice calls, text messaging, and internet-based messaging apps.',
 'Smartphones are distinguished from older-design feature phones by their more advanced hardware capabilities and extensive mobile operating systems, access to the internet, business applications, mobile payments, and multimedia functionality, including music, video, gaming, radio, and television.',
 'Smartphones have central processing units (CPUs), similar to those in computers, but optimised to operate in low power environments.',
 'In smartphones, the CPU is typically inte

In [18]:
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
LANGUAGE_MODEL = "google/flan-t5-base"

embedder = SentenceTransformer(EMBEDDING_MODEL) # a sentence embedding model
tokenizer = T5Tokenizer.from_pretrained(LANGUAGE_MODEL)
t5_model = T5ForConditionalGeneration.from_pretrained(LANGUAGE_MODEL)

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [19]:
VECTOR_DB = []  # (chunk, embedding)

def add_chunk_to_db(chunk):
    embedding = embedder.encode(chunk)
    VECTOR_DB.append((chunk, embedding))

for i, chunk in enumerate(dataset):
    add_chunk_to_db(chunk)
    print(f"Added chunk {i+1}/{len(dataset)} to database")

Added chunk 1/20 to database
Added chunk 2/20 to database
Added chunk 3/20 to database
Added chunk 4/20 to database
Added chunk 5/20 to database
Added chunk 6/20 to database
Added chunk 7/20 to database
Added chunk 8/20 to database
Added chunk 9/20 to database
Added chunk 10/20 to database
Added chunk 11/20 to database
Added chunk 12/20 to database
Added chunk 13/20 to database
Added chunk 14/20 to database
Added chunk 15/20 to database
Added chunk 16/20 to database
Added chunk 17/20 to database
Added chunk 18/20 to database
Added chunk 19/20 to database
Added chunk 20/20 to database


In [20]:
VECTOR_DB[0]

('A smartphone is a mobile device that combines the functionality of a traditional mobile phone with advanced computing capabilities.',
 array([-6.82483464e-02,  8.44883695e-02,  7.87148252e-02, -4.18548472e-02,
        -2.89359838e-02, -6.14507832e-02,  1.20281950e-02,  7.29153231e-02,
         4.53183539e-02,  1.03734359e-02,  2.95627508e-02, -1.14304004e-02,
         8.53074118e-02, -5.51087521e-02,  5.73070757e-02, -3.70669775e-02,
        -3.24192159e-02, -5.73755763e-02, -9.15244222e-02,  1.72292944e-02,
        -1.19619966e-02,  8.78940597e-02, -5.59665188e-02, -1.64578063e-03,
         4.37307358e-02, -1.12780416e-02, -7.64900222e-02,  9.65138245e-03,
         2.92031523e-02,  7.11746737e-02,  1.65686402e-02,  1.21953838e-01,
         4.31710184e-02,  1.14683881e-01, -1.36459574e-01, -2.83661745e-02,
        -5.36804050e-02, -4.05281270e-03, -1.19196088e-03, -3.17890048e-02,
        -2.58791428e-02, -2.38496549e-02,  3.34625994e-03,  8.94277182e-04,
         2.84505971e-02,  1.

In [21]:
VECTOR_DB[0][1].shape

(384,)

In [22]:
def cosine_similarity(a, b):
    dot = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    return dot / (norm_a * norm_b)

In [23]:
# retrieval
def retrieve(query, top_n=3):
    query_emb = embedder.encode(query)
    sims = [(chunk, cosine_similarity(query_emb, emb)) for chunk, emb in VECTOR_DB]
    sims.sort(key=lambda x: x[1], reverse=True)
    return sims[:top_n]

In [49]:
query = input("Ask me a question: ")
retrieved = retrieve(query)

Ask me a question: do smartphones have a battery


In [50]:
print("\nRetrieved knowledge:")
for chunk, sim in retrieved:
    print(f" - (similarity: {sim:.2f}) {chunk}")


Retrieved knowledge:
 - (similarity: 0.67) Smartphones typically feature lithium-ion or lithium-polymer batteries due to their high energy densities.
 - (similarity: 0.62) Smartphones are typically equipped with a power button and volume buttons.
 - (similarity: 0.56) A smartphone is a mobile device that combines the functionality of a traditional mobile phone with advanced computing capabilities.


In [51]:
# augmentation
context = "\n".join([f"- {chunk}" for chunk, _ in retrieved])
prompt = f"""
You are a helpful chatbot. Use only the following information to answer the question.
Write it in a good detailed paragraph.

Context:
{context}

Question: {query}
Answer:
"""

In [52]:
# generation
inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
outputs = t5_model.generate(
    **inputs,
    max_new_tokens=128,
    temperature=0.7,
    do_sample=True,
    top_p=0.9,
    early_stopping=True
)

print("\nChatbot response:")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Chatbot response:
yes
