In [1]:
import os
from dotenv import load_dotenv
from huggingface_hub import login, InferenceClient
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.docstore.document import Document
from datasets import load_dataset



In [2]:
# Configuration , you can change the model and other parameters
CONFIG = {
    "model_name": "meta-llama/Llama-3.2-3B",
    "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
    "dataset": "Falah/story44kids_1_prompts"
}

In [3]:
# Create a token Huggin Face and save it in your own .env.local file

load_dotenv()
token = os.getenv('HF_TOKEN')
login(token=token)

client = InferenceClient(model=CONFIG["model_name"])

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /Users/sina/.cache/huggingface/token
Login successful


In [4]:
# Load the dataset from Hugging Face

ds = load_dataset(CONFIG["dataset"]) # You can replace this with any compatible dataset

In [5]:
# Print the first few examples

print(ds['train'][:5])

{'prompts': ['Once upon a time, in a small village nestled on the outskirts of a mystical forest, there lived a poor but content farmer named Ethan. He had a modest cottage and a small plot of land where he grew vegetables to sustain himself. Despite the hardships that came his way, he always wore a smile and greeted everyone with warmth.', "One sunny morning, as Ethan was tending to his crops, he heard a rustling in the bushes nearby. Curiosity piqued, he cautiously approached the sound and discovered a beautiful fox trapped in a hunter's snare. The fox looked at Ethan with pleading eyes, silently asking for help.", 'Without a second thought, Ethan rushed over to free the fox. Using his trusted pocket knife, he carefully cut through the tangled mess until the fox was liberated. Grateful for being saved, the fox introduced herself as Fiona. She explained that she had gotten lost while exploring the depths of the mysterious forest.', "Ethan, being a gentle soul, couldn't leave Fiona alo

In [6]:
# Wrap the text in Document objects

train_texts = [item["prompts"] for item in ds['train']]
documents = [Document(page_content=f"Prompt: {text}") for text in train_texts]


In [7]:
# Split large documents into chunks

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_documents = text_splitter.split_documents(documents)

In [8]:
# Verify the number of document chunks

num_chunks = len(split_documents)
print(f"Number of document chunks: {num_chunks}")

# Print content of the chunks 
for i, doc in enumerate(split_documents):
    print(f"Chunk {i+1}:\n{doc.page_content}\n{'-'*50}")


Number of document chunks: 10
Chunk 1:
Prompt: Once upon a time, in a small village nestled on the outskirts of a mystical forest, there lived a poor but content farmer named Ethan. He had a modest cottage and a small plot of land where he grew vegetables to sustain himself. Despite the hardships that came his way, he always wore a smile and greeted everyone with warmth.
--------------------------------------------------
Chunk 2:
Prompt: One sunny morning, as Ethan was tending to his crops, he heard a rustling in the bushes nearby. Curiosity piqued, he cautiously approached the sound and discovered a beautiful fox trapped in a hunter's snare. The fox looked at Ethan with pleading eyes, silently asking for help.
--------------------------------------------------
Chunk 3:
Prompt: Without a second thought, Ethan rushed over to free the fox. Using his trusted pocket knife, he carefully cut through the tangled mess until the fox was liberated. Grateful for being saved, the fox introduced he

In [9]:
# Embed the documents and initialize Chroma vector store

embedding_model = HuggingFaceEmbeddings(model_name=CONFIG["embedding_model"])
vector_store = Chroma(embedding_function=embedding_model, persist_directory="./vector_base")
vector_store.add_documents(split_documents)

  embedding_model = HuggingFaceEmbeddings(model_name=CONFIG["embedding_model"])
  vector_store = Chroma(embedding_function=embedding_model, persist_directory="./vector_base")


['a328e838-c0ac-4113-9b0a-ee5e3ab04cd3',
 'f1630594-2ae2-4329-a35b-d4aaccf88d9b',
 '1b5fd596-a71a-42d8-8280-83e8366ec041',
 'fd301085-a6cf-41fe-ac81-02cc15ecd0a1',
 '7d50fcc7-371b-444b-85bc-2f5d5e5fe902',
 'a4db7317-6f39-4da5-901e-a59445421915',
 '645a825d-8f9c-4929-b925-43bdefd1782d',
 '61f436d1-f1f2-46be-89e4-dbc7c5b4e292',
 'db3f1f5d-1297-40f9-98b7-5e81a106c6c1',
 '278f0047-cef1-4c6b-9666-b6da6331b0f7']

In [10]:
# Check the number of documents stored in the vector store

stored_embeddings = vector_store._collection.count()
print(f"Number of embeddings in the vector store: {stored_embeddings}")


Number of embeddings in the vector store: 140


In [11]:
# Choose the the number of documents you want to retrieve based on your query
def retrieve_documents(query, num_docs=2): # num_docs specifies how many results to return
    retrieved_docs = vector_store.similarity_search(query, k=num_docs)
    retrieved_texts = [doc.page_content for doc in retrieved_docs]
    # Remove duplicate documents
    retrieved_texts = list(dict.fromkeys(retrieved_texts))
    print("Retrieved Documents:", retrieved_texts)
    return retrieved_texts

In [14]:
class HFInferenceStreamer(StreamingStdOutCallbackHandler):
    def __init__(self):
        self.text = ""
        
    def on_llm_new_token(self, token: str, **kwargs) -> None:
        self.text += token
        print(token, end="", flush=True)

def ask_query(query):
    retrieved_docs = retrieve_documents(query)
    print("Retrieved documents:")
    for i, doc in enumerate(retrieved_docs):
        print(f"Document {i+1}:\n{doc}\n{'='*50}")
    
    combined_input = (
        "You are a helpful AI assistant. Answer ONLY the following question based on the context provided below. "
        "Do not generate or answer any other questions. "
        "If the answer cannot be found in the context, state 'I don't have enough information to answer that question.' "
        "Do not make up or infer any information that is not directly stated in the context. "
        "If you're unsure, say 'I'm not sure.' "
        "Provide a concise answer.\n\n"
        f"Context:\n{' '.join(retrieved_docs)}\n\n"
        f"Question: {query}\n"
        "Answer: "
    )
    
    print("\nFull prompt sent to the model:")
    print(combined_input)
    print("="*50)

    streamer = HFInferenceStreamer()
    response = client.text_generation(
        combined_input, 
        max_new_tokens=200, 
        temperature=0.2,  # Low temperature for more focused responses
        top_p=0.9,  # Added top_p to further control randomness
        do_sample=False,  # Disable sampling for more deterministic output
        stream=True
    )
    print("\nModel response:")
    for token in response:
        streamer.on_llm_new_token(token)
    
    return streamer.text.strip()

# Usage
query = "Why did Fiona thank Ethan?"
response = ask_query(query)
print("\nFull response:", response)

Retrieved Documents: ['Prompt: Overwhelmed with joy, Fiona thanked Ethan for his unwavering support and promised to always cherish their friendship. With newfound confidence and a sense of purpose, she bid farewell to Ethan, disappearing into the embrace of her family.']
Retrieved documents:
Document 1:
Prompt: Overwhelmed with joy, Fiona thanked Ethan for his unwavering support and promised to always cherish their friendship. With newfound confidence and a sense of purpose, she bid farewell to Ethan, disappearing into the embrace of her family.

Full prompt sent to the model:
You are a helpful AI assistant. Answer ONLY the following question based on the context provided below. Do not generate or answer any other questions. If the answer cannot be found in the context, state 'I don't have enough information to answer that question.' Do not make up or infer any information that is not directly stated in the context. If you're unsure, say 'I'm not sure.' Provide a concise answer.

Conte