In [4]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch, os

In [5]:
# ------------------------------------------------------------
# 1️⃣  Sample Knowledge Source
# ------------------------------------------------------------
sample_text = """
The University of Miami offers a one-year Master’s in Data Science program.
The curriculum emphasizes machine learning, deep learning, and statistics.
An internship is required for graduation.
Applications open each August and close each January.
"""

with open("sample_data.txt", "w") as f:
    f.write(sample_text)

loader = TextLoader("sample_data.txt")
docs = loader.load()

In [6]:
# ------------------------------------------------------------
# 2️⃣  Split Text into Chunks
# ------------------------------------------------------------
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
splits = splitter.split_documents(docs)
print(f"Chunks created: {len(splits)}")

Chunks created: 1


In [7]:
# ------------------------------------------------------------
# 3️⃣  Create Embeddings (open model)
# ------------------------------------------------------------
embedding_fn = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Build Chroma vector DB
vectordb = Chroma.from_documents(splits, embedding_fn)
retriever = vectordb.as_retriever(search_kwargs={"k": 3})

  embedding_fn = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [8]:
# ------------------------------------------------------------
# 4️⃣  Load an Open LLM (Zephyr-7B-Beta)
# ------------------------------------------------------------
model_id = "HuggingFaceH4/zephyr-7b-beta"  # ✅ fully open, no login needed

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto",
)

generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    temperature=0.3,
)

llm = HuggingFacePipeline(pipeline=generator)

tokenizer_config.json: 0.00B [00:01, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00007-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=generator)


In [10]:
# ------------------------------------------------------------ # 5️⃣ Create RAG Chain # ------------------------------------------------------------
qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff",
                                       retriever=retriever,
                                        return_source_documents=True, )

In [13]:
# ------------------------------------------------------------ # 6️⃣ Ask Questions # ------------------------------------------------------------
query = "When do applications open for the MS in Data Science program?"
result = qa_chain(query)
print("🤖 Answer:")
print(result["result"])
print("\n📚 Source Docs:")
for s in result["source_documents"]: print("-", s.page_content[:100], "...")

  result = qa_chain(query)


🤖 Answer:
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

The University of Miami offers a one-year Master’s in Data Science program.
The curriculum emphasizes machine learning, deep learning, and statistics.
An internship is required for graduation.
Applications open each August and close each January.

Question: When do applications open for the MS in Data Science program?
Helpful Answer: Applications for the University of Miami's MS in Data Science program typically open in August. Is there any information provided about the application deadline for this program?

📚 Source Docs:
- The University of Miami offers a one-year Master’s in Data Science program.
The curriculum emphasize ...


In [14]:
# ------------------------------------------------------------ # 7️⃣ Optional — Inspect Retrieved Chunks # ------------------------------------------------------------
relevant = retriever.get_relevant_documents(query)
print("\n🔎 Retrieved Chunks:")
for i, doc in enumerate(relevant, start=1):
  print(f"Chunk {i}:\n{doc.page_content}\n{'-'*50}")

  relevant = retriever.get_relevant_documents(query)



🔎 Retrieved Chunks:
Chunk 1:
The University of Miami offers a one-year Master’s in Data Science program.
The curriculum emphasizes machine learning, deep learning, and statistics.
An internship is required for graduation.
Applications open each August and close each January.
--------------------------------------------------
