In [11]:
# ================================
# 1. Setup & Imports
# ================================
import os
from dotenv import load_dotenv
from pathlib import Path
import sys
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFacePipeline

In [3]:

# ================================
# 2. Load Environment & Login
# ================================
load_dotenv()
hf_token = os.getenv("HF_TOKEN")
login(hf_token)

models_root = Path("models")


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [None]:

# ================================
# 3. Load Embedding Model
# ================================
embedding_model = HuggingFaceEmbeddings(
    model_name="thenlper/gte-large",
    cache_folder=str(models_root),
    model_kwargs={"device": "auto"}  # force CPU
)


In [None]:

# ================================
# 4. Load LLM (Gemma-3-4B-IT)
# ================================
model_id = "google/gemma-3-4b-it"

tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    dtype="auto",
    token=hf_token
)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=128,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    top_k=50
)

llm_wrapper = HuggingFacePipeline(pipeline=pipe)


Device set to use cpu


In [17]:
# ================================
# 5. Prepare Document Store (example PDF)
# ================================
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("/home/TCS_GenAI_Hackaton/SampathKovvaliResume.pdf")
documents = loader.load()

# Split into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)

# Build FAISS vector DB
db = FAISS.from_documents(docs, embedding_model)
retriever = db.as_retriever()

In [18]:

# ================================
# 6. Build RetrievalQA
# ================================
qa = RetrievalQA.from_chain_type(
    llm=llm_wrapper,
    retriever=retriever,
    chain_type="stuff"
)


In [21]:

# ================================
# 7. Ask Questions
# ================================
query = "What document is it?"
result = qa.invoke(query)


In [22]:
print("\n--- Answer ---\n", result["result"])


--- Answer ---
 Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

:
:
:
:
:
SAMPA TH KOVV ALI
Data Scientist
 91 9441682374 sampathkovvali@gmail.com github.com/sampath017 India
Summary
I am a Data Scientist with a firm background in Electrical and Electronics Engineering, equipped with robust programming and analytical 
skills. My current role at TCS involves developing AI solutions that enhance workflow efficiency and automate processes. I thrive on 
challenges and enjoy collaborating with teams to push innovation forward, building advanced applications that drive tangible results.
Experience
Tata Consultancy Services  TCS  India
Data Scientist 01/2023   Present
Leading IT services and consulting company.
Built a RAG application using LLaMA 3.2 11B Vision model for incident classification & resolution.
Implemented an LLM-based MOM generator for Teams meetings.
Automated 5