In [1]:
# Default packages
import os
from pathlib import Path
import sys

# Installed packages
from dotenv import load_dotenv
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from transformers import StoppingCriteria, StoppingCriteriaList

from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain_huggingface import HuggingFacePipeline, HuggingFaceEmbeddings

# Custome modules
sys.path.append(Path("../src").resolve().as_posix())
import settings as s
from indexeing import get_db

In [2]:
load_dotenv()
hf_token = os.getenv("HF_TOKEN")
login(hf_token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:
embedding_model = HuggingFaceEmbeddings(
    model_name="thenlper/gte-large",
    cache_folder=str(s.models_root_path),
)

embedding_model

HuggingFaceEmbeddings(model_name='thenlper/gte-large', cache_folder='/home/TCS_GenAI_Hackaton/models', model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [4]:
model_id = "google/gemma-3-4b-it"

tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=s.models_root_path)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    dtype="auto",
    cache_dir=s.models_root_path
)
model

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Gemma3ForConditionalGeneration(
  (model): Gemma3Model(
    (vision_tower): SiglipVisionModel(
      (vision_model): SiglipVisionTransformer(
        (embeddings): SiglipVisionEmbeddings(
          (patch_embedding): Conv2d(3, 1152, kernel_size=(14, 14), stride=(14, 14), padding=valid)
          (position_embedding): Embedding(4096, 1152)
        )
        (encoder): SiglipEncoder(
          (layers): ModuleList(
            (0-26): 27 x SiglipEncoderLayer(
              (layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
              (self_attn): SiglipAttention(
                (k_proj): Linear(in_features=1152, out_features=1152, bias=True)
                (v_proj): Linear(in_features=1152, out_features=1152, bias=True)
                (q_proj): Linear(in_features=1152, out_features=1152, bias=True)
                (out_proj): Linear(in_features=1152, out_features=1152, bias=True)
              )
              (layer_norm2): LayerNorm((1152,), eps=1e-06, elementwi

In [5]:
# class StopOnNewline(StoppingCriteria):
#     def __call__(self, input_ids, scores, **kwargs):
#         return input_ids[0][-1] == tokenizer.eos_token_id


# stopping_criteria = StoppingCriteriaList([StopOnNewline()])

# output = model.generate(
#     **input_tokens,
#     temperature=0.7,   # creativity control: 0 = deterministic, higher = more creative
#     top_p=0.9,         # nucleus sampling
#     do_sample=True,
#     stopping_criteria=stopping_criteria,
# )

# generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
# print(generated_text)

In [12]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    # max_new_tokens=128,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    top_k=50
)

llm_wrapper = HuggingFacePipeline(pipeline=pipe)
llm_wrapper

Device set to use cuda:0


HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7fdc04bef560>, model_id='google/gemma-3-4b-it')

In [13]:
db = get_db(embedding_model)
db

File processed and indexed: bill_of_materials.csv
File processed and indexed: finished_goods_inventory.csv
File processed and indexed: production_schedule.csv
File processed and indexed: purchases.csv
File processed and indexed: raw_material_inventory.csv


<langchain_chroma.vectorstores.Chroma at 0x7fdc04bedd00>

In [15]:
retriever = db.as_retriever(search_kwargs={"k": 3})

conversational_qa = ConversationalRetrievalChain.from_llm(
    llm=llm_wrapper,
    retriever=retriever,
    return_source_documents=True  # optional: see where answer came from
)

chat_history = []  # stores dialog

query1 = "What products are available in finished goods inventory search in the file finished_goods_inventory.csv?"
result1 = conversational_qa.invoke({"question": query1, "chat_history": chat_history})
print("Q:", query1)
print("A:", result1["answer"])

# # Add to history
# chat_history.append((query1, result1["answer"]))

# query2 = "And how many raw materials do we have?"
# result2 = conversational_qa.invoke({"question": query2, "chat_history": chat_history})
# print("Q:", query2)
# print("A:", result2["answer"])


Q: What products are available in finished goods inventory search in the file finished_goods_inventory.csv?
A: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

item_id: 25
item_name: Acetone
category: Raw Material
stock_level: 579
reorder_point: 640
lead_time_days: 7
supplier: SteelCorp

item_id: 25
item_name: Acetone
category: Raw Material
stock_level: 579
reorder_point: 640
lead_time_days: 7
supplier: SteelCorp

item_id: 13
item_name: PVC Pipes
category: Raw Material
stock_level: 668
reorder_point: 997
lead_time_days: 9
supplier: BoxMakers

Question: What products are available in finished goods inventory search in the file finished_goods_inventory.csv?
Helpful Answer: The file contains the following information: item_id, item_name, category, stock_level, and finished_goods_flag. The question asks for products available in finished goods inventory. Thus, we need to filte