In [1]:
# Default packages
import os
from pathlib import Path
import sys

# Installed packages
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain_huggingface import HuggingFacePipeline, HuggingFaceEmbeddings
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain.agents import create_sql_agent

import pandas as pd
from dotenv import load_dotenv

# Custome modules
sys.path.append(Path("../src").resolve().as_posix())
import settings as s
from indexeing import get_db

In [2]:
load_dotenv()
login(os.getenv("HF_TOKEN"))

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:
embedding_model = HuggingFaceEmbeddings(
    model_name="thenlper/gte-large",
    cache_folder=str(s.models_root_path),
)

embedding_model

HuggingFaceEmbeddings(model_name='thenlper/gte-large', cache_folder='C:\\Users\\sampath\\Dev\\TCS_GenAI_Hackaton\\models', model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [4]:
model_id = "google/gemma-3-4b-it"

tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=s.models_root_path)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    dtype="auto",
    cache_dir=s.models_root_path
)
model

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu and disk.


Gemma3ForConditionalGeneration(
  (model): Gemma3Model(
    (vision_tower): SiglipVisionModel(
      (vision_model): SiglipVisionTransformer(
        (embeddings): SiglipVisionEmbeddings(
          (patch_embedding): Conv2d(3, 1152, kernel_size=(14, 14), stride=(14, 14), padding=valid)
          (position_embedding): Embedding(4096, 1152)
        )
        (encoder): SiglipEncoder(
          (layers): ModuleList(
            (0-26): 27 x SiglipEncoderLayer(
              (layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
              (self_attn): SiglipAttention(
                (k_proj): Linear(in_features=1152, out_features=1152, bias=True)
                (v_proj): Linear(in_features=1152, out_features=1152, bias=True)
                (q_proj): Linear(in_features=1152, out_features=1152, bias=True)
                (out_proj): Linear(in_features=1152, out_features=1152, bias=True)
              )
              (layer_norm2): LayerNorm((1152,), eps=1e-06, elementwi

In [6]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
)

llm = HuggingFacePipeline(pipeline=pipe)
llm

Device set to use cpu


HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x0000025730283E30>, model_id='google/gemma-3-4b-it')

In [None]:
db = SQLDatabase.from_uri(f"sqlite:///{s.db_path}")

# Create toolkit
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

# Create SQL Agent
agent_executor = create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    verbose=True
)

In [None]:
query = "What is the total quantity of finished goods?"
agent_executor.invoke(query)

In [6]:
from langchain_experimental.sql import SQLDatabaseChain
from langchain_community.utilities import SQLDatabase
from langchain.agents import initialize_agent, Tool
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from transformers import pipeline

# -----------------------
# 1. Load CSV into SQLite
# -----------------------
db = SQLDatabase.from_uri(f"sqlite:///{s.db_path}")

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=0,  # GPU
    max_new_tokens=512,
    temperature=0
)

llm = HuggingFacePipeline(pipeline=pipe)

# -----------------------
# 4. Define Tools
# -----------------------
def run_sql(query: str):
    return db.run(query)

tools = [
    Tool(
        name="sql_db_query",
        func=run_sql,
        description="Run SQL queries on the inventory database"
    )
]

# -----------------------
# 5. Agent with Tool Calling
# -----------------------
agent_executor = initialize_agent(
    tools=tools,
    llm=llm,
    agent="zero-shot-react-description",  # ReAct agent with tool use
    verbose=True,
    handle_parsing_errors=True
)

# -----------------------
# 6. Query Example
# -----------------------
query = "What is the total quantity of finished goods?"
response = agent_executor.invoke({"input": query})
print("Answer:", response["output"])


ValueError: The model has been loaded with `accelerate` and therefore cannot be moved to a specific device. Please discard the `device` argument when creating your pipeline object.