In [1]:
# Downgrade numpy to a compatible version that works with faiss and others
!pip install -q numpy==1.26.4
# Now install your main packages
!pip install -q bitsandbytes accelerate peft transformers datasets sentence-transformers faiss-cpu gradio langchain langchain-community


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m60.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m63.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.5/65.5 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m92.6 MB/s[0m eta

In [2]:
pip install -U bitsandbytes

Note: you may need to restart the kernel to use updated packages.


In [None]:
# ✅ Complete Kaggle Notebook Script: Fine-Tune Mistral-7B + LangChain RAG + Gradio Chatbot



# STEP 1: IMPORTS
import json
import torch
import os
import numpy as np
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType
from sentence_transformers import SentenceTransformer
import faiss
import gradio as gr

# LangChain imports
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# STEP 2: CONFIGURATION
model_id = "mistralai/Mistral-7B-v0.1"
hf_token = "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"  # Paste your token here
input_path = "/kaggle/input/sbi-faq/sbi_data.json"

# STEP 3: LOAD BASE MODEL WITH 8-BIT AND LoRA
bnb_config = {"load_in_8bit": True, "device_map": "auto", "use_auth_token": hf_token}
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)



# ✅ Fix: Set padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token





model = AutoModelForCausalLM.from_pretrained(model_id, **bnb_config)
model = prepare_model_for_kbit_training(model)

# Apply LoRA
lora_config = LoraConfig(task_type=TaskType.CAUSAL_LM, r=4, lora_alpha=16, lora_dropout=0.1, bias="none")
model = get_peft_model(model, lora_config)

# STEP 4: LOAD DATASET
with open(input_path, 'r') as f:
    data = json.load(f)

samples = [{"text": f"### Question: {q}\n### Answer: {a}"} for q, a in data.items()]
dataset = Dataset.from_list(samples)

def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

tok_dataset = dataset.map(tokenize, batched=True)

# STEP 5: TRAINING ARGUMENTS
args = TrainingArguments(
    output_dir="./mistral-sbi-lora",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    logging_steps=10,
    num_train_epochs=1,
    save_strategy="epoch",
    learning_rate=2e-4,
    bf16=False,
    fp16=True
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tok_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()
model.save_pretrained("/kaggle/working/sbi-lora")

# STEP 6: BUILD EMBEDDINGS + VECTORSTORE (LangChain RAG)
docs = list(data.items())
texts = [f"Question: {q}\nAnswer: {a}" for q, a in docs]
metadatas = [{"source": q} for q, _ in docs]

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = FAISS.from_texts(texts, embedding_model, metadatas=metadatas)

# STEP 7: WRAP FINETUNED MODEL AS LANGCHAIN LLM
from transformers import pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto", max_new_tokens=256)
llm = HuggingFacePipeline(pipeline=pipe)

# STEP 8: CREATE LangChain RAG CHAIN
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""Use the following context to answer the question. If you don't know the answer, just say so.
Context:
{context}

Question: {question}
Answer:"""
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 3}),
    chain_type="stuff",
    chain_type_kwargs={"prompt": prompt_template}
)

# STEP 9: GRADIO CHAT UI

def chat_interface(user_query):
    return qa_chain.run(user_query)

gr.Interface(
    fn=chat_interface,
    inputs=gr.Textbox(lines=2, placeholder="Ask about SBI..."),
    outputs="text",
    title="SBI Chatbot (LangChain RAG + Fine-Tuned Mistral)",
    description="Ask anything about SBI banking services",
    theme="default"
).launch(share=True)


tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Map:   0%|          | 0/822 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


<IPython.core.display.Javascript object>