In [None]:
!pip install -U bitsandbytes accelerate
!pip install -q --upgrade transformers

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
# ✅ Set GPU visibility FIRST
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Must be first

# Then import everything else
import torch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support, confusion_matrix
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, TaskType


import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


import pandas as pd
import torch
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    accuracy_score, f1_score,
    precision_recall_fscore_support,
    confusion_matrix
)

from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    BitsAndBytesConfig,
    pipeline
)
from peft import LoraConfig, get_peft_model, TaskType




# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import transformers, sys; print(transformers.__version__)

In [None]:
import transformers, inspect, sys, os
print(transformers.__version__)          # ⟶ 4.51.3
print(inspect.signature(transformers.TrainingArguments))  # shows evaluation_strategy

In [None]:
#!/usr/bin/env python

# 1. Setup device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# 2. Load and preprocess data
df = pd.read_csv("/kaggle/input/bank-customer-complaint-analysis/complaints.csv").drop(columns=["Unnamed: 0"])
label_encoder = LabelEncoder()
df["product_label"] = label_encoder.fit_transform(df["product"])
label_mappings = dict(zip(label_encoder.classes_,
                          label_encoder.transform(label_encoder.classes_)))
print("Label mappings:", label_mappings)

# 3. Sample 10% and stratified splits
sampled = df.sample(frac=0.1, random_state=42)
train_df, temp_df = train_test_split(
    sampled, test_size=0.2, stratify=sampled["product_label"], random_state=42
)
val_df, test_df = train_test_split(
    temp_df, test_size=0.5, stratify=temp_df["product_label"], random_state=42
)

# def to_hf(ds):
#     return Dataset.from_dict({
#         "text": ds["narrative"].tolist(),
#         "labels": ds["product_label"].tolist()
#     })
def to_hf(ds):
    return Dataset.from_dict({
        "text": ds["narrative"].fillna("").astype(str).tolist(),  # ✅ clean text
        "labels": ds["product_label"].astype(int).tolist()        # ✅ ensure ints
    })


train_ds = to_hf(train_df)
val_ds   = to_hf(val_df)
test_ds  = to_hf(test_df)

# 4. Load tokenizer & quantization config
model_id   = "bert-base-uncased"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# 5. Tokenize datasets
def tokenize_fn(ex):
    return tokenizer(ex["text"], padding="max_length", truncation=True, max_length=128)

train_ds = train_ds.map(tokenize_fn, batched=True)
val_ds   = val_ds.map(tokenize_fn, batched=True)
test_ds  = test_ds.map(tokenize_fn, batched=True)

# 6. Load BERT classifier + LoRA
clf = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    low_cpu_mem_usage=True,
    num_labels=len(label_encoder.classes_),
    torch_dtype=torch.float32,
)
lora_cfg = LoraConfig(
    r=4,
    lora_alpha=32,
    target_modules=["query", "key", "value", "dense"],
    task_type=TaskType.SEQ_CLS,
)
clf = get_peft_model(clf, lora_cfg)
clf.to(device)

# 7. Define metrics
def compute_metrics(pred):
    logits, labels = pred
    preds = logits.argmax(axis=-1)
    p, r, f1, _ = precision_recall_fscore_support(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "precision": p, "recall": r, "f1": f1}

# 8. Training arguments & Trainer
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    logging_dir="./logs",
    save_total_limit=2,
    load_best_model_at_end=True,
    fp16=True,
    optim="paged_adamw_8bit",
    report_to="none",
)

trainer = Trainer(
    model=clf,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# 9. Train & evaluate
trainer.train()
print("Validation:", trainer.evaluate())

raw_preds, labels, _ = trainer.predict(test_ds)
preds = raw_preds.argmax(axis=-1)

print(f"Test Accuracy: {accuracy_score(labels, preds):.4f}")
print(f"Test F1 Score: {f1_score(labels, preds, average='weighted'):.4f}")

# 10. Confusion matrix
cm = confusion_matrix(labels, preds)
plt.figure(figsize=(8, 6))
sns.heatmap(
    cm, annot=True, fmt="d", cmap="Blues",
    xticklabels=label_encoder.classes_,
    yticklabels=label_encoder.classes_
)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()
# Create a folder to save everything
!pip install joblib
save_dir = "./bert_lora_complaints"

# Save the PEFT (LoRA) adapter and base model
clf.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)

# Optional: save label encoder for use during inference
import joblib
joblib.dump(label_encoder, f"{save_dir}/label_encoder.pkl")

print(f"Model and tokenizer saved to {save_dir}")


# # ────────────────────────────────────────────────────────────────────────────────
# # 11. Tiny LLM for email response generation

In [None]:
# Create a folder to save everything
!pip install joblib
save_dir = "./bert_lora_complaints"

# Save the PEFT (LoRA) adapter and base model
clf.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)

# Optional: save label encoder for use during inference
import joblib
joblib.dump(label_encoder, f"{save_dir}/label_encoder.pkl")

print(f"Model and tokenizer saved to {save_dir}")

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig

test_texts = test_df["narrative"].fillna("").astype(str).tolist()
test_labels = label_encoder.inverse_transform(preds)

# ─── 12. Load TinyLLaMA for email generation ────────────────────────────────
gen_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

bnb_llama = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# 12a. Tokenizer & Model (assign all layers to cuda:0)
llama_tokenizer = AutoTokenizer.from_pretrained(gen_model_id)
llama_model = AutoModelForCausalLM.from_pretrained(
    gen_model_id,
    quantization_config=bnb_llama,
    device_map={"": 0},           # map entire model to GPU 0
    low_cpu_mem_usage=True,
)
llama_model.to(device)

# 12b. Text-generation pipeline (drop explicit `device` argument)
llama_pipe = pipeline(
    "text-generation",
    model=llama_model,
    tokenizer=llama_tokenizer
)

# ─── 12b. Text-generation pipeline (unchanged) ─────────────────────────────
llama_pipe = pipeline(
    "text-generation",
    model=llama_model,
    tokenizer=llama_tokenizer
)

from transformers import pipeline

# ─── Sentiment analysis pipeline ─────────────────────────────────────────────
sentiment_pipe = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english",
    device=0  # uses cuda:0
)

# ─── 12c. Helper to generate empathetic responses ─────────────────────────────
def generate_email_response_llama(category: str, message: str) -> str:
    # 1) Analyze customer sentiment
    sentiment = sentiment_pipe(message)[0]
    label = sentiment["label"].lower()
    score = sentiment["score"]
    
    # 2) Choose tone based on sentiment
    if label == "negative":
        tone_instruction = "Use an empathetic, apologetic tone that acknowledges the customer's frustration."
    else:
        tone_instruction = "Use a warm, friendly tone that thanks the customer for their feedback."
    
    # 3) Build prompt
    prompt = (
        "[INST] You are a helpful customer support assistant.\n\n"
        f"Complaint Category: {category}\n"
        f"Customer Message: {message}\n\n"
        f"Customer Sentiment: {label} (confidence {score:.2f}). {tone_instruction}\n\n"
        "Write a professional email response to the customer. [/INST]\n"
    )
    
    # 4) Generate, returning only the new text
    out = llama_pipe(
        prompt,
        max_new_tokens=150,
        do_sample=True,
        top_p=0.9,
        temperature=0.7,
        truncation=True,
        return_full_text=False
    )
    return out[0]["generated_text"].strip()

# ─── 13. Generate on first 5 test cases ────────────────────────────────────
test_texts  = test_df["narrative"].fillna("").astype(str).tolist()
test_labels = label_encoder.inverse_transform(preds)

for txt, cat in zip(test_texts[:5], test_labels[:5]):
    print("⟶ Complaint:", txt)
    print("⟶ Category :", cat)
    print("⟶ Response :", generate_email_response_llama(cat, txt))
    print("-" * 80)

# ─── 12c. Helper to generate responses (use max_new_tokens instead of max_length) ────
def generate_email_response_llama(category: str, message: str) -> str:
    sentiment = sentiment_pipe(message)[0]
    label = sentiment["label"].lower()

    # Simplified tone suggestion
    if label == "negative":
        tone_instruction = (
            "Acknowledge the customer's concern respectfully and offer a clear resolution path. "
            "Avoid excessive emotion. Be concise and professional."
        )
    else:
        tone_instruction = (
            "Thank the customer for their message and respond in a helpful, clear, and professional tone."
        )

    prompt = (
        "[INST] You are a professional customer support agent. "
        f"Complaint Category: {category}\n"
        f"Customer Message: {message}\n\n"
        f"{tone_instruction}\n\n"
        "Write a short and professional email response. Avoid repeating the prompt. [/INST]\n"
    )

    out = llama_pipe(
        prompt,
        max_new_tokens=120,          # shorter response
        do_sample=True,
        top_p=0.85,                  # reduce randomness slightly
        temperature=0.6,             # lower temperature for less emotional tone
        truncation=True,
        return_full_text=False
    )
    return out[0]["generated_text"].strip()


# ─── 13. Generate on first 5 test cases ────────────────────────────────────
test_texts = test_df["narrative"].fillna("").astype(str).tolist()
test_labels = label_encoder.inverse_transform(preds)

for txt, cat in zip(test_texts[:5], test_labels[:5]):
    print("⟶ Complaint:", txt)
    print("⟶ Category:", cat)
    print("⟶ LLaMA-Response:", generate_email_response_llama(cat, txt))
    print("-" * 80)


# ─── 12. Load TinyLLaMA for email generation ────────────────────────────────
# gen_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# bnb_llama = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.bfloat16
# )

# # 12a. Tokenizer & Model
# llama_tokenizer = AutoTokenizer.from_pretrained(gen_model_id)
# llama_model = AutoModelForCausalLM.from_pretrained(
#     gen_model_id,
#     quantization_config=bnb_llama,
#     low_cpu_mem_usage=True,
# )
# llama_model.to(device)

# # 12b. Text-generation pipeline
# llama_pipe = pipeline(
#     "text-generation",
#     model=llama_model,
#     tokenizer=llama_tokenizer,
#     device=0 if torch.cuda.is_available() else -1,
# )

# # 12c. Helper to generate responses
# def generate_email_response_llama(category: str, message: str) -> str:
#     prompt = (
#         "[INST] You are a helpful customer support assistant.\n\n"
#         f"Complaint Category: {category}\n"
#         f"Customer Message: {message}\n\n"
#         "Write a professional email response to the customer. [/INST]"
#     )
#     out = llama_pipe(
#         prompt,
#         max_length=200,
#         do_sample=True,
#         top_p=0.9,
#         temperature=0.7,
#     )[0]["generated_text"]
#     # strip off the prompt echo if necessary
#     return out.split("[INST]")[-1].strip()

# # ─── 13. Generate on first 5 test cases ────────────────────────────────────
# # (assuming you still have `test_texts` and `test_labels` from above)
# for txt, cat in zip(test_texts[:5], test_labels[:5]):
#     print("⟶ Complaint:", txt)
#     print("⟶ Category:", cat)
#     print("⟶ LLaMA-Response:", generate_email_response_llama(cat, txt))
#     print("-" * 80)