In [1]:
# --- INSTALLING LIBRARIES ---
# The same dependencies as RoBERTa are needed, plus DeBERTa may require `SentencePiece`
!pip install --upgrade transformers accelerate datasets scikit-learn sentencepiece -q

# --- IMPORTING LIBRARIES ---
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
# üí• CHANGE 1: Import DebertaV2Model for DeBERTa-v3-Base
from transformers import AutoTokenizer, DebertaV2Model, TrainingArguments, Trainer 
from datasets import Dataset, DatasetDict
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer, OneHotEncoder
from sklearn.utils.class_weight import compute_class_weight 
import matplotlib.pyplot as plt
import seaborn as sns
import os
from packaging import version
import sklearn

RNG = 42

# --- LOADING THE DATASET ---
FILE_PATH = '/kaggle/input/non-english-removed-2/Non English reviews deleted All_Language_Data - Copy.csv' 
MODEL_CHECKPOINT = "microsoft/deberta-v3-base" # üí• CHANGE 2: DeBERTa Checkpoint

print(f"Loading dataset from: {FILE_PATH}")

try:
    # Use raw_df for initial load
    raw_df = pd.read_csv(FILE_PATH) 
except FileNotFoundError:
    print("\n‚ùå ERROR: File not found!")
    print(f"Please verify the path: {FILE_PATH}")
    raise

# --- SANITY CHECK ---
needed_cols = [
    'title', 'reviewText', 'audienceScore', 'tomatoMeter', 'runtimeMinutes',
    'genre', 'language_encoded', 'director_encoded', 'sentiment_label'
]

missing = [c for c in needed_cols if c not in raw_df.columns]
if missing:
    raise ValueError(f"‚ùå CRITICAL ERROR: Your CSV is missing these columns: {missing}")

print("--- Part 1: Setup and Data Loading Complete ---")
print(f"Dataset loaded with {raw_df.shape[0]} rows.")
print(f"Target Label Source (to be binarized/filtered):\\n{raw_df['sentiment_label'].value_counts().sort_index()}")

[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m44.0/44.0 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m12.0/12.0 MB[0m [31m98.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m380.9/380.9 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m9.7/9.7 MB[0m [31m113.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.4/1.4 MB[0m [31m38.3 MB/s[0m eta [36m0:00:00[0m


2025-11-25 12:19:12.722944: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764073153.104608      20 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764073153.207427      20 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

Loading dataset from: /kaggle/input/non-english-removed-2/Non English reviews deleted All_Language_Data - Copy.csv
--- Part 1: Setup and Data Loading Complete ---
Dataset loaded with 194801 rows.
Target Label Source (to be binarized/filtered):\nsentiment_label
0    97036
1    97765
Name: count, dtype: int64


In [2]:
# --- PART 2: PREPROCESSING & FEATURE ENGINEERING (BINARY - ALL FEATURES) ---

if 'raw_df' not in globals():
    raise ValueError("‚ùå 'raw_df' is missing! Please run Part 1 to load the data first.")

df = raw_df.copy()

# --- CLEANING & BINARIZATION ---
print("Cleaning data (already binary, no filtering needed)...")
df = df.dropna(subset=['reviewText', 'sentiment_label']).copy() 
df['label'] = df['sentiment_label'].astype(int) 

class_names = ['Negative', 'Positive'] 
N_CLASSES = 2 
                 
# Text Prep (Same as RoBERTa)
df['title'] = df['title'].fillna("")
df['genre'] = df['genre'].fillna("Unknown")
df['combined_text'] = df['title'] + " [SEP] " + df['reviewText']

# --- STRATIFIED SPLIT ---
print("Splitting data (Stratified)...")
train_idx, test_idx = train_test_split(
    df.index, test_size=0.2, random_state=RNG, stratify=df['label']
)
train_df = df.loc[train_idx].reset_index(drop=True)
test_df  = df.loc[test_idx].reset_index(drop=True)

y_train = train_df['label'].values.astype(int)

# ==========================================
# CRITICAL: CALCULATE CLASS WEIGHTS (Same as RoBERTa)
# ==========================================
print("Calculating class weights...")
classes_in_y = np.unique(y_train)

class_weights_np = compute_class_weight(
    class_weight="balanced",
    classes=classes_in_y,
    y=y_train
)
class_weights = torch.tensor(class_weights_np, dtype=torch.float)

if len(class_weights) == 1 and N_CLASSES == 2:
    print("[WARNING] Only one class found in y_train. Forcing weights to [1.0, 1.0].")
    class_weights = torch.tensor([1.0, 1.0], dtype=torch.float)

print("\n‚öñÔ∏è Calculated Class Weights (Neg, Pos):")
print(class_weights)

# ==========================================
# FEATURE ENGINEERING (All Features - Same as RoBERTa)
# ==========================================
print("Starting Feature Engineering...")

# 1. Numeric: Scaling
NUMERIC_COLS = ['tomatoMeter', 'audienceScore', 'runtimeMinutes']
train_numeric = train_df[NUMERIC_COLS].copy(); 
train_medians = train_numeric.median(numeric_only=True); 
train_numeric = train_numeric.fillna(train_medians)
scaler = StandardScaler(); scaler.fit(train_numeric.values)

# 2. Categorical: Genre (Multi-Label Binarizer)
train_genre_list = train_df['genre'].fillna("Unknown").str.split(', '); 
mlb = MultiLabelBinarizer(); mlb.fit(train_genre_list)

# 3. Categorical: Language (One-Hot Encoder)
train_lang = train_df['language_encoded'].astype('Int64').astype(str).fillna('unknown')
if version.parse(sklearn.__version__) >= version.parse("1.2"):
    ohe_lang = OneHotEncoder(handle_unknown='ignore', sparse_output=False, dtype=np.float32)
else:
    ohe_lang = OneHotEncoder(handle_unknown='ignore', sparse=False, dtype=np.float32)
ohe_lang.fit(train_lang.to_frame())

# 4. Categorical: Director (Target Encoding - BINARY VERSION)
print("-> Processing Director (Target Encoding - BINARY)...")
def director_features_binary(train_s, y, test_s, n_splits=5):
    s_train = train_s.fillna("Unknown").astype(str)
    s_test  = test_s.fillna("Unknown").astype(str)
    freq = s_train.value_counts()
    tr_freq = np.log1p(s_train.map(freq).fillna(0).values)[:, None].astype(np.float32)
    te_freq = np.log1p(s_test.map(freq).fillna(0).values)[:, None].astype(np.float32)
    global_mean = y.mean()
    tr_te = np.zeros(len(s_train), dtype=np.float32)[:, None]
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RNG)

    for tr_i, val_i in skf.split(np.zeros(len(y)), y):
        tr_fold_s = s_train.iloc[tr_i]
        tr_fold_y = y[tr_i]
        fold_means = pd.Series(tr_fold_y, index=tr_fold_s.index).groupby(tr_fold_s).mean()
        val_te = s_train.iloc[val_i].map(fold_means)
        tr_te[val_i, 0] = val_te.fillna(global_mean).values.astype(np.float32)

    full_means = pd.Series(y, index=s_train.index).groupby(s_train).mean()
    te_te = s_test.map(full_means).fillna(global_mean).values[:, None].astype(np.float32)
    return tr_freq, te_freq, tr_te, te_te

# Apply Director Encoding
dtr_f, dte_f, dtr_te, dte_te = director_features_binary(
    train_df['director_encoded'].astype(str), y_train,
    test_df['director_encoded'].astype(str)
)
dir_train_block = np.concatenate([dtr_f, dtr_te], axis=1).astype(np.float32)
dir_test_block  = np.concatenate([dte_f, dte_te], axis=1).astype(np.float32)

print("--- Part 2: Preprocessing & Weights Complete ---")

Cleaning data (already binary, no filtering needed)...
Splitting data (Stratified)...
Calculating class weights...

‚öñÔ∏è Calculated Class Weights (Neg, Pos):
tensor([1.0038, 0.9963])
Starting Feature Engineering...
-> Processing Director (Target Encoding - BINARY)...
--- Part 2: Preprocessing & Weights Complete ---


In [3]:
# --- PART 3: DATASET CREATION & TOKENIZATION (FINAL) ---

# Ensure prerequisites from Part 2 exist
required_vars = ['train_df', 'test_df', 'dir_train_block', 'dir_test_block', 'scaler', 'mlb', 'ohe_lang', 'train_medians']
if not all(v in globals() for v in required_vars):
    raise ValueError("‚ùå Missing variables from Part 2. Please run the previous cell first.")

NUMERIC_COLS = ['tomatoMeter', 'audienceScore', 'runtimeMinutes'] 

# Define helper function (Same concatenation logic as RoBERTa notebook)
def build_features(split_df, dir_block):
    out = {}
    out['combined_text'] = split_df['combined_text'].tolist()
    out['label'] = split_df['label'].astype(int).tolist()

    # Numeric (3 features)
    numeric = split_df[NUMERIC_COLS].copy().fillna(train_medians)
    out['numerical_features'] = scaler.transform(numeric.values).astype(np.float32)

    # Genre (multi-hot)
    genre_list = split_df['genre'].fillna("Unknown").str.split(', ')
    
    # Concatenate all categorical features (Genre + Language + Director Block (2 features))
    out['categorical_features'] = np.concatenate([
        mlb.transform(genre_list),
        ohe_lang.transform(split_df['language_encoded'].astype('Int64').astype(str).fillna('unknown').to_frame()),
        dir_block
    ], axis=1).astype(np.float32)
    
    return out

print("Building features dictionaries...")
train_feats = build_features(train_df, dir_train_block)
test_feats  = build_features(test_df, dir_test_block)

# Dimensions
num_numerical_features = len(NUMERIC_COLS)
num_categorical_features = train_feats['categorical_features'].shape[1]
print(f"Feature Dims: Numeric={num_numerical_features}, Categorical={num_categorical_features}")

# HF Dataset
print("Converting to Hugging Face Datasets...")
raw_datasets = DatasetDict({
    'train': Dataset.from_dict(train_feats),
    'test':  Dataset.from_dict(test_feats),
})

# Tokenization
# üí• CHANGE 3: Use DeBERTa-v3-Base checkpoint
MODEL_CHECKPOINT = "microsoft/deberta-v3-base" 

print(f"Downloading Tokenizer ({MODEL_CHECKPOINT})...")
try:
    # DeBERTa v3 uses a specific fast tokenizer
    tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT) 
except Exception as e:
    print("\n‚ùå ERROR: Could not download tokenizer. Check Internet connection.")
    raise e

def tokenize_fn(examples):
    # Truncate/pad to 256 tokens 
    return tokenizer(examples["combined_text"], padding="max_length", truncation=True, max_length=256)

print("Tokenizing dataset (this may take a moment)...")
tokenized_datasets = raw_datasets.map(tokenize_fn, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(["combined_text"])
tokenized_datasets.set_format("torch")

print("--- Part 3: Dataset Ready & Tokenized ---")

Building features dictionaries...
Feature Dims: Numeric=3, Categorical=48
Converting to Hugging Face Datasets...
Downloading Tokenizer (microsoft/deberta-v3-base)...


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



Tokenizing dataset (this may take a moment)...


Map:   0%|          | 0/155840 [00:00<?, ? examples/s]

Map:   0%|          | 0/38961 [00:00<?, ? examples/s]

--- Part 3: Dataset Ready & Tokenized ---


In [4]:
# --- PART 4: MODEL SETUP & TRAINING (BINARY DeBERTa) ---

from transformers import Trainer, TrainingArguments, DebertaV2Model 
import torch.nn as nn
import torch
import numpy as np
import os
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# NOTE: Variables N_CLASSES, MODEL_CHECKPOINT, class_weights, 
# num_numerical_features, and num_categorical_features are assumed to be 
# defined in Parts 1, 2, and 3.

# --- MODEL ARCHITECTURE ---
class MultimodalClassifier(nn.Module):
    def __init__(self, num_labels, num_numerical, num_categorical):
        super().__init__()
        # üí• CHANGE 4: Use DebertaV2Model 
        self.deberta = DebertaV2Model.from_pretrained(MODEL_CHECKPOINT) 
        self.dropout = nn.Dropout(0.2)
        # Hidden size for DeBERTa-v3-base is 768
        # üí° FIX: Corrected attribute name from hidden_hidden_size to hidden_size
        self.classifier = nn.Linear(
            self.deberta.config.hidden_size + num_numerical + num_categorical,
            num_labels
        )
        self.num_labels = num_labels

    # üí• CHANGE 5: Use self.deberta and its config
    def forward(self, input_ids, attention_mask, numerical_features, categorical_features, labels=None):
        deberta_output = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
        # DeBERTa uses the first token's output for classification
        pooled_output = self.dropout(deberta_output.last_hidden_state[:, 0])

        combined = torch.cat([pooled_output, numerical_features, categorical_features], dim=1)
        logits = self.classifier(combined)

        loss = None
        if labels is not None:
            # We use the standard CrossEntropyLoss here, which is overridden in WeightedTrainer
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        return (loss, logits) if loss is not None else (None, logits)

# --- CUSTOM TRAINER (Handles weighted loss - IDENTICAL TO RoBERTa) ---
class WeightedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs[1]
        
        # Determine the device for weights
        weights = class_weights.to(logits.device)

        # Weighted Loss Calculation
        loss_fct = nn.CrossEntropyLoss(weight=weights)
        loss = loss_fct(logits.view(-1, self.model.num_labels), labels.view(-1))

        return (loss, outputs) if return_outputs else loss

# --- DATA COLLATOR (IDENTICAL TO RoBERTa) ---
class MultimodalDataCollator:
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer
    def __call__(self, features):
        batch = self.tokenizer.pad(
            [{"input_ids": f["input_ids"], "attention_mask": f["attention_mask"]} for f in features],
            return_tensors="pt"
        )
        batch['labels'] = torch.tensor([f['label'] for f in features], dtype=torch.long)
        batch['numerical_features'] = torch.stack([f['numerical_features'] for f in features])
        batch['categorical_features'] = torch.stack([f['categorical_features'] for f in features])
        return batch

# --- METRICS FUNCTION (IDENTICAL TO RoBERTa) ---
def compute_metrics(pred):
    labels = pred.label_ids
    predictions = pred.predictions[0] if isinstance(pred.predictions, tuple) else pred.predictions
    preds = predictions.argmax(-1)
    prec, rec, f1, _ = precision_recall_fscore_support(labels, preds, average='binary', zero_division=0) 
    acc = accuracy_score(labels, preds)
    return {'accuracy': acc, 'f1': f1, 'precision': prec, 'recall': rec}

# --- INIT MODEL ---
multimodal_model = MultimodalClassifier(
    num_labels=N_CLASSES,
    num_numerical=num_numerical_features,
    num_categorical=num_categorical_features
)

# üí• CHANGE 6: Update output directory name
OUTPUT_DIR = "/kaggle/working/Model_Results_Binary_Weighted_DeBERTa_All_Features" 

multimodal_training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    learning_rate=2e-5,
    per_device_train_batch_size=16, 
    per_device_eval_batch_size=16,
    num_train_epochs=3, 
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy", 
    greater_is_better=True,
    save_total_limit=2,
    report_to="none",
    logging_steps=100,
    fp16=True 
)

multimodal_trainer = WeightedTrainer(
    model=multimodal_model,
    args=multimodal_training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    data_collator=MultimodalDataCollator(tokenizer),
    compute_metrics=compute_metrics,
)

print(f"--- Part 4: DeBERTa Base Weighted Training Setup Complete (3 Epochs) ---")
print(f"Saving checkpoints to: {OUTPUT_DIR}")
print(f"Training Batch Size: {multimodal_training_args.per_device_train_batch_size}")
multimodal_trainer.train() 
print("\n--- Training will start when the cell is executed ---")

pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/371M [00:00<?, ?B/s]

You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


--- Part 4: DeBERTa Base Weighted Training Setup Complete (3 Epochs) ---
Saving checkpoints to: /kaggle/working/Model_Results_Binary_Weighted_DeBERTa_All_Features
Training Batch Size: 16




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2266,0.232376,0.911373,0.911396,0.914564,0.908249
2,0.1623,0.241848,0.914453,0.91558,0.906965,0.924359
3,0.1098,0.287504,0.91471,0.915322,0.912138,0.918529





--- Training will start when the cell is executed ---


In [5]:
# --- PART 5: EVALUATION & SAVING (KAGGLE VERSION - BINARY) ---

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import shutil 

# NOTE: The lines below assume training (multimodal_trainer.train()) was executed in Part 4.

# =========================
# EVALUATE ‚Ä¢ REPORT ‚Ä¢ PLOTS ‚Ä¢ SAVE
# =========================
print("\n--- Final Evaluation on Test Set ---")

# Use the trainer and dataset from previous parts
trainer = multimodal_trainer 
tokenized_datasets = tokenized_datasets 

# 1. PLOT LOSS CURVES 
try:
    log_history = trainer.state.log_history
    train_logs = [log for log in log_history if ('loss' in log and 'eval_loss' not in log)]
    eval_logs = [log for log in log_history if ('eval_loss' in log)]

    plt.figure(figsize=(12, 5))

    # Loss Plot
    plt.subplot(1, 2, 1)
    plt.plot([log.get('epoch', i) for i, log in enumerate(train_logs, 1)], [log['loss'] for log in train_logs], label='Training Loss')
    plt.plot([log.get('epoch', i) for i, log in enumerate(eval_logs, 1)], [log['eval_loss'] for log in eval_logs], label='Validation Loss')
    plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title('Training vs Validation Loss'); plt.legend()

    # Accuracy Plot
    plt.subplot(1, 2, 2)
    if len(eval_logs) > 0 and 'eval_accuracy' in eval_logs[0]:
        plt.plot([log.get('epoch', i) for i, log in enumerate(eval_logs, 1)], [log['eval_accuracy'] for log in eval_logs], label='Validation Accuracy')
        plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.title('Validation Accuracy'); plt.legend()
    plt.tight_layout()
    plt.savefig("loss_and_accuracy_deberta_all_features.png")
    plt.close()

except AttributeError:
    print("Skipping plots: Training history not available (trainer.train() might be commented out or failed).")


# 2. PREDICTIONS 
print("Generating predictions...")
preds_output = trainer.predict(tokenized_datasets["test"])

logits = preds_output.predictions[0] if isinstance(preds_output.predictions, tuple) else preds_output.predictions 

pred_labels = np.argmax(logits, axis=-1)
true_labels = np.array(tokenized_datasets["test"]["label"])

# 3. CLASSIFICATION REPORT 
print("\n--- Classification Report (BINARY DeBERTa - All Features) ---")
report_text = classification_report(true_labels, pred_labels, target_names=class_names, digits=4, zero_division=0)
print(report_text)

# 4. CONFUSION MATRIX 
cm = confusion_matrix(true_labels, pred_labels)
row_sums = cm.sum(axis=1, keepdims=True); row_sums[row_sums == 0] = 1 
cm_norm = cm.astype('float') / row_sums

plt.figure(figsize=(12, 5))

# Counts (Integers)
plt.subplot(1, 2, 1)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix (Counts)')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')

# Normalized (Floats with 4 decimals)
plt.subplot(1, 2, 2)
sns.heatmap(cm_norm, annot=True, fmt='.4f', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix (Normalized)')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')

plt.tight_layout()
plt.savefig("confusion_matrix_deberta_all_features.png")
plt.close()

# 5. PER-CLASS BAR CHART
report_dict = classification_report(true_labels, pred_labels, target_names=class_names, output_dict=True, zero_division=0)
report_df = pd.DataFrame(report_dict).transpose()
report_df_classes = report_df.loc[[cn for cn in class_names if cn in report_df.index]]

ax = report_df_classes[['precision', 'recall', 'f1-score']].plot(kind='bar', figsize=(10, 6))
plt.title('Per-Class Metrics (BINARY DeBERTa - All Features)')
plt.xlabel('Classes'); plt.ylabel('Score')
plt.xticks(rotation=0); plt.grid(axis='y', linestyle='--'); plt.legend(loc='lower right')
plt.savefig("per_class_metrics_deberta_all_features.png")
plt.close()

# 6. SAVE (KAGGLE SPECIFIC)
# üí• CHANGE 7: Update final model directory name
FINAL_MODEL_DIR = "Final_Model_Binary_DeBERTa_All_Features" 
FINAL_MODEL_PATH = f"/kaggle/working/{FINAL_MODEL_DIR}"

print(f"\nüíæ Saving model to {FINAL_MODEL_PATH}...")
trainer.save_model(FINAL_MODEL_PATH)

# --- ZIP FOR DOWNLOAD ---
print("üì¶ Zipping model for easy download...")
shutil.make_archive(f"/kaggle/working/{FINAL_MODEL_DIR}", 'zip', FINAL_MODEL_PATH)

print(f"‚úÖ DONE! You can now download '{FINAL_MODEL_DIR}.zip' from the 'Output' tab.")


--- Final Evaluation on Test Set ---
Generating predictions...





--- Classification Report (BINARY DeBERTa - All Features) ---
              precision    recall  f1-score   support

    Negative     0.9173    0.9109    0.9141     19408
    Positive     0.9121    0.9185    0.9153     19553

    accuracy                         0.9147     38961
   macro avg     0.9147    0.9147    0.9147     38961
weighted avg     0.9147    0.9147    0.9147     38961


üíæ Saving model to /kaggle/working/Final_Model_Binary_DeBERTa_All_Features...
üì¶ Zipping model for easy download...
‚úÖ DONE! You can now download 'Final_Model_Binary_DeBERTa_All_Features.zip' from the 'Output' tab.
