In [1]:
# Import necessary libraries
import os
import sys
import logging
import torch
from transformers import DistilBertForSequenceClassification, DistilBertTokenizerFast
import matplotlib.pyplot as plt
import pandas as pd

# Add the project root to path to enable imports
if not '..' in sys.path:
    sys.path.append('..')  # Adjust if needed to point to your project root

# Import your modules
from src.config import CONFIG
from src.models.distilbert.distilbert_finetuner import DistilBERTFineTuner
from src.data_pipeline.data_loader import DataModule
from src.training.model_trainer import ModelTrainer
from src.training.model_evaluator import ModelEvaluator

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

2025-03-08 12:16:48.183425: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
# Access the DistilBERT configuration
distilbert_config = CONFIG["models"]["distilbert"]

# Merge with common training parameters
distilbert_config = {**CONFIG["training"], **CONFIG["models"]["distilbert"]}

print("Current DistilBERT Configuration:")
for key, value in distilbert_config.items():
    print(f"  {key}: {value}")

Current DistilBERT Configuration:
  batch_size: 16
  max_length: 128
  learning_rate: 2e-05
  epochs: 3
  early_stopping_patience: 2
  device: cpu
  pretrained_model_name: distilbert-base-uncased
  num_labels: 2


In [7]:
# Load the dataset and explore its structure
dataset_path = CONFIG["dataset"]["balanced_dataset_path"]
df = pd.read_csv(dataset_path)

print(f"Dataset shape: {df.shape}")
print("\nColumns:")
for col in df.columns:
    print(f"  {col}")

print("\nLabel distribution:")
label_counts = df["is_toxic"].value_counts()
for label, count in label_counts.items():
    print(f"  Label {label}: {count} samples ({count/len(df)*100:.2f}%)")

# Display a few examples
print("\nSample entries:")
df.sample(5)

KeyError: 'dataset_path'

In [None]:
# Initialize the fine-tuner
fine_tuner = DistilBERTFineTuner(distilbert_config)

# Run the fine-tuning process
fine_tuner.run()

In [None]:
# Load the saved model for testing specific examples
model_path = distilbert_config["model_save_path"]
model = DistilBertForSequenceClassification.from_pretrained(model_path)
tokenizer = DistilBertTokenizerFast.from_pretrained(model_path)

# Function to predict sentiment of individual text
def predict_sentiment(text, model=model, tokenizer=tokenizer):
    device = distilbert_config["device"]
    model.to(device)
    model.eval()
    
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=128
    ).to(device)
    
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.softmax(outputs.logits, dim=1)
        
    toxic_prob = predictions[0][1].item()
    label = "Toxic" if toxic_prob > 0.5 else "Non-toxic"
    
    return {
        "text": text,
        "label": label,
        "toxic_probability": toxic_prob,
        "non_toxic_probability": predictions[0][0].item()
    }

# Test with some examples
test_texts = [
    "This code looks good, well structured!",
    "This is the worst implementation I've ever seen. Are you stupid?",
    "I think we should refactor this part for better performance",
    "Why would you even try to submit this garbage code?"
]

for text in test_texts:
    result = predict_sentiment(text)
    print(f"Text: {result['text']}")
    print(f"Prediction: {result['label']} (Toxic prob: {result['toxic_probability']:.4f})")
    print("-" * 80)

In [None]:
# Load evaluation results (assuming they were saved to a file)
# Alternative: you can extract these metrics from the fine_tuner.evaluate_model() directly

# Visualize metrics
test_metrics = {
    "accuracy": 0.92,  # Replace with actual metrics
    "precision": 0.89,
    "recall": 0.91,
    "f1": 0.90
}

plt.figure(figsize=(10, 6))
plt.bar(test_metrics.keys(), test_metrics.values(), color='royalblue')
plt.ylim(0, 1.0)
plt.xlabel('Metrics')
plt.ylabel('Score')
plt.title('DistilBERT Model Performance')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
# Once you've trained all three models, you can compare their results
models_metrics = {
    'DistilBERT': {'accuracy': 0.92, 'precision': 0.89, 'recall': 0.91, 'f1': 0.90},
    'CodeBERT': {'accuracy': 0.94, 'precision': 0.92, 'recall': 0.93, 'f1': 0.92},
    'DeBERTa': {'accuracy': 0.95, 'precision': 0.94, 'recall': 0.93, 'f1': 0.93}
}

# Create comparison dataframe
comparison_df = pd.DataFrame(models_metrics).T
comparison_df