# Check GPU Availability

In [None]:
# Just to check if GPU is available or not, however it is restricted for Nvidia's GPU only
!nvidia-smi

# Install Required Packages

In [None]:
!pip install nltk==3.8.1 rouge-score bert-score sentence-transformers meteor
!pip install --upgrade nltk==3.8.1

In [None]:
# Import Libraries

In [None]:
import nltk
print("NLTK version:", nltk.__version__)

In [None]:
# Download NLTK Data

In [None]:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('omw-1.4')

# Import Additional Libraries

In [None]:
import torch
import rouge_score
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np
import json
from sklearn.model_selection import train_test_split
from torch.cuda.amp import GradScaler, autocast
import os
import matplotlib.pyplot as plt
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from nltk.translate.meteor_score import meteor_score
from bert_score import score as bert_score
from sentence_transformers import SentenceTransformer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# Load the CONAN Dataset

In [None]:
# Load the conan dataset
with open('CONAN.json', 'r') as f:
    conan_data = json.load(f)
conan_data = conan_data['conan']

df = pd.DataFrame(conan_data)

# Initialize Tokenizer and Model

In [None]:
# Initialize tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
tokenizer.pad_token = tokenizer.eos_token  # Set pad token to eos_token to avoid errors
model = GPT2LMHeadModel.from_pretrained('gpt2-medium')

# Check for Multiple GPUs

In [None]:
# Check if multiple GPUs are available
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    model = torch.nn.DataParallel(model)

# Move the model to the selected device(s)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Print Device Information

In [None]:
print(device)

# Define Custom Dataset Class

In [None]:
# Custom dataset class
class CONANDataset(Dataset):
    def __init__(self, df, tokenizer, max_length=256):
        self.df = df.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        hate_speech = self.df['hateSpeech'][idx]
        counterspeech = self.df['counterSpeech'][idx]
        input_text = hate_speech + self.tokenizer.eos_token + counterspeech

        encoding = self.tokenizer(
            input_text,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        input_ids = encoding['input_ids'].squeeze()
        attention_mask = encoding['attention_mask'].squeeze()
        labels = input_ids.clone()

        # Set labels for hate speech tokens to -100
        hate_speech_encoding = self.tokenizer(
            hate_speech + self.tokenizer.eos_token,
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        hs_len = hate_speech_encoding['input_ids'].size(1)
        labels[:hs_len] = -100

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': labels
        }

# Define Custom Collate Function

In [None]:
# DataLoader with a custom collate function
def collate_fn(batch):
    input_ids = torch.stack([item['input_ids'] for item in batch])
    attention_mask = torch.stack([item['attention_mask'] for item in batch])
    labels = torch.stack([item['labels'] for item in batch])

    return {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'labels': labels
    }

# Split Data into Train, Validation, and Test Sets

In [None]:
# Split the DataFrame into train, validation, and test sets
train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Define Training Configurations

In [None]:
# Define configurations
configs = [
    {'learning_rate': 5e-5, 'batch_size': 4, 'num_epochs': 3},
    {'learning_rate': 3e-5, 'batch_size': 2, 'num_epochs': 3},
    {'learning_rate': 1e-5, 'batch_size': 2, 'num_epochs': 3},
]

In [None]:
evaluation_results = []
gen_token = []
ref_token = []

# Define Evaluation Function

In [None]:
# Function to compute evaluation metrics
def evaluate_model(model, test_dataset, tokenizer, device, cnt):
    # Generate counterspeech for test set and compute evaluation metrics
    model.eval()
    generated_texts = []
    reference_texts = []
    
    for idx in range(len(test_dataset)):
        hate_speech = test_dataset.df['hateSpeech'][idx]
        reference_counterspeech = test_dataset.df['counterSpeech'][idx]

        encoding = tokenizer(
            hate_speech + tokenizer.eos_token,
            return_tensors='pt',
            truncation=True,
            max_length=256,
            padding='max_length'
        ).to(device)
        input_ids = encoding['input_ids']
        attention_mask = encoding['attention_mask']

        # Generate counterspeech
        with torch.no_grad():
            generated_ids = model.generate(
                input_ids,
                attention_mask=attention_mask,
                max_new_tokens=50,
                num_beams=5,
                no_repeat_ngram_size=2,
                early_stopping=True,
                pad_token_id=tokenizer.eos_token_id
            )
        generated_counterspeech = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

        # Remove the hate speech part from the generated text
        if generated_counterspeech.startswith(hate_speech):
            generated_counterspeech = generated_counterspeech[len(hate_speech):].strip()

        generated_texts.append(generated_counterspeech)
        reference_texts.append(reference_counterspeech)
        break

    # Compute evaluation metrics
    # For BLEU, ROUGE, METEOR, BERTScore, and Cosine Similarity
    gen_token.append(generated_texts)
    ref_token.append(reference_texts)
    
    # Initialize lists to store scores
    bleu_scores = []
    meteor_scores = []
    bert_scores = []
    cosine_similarities = []

    # Initialize the ROUGE scorer
    rouge_scorer_obj = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    rouge1_scores = []
    rouge2_scores = []
    rougeL_scores = []

    # Initialize the sentence transformer model for embeddings
    st_model = SentenceTransformer('all-MiniLM-L6-v2')  # Or any other appropriate model

    # Compute BERTScore
    P, R, F1 = bert_score(
                generated_texts,
                reference_texts,
                model_type='bert-base-uncased',
                lang='en',
                rescale_with_baseline=True,
                batch_size=4
            )
    
    smoothing_fn = SmoothingFunction().method1
    for i in range(len(generated_texts)):
        gen = generated_texts[i]
        ref = reference_texts[i]

        # BLEU score
        reference_tokens = nltk.word_tokenize(ref.lower())
        generated_tokens = nltk.word_tokenize(gen.lower())
        bleu = sentence_bleu([reference_tokens], generated_tokens, smoothing_function=smoothing_fn)
        bleu_scores.append(bleu)
        
        # ROUGE score
        rouge = rouge_scorer_obj.score(ref, gen)
        rouge1_scores.append(rouge['rouge1'].fmeasure)
        rouge2_scores.append(rouge['rouge2'].fmeasure)
        rougeL_scores.append(rouge['rougeL'].fmeasure)

        # METEOR score
        # Tokenize the reference and generated texts
        reference_tokens = nltk.word_tokenize(ref.lower())
        generated_tokens = nltk.word_tokenize(gen.lower())

        # Compute METEOR score with tokenized inputs
        meteor = meteor_score([reference_tokens], generated_tokens)
        meteor_scores.append(meteor)

        # Cosine similarity
        embeddings = st_model.encode([gen, ref])
        cosine_similarity = np.dot(embeddings[0], embeddings[1]) / (np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1]))
        cosine_similarities.append(cosine_similarity)

    # BERTScore is already computed as arrays P, R, F1
    bert_scores = F1.tolist()  # Convert tensor to list

    # Compute average scores
    avg_bleu = np.mean(bleu_scores)
    avg_meteor = np.mean(meteor_scores)
    avg_rouge1 = np.mean(rouge1_scores)
    avg_rouge2 = np.mean(rouge2_scores)
    avg_rougeL = np.mean(rougeL_scores)
    avg_bert_score = np.mean(bert_scores)
    avg_cosine_similarity = np.mean(cosine_similarities)

    evaluation_scores = {
        'BLEU': avg_bleu,
        'METEOR': avg_meteor,
        'ROUGE-1': avg_rouge1,
        'ROUGE-2': avg_rouge2,
        'ROUGE-L': avg_rougeL,
        'BERTScore': avg_bert_score,
        'CosineSimilarity': avg_cosine_similarity
    }

    return evaluation_scores

# Clear GPU Cache

In [None]:
torch.cuda.empty_cache()

# Training Loop for Different Configurations

In [None]:
cnt = 0
for config in configs:
    print(f"\nTraining with configuration: {config}")

    config_label = f"LR_{config['learning_rate']}_BS_{config['batch_size']}_E_{config['num_epochs']}"

    
    # Initialize tokenizer and model
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
    tokenizer.pad_token = tokenizer.eos_token  # Set pad token to eos_token to avoid errors
    model = GPT2LMHeadModel.from_pretrained('gpt2-medium')

    # Check if multiple GPUs are available
    if torch.cuda.device_count() > 1:
        print(f"Using {torch.cuda.device_count()} GPUs!")
        model = torch.nn.DataParallel(model)

    # Move the model to the selected device(s)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Instantiate datasets and dataloaders with the appropriate batch_size
    train_dataset = CONANDataset(train_df, tokenizer)
    val_dataset = CONANDataset(val_df, tokenizer)
    test_dataset = CONANDataset(test_df, tokenizer)

    train_dataloader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, collate_fn=collate_fn)
    val_dataloader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False, collate_fn=collate_fn)

    # Initialize optimizer and scaler with the learning rate
    optimizer = AdamW(model.parameters(), lr=config['learning_rate'])
    num_epochs = config['num_epochs']
    scaler = GradScaler()

    # Training loop
    model.train()
    train_losses = []
    val_losses = []
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in train_dataloader:
            optimizer.zero_grad()

            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            with autocast():  # Enable mixed precision
                outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
                loss = outputs.loss

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_dataloader)
        train_losses.append(avg_loss)
        print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {avg_loss}")

        # Validation step
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for val_batch in val_dataloader:
                val_input_ids = val_batch['input_ids'].to(device)
                val_attention_mask = val_batch['attention_mask'].to(device)
                val_labels = val_batch['labels'].to(device)

                val_outputs = model(val_input_ids, attention_mask=val_attention_mask, labels=val_labels)
                val_loss += val_outputs.loss.item()

        avg_val_loss = val_loss / len(val_dataloader)
        val_losses.append(avg_val_loss)
        print(f"Epoch {epoch + 1}/{num_epochs}, Validation Loss: {avg_val_loss}")
        model.train()  # Set back to training mode

    #Save the trained model locally
    config_label = f"LR_{config['learning_rate']}_BS_{config['batch_size']}_E_{config['num_epochs']}"
    if not os.path.exists('saved_models'):
        os.makedirs('saved_models')
    model_save_path = os.path.join('saved_models', config_label)
    if torch.cuda.device_count() > 1:
        model_to_save = model.module  # For DataParallel models
    else:
        model_to_save = model
    model_to_save.save_pretrained(model_save_path)
    tokenizer.save_pretrained(model_save_path)
    print(f"Model saved to {model_save_path}")

    # Evaluate the model
    
    evaluation_scores = evaluate_model(model, test_dataset, tokenizer, device, cnt)
    print(f"Evaluation scores for configuration {config}: {evaluation_scores}")
    cnt+=1

    # Store the results
    evaluation_results.append({
        'config': config,
        'evaluation_scores': evaluation_scores,
        'train_losses': train_losses,
        'val_losses': val_losses,
        'config_label': config_label
    })
    del model
    torch.cuda.empty_cache()
    print(f"Freed GPU memory after processing configuration: {config}")

# Calculate results for all the metrics

In [None]:
results_df = pd.DataFrame([
    {
        'learning_rate': res['config']['learning_rate'],
        'batch_size': res['config']['batch_size'],
        'num_epochs': res['config']['num_epochs'],
        'BLEU': res['evaluation_scores']['BLEU'],
        'METEOR': res['evaluation_scores']['METEOR'],
        'ROUGE-1': res['evaluation_scores']['ROUGE-1'],
        'ROUGE-2': res['evaluation_scores']['ROUGE-2'],
        'ROUGE-L': res['evaluation_scores']['ROUGE-L'],
        'BERTScore': res['evaluation_scores']['BERTScore'],
        'CosineSimilarity': res['evaluation_scores']['CosineSimilarity'],
        'config_label': res['config_label']
    }
    for res in evaluation_results
])

In [None]:
metrics = ['BLEU', 'METEOR', 'ROUGE-1', 'ROUGE-2', 'ROUGE-L', 'BERTScore', 'CosineSimilarity']

# Create Directory for plots

In [None]:
if not os.path.exists('plots'):
    os.makedirs('plots')

In [None]:
# Add a 'model_name' column
results_df['model_name'] = ['Model-1', 'Model-2', 'Model-3']
for metric in metrics:
    plt.figure(figsize=(8, 6))
    plt.bar(results_df['model_name'], results_df[metric], color='green')
    plt.title(f'{metric} for Different Models')
    plt.xlabel('Model')
    plt.ylabel(metric)
    plt.xticks(rotation=0)  # No rotation needed for short labels
    plt.tight_layout()
    plot_filename = f'plots/{metric}_scores.png'
    plt.savefig(plot_filename)
    plt.show()
    print(f"Plot saved to {plot_filename}")

In [None]:
print(evaluation_results)