## TFIDF

## SBERT

In [2]:
import os
import ast
import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Configure logging
logging.basicConfig(
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    level=logging.INFO
)

# Create directories
os.makedirs("models", exist_ok=True)
os.makedirs("results", exist_ok=True)

# === 1. Helper Functions ===
def safe_parse(lst_str):
    """Safely parse string representations of lists"""
    try:
        if isinstance(lst_str, list):
            return lst_str
        return ast.literal_eval(lst_str)
    except:
        return [lst_str]

# === 2. Load Embeddings ===
def load_embeddings(embedding_type):
    """Load document embeddings from either PT or CSV files

    Args:
        embedding_type: 'cbow', 'skipgram', 'tfidf', or 'sbert'
    """
    logging.info(f"Loading {embedding_type.upper()} embeddings...")

    try:
        # First try to load from PT file
        if embedding_type == 'tfidf':
            file_name = "tfidf_1.pt"
        elif embedding_type == 'sbert':
            file_name = f"{embedding_type}_output_1.pt"
        else:
            file_name = f"{embedding_type}_1.pt"
        data = torch.load(f"{file_name}")
        embeddings = data['embeddings'].numpy()
        labels = data['labels']
        indices = data['indices']

        # Create DataFrame
        df = pd.DataFrame(embeddings)
        df.insert(0, 'original_index', indices)
        df['labels'] = labels
        logging.info(f"Successfully loaded {embedding_type} embeddings from PT file: {embeddings.shape}")

    except Exception as e:
        # Fallback to CSV file
        logging.info(f"Failed to load from PT file: {e}")
        logging.info("Trying to load from CSV file...")

        if embedding_type == 'tfidf':
            file_name = "tfidf_1.csv"
        elif embedding_type == "sbert":
            return
        else:
            return
            # file_name = f"{embedding_type}_1.csv"

        csv_path = f"{file_name}"
        df = pd.read_csv(csv_path)

        # For TF-IDF, check if we need to rename the label column
        if embedding_type == 'tfidf' and 'CommentClass_en' in df.columns:
            df.rename(columns={'CommentClass_en': 'labels'}, inplace=True)

        logging.info(f"Successfully loaded {embedding_type} embeddings from CSV: {csv_path}")

    # Ensure labels are in the correct format
    if 'labels' in df.columns:
        df['labels'] = df['labels'].apply(safe_parse)
    else:
        # Try to find alternative label column
        label_candidates = ['CommentClass_en', 'label', 'classes', 'class']
        for col in label_candidates:
            if col in df.columns:
                df.rename(columns={col: 'labels'}, inplace=True)
                df['labels'] = df['labels'].apply(safe_parse)
                break
        else:
            logging.error(f"No label column found in {embedding_type} embeddings")
            raise ValueError(f"No label column found in {embedding_type} embeddings")

    return df

# === 3. Neural Network Models ===

class SimpleNN(nn.Module):
    """Simple Feed-Forward Neural Network for multi-label classification"""
    def __init__(self, input_dim, hidden_dim, output_dim, dropout=0.3):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)


class DeepNN(nn.Module):
    """Deep Neural Network with multiple hidden layers"""
    def __init__(self, input_dim, hidden_dims, output_dim, dropout=0.4):
        super(DeepNN, self).__init__()

        layers = []
        prev_dim = input_dim

        # Add hidden layers
        for dim in hidden_dims:
            layers.append(nn.Linear(prev_dim, dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            prev_dim = dim

        # Add output layer with sigmoid for multi-label
        layers.append(nn.Linear(prev_dim, output_dim))
        layers.append(nn.Sigmoid())

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

def prepare_data(df, test_size=0.2, random_state=42):
    """Prepare data for training and evaluation"""
    # Extract features and labels
    feature_cols = [col for col in df.columns if col not in ['original_index', 'labels', 'Unnamed: 0']]
    X = df[feature_cols].values

    # Process labels
    y_raw = df['labels'].tolist()

    # Binarize labels
    mlb = MultiLabelBinarizer()
    y = mlb.fit_transform(y_raw)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=None
    )

    logging.info(f"Data shapes: X_train={X_train.shape}, y_train={y_train.shape}")
    logging.info(f"Classes: {mlb.classes_}")

    return X_train, X_test, y_train, y_test, mlb.classes_


def train_model(model, X_train, y_train, X_test, y_test, model_name, embedding_type,
                batch_size=32, lr=0.001, num_epochs=20, patience=3):
    """Train and evaluate a PyTorch model with metrics for both training and test sets"""
    # Convert to tensors
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.FloatTensor(y_train)
    X_test_tensor = torch.FloatTensor(X_test)
    y_test_tensor = torch.FloatTensor(y_test)

    # Create dataset and dataloader
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Loss and optimizer
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # For early stopping
    best_loss = float('inf')
    patience_counter = 0

    # Lists to store metrics
    train_losses = []
    val_losses = []
    
    # Lists to store performance metrics per epoch
    train_metrics_history = []
    test_metrics_history = []

    # Training loop
    for epoch in range(num_epochs):
        # Training
        model.train()
        epoch_loss = 0
        for inputs, targets in train_loader:
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        avg_train_loss = epoch_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # Evaluate on both training and test sets
        model.eval()
        with torch.no_grad():
            # Training set predictions
            train_outputs = model(X_train_tensor)
            train_loss = criterion(train_outputs, y_train_tensor)
            train_pred_binary = (train_outputs > 0.5).float().numpy()
            
            # Test set predictions
            test_outputs = model(X_test_tensor)
            test_loss = criterion(test_outputs, y_test_tensor)
            test_pred_binary = (test_outputs > 0.5).float().numpy()
            
            val_losses.append(test_loss.item())
            
            # Calculate metrics for training set
            train_metrics = {
                'accuracy': accuracy_score(y_train, train_pred_binary),
                'precision': precision_score(y_train, train_pred_binary, average='weighted', zero_division=0),
                'recall': recall_score(y_train, train_pred_binary, average='weighted', zero_division=0),
                'f1': f1_score(y_train, train_pred_binary, average='weighted', zero_division=0)
            }
            train_metrics_history.append(train_metrics)
            
            # Calculate metrics for test set
            test_metrics = {
                'accuracy': accuracy_score(y_test, test_pred_binary),
                'precision': precision_score(y_test, test_pred_binary, average='weighted', zero_division=0),
                'recall': recall_score(y_test, test_pred_binary, average='weighted', zero_division=0),
                'f1': f1_score(y_test, test_pred_binary, average='weighted', zero_division=0)
            }
            test_metrics_history.append(test_metrics)

            # Early stopping check
            if test_loss < best_loss:
                best_loss = test_loss
                patience_counter = 0
                # Save best model
                torch.save(model.state_dict(), f"best_{embedding_type}_{model_name}_classifier.pt")
            else:
                patience_counter += 1

        if (epoch + 1) % 5 == 0 or epoch == 0:
            logging.info(f'Epoch [{epoch+1}/{num_epochs}], '
                       f'Train Loss: {avg_train_loss:.4f}, '
                       f'Val Loss: {test_loss:.4f}')
            logging.info(f'Train Metrics: Acc={train_metrics["accuracy"]:.4f}, '
                       f'F1={train_metrics["f1"]:.4f}')
            logging.info(f'Test Metrics: Acc={test_metrics["accuracy"]:.4f}, '
                       f'F1={test_metrics["f1"]:.4f}')

        # Check if early stopping criteria is met
        if patience_counter >= patience:
            logging.info(f"Early stopping at epoch {epoch+1}")
            break

    # Load best model for evaluation
    model.load_state_dict(torch.load(f"best_{embedding_type}_{model_name}_classifier.pt"))

    # Final evaluation
    model.eval()
    with torch.no_grad():
        # Get predictions for both train and test sets
        train_outputs = model(X_train_tensor)
        train_pred_binary = (train_outputs > 0.5).float().numpy()
        
        test_outputs = model(X_test_tensor)
        test_pred_binary = (test_outputs > 0.5).float().numpy()
    
    # Calculate final metrics for training set
    train_accuracy = accuracy_score(y_train, train_pred_binary)
    train_precision = precision_score(y_train, train_pred_binary, average='weighted', zero_division=0)
    train_recall = recall_score(y_train, train_pred_binary, average='weighted', zero_division=0)
    train_f1 = f1_score(y_train, train_pred_binary, average='weighted', zero_division=0)
    
    train_metrics = {
        'accuracy': train_accuracy,
        'precision': train_precision,
        'recall': train_recall,
        'f1': train_f1,
    }
    
    # Calculate final metrics for test set
    test_accuracy = accuracy_score(y_test, test_pred_binary)
    test_precision = precision_score(y_test, test_pred_binary, average='weighted', zero_division=0)
    test_recall = recall_score(y_test, test_pred_binary, average='weighted', zero_division=0)
    test_f1 = f1_score(y_test, test_pred_binary, average='weighted', zero_division=0)
    
    test_metrics = {
        'accuracy': test_accuracy,
        'precision': test_precision,
        'recall': test_recall,
        'f1': test_f1,
    }
    
    # Log final results
    logging.info("\nFinal Model Performance:")
    logging.info("Training Set Metrics:")
    for metric, value in train_metrics.items():
        logging.info(f"{metric.capitalize()}: {value:.4f}")
    
    logging.info("\nTest Set Metrics:")
    for metric, value in test_metrics.items():
        logging.info(f"{metric.capitalize()}: {value:.4f}")

    # Get per-class metrics for both sets
    train_class_report = classification_report(y_train, train_pred_binary,
                                         zero_division=0, output_dict=True)
    test_class_report = classification_report(y_test, test_pred_binary,
                                        zero_division=0, output_dict=True)

    return model, train_metrics, test_metrics, train_losses, val_losses, train_metrics_history, test_metrics_history, train_class_report, test_class_report


def plot_training_curves(train_losses, val_losses, train_metrics_history, test_metrics_history, model_name, embedding_type):
    """Plot training and validation loss curves along with performance metrics"""
    # Plot loss curves
    plt.figure(figsize=(12, 8))
    
    # Plot 1: Losses
    plt.subplot(2, 2, 1)
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.title(f'Loss Curves for {model_name} ({embedding_type.upper()})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    
    # Plot 2: Accuracy
    plt.subplot(2, 2, 2)
    train_acc = [metrics['accuracy'] for metrics in train_metrics_history]
    test_acc = [metrics['accuracy'] for metrics in test_metrics_history]
    plt.plot(train_acc, label='Training Accuracy')
    plt.plot(test_acc, label='Test Accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    
    # Plot 3: Precision and Recall (Training)
    plt.subplot(2, 2, 3)
    train_precision = [metrics['precision'] for metrics in train_metrics_history]
    train_recall = [metrics['recall'] for metrics in train_metrics_history]
    plt.plot(train_precision, label='Training Precision')
    plt.plot(train_recall, label='Training Recall')
    plt.title('Training Set: Precision and Recall')
    plt.xlabel('Epochs')
    plt.ylabel('Score')
    plt.legend()
    plt.grid(True)
    
    # Plot 4: Precision and Recall (Test)
    plt.subplot(2, 2, 4)
    test_precision = [metrics['precision'] for metrics in test_metrics_history]
    test_recall = [metrics['recall'] for metrics in test_metrics_history]
    plt.plot(test_precision, label='Test Precision')
    plt.plot(test_recall, label='Test Recall')
    plt.title('Test Set: Precision and Recall')
    plt.xlabel('Epochs')
    plt.ylabel('Score')
    plt.legend()
    plt.grid(True)
    
    plt.tight_layout()
    plt.savefig(f"{embedding_type}_{model_name}_metrics.png")
    plt.close()
    
    # Create an additional plot for F1 scores
    plt.figure(figsize=(10, 6))
    train_f1 = [metrics['f1'] for metrics in train_metrics_history]
    test_f1 = [metrics['f1'] for metrics in test_metrics_history]
    plt.plot(train_f1, label='Training F1')
    plt.plot(test_f1, label='Test F1')
    plt.title(f'F1 Score Evolution for {model_name} ({embedding_type.upper()})')
    plt.xlabel('Epochs')
    plt.ylabel('F1 Score')
    plt.legend()
    plt.grid(True)
    plt.savefig(f"{embedding_type}_{model_name}_f1_curve.png")
    plt.close()


def process_embedding_type(embedding_type):
    """Process a specific embedding type (CBOW, Skip-gram, TF-IDF, or SBERT)"""
    logging.info(f"\n{'-'*70}\nProcessing {embedding_type.upper()} embeddings\n{'-'*70}")

    # Load embeddings
    df = load_embeddings(embedding_type)
    logging.info(f"Loaded {embedding_type} dataframe with shape: {df.shape}")

    # Prepare data
    X_train, X_test, y_train, y_test, classes = prepare_data(df)

    # Model parameters
    input_dim = X_train.shape[1]
    output_dim = y_train.shape[1]

    # Define models to test
    models = [
        {
            'name': 'SimpleNN',
            'model': SimpleNN(input_dim, hidden_dim=128, output_dim=output_dim)
        },
        {
            'name': 'DeepNN',
            'model': DeepNN(
                input_dim,
                hidden_dims=[256, 128, 64],
                output_dim=output_dim
            )
        }
    ]

    # Train and evaluate each model
    results = {}

    for model_info in models:
        name = model_info['name']
        model = model_info['model']

        logging.info(f"\n{'='*50}\nTraining {name} with {embedding_type.upper()}\n{'='*50}")

        trained_model, train_metrics, test_metrics, train_losses, val_losses, train_metrics_history, test_metrics_history, train_class_report, test_class_report = train_model(
            model, X_train, y_train, X_test, y_test, name, embedding_type
        )

        # Log results
        logging.info(f"Results for {name} with {embedding_type.upper()}:")
        logging.info("TRAINING SET:")
        for metric, value in train_metrics.items():
            logging.info(f"{metric.capitalize()}: {value:.4f}")
            
        logging.info("\nTEST SET:")
        for metric, value in test_metrics.items():
            logging.info(f"{metric.capitalize()}: {value:.4f}")

        # Plot learning curves with metrics
        plot_training_curves(train_losses, val_losses, train_metrics_history, test_metrics_history, name, embedding_type)

        # Save detailed class reports
        pd.DataFrame(train_class_report).transpose().to_csv(
            f"{embedding_type}_{name}_train_class_report.csv"
        )
        pd.DataFrame(test_class_report).transpose().to_csv(
            f"{embedding_type}_{name}_test_class_report.csv"
        )

        # Save model
        torch.save({
            'model_state_dict': trained_model.state_dict(),
            'class_names': classes,
            'train_metrics': train_metrics,
            'test_metrics': test_metrics,
            'architecture': str(trained_model)
        }, f"{embedding_type}_{name}_classifier.pt")

        # Store results
        results[name] = {
            'train': train_metrics,
            'test': test_metrics
        }

    return results


def plot_metrics_comparison(results_dict):
    """Plot performance metrics comparison between models and embedding types"""
    metrics = ['accuracy', 'precision', 'recall', 'f1']

    # Prepare data for plotting
    data = []
    for embedding_type, models_data in results_dict.items():
        for model_name, metrics_dict in models_data.items():
            model_label = f"{model_name} ({embedding_type.upper()})"
            for metric in metrics:
                data.append({
                    'Model': model_label,
                    'Metric': metric.capitalize(),
                    'Value': metrics_dict[metric]
                })

    df_plot = pd.DataFrame(data)

    # Create the plot
    plt.figure(figsize=(16, 10))
    sns.barplot(x='Model', y='Value', hue='Metric', data=df_plot)
    plt.title('Performance Metrics Comparison: CBOW vs Skip-gram vs TF-IDF vs SBERT')
    plt.ylim(0, 1)
    plt.xticks(rotation=45, ha='right')
    plt.grid(True, axis='y')
    plt.tight_layout()
    plt.savefig("embedding_comparison.png")
    plt.close()


def plot_embedding_comparison(results_dict):
    """Plot comparison of embedding types across metrics and models"""
    metrics = ['accuracy', 'precision', 'recall', 'f1']
    model_names = list(next(iter(results_dict.values())).keys())

    # For each metric, compare embeddings across models
    for metric in metrics:
        plt.figure(figsize=(12, 7))

        # Prepare data
        x = np.arange(len(model_names))
        width = 0.2  # Width of the bars

        # Create the bars for each embedding type
        embedding_types = list(results_dict.keys())
        for i, emb_type in enumerate(embedding_types):
            values = [results_dict[emb_type][model][metric] for model in model_names]
            offset = width * (i - len(embedding_types)/2 + 0.5)
            plt.bar(x + offset, values, width, label=emb_type.upper())

        plt.xlabel('Model')
        plt.ylabel(metric.capitalize())
        plt.title(f'{metric.capitalize()} Comparison: CBOW vs Skip-gram vs TF-IDF vs SBERT')
        plt.xticks(x, model_names)
        plt.ylim(0, 1)
        plt.legend()
        plt.grid(True, axis='y')
        plt.tight_layout()

        plt.savefig(f"{metric}_comparison.png")
        plt.close()

def plot_metrics_comparison(results_dict):
    """Plot performance metrics comparison between models and embedding types for both train and test sets"""
    metrics = ['accuracy', 'precision', 'recall', 'f1']
    
    # For training set
    plot_set_comparison(results_dict, 'train', metrics, "Training Set")
    
    # For test set
    plot_set_comparison(results_dict, 'test', metrics, "Test Set")


def plot_set_comparison(results_dict, set_type, metrics, title_prefix):
    """Helper function to plot comparisons for a specific set (train or test)"""
    # Prepare data for plotting
    data = []
    for embedding_type, models_data in results_dict.items():
        for model_name, metrics_dict in models_data.items():
            model_label = f"{model_name} ({embedding_type.upper()})"
            for metric in metrics:
                data.append({
                    'Model': model_label,
                    'Metric': metric.capitalize(),
                    'Value': metrics_dict[set_type][metric]
                })

    df_plot = pd.DataFrame(data)

    # Create the plot
    plt.figure(figsize=(16, 10))
    sns.barplot(x='Model', y='Value', hue='Metric', data=df_plot)
    plt.title(f'{title_prefix} Performance Metrics: CBOW vs Skip-gram vs TF-IDF vs SBERT')
    plt.ylim(0, 1)
    plt.xticks(rotation=45, ha='right')
    plt.grid(True, axis='y')
    plt.tight_layout()
    plt.savefig(f"{set_type}_embedding_comparison.png")
    plt.close()


def plot_embedding_comparison(results_dict):
    """Plot comparison of embedding types across metrics and models for both train and test sets"""
    metrics = ['accuracy', 'precision', 'recall', 'f1']
    model_names = list(next(iter(results_dict.values())).keys())
    
    # Plot for training set
    plot_embedding_set_comparison(results_dict, model_names, metrics, 'train', "Training Set")
    
    # Plot for test set
    plot_embedding_set_comparison(results_dict, model_names, metrics, 'test', "Test Set")


def plot_embedding_set_comparison(results_dict, model_names, metrics, set_type, title_prefix):
    """Helper function to plot embedding comparison for a specific set (train or test)"""
    # For each metric, compare embeddings across models
    for metric in metrics:
        plt.figure(figsize=(12, 7))

        # Prepare data
        x = np.arange(len(model_names))
        width = 0.2  # Width of the bars

        # Create the bars for each embedding type
        embedding_types = list(results_dict.keys())
        for i, emb_type in enumerate(embedding_types):
            values = [results_dict[emb_type][model][set_type][metric] for model in model_names]
            offset = width * (i - len(embedding_types)/2 + 0.5)
            plt.bar(x + offset, values, width, label=emb_type.upper())

        plt.xlabel('Model')
        plt.ylabel(metric.capitalize())
        plt.title(f'{title_prefix} {metric.capitalize()} Comparison: CBOW vs Skip-gram vs TF-IDF vs SBERT')
        plt.xticks(x, model_names)
        plt.ylim(0, 1)
        plt.legend()
        plt.grid(True, axis='y')
        plt.tight_layout()

        plt.savefig(f"{set_type}_{metric}_comparison.png")
        plt.close()


def plot_train_test_comparison(results_dict):
    """Plot comparison between training and test metrics for each model and embedding type"""
    metrics = ['accuracy', 'precision', 'recall', 'f1']
    
    for metric in metrics:
        plt.figure(figsize=(15, 8))
        
        # Count number of model-embedding combinations for x-axis positioning
        all_models = []
        for embedding_type, models in results_dict.items():
            for model_name in models:
                all_models.append(f"{model_name} ({embedding_type.upper()})")
        
        x = np.arange(len(all_models))
        width = 0.35  # Width of the bars
        
        # Values for train and test
        train_values = []
        test_values = []
        
        # Collect values
        for embedding_type, models in results_dict.items():
            for model_name, metrics_dict in models.items():
                train_values.append(metrics_dict['train'][metric])
                test_values.append(metrics_dict['test'][metric])
        
        # Create bars
        plt.bar(x - width/2, train_values, width, label='Training')
        plt.bar(x + width/2, test_values, width, label='Test')
        
        plt.xlabel('Model and Embedding')
        plt.ylabel(f'{metric.capitalize()} Score')
        plt.title(f'Training vs Test {metric.capitalize()} Comparison')
        plt.xticks(x, all_models, rotation=45, ha='right')
        plt.ylim(0, 1)
        plt.legend()
        plt.grid(True, axis='y')
        plt.tight_layout()
        
        plt.savefig(f"train_test_{metric}_comparison.png")
        plt.close()


def calculate_overfitting_metrics(results_dict):
    """Calculate and visualize overfitting metrics"""
    metrics = ['accuracy', 'precision', 'recall', 'f1']
    
    # Calculate differences between train and test for each metric
    diff_data = []
    
    for embedding_type, models in results_dict.items():
        for model_name, metrics_dict in models.items():
            model_label = f"{model_name} ({embedding_type.upper()})"
            
            for metric in metrics:
                train_value = metrics_dict['train'][metric]
                test_value = metrics_dict['test'][metric] 
                diff = train_value - test_value
                
                diff_data.append({
                    'Model': model_label,
                    'Metric': metric.capitalize(),
                    'Difference': diff
                })
    
    df_diff = pd.DataFrame(diff_data)
    
    # Create a heatmap of the differences
    plt.figure(figsize=(12, 8))
    pivot_df = df_diff.pivot(index='Model', columns='Metric', values='Difference')
    sns.heatmap(pivot_df, annot=True, cmap='coolwarm', center=0, fmt='.3f')
    plt.title('Overfitting Analysis: Difference Between Training and Test Metrics')
    plt.tight_layout()
    plt.savefig("overfitting_heatmap.png")
    plt.close()
    
    # Return the dataframe for further analysis
    return df_diff


def main():
    """Main function to run the classification pipeline for all embedding types"""
    logging.info("Starting embedding classification comparing CBOW, Skip-gram, TF-IDF and SBERT")

    all_results = {}

    # Process CBOW embeddings
    all_results['cbow'] = process_embedding_type('cbow')

    # Process Skip-gram embeddings
    all_results['skipgram'] = process_embedding_type('skipgram')

    # Process TF-IDF embeddings
    all_results['tfidf'] = process_embedding_type('tfidf')

    # Process SBERT embeddings
    all_results['sbert'] = process_embedding_type('sbert')

    # Compare models and embeddings
    plot_metrics_comparison(all_results)
    plot_embedding_comparison(all_results)
    plot_train_test_comparison(all_results)
    overfitting_df = calculate_overfitting_metrics(all_results)

    # Save overall results for both train and test
    # Training set results
    train_results_df = pd.DataFrame({
        f"{model}_{emb_type}_train": metrics['train']
        for emb_type, models in all_results.items()
        for model, metrics in models.items()
    })
    train_results_df.to_csv("train_embedding_comparison.csv")
    
    # Test set results
    test_results_df = pd.DataFrame({
        f"{model}_{emb_type}_test": metrics['test']
        for emb_type, models in all_results.items()
        for model, metrics in models.items()
    })
    test_results_df.to_csv("test_embedding_comparison.csv")
    
    # Save overfitting analysis
    overfitting_df.to_csv("overfitting_analysis.csv", index=False)

    # Create summary tables
    # For training set
    train_summary_data = []
    for emb_type, models in all_results.items():
        for model_name, metrics in models.items():
            row = {
                'Embedding': emb_type.upper(),
                'Model': model_name,
                'Set': 'Training'
            }
            row.update({k.capitalize(): f"{v:.4f}" for k, v in metrics['train'].items()})
            train_summary_data.append(row)
    
    # For test set
    test_summary_data = []
    for emb_type, models in all_results.items():
        for model_name, metrics in models.items():
            row = {
                'Embedding': emb_type.upper(),
                'Model': model_name,
                'Set': 'Test'
            }
            row.update({k.capitalize(): f"{v:.4f}" for k, v in metrics['test'].items()})
            test_summary_data.append(row)
    
    # Combine and save
    summary_df = pd.DataFrame(train_summary_data + test_summary_data)
    summary_df.to_csv("embedding_classification_summary.csv", index=False)
    
    print("\nClassification results summary:")
    print(summary_df.to_string(index=False))

    # Calculate average performance per embedding type for both sets
    print("\nAverage performance by embedding type:")
    for set_type, set_name in [('train', 'Training'), ('test', 'Test')]:
        print(f"\n{set_name} Set:")
        for emb_type, models in all_results.items():
            avg_metrics = {}
            for metric in ['accuracy', 'precision', 'recall', 'f1']:
                avg_metrics[metric] = np.mean([models[model][set_type][metric] for model in models])
            print(f"{emb_type.upper()}: " + ", ".join([f"{k.capitalize()}: {v:.4f}" for k, v in avg_metrics.items()]))

    logging.info("Classification comparison completed! Results saved to 'results' directory.")

if __name__ == "__main__":
    main()

2025-05-08 00:35:00 - INFO - Starting embedding classification comparing CBOW, Skip-gram, TF-IDF and SBERT
2025-05-08 00:35:00 - INFO - 
----------------------------------------------------------------------
Processing CBOW embeddings
----------------------------------------------------------------------
2025-05-08 00:35:00 - INFO - Loading CBOW embeddings...
  data = torch.load(f"{file_name}")
2025-05-08 00:35:00 - INFO - Successfully loaded cbow embeddings from PT file: (13000, 100)
2025-05-08 00:35:00 - INFO - Loaded cbow dataframe with shape: (13000, 102)
2025-05-08 00:35:00 - INFO - Data shapes: X_train=(10400, 100), y_train=(10400, 12)
2025-05-08 00:35:00 - INFO - Classes: ['Priceperformance' 'Product' 'Speedeslimat' 'convenience'
 'fiyatperformans' 'good -up' 'hızlıteslimat' 'iyipaketleme'
 'kaliteliürün' 'uygunfiyat' 'your quality' 'ürüngüzel']
2025-05-08 00:35:00 - INFO - 
Training SimpleNN with CBOW
2025-05-08 00:35:01 - INFO - Epoch [1/20], Train Loss: 0.2287, Val Loss: 0.17


Classification results summary:
Embedding    Model      Set Accuracy Precision Recall     F1
     CBOW SimpleNN Training   0.5718    0.8225 0.6386 0.7109
     CBOW   DeepNN Training   0.6169    0.8248 0.6715 0.7384
 SKIPGRAM SimpleNN Training   0.6316    0.8287 0.7082 0.7628
 SKIPGRAM   DeepNN Training   0.6641    0.8443 0.7262 0.7773
    TFIDF SimpleNN Training   0.7670    0.9139 0.8327 0.8697
    TFIDF   DeepNN Training   0.7429    0.8664 0.7857 0.8232
    SBERT SimpleNN Training   0.8754    0.9382 0.9288 0.9326
    SBERT   DeepNN Training   0.8703    0.9358 0.9206 0.9278
     CBOW SimpleNN     Test   0.5562    0.8043 0.6295 0.6978
     CBOW   DeepNN     Test   0.5965    0.8053 0.6598 0.7219
 SKIPGRAM SimpleNN     Test   0.6123    0.8084 0.6998 0.7496
 SKIPGRAM   DeepNN     Test   0.6246    0.8204 0.7004 0.7523
    TFIDF SimpleNN     Test   0.6358    0.8471 0.7198 0.7757
    TFIDF   DeepNN     Test   0.6215    0.7825 0.6951 0.7349
    SBERT SimpleNN     Test   0.8662    0.9284 0.927