# Comparative Hyperparameter Optimization for Aspect-Based Sentiment Analysis

This notebook implements a comprehensive comparison of ABSA models:
- **Models:** Random Forest vs XGBoost
- **Feature Extraction:** c-TF-IDF vs TF-IDF with N-grams (bigrams, trigrams)
- **Sampling Methods:** SMOTE vs ADASYN
- **Hyperparameter Optimization:** RandomSearchCV, Bayesian Optimization, Optuna

In [None]:
!pip install pandas numpy scikit-learn optuna xgboost imblearn 

## 1. Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import (
    confusion_matrix, classification_report, roc_auc_score, 
    roc_curve, auc, accuracy_score, precision_recall_fscore_support
)
from imblearn.over_sampling import SMOTE, ADASYN
from scipy.stats import randint, uniform
import optuna
from optuna.samplers import TPESampler
import warnings
import os
from datetime import datetime
import json
import pickle

warnings.filterwarnings('ignore')

# Set random seed for reproducibility
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

## 2. Class-based TF-IDF (c-TF-IDF) Implementation

In [None]:
class CTFIDFVectorizer:
    """Class-based TF-IDF (c-TF-IDF) implementation"""
    
    def __init__(self, ngram_range=(1, 1), max_features=None):
        self.ngram_range = ngram_range
        self.max_features = max_features
        self.vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
        self.class_docs = {}
        
    def fit(self, X, y):
        """Fit c-TF-IDF by creating class-level documents"""
        # Create class-level documents by concatenating all documents in each class
        for label in np.unique(y):
            class_texts = X[y == label]
            # Filter out NaN and convert to string
            valid_texts = [str(text) for text in class_texts if pd.notna(text) and str(text).strip()]
            self.class_docs[label] = ' '.join(valid_texts) if valid_texts else ''
        
        # Fit vectorizer on class documents
        class_texts_list = [self.class_docs[label] for label in sorted(self.class_docs.keys())]
        self.vectorizer.fit(class_texts_list)
        return self
    
    def transform(self, X):
        """Transform documents using c-TF-IDF"""
        return self.vectorizer.transform(X)
    
    def fit_transform(self, X, y):
        """Fit and transform"""
        self.fit(X, y)
        return self.transform(X)

## 3. Comparative ABSA Main Class

In [None]:
class ComparativeABSA:
    """Comparative Aspect-Based Sentiment Analysis with Hyperparameter Optimization"""
    
    def __init__(self, output_dir='results'):
        self.output_dir = output_dir
        self.timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        self.results = {}
        self.models = {}
        self.vectorizers = {}
        
        # Create output directories
        os.makedirs(output_dir, exist_ok=True)
        os.makedirs(f'{output_dir}/visualizations', exist_ok=True)
        os.makedirs(f'{output_dir}/models', exist_ok=True)
    
    @staticmethod
    def df_to_markdown(df, float_format='.4f'):
        """Convert DataFrame to markdown table format"""
        # Get column names
        cols = df.columns.tolist()
        
        # Create header
        header = '| ' + ' | '.join(str(col) for col in cols) + ' |'
        separator = '| ' + ' | '.join(['---' for _ in cols]) + ' |'
        
        # Create rows
        rows = []
        for _, row in df.iterrows():
            formatted_values = []
            for val in row:
                if isinstance(val, float):
                    formatted_values.append(f'{val:{float_format}}')
                else:
                    formatted_values.append(str(val))
            rows.append('| ' + ' | '.join(formatted_values) + ' |')
        
        return '\n'.join([header, separator] + rows)
        
    def load_and_prepare_data(self, filepath, sample_size_per_bank=1000):
        """Load dataset and create balanced sample"""
        print(f"Loading dataset from {filepath}...")
        df = pd.read_csv(filepath, compression='gzip', encoding='utf-8', on_bad_lines='skip')
        
        print(f"Original dataset shape: {df.shape}")
        print(f"\nBank distribution:\n{df['bank_name'].value_counts()}")
        print(f"\nSentiment distribution:\n{df['sentiment'].value_counts()}")
        
        # Create balanced sample: balanced by bank and sentiment
        balanced_samples = []
        
        for bank in df['bank_name'].unique():
            bank_data = df[df['bank_name'] == bank]
            
            # Calculate samples per sentiment for this bank
            sentiments = bank_data['sentiment'].unique()
            samples_per_sentiment = sample_size_per_bank // len(sentiments)
            
            for sentiment in sentiments:
                sentiment_data = bank_data[bank_data['sentiment'] == sentiment]
                
                # Sample with replacement if not enough data
                if len(sentiment_data) >= samples_per_sentiment:
                    sampled = sentiment_data.sample(n=samples_per_sentiment, random_state=RANDOM_STATE)
                else:
                    sampled = sentiment_data.sample(n=samples_per_sentiment, replace=True, random_state=RANDOM_STATE)
                
                balanced_samples.append(sampled)
        
        # Combine all samples
        balanced_df = pd.concat(balanced_samples, ignore_index=True)
        
        # Remove duplicates
        balanced_df = balanced_df.drop_duplicates(subset=['content_stemmed'])
        
        # Remove rows with NaN or empty content_stemmed
        balanced_df = balanced_df[balanced_df['content_stemmed'].notna()]
        balanced_df = balanced_df[balanced_df['content_stemmed'].str.strip() != '']
        
        print(f"\nBalanced dataset shape: {balanced_df.shape}")
        print(f"\nBalanced bank distribution:\n{balanced_df['bank_name'].value_counts()}")
        print(f"\nBalanced sentiment distribution:\n{balanced_df['sentiment'].value_counts()}")
        
        return balanced_df
    
    def create_features(self, X_train, X_test, y_train, feature_type='tfidf', ngram_range=(1, 2)):
        """Create features using TF-IDF or c-TF-IDF"""
        print(f"\nCreating features with {feature_type} and n-gram range {ngram_range}...")
        
        if feature_type == 'ctfidf':
            vectorizer = CTFIDFVectorizer(ngram_range=ngram_range, max_features=5000)
        else:  # tfidf
            vectorizer = TfidfVectorizer(ngram_range=ngram_range, max_features=5000)
        
        if feature_type == 'ctfidf':
            X_train_vec = vectorizer.fit_transform(X_train.values, y_train.values)
        else:
            X_train_vec = vectorizer.fit_transform(X_train.values)
        
        X_test_vec = vectorizer.transform(X_test.values)
        
        print(f"Feature matrix shape: {X_train_vec.shape}")
        
        return X_train_vec, X_test_vec, vectorizer
    
    def apply_sampling(self, X_train, y_train, sampling_method='smote'):
        """Apply SMOTE or ADASYN sampling"""
        print(f"\nApplying {sampling_method.upper()} sampling...")
        print(f"Before sampling: {y_train.value_counts().to_dict()}")
        
        if sampling_method == 'smote':
            sampler = SMOTE(random_state=RANDOM_STATE, k_neighbors=5)
        else:  # adasyn
            sampler = ADASYN(random_state=RANDOM_STATE, n_neighbors=5)
        
        X_resampled, y_resampled = sampler.fit_resample(X_train, y_train)
        
        print(f"After sampling: {pd.Series(y_resampled).value_counts().to_dict()}")
        
        return X_resampled, y_resampled
    
    def optimize_random_forest_random_search(self, X_train, y_train):
        """Optimize Random Forest using RandomizedSearchCV"""
        print("\nOptimizing Random Forest with RandomizedSearchCV...")
        
        param_distributions = {
            'n_estimators': randint(100, 500),
            'max_depth': [10, 20, 30, 40, 50, None],
            'min_samples_split': randint(2, 20),
            'min_samples_leaf': randint(1, 10),
            'max_features': ['sqrt', 'log2', None],
            'bootstrap': [True, False],
            'class_weight': ['balanced', 'balanced_subsample', None]
        }
        
        rf = RandomForestClassifier(random_state=RANDOM_STATE, n_jobs=-1)
        
        random_search = RandomizedSearchCV(
            rf, param_distributions, n_iter=50, cv=3, 
            scoring='f1_weighted', random_state=RANDOM_STATE, 
            n_jobs=-1, verbose=1
        )
        
        random_search.fit(X_train, y_train)
        
        print(f"Best parameters: {random_search.best_params_}")
        print(f"Best CV score: {random_search.best_score_:.4f}")
        
        return random_search.best_estimator_, random_search.best_params_
    
    def optimize_xgboost_optuna(self, X_train, y_train):
        """Optimize XGBoost using Optuna"""
        print("\nOptimizing XGBoost with Optuna...")
        
        # Encode labels for XGBoost
        from sklearn.preprocessing import LabelEncoder
        le = LabelEncoder()
        y_train_encoded = le.fit_transform(y_train)
        
        def objective(trial):
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 100, 500),
                'max_depth': trial.suggest_int('max_depth', 3, 15),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
                'subsample': trial.suggest_float('subsample', 0.6, 1.0),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
                'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
                'gamma': trial.suggest_float('gamma', 0, 5),
                'reg_alpha': trial.suggest_float('reg_alpha', 0, 2),
                'reg_lambda': trial.suggest_float('reg_lambda', 0, 2),
                'random_state': RANDOM_STATE,
                'n_jobs': -1,
                'tree_method': 'hist'
            }
            
            model = XGBClassifier(**params)
            
            # Cross-validation
            from sklearn.model_selection import cross_val_score
            scores = cross_val_score(model, X_train, y_train_encoded, cv=3, 
                                    scoring='f1_weighted', n_jobs=-1)
            
            return scores.mean()
        
        study = optuna.create_study(direction='maximize', sampler=TPESampler(seed=RANDOM_STATE))
        study.optimize(objective, n_trials=50, show_progress_bar=True)
        
        print(f"Best parameters: {study.best_params}")
        print(f"Best CV score: {study.best_value:.4f}")
        
        # Train final model with best parameters
        best_model = XGBClassifier(**study.best_params)
        best_model.fit(X_train, y_train_encoded)
        
        # Store label encoder for later use
        best_model.label_encoder = le
        
        return best_model, study.best_params
    
    def evaluate_model(self, model, X_test, y_test, model_name):
        """Evaluate model and generate metrics"""
        print(f"\nEvaluating {model_name}...")
        
        # Handle XGBoost predictions
        if hasattr(model, 'label_encoder'):
            y_test_encoded = model.label_encoder.transform(y_test)
            y_pred_encoded = model.predict(X_test)
            y_pred = model.label_encoder.inverse_transform(y_pred_encoded)
            y_pred_proba = model.predict_proba(X_test)
        else:
            y_pred = model.predict(X_test)
            y_pred_proba = model.predict_proba(X_test)
        
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
        
        # Classification report
        report = classification_report(y_test, y_pred, output_dict=True)
        
        # Confusion matrix
        cm = confusion_matrix(y_test, y_pred)
        
        # ROC AUC (multiclass)
        from sklearn.preprocessing import label_binarize
        classes = np.unique(y_test)
        y_test_bin = label_binarize(y_test, classes=classes)
        
        # Calculate ROC AUC for each class
        roc_auc_dict = {}
        fpr_dict = {}
        tpr_dict = {}
        
        for i, class_name in enumerate(classes):
            fpr_dict[class_name], tpr_dict[class_name], _ = roc_curve(y_test_bin[:, i], y_pred_proba[:, i])
            roc_auc_dict[class_name] = auc(fpr_dict[class_name], tpr_dict[class_name])
        
        # Macro average ROC AUC
        roc_auc_macro = roc_auc_score(y_test_bin, y_pred_proba, average='macro', multi_class='ovr')
        
        results = {
            'model_name': model_name,
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1_score': f1,
            'roc_auc_macro': roc_auc_macro,
            'roc_auc_per_class': roc_auc_dict,
            'confusion_matrix': cm,
            'classification_report': report,
            'fpr': fpr_dict,
            'tpr': tpr_dict,
            'classes': classes
        }
        
        print(f"Accuracy: {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1-Score: {f1:.4f}")
        print(f"ROC AUC (Macro): {roc_auc_macro:.4f}")
        
        return results
    
    def plot_confusion_matrix(self, cm, classes, model_name, filename):
        """Plot confusion matrix"""
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                   xticklabels=classes, yticklabels=classes, cbar_kws={'label': 'Count'})
        plt.title(f'Confusion Matrix - {model_name}', fontsize=14, fontweight='bold')
        plt.ylabel('True Label', fontsize=12)
        plt.xlabel('Predicted Label', fontsize=12)
        plt.tight_layout()
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
        print(f"Saved confusion matrix: {filename}")
    
    def plot_roc_curve(self, results_list, filename):
        """Plot ROC curves for multiple models"""
        plt.figure(figsize=(12, 8))
        
        colors = plt.cm.Set3(np.linspace(0, 1, len(results_list)))
        
        for idx, result in enumerate(results_list):
            model_name = result['model_name']
            classes = result['classes']
            
            # Plot ROC curve for each class
            for i, class_name in enumerate(classes):
                fpr = result['fpr'][class_name]
                tpr = result['tpr'][class_name]
                roc_auc = result['roc_auc_per_class'][class_name]
                
                label = f"{model_name} - {class_name} (AUC = {roc_auc:.3f})"
                plt.plot(fpr, tpr, color=colors[idx], alpha=0.7, 
                        linestyle=['-', '--', '-.'][i], linewidth=2, label=label)
        
        plt.plot([0, 1], [0, 1], 'k--', linewidth=2, label='Random Classifier')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate', fontsize=12)
        plt.ylabel('True Positive Rate', fontsize=12)
        plt.title('ROC Curves - Model Comparison', fontsize=14, fontweight='bold')
        plt.legend(loc='lower right', fontsize=9)
        plt.grid(alpha=0.3)
        plt.tight_layout()
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
        print(f"Saved ROC curve: {filename}")
    
    def plot_metrics_comparison(self, results_df, filename):
        """Plot comparison of metrics across models"""
        fig, axes = plt.subplots(2, 2, figsize=(16, 12))
        
        metrics = ['accuracy', 'precision', 'recall', 'f1_score']
        titles = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
        
        for idx, (metric, title) in enumerate(zip(metrics, titles)):
            ax = axes[idx // 2, idx % 2]
            
            data = results_df.pivot_table(
                index=['feature_type', 'ngram'], 
                columns='sampling_method', 
                values=metric
            )
            
            data.plot(kind='bar', ax=ax, width=0.8, colormap='Set2')
            ax.set_title(f'{title} Comparison', fontsize=12, fontweight='bold')
            ax.set_ylabel(title, fontsize=11)
            ax.set_xlabel('Feature Type & N-gram', fontsize=11)
            ax.legend(title='Sampling Method', fontsize=9)
            ax.grid(axis='y', alpha=0.3)
            ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
        
        plt.tight_layout()
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
        print(f"Saved metrics comparison: {filename}")
    
    def run_experiment(self, df, test_size=0.3, feature_configs=None, sampling_methods=None):
        """Run complete experiment with all configurations"""
        
        if feature_configs is None:
            feature_configs = [
                ('tfidf', (2, 2), 'bigram'),
                ('tfidf', (3, 3), 'trigram'),
                ('ctfidf', (2, 2), 'bigram'),
                ('ctfidf', (3, 3), 'trigram')
            ]
        
        if sampling_methods is None:
            sampling_methods = ['smote', 'adasyn']
        
        # Prepare data
        X = df['content_stemmed']
        y = df['sentiment']
        
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=RANDOM_STATE, stratify=y
        )
        
        print(f"\n{'='*80}")
        print(f"Train/Test Split: {int((1-test_size)*100)}/{int(test_size*100)}")
        print(f"Training set size: {len(X_train)}")
        print(f"Test set size: {len(X_test)}")
        print(f"{'='*80}")
        
        all_results = []
        experiment_id = 0
        
        # Iterate through all configurations
        for feature_type, ngram_range, ngram_name in feature_configs:
            # Create features
            X_train_vec, X_test_vec, vectorizer = self.create_features(
                X_train, X_test, y_train, feature_type, ngram_range
            )
            
            for sampling_method in sampling_methods:
                # Apply sampling
                X_train_resampled, y_train_resampled = self.apply_sampling(
                    X_train_vec, y_train, sampling_method
                )
                
                print(f"\n{'='*80}")
                print(f"Configuration: {feature_type.upper()} + {ngram_name.upper()} + {sampling_method.upper()}")
                print(f"{'='*80}")
                
                # Train and evaluate Random Forest
                rf_model, rf_params = self.optimize_random_forest_random_search(
                    X_train_resampled, y_train_resampled
                )
                
                rf_results = self.evaluate_model(
                    rf_model, X_test_vec, y_test,
                    f"RF_{feature_type}_{ngram_name}_{sampling_method}"
                )
                
                rf_results.update({
                    'model_type': 'RandomForest',
                    'feature_type': feature_type,
                    'ngram': ngram_name,
                    'sampling_method': sampling_method,
                    'test_size': test_size,
                    'hyperparameters': rf_params,
                    'optimization_method': 'RandomSearchCV'
                })
                
                all_results.append(rf_results)
                
                # Save confusion matrix
                self.plot_confusion_matrix(
                    rf_results['confusion_matrix'],
                    rf_results['classes'],
                    rf_results['model_name'],
                    f"{self.output_dir}/visualizations/cm_{rf_results['model_name']}.png"
                )
                
                # Train and evaluate XGBoost
                xgb_model, xgb_params = self.optimize_xgboost_optuna(
                    X_train_resampled, y_train_resampled
                )
                
                xgb_results = self.evaluate_model(
                    xgb_model, X_test_vec, y_test,
                    f"XGB_{feature_type}_{ngram_name}_{sampling_method}"
                )
                
                xgb_results.update({
                    'model_type': 'XGBoost',
                    'feature_type': feature_type,
                    'ngram': ngram_name,
                    'sampling_method': sampling_method,
                    'test_size': test_size,
                    'hyperparameters': xgb_params,
                    'optimization_method': 'Optuna'
                })
                
                all_results.append(xgb_results)
                
                # Save confusion matrix
                self.plot_confusion_matrix(
                    xgb_results['confusion_matrix'],
                    xgb_results['classes'],
                    xgb_results['model_name'],
                    f"{self.output_dir}/visualizations/cm_{xgb_results['model_name']}.png"
                )
                
                # Save models
                model_filename_rf = f"{self.output_dir}/models/{rf_results['model_name']}.pkl"
                model_filename_xgb = f"{self.output_dir}/models/{xgb_results['model_name']}.pkl"
                
                with open(model_filename_rf, 'wb') as f:
                    pickle.dump(rf_model, f)
                with open(model_filename_xgb, 'wb') as f:
                    pickle.dump(xgb_model, f)
                
                experiment_id += 2
        
        return all_results
    
    def generate_report(self, all_results, split_ratio):
        """Generate comprehensive markdown report"""
        
        # Convert results to DataFrame
        results_data = []
        for result in all_results:
            results_data.append({
                'Model': result['model_type'],
                'Feature Type': result['feature_type'],
                'N-gram': result['ngram'],
                'Sampling': result['sampling_method'],
                'Optimization': result['optimization_method'],
                'Accuracy': result['accuracy'],
                'Precision': result['precision'],
                'Recall': result['recall'],
                'F1-Score': result['f1_score'],
                'ROC AUC': result['roc_auc_macro'],
                'Test Size': result['test_size']
            })
        
        results_df = pd.DataFrame(results_data)
        
        # Save to CSV
        csv_filename = f"{self.output_dir}/results_comparison_{split_ratio}.csv"
        results_df.to_csv(csv_filename, index=False)
        print(f"\nSaved results to: {csv_filename}")
        
        # Generate visualizations
        self.plot_metrics_comparison(
            results_df,
            f"{self.output_dir}/visualizations/metrics_comparison_{split_ratio}.png"
        )
        
        self.plot_roc_curve(
            all_results,
            f"{self.output_dir}/visualizations/roc_curves_{split_ratio}.png"
        )
        
        # Generate markdown report
        md_filename = f"{self.output_dir}/report_{split_ratio}.md"
        
        with open(md_filename, 'w', encoding='utf-8') as f:
            f.write(f"# Comparative Hyperparameter Optimization Report\n\n")
            f.write(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            f.write(f"**Train/Test Split:** {split_ratio}\n\n")
            
            f.write("## Executive Summary\n\n")
            f.write("This report presents a comprehensive comparison of Aspect-Based Sentiment Analysis (ABSA) models using:\n\n")
            f.write("- **Models:** Random Forest vs XGBoost\n")
            f.write("- **Feature Extraction:** TF-IDF vs c-TF-IDF with Bigrams and Trigrams\n")
            f.write("- **Sampling Methods:** SMOTE vs ADASYN\n")
            f.write("- **Hyperparameter Optimization:** RandomSearchCV (RF) and Optuna (XGBoost)\n\n")
            
            f.write("## Dataset Information\n\n")
            f.write(f"- **Total Samples:** {len(results_df) // 8} configurations tested\n")
            f.write(f"- **Test Size:** {results_df['Test Size'].iloc[0] * 100:.0f}%\n")
            f.write(f"- **Random State:** {RANDOM_STATE}\n\n")
            
            f.write("## Overall Results\n\n")
            
            # Best model overall
            best_idx = results_df['F1-Score'].idxmax()
            best_model = results_df.iloc[best_idx]
            
            f.write(f"### Best Performing Model\n\n")
            f.write(f"- **Model:** {best_model['Model']}\n")
            f.write(f"- **Feature Type:** {best_model['Feature Type']}\n")
            f.write(f"- **N-gram:** {best_model['N-gram']}\n")
            f.write(f"- **Sampling:** {best_model['Sampling']}\n")
            f.write(f"- **F1-Score:** {best_model['F1-Score']:.4f}\n")
            f.write(f"- **Accuracy:** {best_model['Accuracy']:.4f}\n")
            f.write(f"- **ROC AUC:** {best_model['ROC AUC']:.4f}\n\n")
            
            f.write("## Detailed Results Table\n\n")
            f.write(self.df_to_markdown(results_df, float_format='.4f'))
            f.write("\n\n")
            
            f.write("## Model Comparison by Configuration\n\n")
            
            # Group by feature type and sampling
            for feature_type in results_df['Feature Type'].unique():
                f.write(f"### {feature_type.upper()} Features\n\n")
                
                for sampling in results_df['Sampling'].unique():
                    subset = results_df[
                        (results_df['Feature Type'] == feature_type) & 
                        (results_df['Sampling'] == sampling)
                    ]
                    
                    f.write(f"#### {sampling.upper()} Sampling\n\n")
                    f.write(self.df_to_markdown(subset[['Model', 'N-gram', 'Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC AUC']], float_format='.4f'))
                    f.write("\n\n")
            
            f.write("## Visualizations\n\n")
            f.write(f"![Metrics Comparison](visualizations/metrics_comparison_{split_ratio}.png)\n\n")
            f.write(f"![ROC Curves](visualizations/roc_curves_{split_ratio}.png)\n\n")
            
            f.write("## Confusion Matrices\n\n")
            for result in all_results:
                model_name = result['model_name']
                f.write(f"### {model_name}\n\n")
                f.write(f"![Confusion Matrix](visualizations/cm_{model_name}.png)\n\n")
            
            f.write("## Detailed Classification Reports\n\n")
            for result in all_results:
                f.write(f"### {result['model_name']}\n\n")
                report_df = pd.DataFrame(result['classification_report']).transpose()
                f.write(self.df_to_markdown(report_df, float_format='.4f'))
                f.write("\n\n")
            
            f.write("## Hyperparameter Configurations\n\n")
            for result in all_results:
                f.write(f"### {result['model_name']}\n\n")
                f.write(f"**Optimization Method:** {result['optimization_method']}\n\n")
                f.write("**Best Hyperparameters:**\n\n")
                for param, value in result['hyperparameters'].items():
                    f.write(f"- `{param}`: {value}\n")
                f.write("\n")
            
            f.write("## Conclusions\n\n")
            f.write("### Key Findings\n\n")
            
            # Compare RF vs XGBoost
            rf_avg = results_df[results_df['Model'] == 'RandomForest']['F1-Score'].mean()
            xgb_avg = results_df[results_df['Model'] == 'XGBoost']['F1-Score'].mean()
            
            f.write(f"1. **Model Performance:**\n")
            f.write(f"   - Random Forest Average F1-Score: {rf_avg:.4f}\n")
            f.write(f"   - XGBoost Average F1-Score: {xgb_avg:.4f}\n")
            f.write(f"   - Winner: {'Random Forest' if rf_avg > xgb_avg else 'XGBoost'}\n\n")
            
            # Compare feature types
            tfidf_avg = results_df[results_df['Feature Type'] == 'tfidf']['F1-Score'].mean()
            ctfidf_avg = results_df[results_df['Feature Type'] == 'ctfidf']['F1-Score'].mean()
            
            f.write(f"2. **Feature Extraction:**\n")
            f.write(f"   - TF-IDF Average F1-Score: {tfidf_avg:.4f}\n")
            f.write(f"   - c-TF-IDF Average F1-Score: {ctfidf_avg:.4f}\n")
            f.write(f"   - Winner: {'TF-IDF' if tfidf_avg > ctfidf_avg else 'c-TF-IDF'}\n\n")
            
            # Compare sampling methods
            smote_avg = results_df[results_df['Sampling'] == 'smote']['F1-Score'].mean()
            adasyn_avg = results_df[results_df['Sampling'] == 'adasyn']['F1-Score'].mean()
            
            f.write(f"3. **Sampling Methods:**\n")
            f.write(f"   - SMOTE Average F1-Score: {smote_avg:.4f}\n")
            f.write(f"   - ADASYN Average F1-Score: {adasyn_avg:.4f}\n")
            f.write(f"   - Winner: {'SMOTE' if smote_avg > adasyn_avg else 'ADASYN'}\n\n")
            
            # Compare n-grams
            bigram_avg = results_df[results_df['N-gram'] == 'bigram']['F1-Score'].mean()
            trigram_avg = results_df[results_df['N-gram'] == 'trigram']['F1-Score'].mean()
            
            f.write(f"4. **N-gram Analysis:**\n")
            f.write(f"   - Bigram Average F1-Score: {bigram_avg:.4f}\n")
            f.write(f"   - Trigram Average F1-Score: {trigram_avg:.4f}\n")
            f.write(f"   - Winner: {'Bigram' if bigram_avg > trigram_avg else 'Trigram'}\n\n")
            
            f.write("### Recommendations\n\n")
            f.write(f"Based on the experimental results, the recommended configuration is:\n\n")
            f.write(f"- **Model:** {best_model['Model']}\n")
            f.write(f"- **Feature Extraction:** {best_model['Feature Type'].upper()} with {best_model['N-gram']}\n")
            f.write(f"- **Sampling Method:** {best_model['Sampling'].upper()}\n")
            f.write(f"- **Expected F1-Score:** {best_model['F1-Score']:.4f}\n\n")
            
            f.write("---\n\n")
            f.write(f"*Report generated by Comparative ABSA System on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n")
        
        print(f"\nSaved report to: {md_filename}")
        
        return results_df, md_filename

## 4. Configuration and Setup

In [None]:
# Configuration
# DATASET_PATH = 'dataset/07_indobert_filtered_90K_Stemmed.csv.gz'
DATASET_PATH='https://github.com/roniwahyu/Indonesian-Digital-Banks-Dataset2025/raw/refs/heads/main/07_indobert_filtered_90K_Stemmed.csv.gz'
SAMPLE_SIZE_PER_BANK = 1000  # Balanced samples per bank
TEST_SIZES = [0.30, 0.25, 0.35]  # Multiple train/test splits

print("="*80)
print("COMPARATIVE HYPERPARAMETER OPTIMIZATION FOR ABSA")
print("="*80)

## 5. Initialize System and Load Data

In [None]:
# Initialize system
absa = ComparativeABSA(output_dir='results_absa_comparison')

# Load and prepare data
df = absa.load_and_prepare_data(DATASET_PATH, sample_size_per_bank=SAMPLE_SIZE_PER_BANK)

In [None]:
df.head()

In [None]:
#df statistics
df.info()

In [None]:
#statistics by sentiment and banks and count total per sentiment
df.head()

df.groupby(['sentiment', 'bank_name']).size().unstack().fillna(0).astype(int)

In [None]:
df.sentiment.value_counts()

## 6. Run Experiments for Each Train/Test Split

In [None]:
# Run experiments for each train/test split
all_split_results = {}

for test_size in TEST_SIZES:
    split_ratio = f"{int((1-test_size)*100)}_{int(test_size*100)}"
    print(f"\n{'='*80}")
    print(f"RUNNING EXPERIMENTS WITH {split_ratio} SPLIT")
    print(f"{'='*80}")
    
    # Run experiment
    results = absa.run_experiment(df, test_size=test_size)
    
    # Generate report
    results_df, report_file = absa.generate_report(results, split_ratio)
    
    all_split_results[split_ratio] = {
        'results': results,
        'dataframe': results_df,
        'report_file': report_file
    }

## 7. Generate Summary Comparison Across All Splits

In [None]:
print(f"\n{'='*80}")
print("GENERATING SUMMARY COMPARISON")
print(f"{'='*80}")

summary_data = []
for split_ratio, data in all_split_results.items():
    df_results = data['dataframe']
    best_idx = df_results['F1-Score'].idxmax()
    best = df_results.iloc[best_idx]
    
    summary_data.append({
        'Split Ratio': split_ratio,
        'Best Model': best['Model'],
        'Best Feature': best['Feature Type'],
        'Best N-gram': best['N-gram'],
        'Best Sampling': best['Sampling'],
        'Best F1-Score': best['F1-Score'],
        'Best Accuracy': best['Accuracy'],
        'Best ROC AUC': best['ROC AUC']
    })

summary_df = pd.DataFrame(summary_data)
summary_df.to_csv('results_absa_comparison/summary_all_splits.csv', index=False)

print("\n" + "="*80)
print("EXPERIMENT COMPLETED SUCCESSFULLY!")
print("="*80)
print(f"\nResults saved in: results_absa_comparison/")
print(f"- Individual reports for each split ratio")
print(f"- CSV files with detailed metrics")
print(f"- Visualizations (confusion matrices, ROC curves, metrics comparison)")
print(f"- Trained models saved in results_absa_comparison/models/")
print("\nSummary of Best Models Across Splits:")
print(summary_df.to_string(index=False))

## 8. Display Summary Results

In [None]:
# Display summary DataFrame
summary_df