In [1]:
import pandas as pd
df=pd.read_csv('credit_ratings_multimodal.csv')

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2029 entries, 0 to 2028
Data columns (total 46 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   Name                                2029 non-null   object 
 1   Ticker                              2029 non-null   object 
 2   Rating Agency Name                  2029 non-null   object 
 3   Sector                              2029 non-null   object 
 4   currentRatio                        2029 non-null   float64
 5   quickRatio                          2029 non-null   float64
 6   cashRatio                           2029 non-null   float64
 7   daysOfSalesOutstanding              2029 non-null   float64
 8   netProfitMargin                     2029 non-null   float64
 9   pretaxProfitMargin                  2029 non-null   float64
 10  grossProfitMargin                   2029 non-null   float64
 11  operatingProfitMargin               2029 no

In [5]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import (accuracy_score, balanced_accuracy_score, precision_score,
                           recall_score, f1_score, roc_auc_score, log_loss,
                           confusion_matrix, classification_report, top_k_accuracy_score)
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
print("Loading data...")
df = pd.read_csv('credit_ratings_multimodal.csv')

# Step 1: Data Preprocessing
print("\n1. Data Preprocessing...")

# Drop rating agency name as requested
df = df.drop(columns=['Rating Agency Name'], errors='ignore')

# Convert date to datetime
df['rating_date'] = pd.to_datetime(df['rating_date'], errors='coerce')
print(f"Date conversion completed. Earliest date: {df['rating_date'].min()}, Latest date: {df['rating_date'].max()}")

# Create label encoders for Sector and Ticker
sector_encoder = LabelEncoder()
ticker_encoder = LabelEncoder()

df['Sector_Encoded'] = sector_encoder.fit_transform(df['Sector'])
df['Ticker_Encoded'] = ticker_encoder.fit_transform(df['Ticker'])

print(f"Sectors encoded: {len(sector_encoder.classes_)} unique sectors")
print(f"Tickers encoded: {len(ticker_encoder.classes_)} unique tickers")

# Handle missing values in NLP features (since MD&A section might have nulls)
# We have two options: drop rows with missing NLP or impute
print(f"\nMissing values in NLP features: {df['nlp_sentiment'].isnull().sum()} rows")

# For this analysis, we'll drop rows with missing NLP features
df_clean = df.dropna(subset=['nlp_sentiment'])
print(f"Data shape after dropping missing NLP: {df_clean.shape}")

# Define features for two scenarios
# Scenario 1: Without Ticker
features_without_ticker = [
    # Financial ratios
    'currentRatio', 'quickRatio', 'cashRatio', 'daysOfSalesOutstanding',
    'netProfitMargin', 'pretaxProfitMargin', 'grossProfitMargin',
    'operatingProfitMargin', 'returnOnAssets', 'returnOnCapitalEmployed',
    'returnOnEquity', 'assetTurnover', 'fixedAssetTurnover', 'debtEquityRatio',
    'debtRatio', 'effectiveTaxRate', 'freeCashFlowOperatingCashFlowRatio',
    'freeCashFlowPerShare', 'cashPerShare', 'companyEquityMultiplier',
    'ebitPerRevenue', 'enterpriseValueMultiple', 'operatingCashFlowPerShare',
    'operatingCashFlowSalesRatio', 'payablesTurnover',

    # Sector (encoded)
    'Sector_Encoded',

    # NLP features
    'nlp_positivity', 'nlp_negativity', 'nlp_litigiousness', 'nlp_polarity',
    'nlp_risk', 'nlp_readability', 'nlp_fraud', 'nlp_safety',
    'nlp_certainty', 'nlp_uncertainty', 'nlp_sentiment'
]

# Scenario 2: With Ticker
features_with_ticker = features_without_ticker + ['Ticker_Encoded']

# Target variables
binary_target = 'Rating_Encoded_Binary'
multiclass_target = 'Rating_Encoded_Multiclass'

print(f"\nFeatures without ticker: {len(features_without_ticker)}")
print(f"Features with ticker: {len(features_with_ticker)}")

# Split data for both scenarios
X_without = df_clean[features_without_ticker]
X_with = df_clean[features_with_ticker]
y_binary = df_clean[binary_target]
y_multi = df_clean[multiclass_target]

# Standardize features
scaler_without = StandardScaler()
scaler_with = StandardScaler()

X_without_scaled = scaler_without.fit_transform(X_without)
X_with_scaled = scaler_with.fit_transform(X_with)

# Split into train/test sets
X_without_train, X_without_test, y_binary_train, y_binary_test = train_test_split(
    X_without_scaled, y_binary, test_size=0.2, random_state=42, stratify=y_binary
)

X_with_train, X_with_test, _, _ = train_test_split(
    X_with_scaled, y_binary, test_size=0.2, random_state=42, stratify=y_binary
)

# For multiclass
X_without_train_multi, X_without_test_multi, y_multi_train, y_multi_test = train_test_split(
    X_without_scaled, y_multi, test_size=0.2, random_state=42, stratify=y_multi
)

X_with_train_multi, X_with_test_multi, _, _ = train_test_split(
    X_with_scaled, y_multi, test_size=0.2, random_state=42, stratify=y_multi
)

print(f"\nTrain/Test splits created:")
print(f"Binary classification - Train: {len(y_binary_train)}, Test: {len(y_binary_test)}")
print(f"Multiclass classification - Train: {len(y_multi_train)}, Test: {len(y_multi_test)}")

# Define evaluation metrics function
def evaluate_model(model, X_train, X_test, y_train, y_test, is_binary=True, model_name=""):
    """Comprehensive model evaluation"""

    # Train the model
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test) if hasattr(model, 'predict_proba') else None

    # Calculate metrics
    results = {
        'Model': model_name,
        'Accuracy': accuracy_score(y_test, y_pred),
        'Balanced Accuracy': balanced_accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred, average='binary' if is_binary else 'weighted', zero_division=0),
        'Recall': recall_score(y_test, y_pred, average='binary' if is_binary else 'weighted', zero_division=0),
        'F1-Score': f1_score(y_test, y_pred, average='binary' if is_binary else 'weighted', zero_division=0),
    }

    # Top-K Accuracy (for multiclass)
    if not is_binary and y_pred_proba is not None:
        try:
            results['Top-K Accuracy'] = top_k_accuracy_score(y_test, y_pred_proba, k=3)
        except:
            results['Top-K Accuracy'] = np.nan

    # ROC-AUC
    if is_binary and y_pred_proba is not None:
        results['ROC-AUC'] = roc_auc_score(y_test, y_pred_proba[:, 1])
    elif not is_binary and y_pred_proba is not None:
        try:
            results['ROC-AUC'] = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')
        except:
            results['ROC-AUC'] = np.nan
    else:
        results['ROC-AUC'] = np.nan

    # Log Loss
    if y_pred_proba is not None:
        results['Log Loss'] = log_loss(y_test, y_pred_proba)
    else:
        results['Log Loss'] = np.nan

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)

    # Error metrics
    if is_binary:
        tn, fp, fn, tp = cm.ravel()
        results['False Positive'] = fp
        results['False Negative'] = fn
        results['Type I Error'] = fp / (fp + tn) if (fp + tn) > 0 else 0
        results['Type II Error'] = fn / (fn + tp) if (fn + tp) > 0 else 0
        results['Misclassification Error'] = (fp + fn) / len(y_test)
    else:
        results['Misclassification Error'] = 1 - results['Accuracy']

    return results, cm

# Define models for binary classification
print("\n" + "="*80)
print("BINARY CLASSIFICATION MODELS (Investment Grade vs Below Investment Grade)")
print("="*80)

# Initialize models for binary classification
binary_models = {
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
    'K-Nearest Neighbors': KNeighborsClassifier(n_neighbors=5),
    'Naive Bayes': GaussianNB(),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
    'SVM': SVC(probability=True, random_state=42)
}

# Results storage
binary_results_without = []
binary_results_with = []
binary_cm_without = {}
binary_cm_with = {}

# Train and evaluate without ticker
print("\nTraining models WITHOUT Ticker feature...")
for name, model in binary_models.items():
    print(f"  Training {name}...")
    results, cm = evaluate_model(
        model, X_without_train, X_without_test,
        y_binary_train, y_binary_test,
        is_binary=True, model_name=name
    )
    binary_results_without.append(results)
    binary_cm_without[name] = cm

# Train and evaluate with ticker
print("\nTraining models WITH Ticker feature...")
for name, model in binary_models.items():
    print(f"  Training {name}...")
    results, cm = evaluate_model(
        model, X_with_train, X_with_test,
        y_binary_train, y_binary_test,
        is_binary=True, model_name=name
    )
    binary_results_with.append(results)
binary_cm_with[name] = cm

# Convert to DataFrames
df_binary_without = pd.DataFrame(binary_results_without)
df_binary_with = pd.DataFrame(binary_results_with)

print("\n" + "="*80)
print("MULTICLASS CLASSIFICATION MODELS (6 Rating Categories)")
print("="*80)

# Initialize models for multiclass
multiclass_models = {
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42, multi_class='multinomial'),
    'K-Nearest Neighbors': KNeighborsClassifier(n_neighbors=5),
    'Naive Bayes': GaussianNB(),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42),
    'SVM': SVC(probability=True, random_state=42)
}

# Results storage
multi_results_without = []
multi_results_with = []
multi_cm_without = {}
multi_cm_with = {}

# Train and evaluate without ticker
print("\nTraining multiclass models WITHOUT Ticker feature...")
for name, model in multiclass_models.items():
    print(f"  Training {name}...")
    results, cm = evaluate_model(
        model, X_without_train_multi, X_without_test_multi,
        y_multi_train, y_multi_test,
        is_binary=False, model_name=name
    )
    multi_results_without.append(results)
    multi_cm_without[name] = cm

# Train and evaluate with ticker
print("\nTraining multiclass models WITH Ticker feature...")
for name, model in multiclass_models.items():
    print(f"  Training {name}...")
    results, cm = evaluate_model(
        model, X_with_train_multi, X_with_test_multi,
        y_multi_train, y_multi_test,
        is_binary=False, model_name=name
    )
    multi_results_with.append(results)
    multi_cm_with[name] = cm

# Convert to DataFrames
df_multi_without = pd.DataFrame(multi_results_without)
df_multi_with = pd.DataFrame(multi_results_with)

# Deep Neural Network implementation
print("\n" + "="*80)
print("DEEP NEURAL NETWORK MODELS")
print("="*80)

def create_dnn_model(input_dim, output_dim, is_binary=True):
    """Create a simple DNN model"""
    model = keras.Sequential([
        layers.Dense(128, activation='relu', input_dim=input_dim),
        layers.Dropout(0.3),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(32, activation='relu'),
        layers.Dense(output_dim, activation='sigmoid' if is_binary else 'softmax')
    ])

    if is_binary:
        model.compile(optimizer='adam',
                     loss='binary_crossentropy',
                     metrics=['accuracy'])
    else:
        model.compile(optimizer='adam',
                     loss='sparse_categorical_crossentropy',
                     metrics=['accuracy'])

    return model

# DNN for binary classification
print("\nTraining DNN for binary classification...")
dnn_binary_without = create_dnn_model(X_without_train.shape[1], 1, is_binary=True)
dnn_binary_with = create_dnn_model(X_with_train.shape[1], 1, is_binary=True)

# Train without ticker
dnn_binary_without.fit(X_without_train, y_binary_train,
                      epochs=50, batch_size=32,
                      validation_split=0.2, verbose=0)

# Train with ticker
dnn_binary_with.fit(X_with_train, y_binary_train,
                   epochs=50, batch_size=32,
                   validation_split=0.2, verbose=0)

# Evaluate DNN binary
y_pred_dnn_without = (dnn_binary_without.predict(X_without_test) > 0.5).astype("int32")
y_pred_dnn_with = (dnn_binary_with.predict(X_with_test) > 0.5).astype("int32")

# Get probabilities for DNN
y_pred_proba_without = dnn_binary_without.predict(X_without_test)
y_pred_proba_with = dnn_binary_with.predict(X_with_test)

# Calculate DNN metrics for binary
dnn_binary_results_without = {
    'Model': 'DNN',
    'Accuracy': accuracy_score(y_binary_test, y_pred_dnn_without),
    'Balanced Accuracy': balanced_accuracy_score(y_binary_test, y_pred_dnn_without),
    'Precision': precision_score(y_binary_test, y_pred_dnn_without),
    'Recall': recall_score(y_binary_test, y_pred_dnn_without),
    'F1-Score': f1_score(y_binary_test, y_pred_dnn_without),
    'ROC-AUC': roc_auc_score(y_binary_test, y_pred_proba_without),
    'Log Loss': log_loss(y_binary_test, y_pred_proba_without),
    'Misclassification Error': 1 - accuracy_score(y_binary_test, y_pred_dnn_without)
}

dnn_binary_results_with = {
    'Model': 'DNN',
    'Accuracy': accuracy_score(y_binary_test, y_pred_dnn_with),
    'Balanced Accuracy': balanced_accuracy_score(y_binary_test, y_pred_dnn_with),
    'Precision': precision_score(y_binary_test, y_pred_dnn_with),
    'Recall': recall_score(y_binary_test, y_pred_dnn_with),
    'F1-Score': f1_score(y_binary_test, y_pred_dnn_with),
    'ROC-AUC': roc_auc_score(y_binary_test, y_pred_proba_with),
    'Log Loss': log_loss(y_binary_test, y_pred_proba_with),
    'Misclassification Error': 1 - accuracy_score(y_binary_test, y_pred_dnn_with)
}

# Add DNN results to DataFrames
df_binary_without = pd.concat([df_binary_without, pd.DataFrame([dnn_binary_results_without])], ignore_index=True)
df_binary_with = pd.concat([df_binary_with, pd.DataFrame([dnn_binary_results_with])], ignore_index=True)

# DNN for multiclass
print("\nTraining DNN for multiclass classification...")
# --- FIX STARTS HERE ---
# The error indicated that labels go up to 6, meaning there are 7 classes (0-6).
# So, output_dim should be 7 instead of 6.
dnn_multi_without = create_dnn_model(X_without_train_multi.shape[1], y_multi_train.nunique(), is_binary=False)
dnn_multi_with = create_dnn_model(X_with_train_multi.shape[1], y_multi_train.nunique(), is_binary=False)
# --- FIX ENDS HERE ---

# Train without ticker
dnn_multi_without.fit(X_without_train_multi, y_multi_train,
                     epochs=50, batch_size=32,
                     validation_split=0.2, verbose=0)

# Train with ticker
dnn_multi_with.fit(X_with_train_multi, y_multi_train,
                  epochs=50, batch_size=32,
                  validation_split=0.2, verbose=0)

# Evaluate DNN multiclass
y_pred_dnn_multi_without = np.argmax(dnn_multi_without.predict(X_without_test_multi), axis=1)
y_pred_dnn_multi_with = np.argmax(dnn_multi_with.predict(X_with_test_multi), axis=1)
y_pred_proba_multi_without = dnn_multi_without.predict(X_without_test_multi)
y_pred_proba_multi_with = dnn_multi_with.predict(X_with_test_multi)

# Calculate DNN metrics for multiclass
dnn_multi_results_without = {
    'Model': 'DNN',
    'Accuracy': accuracy_score(y_multi_test, y_pred_dnn_multi_without),
    'Balanced Accuracy': balanced_accuracy_score(y_multi_test, y_pred_dnn_multi_without),
    'Precision': precision_score(y_multi_test, y_pred_dnn_multi_without, average='weighted'),
    'Recall': recall_score(y_multi_test, y_pred_dnn_multi_without, average='weighted'),
    'F1-Score': f1_score(y_multi_test, y_pred_dnn_multi_without, average='weighted'),
    'ROC-AUC': roc_auc_score(y_multi_test, y_pred_proba_multi_without, multi_class='ovr'),
    'Log Loss': log_loss(y_multi_test, y_pred_proba_multi_without),
    'Misclassification Error': 1 - accuracy_score(y_multi_test, y_pred_dnn_multi_without)
}

dnn_multi_results_with = {
    'Model': 'DNN',
    'Accuracy': accuracy_score(y_multi_test, y_pred_dnn_multi_with),
    'Balanced Accuracy': balanced_accuracy_score(y_multi_test, y_pred_dnn_multi_with),
    'Precision': precision_score(y_multi_test, y_pred_dnn_multi_with, average='weighted'),
    'Recall': recall_score(y_multi_test, y_pred_dnn_multi_with, average='weighted'),
    'F1-Score': f1_score(y_multi_test, y_pred_dnn_multi_with, average='weighted'),
    'ROC-AUC': roc_auc_score(y_multi_test, y_pred_proba_multi_with, multi_class='ovr'),
    'Log Loss': log_loss(y_multi_test, y_pred_proba_multi_with),
    'Misclassification Error': 1 - accuracy_score(y_multi_test, y_pred_dnn_multi_with)
}

# Add DNN results to DataFrames
df_multi_without = pd.concat([df_multi_without, pd.DataFrame([dnn_multi_results_without])], ignore_index=True)
df_multi_with = pd.concat([df_multi_with, pd.DataFrame([dnn_multi_results_with])], ignore_index=True)

# Ensemble Models (Stacking)
print("\n" + "="*80)
print("ENSEMBLE MODELS (Stacking Classifier)")
print("="*80)

# Create ensemble for binary classification
print("\nCreating ensemble model for binary classification...")
base_models = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)),
    ('lr', LogisticRegression(max_iter=1000, random_state=42))
]

# Stacking classifier
stacking_clf = StackingClassifier(
    estimators=base_models,
    final_estimator=LogisticRegression(),
    cv=5
)

# Train and evaluate ensemble without ticker
stacking_clf.fit(X_without_train, y_binary_train)
y_pred_stack_without = stacking_clf.predict(X_without_test)
y_pred_proba_stack_without = stacking_clf.predict_proba(X_without_test)

stacking_results_without = {
    'Model': 'Ensemble (Stacking)',
    'Accuracy': accuracy_score(y_binary_test, y_pred_stack_without),
    'Balanced Accuracy': balanced_accuracy_score(y_binary_test, y_pred_stack_without),
    'Precision': precision_score(y_binary_test, y_pred_stack_without),
    'Recall': recall_score(y_binary_test, y_pred_stack_without),
    'F1-Score': f1_score(y_binary_test, y_pred_stack_without),
    'ROC-AUC': roc_auc_score(y_binary_test, y_pred_proba_stack_without[:, 1]),
    'Log Loss': log_loss(y_binary_test, y_pred_proba_stack_without),
    'Misclassification Error': 1 - accuracy_score(y_binary_test, y_pred_stack_without)
}

# Train and evaluate ensemble with ticker
stacking_clf.fit(X_with_train, y_binary_train)
y_pred_stack_with = stacking_clf.predict(X_with_test)
y_pred_proba_stack_with = stacking_clf.predict_proba(X_with_test)

stacking_results_with = {
    'Model': 'Ensemble (Stacking)',
    'Accuracy': accuracy_score(y_binary_test, y_pred_stack_with),
    'Balanced Accuracy': balanced_accuracy_score(y_binary_test, y_pred_stack_with),
    'Precision': precision_score(y_binary_test, y_pred_stack_with),
    'Recall': recall_score(y_binary_test, y_pred_stack_with),
    'F1-Score': f1_score(y_binary_test, y_pred_stack_with),
    'ROC-AUC': roc_auc_score(y_binary_test, y_pred_proba_stack_with[:, 1]),
    'Log Loss': log_loss(y_binary_test, y_pred_proba_stack_with),
    'Misclassification Error': 1 - accuracy_score(y_binary_test, y_pred_stack_with)
}

# Add ensemble results
df_binary_without = pd.concat([df_binary_without, pd.DataFrame([stacking_results_without])], ignore_index=True)
df_binary_with = pd.concat([df_binary_with, pd.DataFrame([stacking_results_with])], ignore_index=True)

# Display results
print("\n" + "="*80)
print("BINARY CLASSIFICATION RESULTS SUMMARY")
print("="*80)

print("\nModels WITHOUT Ticker (Sorted by Accuracy):")
print(df_binary_without.sort_values('Accuracy', ascending=False).to_string())

print("\n\nModels WITH Ticker (Sorted by Accuracy):")
print(df_binary_with.sort_values('Accuracy', ascending=False).to_string())

print("\n" + "="*80)
print("MULTICLASS CLASSIFICATION RESULTS SUMMARY")
print("="*80)

print("\nModels WITHOUT Ticker (Sorted by Accuracy):")
print(df_multi_without.sort_values('Accuracy', ascending=False).to_string())

print("\n\nModels WITH Ticker (Sorted by Accuracy):")
print(df_multi_with.sort_values('Accuracy', ascending=False).to_string())

# Model Ranking based on Accuracy
print("\n" + "="*80)
print("FINAL MODEL RANKING BASED ON ACCURACY")
print("="*80)

print("\nBINARY CLASSIFICATION - WITHOUT TICKER:")
binary_ranking_without = df_binary_without.sort_values('Accuracy', ascending=False)[['Model', 'Accuracy', 'F1-Score', 'ROC-AUC']]
print(binary_ranking_without.to_string())

print("\n\nBINARY CLASSIFICATION - WITH TICKER:")
binary_ranking_with = df_binary_with.sort_values('Accuracy', ascending=False)[['Model', 'Accuracy', 'F1-Score', 'ROC-AUC']]
print(binary_ranking_with.to_string())

print("\n\nMULTICLASS CLASSIFICATION - WITHOUT TICKER:")
multi_ranking_without = df_multi_without.sort_values('Accuracy', ascending=False)[['Model', 'Accuracy', 'F1-Score', 'ROC-AUC']]
print(multi_ranking_without.to_string())

print("\n\nMULTICLASS CLASSIFICATION - WITH TICKER:")
multi_ranking_with = df_multi_with.sort_values('Accuracy', ascending=False)[['Model', 'Accuracy', 'F1-Score', 'ROC-AUC']]
print(multi_ranking_with.to_string())

# Function to plot confusion matrices
def plot_confusion_matrices(cm_dict, title_prefix):
    """Plot confusion matrices for all models"""
    n_models = len(cm_dict)
    fig, axes = plt.subplots(2, 4, figsize=(20, 10))
    axes = axes.flatten()

    for idx, (model_name, cm) in enumerate(list(cm_dict.items())[:8]):
        ax = axes[idx]
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
        ax.set_title(f'{model_name}')
        ax.set_xlabel('Predicted')
        ax.set_ylabel('Actual')

    plt.suptitle(f'{title_prefix} - Confusion Matrices', fontsize=16)
    plt.tight_layout()
    plt.show()

# Plot confusion matrices for binary classification
print("\n" + "="*80)
print("CONFUSION MATRICES")
print("="*80)

# Note: Uncomment to plot confusion matrices
# print("\nBinary Classification - WITHOUT Ticker:")
# plot_confusion_matrices(binary_cm_without, "Binary Classification Without Ticker")

# print("\nBinary Classification - WITH Ticker:")
# plot_confusion_matrices(binary_cm_with, "Binary Classification With Ticker")

# print("\nMulticlass Classification - WITHOUT Ticker:")
# plot_confusion_matrices(multi_cm_without, "Multiclass Classification Without Ticker")

# print("\nMulticlass Classification - WITH Ticker:")
# plot_confusion_matrices(multi_cm_with, "Multiclass Classification With Ticker")

# Comparison: With vs Without Ticker
print("\n" + "="*80)
print("COMPARISON: WITH vs WITHOUT TICKER FEATURE")
print("="*80)

# Create comparison DataFrame for binary
comparison_binary = pd.merge(
    df_binary_without[['Model', 'Accuracy', 'F1-Score', 'ROC-AUC', 'Misclassification Error']],
    df_binary_with[['Model', 'Accuracy', 'F1-Score', 'ROC-AUC', 'Misclassification Error']],
    on='Model',
    suffixes=('_Without', '_With')
)

comparison_binary['Accuracy_Diff'] = comparison_binary['Accuracy_With'] - comparison_binary['Accuracy_Without']
comparison_binary['F1_Diff'] = comparison_binary['F1-Score_With'] - comparison_binary['F1-Score_Without']
comparison_binary['AUC_Diff'] = comparison_binary['ROC-AUC_With'] - comparison_binary['ROC-AUC_Without']

print("\nBinary Classification Comparison (With - Without Ticker):")
print(comparison_binary[['Model', 'Accuracy_Without', 'Accuracy_With', 'Accuracy_Diff',
                        'F1-Score_Without', 'F1-Score_With', 'F1_Diff',
                        'ROC-AUC_Without', 'ROC-AUC_With', 'AUC_Diff']].to_string())

# Create comparison DataFrame for multiclass
comparison_multi = pd.merge(
    df_multi_without[['Model', 'Accuracy', 'F1-Score', 'ROC-AUC', 'Misclassification Error']],
    df_multi_with[['Model', 'Accuracy', 'F1-Score', 'ROC-AUC', 'Misclassification Error']],
    on='Model',
    suffixes=('_Without', '_With')
)

comparison_multi['Accuracy_Diff'] = comparison_multi['Accuracy_With'] - comparison_multi['Accuracy_Without']
comparison_multi['F1_Diff'] = comparison_multi['F1-Score_With'] - comparison_multi['F1-Score_Without']
comparison_multi['AUC_Diff'] = comparison_multi['ROC-AUC_With'] - comparison_multi['ROC-AUC_Without']

print("\n\nMulticlass Classification Comparison (With - Without Ticker):")
print(comparison_multi[['Model', 'Accuracy_Without', 'Accuracy_With', 'Accuracy_Diff',
                       'F1-Score_Without', 'F1-Score_With', 'F1_Diff',
                       'ROC-AUC_Without', 'ROC-AUC_With', 'AUC_Diff']].to_string())

# Summary Statistics
print("\n" + "="*80)
print("SUMMARY STATISTICS")
print("="*80)

print(f"\nDataset Information:")
print(f"- Total samples: {len(df)}")
print(f"- Samples after cleaning: {len(df_clean)}")
print(f"- Binary classes: Investment Grade (1) vs Below Investment Grade (0)")
print(f"- Multiclass categories: {y_multi_train.nunique()} rating categories (0-{y_multi_train.max()})") # Updated description for multiclass
print(f"- Features without ticker: {len(features_without_ticker)}")
print(f"- Features with ticker: {len(features_with_ticker)}")

print(f"\nClass Distribution (Binary):")
print(f"- Investment Grade (1): {sum(y_binary == 1)} samples")
print(f"- Below Investment Grade (0): {sum(y_binary == 0)} samples")

print(f"\nClass Distribution (Multiclass):")
rating_counts = df_clean['Rating_Merged'].value_counts()
for rating, count in rating_counts.items():
    print(f"- {rating}: {count} samples")

print("\n" + "="*80)
print("BEST MODELS SUMMARY")
print("="*80)

print("\n1. BINARY CLASSIFICATION:")
print(f"   Best WITHOUT ticker: {binary_ranking_without.iloc[0]['Model']} (Accuracy: {binary_ranking_without.iloc[0]['Accuracy']:.4f})")
print(f"   Best WITH ticker: {binary_ranking_with.iloc[0]['Model']} (Accuracy: {binary_ranking_with.iloc[0]['Accuracy']:.4f})")

print("\n2. MULTICLASS CLASSIFICATION:")
print(f"   Best WITHOUT ticker: {multi_ranking_without.iloc[0]['Model']} (Accuracy: {multi_ranking_without.iloc[0]['Accuracy']:.4f})")
print(f"   Best WITH ticker: {multi_ranking_with.iloc[0]['Model']} (Accuracy: {multi_ranking_with.iloc[0]['Accuracy']:.4f})")

print("\n3. TICKER FEATURE IMPACT:")
print(f"   Binary classification: Adding ticker improved accuracy for {sum(comparison_binary['Accuracy_Diff'] > 0)} out of {len(comparison_binary)} models")
print(f"   Multiclass classification: Adding ticker improved accuracy for {sum(comparison_multi['Accuracy_Diff'] > 0)} out of {len(comparison_multi)} models")

# Save results to CSV
df_binary_without.to_csv('binary_results_without_ticker.csv', index=False)
df_binary_with.to_csv('binary_results_with_ticker.csv', index=False)
df_multi_without.to_csv('multiclass_results_without_ticker.csv', index=False)
df_multi_with.to_csv('multiclass_results_with_ticker.csv', index=False)
comparison_binary.to_csv('binary_comparison.csv', index=False)
comparison_multi.to_csv('multiclass_comparison.csv', index=False)

print("\nResults saved to CSV files.")

Loading data...

1. Data Preprocessing...
Date conversion completed. Earliest date: 2005-08-16 00:00:00, Latest date: 2016-12-23 00:00:00
Sectors encoded: 12 unique sectors
Tickers encoded: 593 unique tickers

Missing values in NLP features: 390 rows
Data shape after dropping missing NLP: (1639, 47)

Features without ticker: 37
Features with ticker: 38

Train/Test splits created:
Binary classification - Train: 1311, Test: 328
Multiclass classification - Train: 1311, Test: 328

BINARY CLASSIFICATION MODELS (Investment Grade vs Below Investment Grade)

Training models WITHOUT Ticker feature...
  Training Logistic Regression...
  Training K-Nearest Neighbors...
  Training Naive Bayes...
  Training Decision Tree...
  Training Random Forest...
  Training XGBoost...
  Training SVM...

Training models WITH Ticker feature...
  Training Logistic Regression...
  Training K-Nearest Neighbors...
  Training Naive Bayes...
  Training Decision Tree...
  Training Random Forest...
  Training XGBoost...