# Stage 2

In [None]:
# File URL
file_url_2 = "https://drive.google.com/uc?id=1vy1JFQZva3lhMJQV69C43AB1NTM4W-DZ"

# Loads the CSV file from Google Drive into a pandas DataFrame
stage2_data = pd.read_csv(f"https://drive.google.com/uc?export=download&id={file_url_2.split('=')[-1]}")

# View the first few rows
stage2_data.head()

In [None]:
# Check data types and missing values
stage2_data.info()

# Quick summary of data
stage2_data.describe(include='all')

In [None]:
# Drop 'LearnerCode' – it's just an ID, not predictive
stage2_data.drop(columns=['LearnerCode'], inplace=True)

In [None]:
# Convert 'DateofBirth' to datetime
stage2_data['DateofBirth'] = pd.to_datetime(stage2_data['DateofBirth'], errors='coerce', dayfirst=True)

# Compute age assuming data collected in 2016
stage2_data['Age'] = 2016 - stage2_data['DateofBirth'].dt.year

# Why 2016?
# A student born in 1998 is listed under Foundation, which is typically for students around 18 years old.
# That suggests this record was collected around 2016 (1998 + 18).

# Drop the original DateofBirth column
stage2_data.drop(columns=['DateofBirth'], inplace=True)

In [None]:
# List all columns in the DataFrame
for col in stage2_data.columns:
    print(col)

In [None]:
# Identify columns with >200 unique values
high_cardinality_cols = [col for col in stage2_data.columns if stage2_data[col].nunique() > 200]

# Print columns that will be dropped
print("Dropped columns due to high cardinality (>200 unique values):")
print(high_cardinality_cols)

# Drop those columns
stage2_data.drop(columns=high_cardinality_cols, inplace=True)

In [None]:
# Save original column names
original_columns = set(stage2_data.columns)

# Drop columns where more than 50% of the data is missing
threshold = len(stage2_data) * 0.5
stage2_data.dropna(thresh=threshold, axis=1, inplace=True)

# Save new column names
remaining_columns = set(stage2_data.columns)

# Find which columns were dropped
dropped_columns = original_columns - remaining_columns
print("Dropped columns due to >50% missing values:", dropped_columns)

In [None]:
#from sklearn.impute import SimpleImputer

# Example: assuming 'stage1_data' is your DataFrame
missing_percent = stage2_data.isnull().mean() * 100
initial_row_count = len(stage2_data)

# Dictionary to store dropped info
dropped_info = {}

# Drop rows if missing value is <2% in that column
for col in stage2_data.columns:
    if 0 < missing_percent[col] < 2:
        missing_rows = stage2_data[col].isnull().sum()
        dropped_info[col] = {
            'rows_dropped': missing_rows,
            'percent_of_total_rows': (missing_rows / initial_row_count) * 100
        }
        stage1_data = stage2_data[~stage2_data[col].isnull()]

# Print the info
if dropped_info:
    print("Columns with dropped rows (missing < 2%):")
    for col, info in dropped_info.items():
        print(f"- {col}: {info['rows_dropped']} rows dropped "
              f"({info['percent_of_total_rows']:.2f}% of total)")
else:
    print("No columns had missing values <2%, so no rows were dropped.")

In [None]:
numeric_cols = stage2_data.select_dtypes(include=np.number).columns
if len(numeric_cols) > 0:
    imputer = SimpleImputer(strategy='mean')
    stage2_data[numeric_cols] = imputer.fit_transform(stage2_data[numeric_cols])
    print(f"Imputed missing values in numeric columns: {list(numeric_cols)}")
else:
    print("No numeric columns found for imputation, skipping this step.")

In [None]:
# Encode the target first, before it gets one-hot encoded
stage2_data['CompletedCourse'] = stage2_data['CompletedCourse'].map({'Yes': 1, 'No': 0})

In [None]:
# One-hot encode the other categorical features (excluding the target)
categorical_cols = stage2_data.select_dtypes(include=['object', 'category']).columns
categorical_cols = categorical_cols.drop('CompletedCourse', errors='ignore')

stage2_data = pd.get_dummies(stage2_data, columns=categorical_cols, drop_first=True)

In [None]:
# Count how many samples in each class
print(stage2_data['CompletedCourse'].value_counts())

# Same in %
print(stage2_data['CompletedCourse'].value_counts(normalize=True) * 100)


In [None]:
# Plot histogram to check distribution
stage2_data['CompletedCourse'].value_counts().plot(kind='bar')
plt.title("Distribution of CompletedCourse")
plt.xlabel("Completed (1 = Yes, 0 = No)")
plt.ylabel("Count")
plt.show()

In [None]:
# Split data into X (features) and y (target)
X = stage2_data.drop(columns=['CompletedCourse'])
y = stage2_data['CompletedCourse']

In [None]:
# Split into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Have binary labels 0 and 1 -> can use scale_pos_weight as:
num_neg = sum(y_train == 0)
num_pos = sum(y_train == 1)
scale_pos_weight = num_neg / num_pos

In [None]:
model_xgb = xgb.XGBClassifier(scale_pos_weight=scale_pos_weight, random_state=42, eval_metric='logloss')

model_xgb.fit(X_train, y_train)

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(15, 18))

# Gain (average gain in accuracy) - top plot
# How much a feature improves the model’s accuracy when it’s used for splitting.
xgb.plot_importance(model_xgb, importance_type='gain', ax=axes[0], max_num_features=10, title='Feature Importance (Gain)')

# Weight (frequency) - middle plot
# Definition: How many times a feature is used to split nodes in all trees.
xgb.plot_importance(model_xgb, importance_type='weight', ax=axes[1], max_num_features=10, title='Feature Importance (Weight)')

# Cover (average coverage) - bottom plot
# How many data points are affected by splits on that feature.
xgb.plot_importance(model_xgb, importance_type='cover', ax=axes[2], max_num_features=10, title='Feature Importance (Cover)')

plt.tight_layout()
plt.show()

In [None]:
# Get importance dictionaries
gain_importance = model_xgb.get_booster().get_score(importance_type='gain')
weight_importance = model_xgb.get_booster().get_score(importance_type='weight')
cover_importance = model_xgb.get_booster().get_score(importance_type='cover')

# Helper function to create sorted DataFrame for a given importance type
def create_importance_df(importance_dict, top_n=10):
    df = pd.DataFrame(list(importance_dict.items()), columns=['Feature', 'Value'])
    df_sorted = df.sort_values(by='Value', ascending=False).head(top_n).reset_index(drop=True)
    return df_sorted

# Create tables
gain_df = create_importance_df(gain_importance)
weight_df = create_importance_df(weight_importance)
cover_df = create_importance_df(cover_importance)

# Display the tables
print("Top 10 Features by Gain:")
print(gain_df, "\n")

print("Top 10 Features by Weight:")
print(weight_df, "\n")

print("Top 10 Features by Cover:")
print(cover_df)

In [None]:
# Predictions
y_pred_xgb = model_xgb.predict(X_test)
y_pred_proba_xgb = model_xgb.predict_proba(X_test)[:, 1]

# Metrics
cm = confusion_matrix(y_test, y_pred_xgb)
print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n", classification_report(y_test, y_pred_xgb))
print(f"Accuracy: {accuracy_score(y_test, y_pred_xgb):.4f}")
print(f"AUC Score: {roc_auc_score(y_test, y_pred_proba_xgb):.4f}")

# Plot confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba_xgb)
auc = roc_auc_score(y_test, y_pred_proba_xgb)

plt.figure(figsize=(6, 4))
plt.plot(fpr, tpr, label=f'AUC = {auc:.4f}')
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Split data into training and validation sets (80% train, 20% validation)
X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:
# Define Optuna objective function
def objective(trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'scale_pos_weight': scale_pos_weight,
        'eval_metric': 'logloss',
        'random_state': 42,
        'use_label_encoder': False,
        'verbosity': 0
    }

    model = XGBClassifier(**params)

    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    recall_scores = []

    for train_idx, val_idx in skf.split(X_train, y_train):
        X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

        model.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], verbose=False)

        preds = model.predict(X_val)
        recall_0 = recall_score(y_val, preds, pos_label=0)
        recall_scores.append(recall_0)

    return np.mean(recall_scores)

# Run the study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

print("Best Hyperparameters from Optuna tuning:")
print(study.best_trial.params)
print(f"Best Mean Recall for Class 0 (Dropouts): {study.best_value:.4f}")

# Use best params from study
best_params = study.best_trial.params
best_params.update({
    'scale_pos_weight': scale_pos_weight,
    'eval_metric': 'logloss',
    'random_state': 42,
    'use_label_encoder': False,
    'verbosity': 0
})

model_xgb_optimised = XGBClassifier(**best_params)

# Train on full training data
model_xgb_optimised.fit(X_train, y_train)

# Predict and evaluate on test data
y_pred_xgb = model_xgb_optimised.predict(X_test)
y_pred_proba_xgb = model_xgb_optimised.predict_proba(X_test)[:, 1]

# Metrics and confusion matrix
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_auc_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_pred_xgb)
print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n", classification_report(y_test, y_pred_xgb))
print(f"Accuracy: {accuracy_score(y_test, y_pred_xgb):.4f}")
print(f"AUC Score: {roc_auc_score(y_test, y_pred_proba_xgb):.4f}")

disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()

In [None]:
# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba_xgb)
auc = roc_auc_score(y_test, y_pred_proba_xgb)

plt.figure(figsize=(6, 4))
plt.plot(fpr, tpr, label=f'AUC = {auc:.4f}')
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(15, 18))

# Gain (average gain in accuracy) - top plot
# How much a feature improves the model’s accuracy when it’s used for splitting.
xgb.plot_importance(model_xgb, importance_type='gain', ax=axes[0], max_num_features=10, title='Feature Importance (Gain)')

# Weight (frequency) - middle plot
# Definition: How many times a feature is used to split nodes in all trees.
xgb.plot_importance(model_xgb, importance_type='weight', ax=axes[1], max_num_features=10, title='Feature Importance (Weight)')

# Cover (average coverage) - bottom plot
# How many data points are affected by splits on that feature.
xgb.plot_importance(model_xgb, importance_type='cover', ax=axes[2], max_num_features=10, title='Feature Importance (Cover)')

plt.tight_layout()
plt.show()

In [None]:
# Get importance dictionaries
gain_importance = model_xgb.get_booster().get_score(importance_type='gain')
weight_importance = model_xgb.get_booster().get_score(importance_type='weight')
cover_importance = model_xgb.get_booster().get_score(importance_type='cover')

# Helper function to create sorted DataFrame for a given importance type
def create_importance_df(importance_dict, top_n=10):
    df = pd.DataFrame(list(importance_dict.items()), columns=['Feature', 'Value'])
    df_sorted = df.sort_values(by='Value', ascending=False).head(top_n).reset_index(drop=True)
    return df_sorted

# Create tables
gain_df = create_importance_df(gain_importance)
weight_df = create_importance_df(weight_importance)
cover_df = create_importance_df(cover_importance)

# Display the tables
print("Top 10 Features by Gain:")
print(gain_df, "\n")

print("Top 10 Features by Weight:")
print(weight_df, "\n")

print("Top 10 Features by Cover:")
print(cover_df)

In [None]:
# Metrics from both models (default and tuned)
metrics_data = {
    'Metric': [
        'Accuracy',
        'AUC Score',
        'Precision (Class 0)',
        'Precision (Class 1)',
        'Recall (Class 0)',
        'Recall (Class 1)',
        'F1-Score (Class 0)',
        'F1-Score (Class 1)',
        'True Negatives (TN)',
        'False Positives (FP)',
        'False Negatives (FN)',
        'True Positives (TP)'
    ],
    'Default Model': [
        0.8659,
        0.9012,
        0.54,
        0.96,
        0.78,
        0.88,
        0.64,
        0.92,
        587,
        164,
        508,
        3753
    ],
    'Tuned Model': [
        0.8651,
        0.8987,
        0.53,
        0.96,
        0.78,
        0.88,
        0.63,
        0.92,
        585,
        166,
        510,
        3751
    ]
}

# Create DataFrame
metrics_df = pd.DataFrame(metrics_data)

# Calculate percentage change from default to tuned
def pct_change(default, tuned):
    return np.where(default != 0, (tuned - default) / default * 100, np.nan)

metrics_df['Percentage Change (%)'] = pct_change(metrics_df['Default Model'], metrics_df['Tuned Model'])

# Format display: 4 decimals for models, + sign for percentage changes
styled_df = metrics_df.style.format({
    'Default Model': '{:.4f}',
    'Tuned Model': '{:.4f}',
    'Percentage Change (%)': '{:+.2f}%'
}).set_caption("Model Performance Comparison with Percentage Change").set_properties(**{'text-align': 'center'})

styled_df

In [None]:
plt.figure(figsize=(10, 6))

# Bar plot for percentage changes using your metrics_df DataFrame
bars = plt.bar(metrics_df["Metric"], metrics_df["Percentage Change (%)"], color='skyblue')


plt.axhline(0, color='gray', linewidth=0.8)

plt.xticks(rotation=45, ha='right')

plt.ylabel("Percentage Change (%)")
plt.title("Percentage Change of Metrics: Tuned vs Default XGBoost")

# Add value labels on top of bars
for bar in bars:
    height = bar.get_height()
    plt.annotate(f'{height:+.2f}%',
                 xy=(bar.get_x() + bar.get_width() / 2, height),
                 xytext=(0, 3),
                 textcoords="offset points",
                 ha='center', va='bottom', fontsize=8)

plt.tight_layout()
plt.show()

In [None]:
# Decided to scale the data for better convergence
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:

# Handles class imbalance
# Cares more about underrepresented class by giving it a higher penalty when misclassified
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weight_dict = dict(enumerate(class_weights))

In [None]:
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'), # 64 neruons are moderate complexity/ relu makes training fast
    Dropout(0.3), # Prevents overfitting by randomly dropping 30% of neurons
    Dense(32, activation='relu'),
    Dropout(0.2), # Drops 20% to reduce overfitting
    Dense(1, activation='sigmoid')
])

In [None]:
from tensorflow.keras.optimizers import Adam
model.compile(
    optimizer=Adam(), # handles the imbalanced classes well
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [None]:
history = model.fit(
    X_train_scaled, y_train,
    epochs=50,
    batch_size=32,
    validation_data=(X_test_scaled, y_test),
    class_weight=class_weight_dict,
    verbose=1
)

In [None]:
y_pred_probs = model.predict(X_test_scaled)
y_pred = (y_pred_probs > 0.5).astype(int)

In [None]:
# Accuracy
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"\nTest Accuracy: {test_accuracy:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

# Classification Report (Precision, Recall, F1)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, digits=4))

# AUC
auc = roc_auc_score(y_test, y_pred_probs)
print(f"AUC: {auc:.4f}")

# Plot confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()

# ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_probs)
plt.figure(figsize=(6, 4))
plt.plot(fpr, tpr, label=f"AUC = {auc:.4f}")
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Plot Loss and Accuracy Curves

# Extract values from the training history
history_dict = history.history

train_loss = history_dict['loss']
val_loss = history_dict['val_loss']
train_acc = history_dict.get('accuracy')  # Might be 'acc' in older Keras
val_acc = history_dict.get('val_accuracy')

epochs = range(1, len(train_loss) + 1)

# Plot loss
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.plot(epochs, train_loss, label='Training Loss')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# Plot accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, train_acc, label='Training Accuracy')
plt.plot(epochs, val_acc, label='Validation Accuracy')
plt.title('Accuracy Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

In [None]:
# Define model-building function for Keras Tuner
def build_model(hp):
    model = Sequential()
    model.add(Input(shape=(X_train_scaled.shape[1],)))

    # Tune number of units in first Dense layer
    units = hp.Choice('units', values=[32, 64, 128], default=64)
    model.add(Dense(units, activation=hp.Choice('activation', ['relu', 'tanh'], default='tanh'))) # Tune activation function to find the best nonlinearity for the data
    model.add(Dropout(hp.Float('dropout_1', 0.1, 0.5, step=0.1, default=0.3)))

    # Second Dense layer adds capacity if needed (flexibility to increase complexity)
    if hp.Boolean('second_layer', default=True):
        units2 = hp.Int('units_2', min_value=16, max_value=64, step=16, default=32)
        model.add(Dense(units2, activation=hp.Choice('activation_2', ['relu', 'tanh'], default='tanh')))
        model.add(Dropout(hp.Float('dropout_2', 0.1, 0.3, step=0.1, default=0.2))) # The dropout ranges (0.1 to 0.5) help address potential overfitting seen in the fluctuating validation loss.
        # Additional dropout for the second layer to control overfitting
        dropout_2 = hp.Float('dropout_2', 0.1, 0.3, step=0.1, default=0.2)
        model.add(Dropout(dropout_2))
    # Output layer with sigmoid activation for binary classification
    model.add(Dense(1, activation='sigmoid'))

    # Tune learning rate (1e-4 to 1e-2) to optimize training convergence and stability
    lr = hp.Float('learning_rate', 1e-4, 1e-2, sampling='log', default=1e-3)
    model.compile(
        optimizer=Adam(learning_rate=lr),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model
    # With the use of compiled Keras Sequential models, the functionality can be configured
    # with different tunable layers, activations, dropout, and learning rates, which can then lead to training.

In [None]:
# find the best hyperparameters by running multiple model configurations
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy', # maximize validation accuracy
    max_trials=10, # 15 was used as a starting point but reduced the number of trials to save time, but fewer trials may limit the search quality
    executions_per_trial=1,
    directory='hyperparam_tuning',
    project_name='nn_imbalanced_data'
)

tuner.search(
    X_train_scaled, y_train,
    epochs=30, # Number of epochs to train each trial
    batch_size=32, # Number of samples per gradient update
    validation_data=(X_test_scaled, y_test),
    class_weight=class_weight_dict, # Weights to handle class imbalance during training
    verbose=1
)

In [None]:
# Retrieve the best model found by the tuner
best_model = tuner.get_best_models(num_models=1)[0]

# Predict probabilities on the test set
y_pred_probs = best_model.predict(X_test_scaled).ravel()
y_pred = (y_pred_probs > 0.5).astype(int) # Convert probabilities to binary class predictions using 0.5 threshold

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)
auc_score = roc_auc_score(y_test, y_pred_probs)

# Print results
print(f"Best tuned model accuracy: {accuracy:.4f}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(report)
print(f"AUC Score: {auc_score:.4f}")

# Plot confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_probs)
plt.figure(figsize=(6, 4))
plt.plot(fpr, tpr, label=f"AUC = {auc_score:.4f}")
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Plot Loss and Accuracy Curves

# Extract values from the training history
history_dict = history.history

train_loss = history_dict['loss']
val_loss = history_dict['val_loss']
train_acc = history_dict.get('accuracy')  # Might be 'acc' in older Keras
val_acc = history_dict.get('val_accuracy')

epochs = range(1, len(train_loss) + 1)

# Plot loss
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.plot(epochs, train_loss, label='Training Loss')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# Plot accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, train_acc, label='Training Accuracy')
plt.plot(epochs, val_acc, label='Validation Accuracy')
plt.title('Accuracy Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

In [None]:
# Metrics from Neural Network default and tuned models (excluding macro/weighted F1-scores)
metrics = {
    "Metric": [
        "Accuracy",
        "AUC Score",
        "Precision (Class 0)",
        "Precision (Class 1)",
        "Recall (Class 0)",
        "Recall (Class 1)",
        "F1-Score (Class 0)",
        "F1-Score (Class 1)",
        "True Negatives (TN)",
        "False Positives (FP)",
        "False Negatives (FN)",
        "True Positives (TP)"
    ],
    "Default Model": [
        0.8559,
        0.8696,
        0.5127,
        0.9571,
        0.7790,
        0.8695,
        0.6184,
        0.9112,
        585,
        166,
        556,
        3705
    ],
    "Tuned Model": [
        0.8869,
        0.8787,
        0.6036,
        0.9479,
        0.7137,
        0.9174,
        0.6541,
        0.9324,
        536,
        215,
        352,
        3909
    ]
}

# Compute percentage changes
percent_change = []
for orig, tuned in zip(metrics["Default Model"], metrics["Tuned Model"]):  # updated here
    if orig == 0 or orig is None:
        percent_change.append(None)
    else:
        change = ((tuned - orig) / orig) * 100
        percent_change.append(change)

# Create DataFrame
df_nn = pd.DataFrame({
    "Metric": metrics["Metric"],
    "Default Model": metrics["Default Model"],  # updated here
    "Tuned Model": metrics["Tuned Model"],
    "% Change": percent_change
})

# Style the DataFrame
styled_df_nn = df_nn.style.format({
    "Default Model": "{:.4f}",  # updated here
    "Tuned Model": "{:.4f}",
    "% Change": lambda x: "" if pd.isnull(x) else f"{x:+.2f}%"
}).set_caption("Neural Network Performance Comparison").set_properties(**{'text-align': 'center'})

styled_df_nn

In [None]:
# Plotting
plt.figure(figsize=(10, 6))

bars = plt.bar(df_nn["Metric"], df_nn["% Change"], color='skyblue')

plt.axhline(0, color='gray', linewidth=0.8)
plt.xticks(rotation=45, ha='right')
plt.ylabel("Percentage Change (%)")
plt.title("Percentage Change of Metrics: Tuned vs Original Neural Network Model")

# Add value labels on bars
for bar in bars:
    height = bar.get_height()
    plt.annotate(f'{height:+.2f}%',
                 xy=(bar.get_x() + bar.get_width() / 2, height),
                 xytext=(0, 3),
                 textcoords="offset points",
                 ha='center', va='bottom', fontsize=8)

plt.tight_layout()
plt.show()

In [None]:
# Data for Neural Network including confusion matrix metrics
nn_metrics = {
    "Metric": [
        "Accuracy",
        "AUC Score",
        "Precision (Class 0)",
        "Recall (Class 0)",
        "F1-Score (Class 0)",
        "Precision (Class 1)",
        "Recall (Class 1)",
        "F1-Score (Class 1)",
        "True Negatives (TN)",
        "False Positives (FP)",
        "False Negatives (FN)",
        "True Positives (TP)"
    ],
    "Default Model": [
        0.8559,
        0.8696,
        0.5127,
        0.7790,
        0.6184,
        0.9571,
        0.8695,
        0.9112,
        585,
        166,
        556,
        3705
    ],
    "Tuned Model": [
        0.8869,
        0.8787,
        0.6036,
        0.7137,
        0.6541,
        0.9479,
        0.9174,
        0.9324,
        536,
        215,
        352,
        3909
    ]
}


# Data for XGBoost including confusion matrix metrics
xgb_metrics = {
    "Metric": [
        "Accuracy",
        "AUC Score",
        "Precision (Class 0)",
        "Recall (Class 0)",
        "F1-Score (Class 0)",
        "Precision (Class 1)",
        "Recall (Class 1)",
        "F1-Score (Class 1)",
        "True Negatives (TN)",
        "False Positives (FP)",
        "False Negatives (FN)",
        "True Positives (TP)"
    ],
    "Default Model": [
        0.8659,
        0.9012,
        0.5400,
        0.7800,
        0.6400,
        0.9600,
        0.8800,
        0.9200,
        587,
        164,
        508,
        3753
    ],
    "Tuned Model": [
        0.8651,
        0.8987,
        0.5300,
        0.7800,
        0.6300,
        0.9600,
        0.8800,
        0.9200,
        585,
        166,
        510,
        3751
    ]
}

# Convert to DataFrames
nn_df = pd.DataFrame(nn_metrics)
xgb_df = pd.DataFrame(xgb_metrics)

# Align metrics (in case order differs)
common_metrics = list(set(nn_df["Metric"]).intersection(xgb_df["Metric"]))
common_metrics.sort()

# Filter DataFrames to only common metrics in the same order
nn_df = nn_df[nn_df["Metric"].isin(common_metrics)].set_index("Metric").loc[common_metrics].reset_index()
xgb_df = xgb_df[xgb_df["Metric"].isin(common_metrics)].set_index("Metric").loc[common_metrics].reset_index()

# Function to compute percentage change: (NN - XGB) / XGB * 100
def percentage_change(nn_values, xgb_values):
    # To avoid division by zero, handle zero values
    changes = []
    for nn_val, xgb_val in zip(nn_values, xgb_values):
        if xgb_val == 0:
            changes.append(np.nan)
        else:
            changes.append(((nn_val - xgb_val) / xgb_val) * 100)
    return changes

# Table 1: Default Models Comparison
default_compare = pd.DataFrame({
    "Metric": common_metrics,
    "XGBoost Default": xgb_df["Default Model"].values,
    "Neural Net Default": nn_df["Default Model"].values
})
default_compare["% Change (NN vs XGB)"] = percentage_change(default_compare["Neural Net Default"], default_compare["XGBoost Default"])

# Table 2: Tuned Models Comparison
tuned_compare = pd.DataFrame({
    "Metric": common_metrics,
    "XGBoost Tuned": xgb_df["Tuned Model"].values,
    "Neural Net Tuned": nn_df["Tuned Model"].values
})
tuned_compare["% Change (NN vs XGB)"] = percentage_change(tuned_compare["Neural Net Tuned"], tuned_compare["XGBoost Tuned"])

# Format the percentage change column with +/– and 2 decimals for display
def format_percentage_change(df, col_name):
    return df.style.format({
        col_name: "{:+.2f}%"
    }).format(precision=4, subset=df.columns[1:-1]).set_properties(**{'text-align': 'center'})

# Display tables nicely formatted
print("Table 1: Default Models Comparison")
display(format_percentage_change(default_compare, "% Change (NN vs XGB)"))

print("\nTable 2: Tuned Models Comparison")
display(format_percentage_change(tuned_compare, "% Change (NN vs XGB)"))

# Plot function for bar chart of percentage changes
def plot_bar(df, title):
    plt.figure(figsize=(12, 6))
    bars = plt.bar(df["Metric"], df["% Change (NN vs XGB)"], color='skyblue')
    plt.axhline(0, color='gray', linewidth=0.8)
    plt.xticks(rotation=45, ha='right')
    plt.ylabel("Percentage Change (%)")
    plt.title(title)
    for bar in bars:
        height = bar.get_height()
        if not np.isnan(height):
            plt.annotate(f'{height:+.2f}%',
                         xy=(bar.get_x() + bar.get_width() / 2, height),
                         xytext=(0, 3),
                         textcoords="offset points",
                         ha='center', va='bottom', fontsize=8)
    plt.tight_layout()
    plt.show()

# Plot both bar charts
plot_bar(default_compare, "Percentage Change: Neural Network Default vs XGBoost Default")
plot_bar(tuned_compare, "Percentage Change: Neural Network Tuned vs XGBoost Tuned")
