In [1]:
import pandas as pd
import numpy as np
import joblib
from lightgbm import Dataset
import lightgbm
import utils
from utils import *
import os, csv, ast
import shutil
from scipy.special import softmax
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

### The models that this one uses were trained using the original LoGoFunc `train.py` script.

### Testing

In [17]:
# Load both test and reload training data.
X_train = pd.read_csv('../data/X_train_500.csv', low_memory=False)
X_test = pd.read_csv('../data/X_test_500.csv', low_memory=False)
print(X_train.shape, X_test.shape)

(25546, 500) (2831, 500)


In [18]:
# Define the output path for the CSV file.
output_path = '../results/LGBM-validation.csv'

import numpy as np
import pandas as pd
from scipy.special import softmax
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import joblib
import utils

# This is the soft-voting function used to aggregate the predictions.
def soft_vote(preds):
    summed_preds = [[np.sum(preds[:, j][:, i]) for i in range(3)] for j in range(len(preds[0]))]
    return [softmax(np.log(sp)) for sp in summed_preds]

# Again, this checks for columns that only contain NaN values.
def drop_allnan(data):
    for col in data.columns:
        if data[col].isna().sum() == len(data):
            data = data.drop(columns=col)
    return data

# Input the training data and do initial pre-processing.
X_train = drop_allnan(X_train)
columns = X_train.columns.tolist()

# Invoke the pre-processing function.
preprocessor = joblib.load('../models/lightgbm/preprocessor.joblib')

# Iterate over the models in the ensemble.
models = []
num_models = 27
for i in range(num_models):
    models.append(joblib.load(f'../models/lightgbm/model_{i}.joblib'))

# Feed the data into the pre-processor functions.
y_test = pd.read_csv('../data/y_test_id.csv', low_memory=False)
impact_vals = {'LOW': 0, 'MODIFIER': 1, 'MODERATE': 1.5, 'HIGH': 2}
encoded_impacts = [impact_vals[imp] for imp in X_test['IMPACT']]
X_test = X_test.drop(columns=['IMPACT'])
X_test['IMPACT'] = encoded_impacts
X_test = X_test[columns]
ids = X_test['ID'].tolist()
X_test = X_test.drop(columns='ID')

# Make sure the data types are the same. because NumPy arrays are not tolerated in places where DataFrames are expected.
for col in X_test.columns:
    X_test[col] = X_test[col].astype(X_train[col].dtype)
X_test = utils.transform(X_test, preprocessor)

# Pool the predictions into a list.
all_preds = []
for i in range(num_models):
    preds = models[i].predict(X_test, num_iteration=-1, pred_leaf=False)
    all_preds.append(preds)

# Apply the soft-voting function.
y_pred_proba = soft_vote(np.array(all_preds))
y_pred = [np.argmax(p) for p in y_pred_proba]

# Map the labels to numbers.
label_mapping = {'Neutral': 0, 'GOF': 1, 'LOF': 2}
y_test_numeric = [label_mapping[label] for label in y_test['label']]

# Perform the evaluation using Scikit-learn's metrics.
accuracy = accuracy_score(y_test_numeric, y_pred)
precision = precision_score(y_test_numeric, y_pred, average='weighted')
recall = recall_score(y_test_numeric, y_pred, average='weighted')
f1 = f1_score(y_test_numeric, y_pred, average='weighted')
roc_auc = roc_auc_score(y_test_numeric, y_pred_proba, multi_class='ovo')
conf_matrix = confusion_matrix(y_test_numeric, y_pred)

# Print the recorded metrics.
print(f'Test Accuracy: {accuracy}')
print(f'Test Precision: {precision}')
print(f'Test Recall: {recall}')
print(f'Test F1 Score: {f1}')
print(f'Test ROC AUC Score: {roc_auc}')
print(f'Confusion Matrix:\n{conf_matrix}')

out = []
for i in range(len(y_pred)):
    out.append([ids[i], ['Neutral', 'GOF', 'LOF'][y_pred[i]], *y_pred_proba[i]])
out = pd.DataFrame(out, columns=['ID', 'prediction', 'LoGoFunc_Neutral', 'LoGoFunc_GOF', 'LoGoFunc_LOF'])
out.to_csv(output_path, index=None)

(2831, 472)
Test Accuracy: 0.8689509007417874
Test Precision: 0.8772429518278922
Test Recall: 0.8689509007417874
Test F1 Score: 0.8721241510398384
Test ROC AUC Score: 0.9334290595243194
Confusion Matrix:
[[1179   37  123]
 [  10  105   37]
 [  97   67 1176]]


### Evaluation against y_test

In [19]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
metrics_file = '../metrics/LGBM-metrics.csv'

In [20]:
# This is just to manually verify the test output.
predictions_file = output_path

# Load y_test.
y_test_path = '../data/y_test_id.csv'
y_test = pd.read_csv(y_test_path)
y_true = y_test['label']

# Encode labels to numbers.
label_mapping = {'Neutral': 0, 'GOF': 1, 'LOF': 2}
y_true_numeric = [label_mapping[label] for label in y_true]

# Load predictions.
predictions = pd.read_csv(predictions_file)

# Ensure DataFrames have same number of rows.
assert len(predictions) == len(y_test)

# Align predictions and true labels by index
y_pred = predictions['prediction']
y_pred_numeric = [label_mapping[label] for label in y_pred]

# Perform the evaluation using Scikit-learn's metrics.
accuracy = accuracy_score(y_true_numeric, y_pred_numeric)
precision = precision_score(y_true_numeric, y_pred_numeric, average='weighted')
recall = recall_score(y_true_numeric, y_pred_numeric, average='macro')
f1 = f1_score(y_true_numeric, y_pred_numeric, average='weighted')
conf_matrix = confusion_matrix(y_true_numeric, y_pred_numeric)

results = [{
    'accuracy': accuracy,
    'precision': precision,
    'recall': recall,
    'f1_score': f1,
    'confusion_matrix': conf_matrix.tolist()
}]

results_df = pd.DataFrame(results)
results_df.to_csv(metrics_file, index=False)
print(results_df)

   accuracy  precision    recall  f1_score  \
0  0.868951   0.877243  0.816303  0.872124   

                                   confusion_matrix  
0  [[1179, 37, 123], [10, 105, 37], [97, 67, 1176]]  


### Compute Macro-REC

In [2]:
input_file = '../metrics/LGBM-metrics.csv'
output_file = '../metrics/LGBM-evaluation.csv'

def macro(confusion_matrix):
    cm = np.array(confusion_matrix)
    recalls = np.diag(cm) / np.sum(cm, axis=1)
    return np.mean(recalls)

def micro(confusion_matrix):
    cm = np.array(confusion_matrix)
    true_positives = np.diag(cm)
    total_true_positives = np.sum(true_positives)
    total_actual_positives = np.sum(cm)
    return total_true_positives / total_actual_positives

with open(input_file, 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    rows = list(reader)

for row in rows:
    confusion_matrix = ast.literal_eval(row['confusion_matrix'])
    macro = macro(confusion_matrix)
    micro_recall = micro(confusion_matrix)
    row['micro_recall'] = f'{micro_recall:.4f}'
    row['macro'] = f'{macro:.4f}'

    # Remove the original 'weighted' REC column.
    if 'recall' in row:
        del row['recall']

with open(output_file, 'w', newline='') as csvfile:
    fieldnames = ['accuracy', 'precision', 'f1_score', 'micro_recall', 'macro', 'confusion_matrix']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for row in rows:
        writer.writerow(row)

print(f"Saved to {output_file}")

Saved to ../metrics/LGBM-evaluation.csv
