<a href="https://colab.research.google.com/github/noahfavreau/nasa-space-apps-2025/blob/main/model_architecture.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import matplotlib.pyplot as plt

from catboost import CatBoostClassifier, cv, Pool

from pytorch_tabnet.tab_model import TabNetClassifier

import lightgbm as lgb

from sklearn.linear_model import LogisticRegressionCV

import xgboost as xgb

from xgboost import plot_importance, XGBClassifier

from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split, KFold

import random

import numpy as np

import pickle

RANDOM_SEED = 67

In [None]:
def train_catboost(X_train, X_val, y_train, y_val, Fold):
  catboost_model = CatBoostClassifier(
                           iterations=200,
                           task_type="GPU",
                           devices='0',
                           depth=6,
                           loss_function='MultiClass',
                           verbose=0,
                           eval_metric='MultiClass',
                           random_state=RANDOM_SEED + Fold
                          )

  catboost_train_pool = Pool(X_train,
                            y_train,
                             )

  catboost_val_pool = Pool(X_val,
                            y_val,
                             )

  catboost_model.fit(catboost_train_pool,
                     eval_set=catboost_val_pool,
                     early_stopping_rounds=20,
                     use_best_model=True)

  catboost_val_preds = catboost_model.predict(X_val)
  catboost_oof_preds[test_index] = catboost_val_preds

  catboost_accuracy = accuracy_score(y_val, catboost_val_preds)
  catboost_report = classification_report(y_val, catboost_val_preds)

  catboost_fold_accuracies.append(catboost_accuracy)

  return catboost_model, catboost_val_preds, catboost_accuracy

In [None]:
def train_lgb(X_train, X_val, y_train, y_val, Fold):
  params = {
    'objective' : 'multiclass',
    'num_class' : 3,
    'metric' : "multi_logloss",
    'verbose' : 0
  }

  lgb_train_data = lgb.Dataset(X_train, label=y_train)
  lgb_val_data = lgb.Dataset(X_val, label=y_val, reference=lgb_train_data)

  lgb_model = lgb.train(params,
                      lgb_train_data,
                      200,
                      early_stopping_rounds=20,
                      valid_sets=[lgb_val_data])

  lgb_val_preds_proba = lgb_model.predict(X_val,
                           num_iteration=lgb_model.best_iteration)

  lgb_val_preds = np.argmax(lgb_val_preds_proba, axis=1)

  lgb_oof_preds[test_index] = lgb_val_preds

  lgb_accuracy = accuracy_score(y_val, lgb_val_preds)
  lgb_report = classification_report(y_val, lgb_val_preds)

  lgb_fold_accuracies.append(lgb_accuracy)

  return lgb_model, lgb_val_preds, lgb_accuracy

In [None]:
def train_xgb(X_train, X_val, y_train, y_val, Fold):
  xgb_model = XGBClassifier(num_classes=3,
                          objective='multi:softmax',
                          eval_metric='mlogloss',
                          use_label_encoder=False,
                          max_depth=4,
                          n_estimators=100,
                          random_state=random.randint(0, 100))

  xgb_model.fit(X_train, y_train)

  xgb_val_preds = xgb_model.predict(X_val)

  xgb_oof_preds[test_index] = xgb_val_preds

  xgb_accuracy = accuracy_score(y_val, xgb_val_preds)
  xgb_report = classification_report(y_val, xgb_val_preds)

  xgb_fold_accuracies.append(xgb_accuracy)

  return xgb_model, xgb_val_preds, xgb_accuracy

In [None]:
X = None
y = None

catboost_fold_accuracies = []
xgb_fold_accuracies = []
lgb_fold_accuracies = []

catboost_oof_preds = np.zeros(len(X))
xgb_oof_preds = np.zeros(len(X))
lgb_oof_preds = np.zeros(len(X))

catboost_models = []
lgb_models = []
xgb_models = []

n_splits = 10
kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)

for Fold, (train_index, test_index) in enumerate(kf.split(X, y)):
  X_train, X_val = X.iloc[train_index], X.iloc[test_index]
  y_train, y_val = y.iloc[train_index], y.iloc[test_index]

  cb_model = cb_preds, cb_acc = train_catboost(X_train, X_val, y_train, y_val, Fold)
  catboost_oof_preds[test_index] = cb_preds
  catboost_fold_accuracies.append(cb_acc)
  catboost_models.append(cb_model)
  print(f"CatBoost Accuracy: {cb_acc:.4f}")

  xgb_model = xgb_preds, xgb_acc = train_xgb(X_train, X_val, y_train, y_val, Fold)
  xgb_oof_preds[test_index] = xgb_preds
  xgb_fold_accuracies.append(xgb_acc)
  xgb_models.append(xgb_model)
  print(f"XGBoost Accuracy: {xgb_acc:.4f}")

  lgb_model = lgb_preds, lgb_acc = train_lgb(X_train, X_val, y_train, y_val, Fold)
  lgb_oof_preds[test_index] = lgb_preds
  lgb_fold_accuracies.append(lgb_acc)
  lgb_models.append(lgb_model)
  print(f"LGBoost Accuracy: {lgb_acc:.4f}")

catboost_mean_accuracy = np.mean(catboost_fold_accuracies)
catboost_std_accuracy = np.std(catboost_fold_accuracies)
print(f"CatBoost : {catboost_mean_accuracy:.4f} (+/- {catboost_std_accuracy:.4f})")

lgb_mean_accuracy = np.mean(lgb_fold_accuracies)
lgb_std_accuracy = np.std(lgb_fold_accuracies)
print(f"LGBoost : {lgb_mean_accuracy:.4f} (+/- {lgb_std_accuracy:.4f})")

xgb_mean_accuracy = np.mean(xgb_fold_accuracies)
xgb_std_accuracy = np.std(xgb_fold_accuracies)
print(f"XGBoost : {xgb_mean_accuracy:.4f} (+/- {xgb_std_accuracy:.4f})")