# Evalutaion

### Model and data loading

In [1]:
import pandas as pd
import joblib
import sys
import os
from sklearn.metrics import classification_report


project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.data.preprocessor import TabnetPreprocessor
from pytorch_tabnet.tab_model import TabNetClassifier


In [2]:
MODEL_SIZE = 'small' 

OUTPUT_DIR = '../src/model' 

MODEL_PATH = os.path.join(OUTPUT_DIR, f'tabnet_model_{MODEL_SIZE}.zip')
PREPROCESSOR_PATH = os.path.join(OUTPUT_DIR, f'preprocessor_{MODEL_SIZE}.pkl')
TEST_SET_PATH = '../src/data/test_set.csv' 
TARGET_COLUMN = 'Label' 

print(f"Model path: {MODEL_PATH}")
print(f"Preprocessor path: {PREPROCESSOR_PATH}")
print(f"Test set path: {TEST_SET_PATH}")

Model path: ../src/model/tabnet_model_small.zip
Preprocessor path: ../src/model/preprocessor_small.pkl
Test set path: ../src/data/test_set.csv


In [3]:
print("Model and preprocessor loading...")

try:
    loaded_preprocessor = joblib.load(PREPROCESSOR_PATH)

    loaded_model = TabNetClassifier()
    loaded_model.load_model(MODEL_PATH)

    print("Loaded")

except FileNotFoundError as e:
    print(f"ERROR: File not found. {e}")


Model and preprocessor loading...




Loaded


In [4]:
print("Test set loading")
test_df = pd.read_csv(TEST_SET_PATH)

X_test = test_df.drop(columns=[TARGET_COLUMN])
y_test = test_df[TARGET_COLUMN]

print(f": {len(test_df)} rows.")

X_test_processed = loaded_preprocessor.transform(X_test)


Test set loading
: 448488 rows.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  final_df[col_name].fillna(unknown_code, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  final_df[col_name].mask(final_df[col_name] == -1, unknown_code, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate

In [5]:
print("Prediciting...")
y_pred = loaded_model.predict(X_test_processed.values)

report = classification_report(y_test, y_pred)

print("\n==============================================")
print(f"  Report (Model: {MODEL_SIZE})")
print("==============================================")
print(report)

Prediciting...

  Report (Model: small)
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    430149
           1       0.98      1.00      0.99     18339

    accuracy                           1.00    448488
   macro avg       0.99      1.00      0.99    448488
weighted avg       1.00      1.00      1.00    448488

