In [1]:
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score
from xgboost import XGBClassifier
from joblib import load

In [2]:
# read in testing dataset and extract the features and outcome
df_test = pd.read_csv("../data/processed/df_test_final.csv", index_col=0)
X_test = df_test.iloc[:,2:]
y_test = df_test.iloc[:,0]

In [3]:
# load the tuned models
xgb_clf = XGBClassifier()
xgb_clf.load_model("../models/xgb.json")

rf_clf = load("../models/rf.joblib")

In [4]:
# function for evalutating the models with f1 score
def evaluate(model):
    y_pred = model.predict(X_test)
    precision = precision_score(y_test, y_pred, average='micro')
    recall = recall_score(y_test, y_pred, average='micro')
    f1 = f1_score(y_test, y_pred, average='micro')
    print(f"precision score: {precision:.4f}, recall score: {recall:.4f}, f1 score: {f1:.4f}")

In [5]:
# random forest
evaluate(rf_clf)

precision score: 0.2271, recall score: 0.2271, f1 score: 0.2271


In [6]:
# gradient boosting
evaluate(xgb_clf)

precision score: 0.2278, recall score: 0.2278, f1 score: 0.2278
