In [1]:
import joblib

In [6]:
import numpy as np

In [3]:
clf = joblib.load("Models/log_reg_no_under_sample_balanced_class_weight.pkl")

In [4]:
clf

LogisticRegression(class_weight='balanced', max_iter=1000, random_state=42)

In [7]:
X_train = np.genfromtxt("Data/X_train.csv", delimiter=',')
y_train = np.genfromtxt("Data/y_train.csv", delimiter=',')

In [8]:
X_cv = np.genfromtxt("Data/X_cv.csv", delimiter=',')
y_cv = np.genfromtxt("Data/y_cv.csv", delimiter=',')

In [9]:
X_test = np.genfromtxt("Data/X_test.csv", delimiter=',')
y_test = np.genfromtxt("Data/y_test.csv", delimiter=',')

In [10]:
from sklearn.linear_model import LogisticRegression

In [12]:
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, precision_score, recall_score

In [21]:
C_list = [0.0001, 0.001, 0.01, 0.1, 1.0, 3.0, 5.0, 10.0, 100]

In [29]:
models = []
train_f1_score = []
cv_f1_score = []

for C in C_list:
    log_reg = LogisticRegression(max_iter=1000, random_state=42, class_weight="balanced", C=C)
    log_reg.fit(X_train, y_train)
    models.append(log_reg)

    y_train_pred = log_reg.predict(X_train)
    train_f1_score.append(f1_score(y_train, y_train_pred))
    
    y_cv_pred = log_reg.predict(X_cv)
    cv_f1_score.append(f1_score(y_cv, y_cv_pred))

In [30]:
print(train_f1_score)

[0.5891907855877141, 0.6225926990514515, 0.6262510723477267, 0.6257510729613733, 0.6257510729613733, 0.6257510729613733, 0.6257510729613733, 0.6257510729613733, 0.6257510729613733]


In [31]:
print(cv_f1_score)

[0.5459636024265049, 0.5763324299909666, 0.5824719101123595, 0.5813117699910153, 0.5815730337078652, 0.5815730337078652, 0.5815730337078652, 0.5815730337078652, 0.5815730337078652]


In [32]:
for model in models:
    y_test_pred = model.predict(X_test)
    print(f1_score(y_test, y_test_pred))

0.5707964601769911
0.5971978984238179
0.6020008699434536
0.6013925152306352
0.6020008699434536
0.6020008699434536
0.6020008699434536
0.6020008699434536
0.6020008699434536


In [35]:
best_model = models[np.argmax(cv_f1_score)]

In [37]:
y_test_pred = best_model.predict(X_test)
print(f1_score(y_test, y_test_pred))

0.6020008699434536


In [39]:
joblib.dump(best_model, 'Models/best_model.pkl')

['Models/best_model.pkl']