# Importing Libraries and Dataset

In [1]:
#Import Libraries
import pandas as pd
import numpy as np
from sklearn import metrics as skmetrics
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

In [2]:
pd.options.display.max_rows = 100
pd.options.display.max_columns = 100

In [3]:
X_smote = pd.read_csv('data/X_smote.csv')
y_smote = pd.read_csv('data/y_smote.csv')
X_val = pd.read_csv('data/X_val.csv')
y_val = pd.read_csv('data/y_val.csv')
X_test = pd.read_csv('data/X_test.csv')
y_test = pd.read_csv('data/y_test.csv')

# Standard Scaling

In [4]:
stdsc = StandardScaler()

X_smote = stdsc.fit_transform(X_smote)
X_val = stdsc.transform(X_val)
X_test = stdsc.transform(X_test)

# Evaluation Metrics Function

In [5]:
def print_statistics(y_actual, y_pred, y_prob, dataset_type):
  print(f"====================={dataset_type}====================")
  print(f"accuracy: {round(skmetrics.accuracy_score(y_actual, y_pred),5)}")
  print(f"precision (macro): {round(skmetrics.precision_score(y_actual, y_pred, average='macro'),5)}")
  print(f"recall (macro): {round(skmetrics.recall_score(y_actual, y_pred, average='macro'),5)}")
  print(f"f1 score (macro): {round(skmetrics.f1_score(y_actual, y_pred, average='macro'),5)}")
  print(f"f1 score of class 1: {round(skmetrics.f1_score(y_actual, y_pred, pos_label=1),5)}")
  print(f"f1 score of class 0: {round(skmetrics.f1_score(y_actual, y_pred, pos_label=0),5)}")
  prec, recall, thresholds = skmetrics.precision_recall_curve(y_actual, y_prob)
  print(f"pr auc score of class 1: {round(skmetrics.auc(recall, prec),5)}")
  prec_0, recall_0, thresholds = skmetrics.precision_recall_curve(y_actual, y_prob, pos_label=0)
  print(f"pr auc score of class 0: {round(skmetrics.auc(recall_0, prec_0),5)}")

# Logistic Regression Model

In [None]:
lr = LogisticRegression()
model_train = lr.fit(X_smote,y_smote.values.ravel())

In [8]:
# Evaluating Train and Test
train_pred_proba = model_train.predict(X_smote)
train_pred = np.round(train_pred_proba)
val_pred_proba = model_train.predict(X_val)
val_pred = np.round(val_pred_proba)
test_pred_proba = model_train.predict(X_test)
test_pred = np.round(test_pred_proba)

In [9]:
print_statistics(y_smote, train_pred, train_pred_proba, 'train')
print_statistics(y_val, val_pred, val_pred_proba, 'val')
print_statistics(y_test, test_pred, test_pred_proba, 'test')

accuracy: 0.96078
precision (macro): 0.96148
recall (macro): 0.96078
f1 score (macro): 0.96077
f1 score of class 1: 0.96
f1 score of class 0: 0.96153
pr auc score of class 1: 0.97506
pr auc score of class 0: 0.26519
accuracy: 0.94868
precision (macro): 0.94829
recall (macro): 0.91997
f1 score (macro): 0.93286
f1 score of class 1: 0.90027
f1 score of class 0: 0.96545
pr auc score of class 1: 0.92176
pr auc score of class 0: 0.39356
accuracy: 0.94556
precision (macro): 0.94733
recall (macro): 0.91298
f1 score (macro): 0.9283
f1 score of class 1: 0.89312
f1 score of class 0: 0.96348
pr auc score of class 1: 0.91774
pr auc score of class 0: 0.39176
