In [None]:
import argparse
import json
from time import time
from pathlib import Path

import joblib
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder

pd.set_option("display.max_columns", None)

# Modeling

In [None]:
DATA_DIR = Path("../data")
SPLIT_DATA_DIR = DATA_DIR / "split"
TRAIN_DATA_FILENAME = SPLIT_DATA_DIR / "lending_club_train.csv"
TEST_DATA_FILENAME = SPLIT_DATA_DIR / "lending_club_test.csv"

In [None]:
df_train = pd.read_csv(TRAIN_DATA_FILENAME)
df_test = pd.read_csv(TEST_DATA_FILENAME)

# columns to drop
col_to_drop = ["issue_d", "loan_status", "revol_util", "pub_rec_bankruptcies"]
X = df_train.drop(columns=col_to_drop)
y = df_train["loan_status"]

X_test = df_test.drop(columns=col_to_drop)
y_test = df_test["loan_status"]

print("Training size:", X.shape[0])
print("Test size:", X_test.shape[0])

In [None]:
X.head()

In [None]:
X.info()

In [None]:
list_features = X.select_dtypes(include="number").columns.tolist()
X_train = X[list_features].copy()
y_train = y.copy()

In [None]:
base_model = Pipeline(
    [
        ("estimator", LogisticRegression(random_state=11))
    ]
)
base_model.fit(X_train, y_train)

train_preds = base_model.predict(X_train)
test_preds = base_model.predict(X_test[list_features])

print("Training performance..")
print(classification_report(y_train, train_preds))
print()
print("Test performance..")
print(classification_report(y_test, test_preds))

In [None]:
model = Pipeline(
    [
        ("scaler", StandardScaler()),
        ("estimator", LogisticRegression(random_state=11))
    ]
)
model.fit(X_train, y_train)

train_preds = model.predict(X_train)
test_preds = model.predict(X_test[list_features])

print("Training performance..")
print(classification_report(y_train, train_preds))
print()
print("Test performance..")
print(classification_report(y_test, test_preds))