In [1]:
import re
import json
import numpy as np
import pandas as pd
from functools import partial

import sklearn
from sklearn.preprocessing import StandardScaler
import joblib

import lightgbm as lgb
from lightgbm import LGBMClassifier

## Own specific functions 
from functions import *

In [None]:
df = pd.read_csv(r"datasets\df_resampled.csv")

In [None]:
df_analysis(df, "df", analysis_type="header")

In [None]:
train_df = df[df["TARGET"].notnull()]
test_df = df[df["TARGET"].isnull()]

In [None]:
train_df.head(5)

In [None]:
best={'learning_rate': 0.002996768103678267, 'max_depth': 1, 'min_child_weight': 43.56681976377634, 'min_split_gain': 0.025360811467421862, 'n_estimators': 0, 'num_leaves': 1, 'reg_alpha': 0.047205508056568875, 'reg_lambda': 0.07740351793970067}

In [None]:
print("Best: {}".format(best))

In [None]:
N_ESTIMATORS = [8000, 10000, 12000]
NUM_LEAVES = [32, 34, 36]
MAX_DEPTH = [7, 8, 9]

In [None]:
clf = LGBMClassifier(
    n_estimators=N_ESTIMATORS[best.get("n_estimators")],
    learning_rate=best.get("learning_rate"),
    num_leaves=NUM_LEAVES[best.get("num_leaves")],
    max_depth=MAX_DEPTH[best.get("max_depth")],
    reg_alpha=best.get("reg_alpha"),
    reg_lambda=best.get("reg_lambda"),
    min_split_gain=best.get("min_split_gain"),
    min_child_weight=best.get("min_child_weight"),
    #colsample_bytree=0.8, 
    #subsample=0.8,
    is_unbalance=False,
    n_jobs=-1 
)

In [None]:
# Formatting columns name
train_df = train_df.rename(columns=lambda x:re.sub("[^A-Za-z0-9_]+", "", x))
df_lgbm = train_df.copy()
    
#scaler = StandardScaler()
    
# Divide in training/validation and test data
train_df = df_lgbm[df_lgbm["TARGET"].notnull()]
del df_lgbm, df, test_df
gc.collect()

In [None]:
# Split dataset to train
X = train_df.drop(columns=["TARGET", "SK_ID_CURR"])
y_target = train_df.loc[:, "TARGET"]

In [None]:
#X_scaled = scaler.fit_transform(X)

In [None]:
clf.fit(X, y_target)

In [None]:
# Saving the model based on the best parameters
joblib.dump(clf, "models/model_resampled.pkl".format(version=sklearn.__version__))

In [None]:
xx

In [None]:
train_df.head(15)

In [None]:
Xnew_X = train_df.drop(columns=["TARGET", "SK_ID_CURR"])
new_X = X.tail(15)
#new_X_scaled = scaler.fit_transform(new_X)

In [None]:
train_df.tail(15)

In [None]:
# load the model from disk
model = joblib.load("models\model_1.0.2_1.pkl")

In [None]:
result = model.predict(new_X)
print(result)

In [None]:
# load the model from disk
initial_model = joblib.load("models\model_1.0.2.pkl")

In [None]:
result = initial_model.predict(new_X)
print(result)

In [None]:
model_final = joblib.load("models\model_1.0.2_2.pkl")

In [None]:
df_customer = pd.read_csv(r"datasets\df_customers_to_predict.csv")

In [None]:
df_customer.head(10)

In [None]:
df_customer.tail(10)

In [None]:
df_customer[(df_customer["FLAG_OWN_REALTY"] == 0) & (df_customer["FLAG_OWN_CAR"] == 0) & (df_customer["CNT_CHILDREN"] > 1) & (df_customer["AMT_INCOME_TOTAL"] < 85000) & (df_customer["AMT_CREDIT"] > (df_customer["AMT_INCOME_TOTAL"]*3))]

In [None]:
pay = [100001, 456168, 456223, 100066, 100028, 456008]
no_pay = [100172, 105749, 452356, 452768, 117362]

In [None]:
df_temp =df_customer[df_customer["SK_ID_CURR"] == 456223]

In [None]:
new_X = df_temp.drop(columns=["SK_ID_CURR"])
new_X.shape

In [None]:
#new_X_scaled = scaler.fit_transform(new_X)

In [None]:
result = initial_model.predict(new_X)
print(result[0])

In [None]:
result_proba = initial_model.predict_proba(new_X)

In [None]:
print(result_proba)

In [None]:
result = model.predict(new_X)
print(int(result[0]))

In [None]:
result_proba = model.predict_proba(new_X)

In [None]:
print(result_proba)

In [None]:
result_model_final = model_final.predict(new_X)
print(result[0])

In [None]:
result_proba = model_final.predict_proba(new_X)

In [None]:
print(result_proba)