In [1]:
import pandas as pd
import numpy as np
from sklearn.externals import joblib 
import pickle

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

import lightgbm as lgb
from sklearn import metrics

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [2]:
df = pd.read_csv("Churn_Modelling.csv")

df_x = df.iloc[:, 3:13]
df_y = df.iloc[:, 13]

In [3]:
def clean_data(df):

    le = LabelEncoder()
    df.Gender = le.fit_transform(df.Gender)
    df = pd.get_dummies(data = df, columns=["Geography"], drop_first = False)
    df = df.sort_index(axis=1)
    return df

In [4]:
df_x = clean_data(df_x)

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size = 0.2, random_state = 0)
pickle.dump(df_x.columns, open("columns.pkl", 'wb'))

In [6]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
pickle.dump(scaler, open("std_scaler.pkl", 'wb'))
print(X_test)
print(X_train.shape[1])

[[-0.36890377  0.8793029  -0.55204276 ...  0.9687384  -0.92159124
   1.04473698]
 [ 0.10961719  0.42972196 -1.31490297 ... -1.03227043 -0.92159124
  -1.031415  ]
 [ 0.30102557  0.30858264  0.57162971 ...  0.9687384  -0.92159124
   1.04473698]
 ...
 [-0.27319958  1.29745526 -0.74791227 ... -1.03227043  0.8095029
  -1.37744033]
 [-0.46460796  1.05975239 -0.00566991 ...  0.9687384  -0.92159124
  -0.33936434]
 [-0.84742473  0.82026342 -0.79945688 ... -1.03227043 -0.92159124
   1.04473698]]
12


In [7]:
model= lgb.LGBMClassifier(max_depth=50,learning_rate=0.05,num_leaves=1200,n_estimators=200)
model.fit(X_train, y_train)

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
               importance_type='split', learning_rate=0.05, max_depth=50,
               min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
               n_estimators=200, n_jobs=-1, num_leaves=1200, objective=None,
               random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
               subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [9]:
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score,roc_curve,auc
predictions = model.predict_proba(X_test)
#print ("\naccuracy_score :",accuracy_score(y_test,predictions))

In [10]:
predictions

array([[0.90105893, 0.09894107],
       [0.8460997 , 0.1539003 ],
       [0.97418745, 0.02581255],
       ...,
       [0.98781448, 0.01218552],
       [0.87421136, 0.12578864],
       [0.91738556, 0.08261444]])

In [11]:
# save the model so created above into a picle.
pickle.dump(model, open('model.pkl', 'wb')) 