# LightGBM

### Libraries

In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter, defaultdict
import gc
import logging
from sklearn.ensemble import RandomForestClassifier
from eli5.sklearn import PermutationImportance
import eli5
import datetime

from utils.schemas import *
from utils.functions import *

### Logging

In [2]:
LOG_NAME = 'logs/Permutation_importances.log'
logging.basicConfig(filename=LOG_NAME, level=logging.WARNING, format='%(asctime)s %(message)s')
logging.warning("")
logging.warning("##### New Permutation Importance Study #####")

### Data

In [3]:
df_imp = pd.read_csv('docs/ft_importances_20190831.csv')

In [4]:
n = 250
logging.warning("Studying first {0} most important features".format(n))

In [5]:
X_cols = df_imp.feature[:n].to_list()

In [6]:
len(X_cols)

250

In [7]:
data_folder = 'input'

In [8]:
train = pd.read_csv(data_folder+'/train_ft_eng_1.csv', dtype = schema_ft_eng_1, usecols=X_cols+['isFraud', 'TransactionDT'])
test = pd.read_csv(data_folder+'/test_ft_eng_1.csv', dtype = schema_ft_eng_1, usecols=X_cols)

In [9]:
train_split = int(len(train)*0.8)
train_split

472432

In [9]:
X_train = train[X_cols][:train_split,:]
y_train = train.isFraud[:train_split]

X_test = train[X_cols][train_split:,:]
y_test = train.isFraud[train_split:]

### Model

#### RandomForest

In [10]:
logging.warning("Used columns: {}".format(X_cols))

In [None]:
logging.warning("Model used to PermitationImportance: {}".format('Random Forest'))

In [12]:
model_rf = RandomForestClassifier(n_estimators=250, oob_score=True, n_jobs=-1, random_state=42)

In [13]:
model_rf.fit(X_train, y_train)

In [14]:
logging.warning("Params: {}".format(str(model_rf.get_params())))

### Permitation Importance

In [None]:
perm_rf = PermutationImportance(model_rf).fit(X_test, y_test)

In [None]:
eli5.show_weights(perm_rf, scoring='roc_auc', feature_names=X_train.columns.tolist(),
                  top=n, random_state=42)