## 5- Baseline modeling experiment 3 - Autogluon TabularPredictor (AutoML)
* Using the open-source Autogluon AutoML library by AWS.
* Using baseline dataset.
* Autogluon will train an ensemble of models and find the best performing one.
    * Model will be __fit__ on the __train__ set.
    * The __validation__ set will be used to pick the best-performing model.
    * Final evaluation will be done on the __test__ set.

#### Stages
1. __Reading data__.
2. __Autogluon model training__.
3. __Model evaluation__.
4. __Saving predictor__.

In [1]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import StratifiedGroupKFold  # for splitting into train/val/test based on customer id while keeping fraud ratio the same for the three sets
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import (
    accuracy_score,
    precision_score, 
    recall_score, 
    f1_score,
    roc_auc_score, 
    average_precision_score, 
    confusion_matrix, 
    classification_report
)
import autogluon
from autogluon.tabular import TabularPredictor
import warnings
warnings.filterwarnings("ignore")
import os

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
RANDOM_SEED = 420

### 1- Reading data

In [3]:
train_df = pd.read_csv("data/train_baseline.csv")
val_df = pd.read_csv("data/validation_baseline.csv")
test_df = pd.read_csv("data/test_baseline.csv")

In [4]:
train_df.shape

(476608, 30)

In [5]:
train_df.head()

Unnamed: 0,step,amount,age_0,age_1,age_2,age_3,age_4,age_5,age_6,age_U,gender_E,gender_F,gender_M,gender_U,category_es_barsandrestaurants,category_es_contents,category_es_fashion,category_es_food,category_es_health,category_es_home,category_es_hotelservices,category_es_hyper,category_es_leisure,category_es_otherservices,category_es_sportsandtoys,category_es_tech,category_es_transportation,category_es_travel,category_es_wellnessandbeauty,fraud
0,-1.858796,-0.303248,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
1,-1.858796,0.018468,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
2,-1.858796,-0.098661,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
3,-1.858796,-0.186943,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
4,-1.858796,-0.017797,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0


In [6]:
val_df.shape

(60291, 30)

In [7]:
val_df.head()

Unnamed: 0,step,amount,age_0,age_1,age_2,age_3,age_4,age_5,age_6,age_U,gender_E,gender_F,gender_M,gender_U,category_es_barsandrestaurants,category_es_contents,category_es_fashion,category_es_food,category_es_health,category_es_home,category_es_hotelservices,category_es_hyper,category_es_leisure,category_es_otherservices,category_es_sportsandtoys,category_es_tech,category_es_transportation,category_es_travel,category_es_wellnessandbeauty,fraud
0,-1.858796,-0.020728,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
1,-1.858796,-0.168078,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
2,-1.858796,-0.071645,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
3,-1.858796,-0.234106,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
4,-1.858796,0.622063,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [8]:
test_df.shape

(57744, 30)

In [9]:
test_df.head()

Unnamed: 0,step,amount,age_0,age_1,age_2,age_3,age_4,age_5,age_6,age_U,gender_E,gender_F,gender_M,gender_U,category_es_barsandrestaurants,category_es_contents,category_es_fashion,category_es_food,category_es_health,category_es_home,category_es_hotelservices,category_es_hyper,category_es_leisure,category_es_otherservices,category_es_sportsandtoys,category_es_tech,category_es_transportation,category_es_travel,category_es_wellnessandbeauty,fraud
0,-1.858796,-0.048201,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
1,-1.858796,-0.06844,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
2,-1.858796,-0.184287,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
3,-1.858796,0.027718,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
4,-1.858796,0.457954,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0


### 2- Autogluon model training

In [10]:
save_path = 'models/baseline-autogluon-tabular-predictor' 

In [11]:
%%time

predictor = TabularPredictor(
    label="fraud",
    problem_type="binary",
    eval_metric="f1",
    path=save_path,
    sample_weight="balance_weight"
).fit(
    train_data=train_df,
    tuning_data=val_df,
    presets="medium_quality",
    verbosity=False,
    use_bag_holdout=True,
    num_cpus=os.cpu_count()
)

CPU times: user 1h 59min 8s, sys: 16.7 s, total: 1h 59min 25s
Wall time: 10min 48s


### 3- Model evaluation

#### 3.1- Evaluation on validation set

In [12]:
%%time

predictor.evaluate(val_df)

CPU times: user 4.81 s, sys: 132 ms, total: 4.94 s
Wall time: 782 ms


{'f1': 0.7502799552071668,
 'accuracy': 0.9963012721633411,
 'balanced_accuracy': np.float64(0.8318896666499407),
 'mcc': 0.7548261088658379,
 'roc_auc': np.float64(0.9937185075802166),
 'precision': 0.8611825192802056,
 'recall': 0.6646825396825397}

In [13]:
leaderboard_val = predictor.leaderboard(val_df).sort_values("score_val", ascending=False)

In [14]:
leaderboard_val.shape

(12, 13)

In [15]:
leaderboard_val.head(50)

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L2,0.75028,0.75028,f1,0.756297,0.517537,403.95797,0.008957,0.005664,1.104706,2,True,12
1,NeuralNetTorch,0.740991,0.740991,f1,0.076268,0.075804,230.09496,0.076268,0.075804,230.09496,1,True,10
2,ExtraTreesEntr,0.733634,0.733634,f1,0.253016,0.108214,17.318475,0.253016,0.108214,17.318475,1,True,7
3,RandomForestEntr,0.731597,0.731597,f1,0.130961,0.089236,18.395466,0.130961,0.089236,18.395466,1,True,4
4,ExtraTreesGini,0.721088,0.721088,f1,0.209774,0.098326,17.267387,0.209774,0.098326,17.267387,1,True,6
5,RandomForestGini,0.719637,0.719637,f1,0.148545,0.099371,18.522568,0.148545,0.099371,18.522568,1,True,3
6,NeuralNetFastAI,0.710723,0.710723,f1,0.263383,0.216056,135.919089,0.263383,0.216056,135.919089,1,True,8
7,LightGBMLarge,0.650672,0.650672,f1,3.418545,3.290402,84.004405,3.418545,3.290402,84.004405,1,True,11
8,LightGBM,0.596105,0.596105,f1,3.395416,3.114259,111.320007,3.395416,3.114259,111.320007,1,True,2
9,LightGBMXT,0.191117,0.191117,f1,0.005552,0.004059,0.51752,0.005552,0.004059,0.51752,1,True,1


#### 3.2- Evaluation on test set

In [16]:
%%time

predictor.evaluate(test_df)

CPU times: user 4.52 s, sys: 75.8 ms, total: 4.6 s
Wall time: 709 ms


{'f1': 0.7526170798898072,
 'accuracy': 0.9922243003602106,
 'balanced_accuracy': np.float64(0.8197742363016195),
 'mcc': 0.7608517415055338,
 'roc_auc': np.float64(0.9927634879160355),
 'precision': 0.9118825100133512,
 'recall': 0.6407129455909943}

In [21]:
leaderboard_test = predictor.leaderboard(test_df).sort_values("score_test", ascending=False)

In [22]:
leaderboard_test.shape

(12, 13)

In [23]:
leaderboard_test.head(50)

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,NeuralNetTorch,0.760965,0.740991,f1,0.083333,0.075804,230.09496,0.083333,0.075804,230.09496,1,True,10
1,WeightedEnsemble_L2,0.752617,0.75028,f1,0.779608,0.517537,403.95797,0.009238,0.005664,1.104706,2,True,12
2,RandomForestEntr,0.741228,0.731597,f1,0.131043,0.089236,18.395466,0.131043,0.089236,18.395466,1,True,4
3,RandomForestGini,0.734694,0.719637,f1,0.137219,0.099371,18.522568,0.137219,0.099371,18.522568,1,True,3
4,ExtraTreesEntr,0.728075,0.733634,f1,0.247767,0.108214,17.318475,0.247767,0.108214,17.318475,1,True,7
5,ExtraTreesGini,0.724574,0.721088,f1,0.210145,0.098326,17.267387,0.210145,0.098326,17.267387,1,True,6
6,LightGBMLarge,0.715336,0.650672,f1,3.493027,3.290402,84.004405,3.493027,3.290402,84.004405,1,True,11
7,LightGBM,0.701803,0.596105,f1,3.280285,3.114259,111.320007,3.280285,3.114259,111.320007,1,True,2
8,NeuralNetFastAI,0.685129,0.710723,f1,0.270921,0.216056,135.919089,0.270921,0.216056,135.919089,1,True,8
9,LightGBMXT,0.33113,0.191117,f1,0.009051,0.004059,0.51752,0.009051,0.004059,0.51752,1,True,1


### 4- Saving predictor

In [20]:
%%time

predictor.save()

CPU times: user 153 ms, sys: 8 ms, total: 161 ms
Wall time: 160 ms
