# Consumer Behavior Analytics - Data Modelling  of `customers_whole`

**Libraries and imports**

In [1]:
# Basic DS libraries
import pandas as pd
import numpy as np

# DataViz libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Statistics Libraries
from scipy import stats
from statsmodels.stats.outliers_influence import variance_inflation_factor as vif

# Data Utils
from sklearn.model_selection import train_test_split, cross_validate, RandomizedSearchCV
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, f1_score, \
                                                                     recall_score, \
                                                                     precision_score, \
                                                                     accuracy_score, \
                                                                     roc_auc_score, \
                                                                     auc, \
                                                                     plot_confusion_matrix, \
                                                                     plot_roc_curve
                                                                         
from imblearn.over_sampling import SMOTE

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier 
from xgboost import XGBClassifier

import pickle
import joblib

# Notebook setup
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

In [2]:
# Loading customers exposed
customers_exposed = pd.read_csv('../data/customers_exposed.csv', parse_dates = ['Dt_Customer'])

# Loading customers whole
customers_whole = pd.read_csv('../data/customers_whole.csv', parse_dates = ['Dt_Customer'])

In [3]:
customers_whole.head()

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Response,Family_Size,Income_PerCap,Total_Spent,Prop_Spending_Income_pc,Total_Puchases,Avg_Ticket,Age
0,5524,1957,Graduation,Single,58138.0,0,0,2012-09-04,58,635,88,546,172,88,88,3,8,10,4,7,0,0,0,0,0,0,1,1,58138.0,1617,0.027813,25,64.68,57
1,2174,1954,Graduation,Single,46344.0,1,1,2014-03-08,38,11,1,6,2,1,6,2,1,1,2,5,0,0,0,0,0,0,0,3,15448.0,27,0.001748,6,4.5,60
2,4141,1965,Graduation,Together,71613.0,0,0,2013-08-21,26,426,49,127,111,21,42,1,8,2,10,4,0,0,0,0,0,0,0,2,35806.5,776,0.021672,21,36.95,49
3,6182,1984,Graduation,Together,26646.0,1,0,2014-02-10,26,11,4,20,10,3,5,2,2,0,4,6,0,0,0,0,0,0,0,3,8882.0,53,0.005967,8,6.62,30
4,5324,1981,PhD,Married,58293.0,1,0,2014-01-19,94,173,43,118,46,27,15,5,5,3,6,5,0,0,0,0,0,0,0,3,19431.0,422,0.021718,19,22.21,33


In [4]:
# Checking class balance (or imballance)
customers_whole['Response'].value_counts(normalize = True)

0    0.850873
1    0.149127
Name: Response, dtype: float64

## Data Preparation

We are going to start preparing the data for modelling regarding both datasets: `customers_whole` and `customers_exposed`, but only until One Hot Encoding.

After that, we will save both one hot encoded dataframes into new csv files and split both analysis in different notebooks. The analysis in this notebook will be for `customers_whole` dataset.

In [5]:
# Defining Sample first_date
first_date = customers_whole['Dt_Customer'].min()

# Transforming datetime feature to numeric feature
for df in [customers_exposed, customers_whole]:
    df['Dt_Customer_InDays'] = df['Dt_Customer'] - first_date
    
    df['Dt_Customer_InDays'] = (df['Dt_Customer_InDays'] / np.timedelta64(1, 'D')).astype(int) + 1
    
    # Dropping unuseful columns for modelling
    df.drop(['ID', 'Dt_Customer'], axis = 1, inplace = True)

In [6]:
customers_whole['Education'].value_counts()

Graduation    1126
PhD            483
Master         369
2n Cycle       201
Basic           54
Name: Education, dtype: int64

In [7]:
# Encoding Education Variable
# customers_whole['Education'] = customers_whole['Education'].map({'Basic': 1,
#                                                                  '2n Cycle': 2,
#                                                                  'Graduation': 3,
#                                                                  'Master': 4, 
#                                                                  'PhD': 5})

In [8]:
# One Hot Encoding categorical feature 'Marital_Status' with pd.get_dummies
customers_exposed_ohe = pd.get_dummies(customers_exposed)
customers_whole_ohe = pd.get_dummies(customers_whole)

customers_whole_ohe

Unnamed: 0,Year_Birth,Income,Kidhome,Teenhome,Recency,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Response,Family_Size,Income_PerCap,Total_Spent,Prop_Spending_Income_pc,Total_Puchases,Avg_Ticket,Age,Dt_Customer_InDays,Education_2n Cycle,Education_Basic,Education_Graduation,Education_Master,Education_PhD,Marital_Status_Divorced,Marital_Status_Married,Marital_Status_Single,Marital_Status_Together,Marital_Status_Widow
0,1957,58138.0,0,0,58,635,88,546,172,88,88,3,8,10,4,7,0,0,0,0,0,0,1,1,58138.00,1617,0.027813,25,64.68,57,37,0,0,1,0,0,0,0,1,0,0
1,1954,46344.0,1,1,38,11,1,6,2,1,6,2,1,1,2,5,0,0,0,0,0,0,0,3,15448.00,27,0.001748,6,4.50,60,587,0,0,1,0,0,0,0,1,0,0
2,1965,71613.0,0,0,26,426,49,127,111,21,42,1,8,2,10,4,0,0,0,0,0,0,0,2,35806.50,776,0.021672,21,36.95,49,388,0,0,1,0,0,0,0,0,1,0
3,1984,26646.0,1,0,26,11,4,20,10,3,5,2,2,0,4,6,0,0,0,0,0,0,0,3,8882.00,53,0.005967,8,6.62,30,561,0,0,1,0,0,0,0,0,1,0
4,1981,58293.0,1,0,94,173,43,118,46,27,15,5,5,3,6,5,0,0,0,0,0,0,0,3,19431.00,422,0.021718,19,22.21,33,539,0,0,0,0,1,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2228,1967,61223.0,0,1,46,709,43,182,42,118,247,2,9,3,4,5,0,0,0,0,0,0,0,3,20407.67,1341,0.065711,18,74.50,47,319,0,0,1,0,0,0,1,0,0,0
2229,1946,64014.0,2,1,56,406,0,30,0,0,8,7,8,2,5,7,0,0,0,1,0,0,0,5,12802.80,444,0.034680,22,20.18,68,681,0,0,0,0,1,0,0,0,1,0
2230,1981,56981.0,0,0,91,908,48,217,32,12,24,1,2,3,13,6,0,1,0,0,0,0,0,1,56981.00,1241,0.021779,19,65.32,33,545,0,0,1,0,0,1,0,0,0,0
2231,1956,69245.0,0,1,8,428,30,214,80,30,61,2,6,5,10,3,0,0,0,0,0,0,0,3,23081.67,843,0.036522,23,36.65,58,544,0,0,0,1,0,0,0,0,1,0


In [9]:
# Saving One Hot Enconded files into a new csv file
customers_whole_ohe.to_csv('../data/customers_whole_ohe.csv', header = True, index = False)
# pd.read_csv('../data/customers_whole_ohe.csv')

customers_exposed_ohe.to_csv('../data/customers_exposed_ohe.csv', header = True, index = False)
# pd.read_csv('../data/customers_exposed_ohe.csv')

Both files have been saved! We will not need to load the `customers_whole_ohe.csv` into this notebook, but it is aways good to keep a standartd log of actions.

**We will move forward with modelling for the `customers_whole` dataset hereafter.**

Let's start:

### Splitting Data into _Train_, _Validation_ and _Test_ sets

We will split the data according to the following schedule:

- Create a `df_train` and a `df_test`.
- From the previous `df_train` we will once again split it into two: `df_train` and `df_val`.

We also know that _specially_ in this dataset (`_whole`) we have unballanced data. So we will perform a oversampling technique called SMOTE. According to the paper published in _The Journal of Artificial Intelligence Research_ in 2002:

> [With SMOTE] The minority class is over-sampled by taking each minority class sample and introducing synthetic examples along the line segments joining any/all of the $k$ minority class nearest neighbors. Depending upon the amount of over-sampling required, neighbors from the k nearest neighbors are randomly chosen.[$^{SMOTE: \: Synthetic\:Minority\:Over-sampling\:Technique}$](https://arxiv.org/pdf/1106.1813.pdf)

- Finally, we will separate all dfs into `X`'s and `y`, naming respectively accordint to the df they belong to.

Let's start:

In [10]:
# Splitting df_train and df_test for training and testing
df_train, df_test = train_test_split(customers_whole_ohe, test_size = .2, random_state = 7)

In [11]:
# Splitting df_train into df_train and df_val
df_train, df_val = train_test_split(df_train, test_size = .2, random_state = 7)

In [12]:
# Checking target variable balance (or imballance)
df_train['Response'].value_counts()

0    1217
1     211
Name: Response, dtype: int64

In [13]:
# Balancing target variable with SMOTE technique

# Instantiating SMOTER over_sampler
smote = SMOTE(random_state = 7)

# Fitting and resampling data with SMOTE
X_train, y_train = smote.fit_resample(df_train.drop('Response', axis = 1), df_train['Response'])

# Checking target class balance
y_train.value_counts()

0    1217
1    1217
Name: Response, dtype: int64

In [14]:
X_val = df_val.drop('Response', axis = 1)
y_val = df_val['Response']

X_test = df_test.drop('Response', axis = 1)
y_test = df_test['Response']

Let's check if the generated `X`'s and `y`'s are correctly built:

In [15]:
print('X_train, y_train   shapes: ', X_train.shape, y_train.shape)
print('X_val  , y_val     shapes: ', X_val.shape, y_val.shape)
print('X_test , y_test    shapes: ', X_test.shape, y_test.shape)

X_train, y_train   shapes:  (2434, 40) (2434,)
X_val  , y_val     shapes:  (358, 40) (358,)
X_test , y_test    shapes:  (447, 40) (447,)


**All shapes match**. We are good to go on.

In [16]:
rf_model2 = RandomForestClassifier()
param_grid = {
            'n_estimators': [50, 60, 70, 80, 90, 100],
            'max_depth': np.arange(0, 10),
            'min_samples_split': np.arange(0, 10),
            'min_samples_leaf': stats.loguniform(.01, 1),
        }
rf_model_rsearch = RandomizedSearchCV(rf_model2, param_distributions = param_grid, cv = 10, 
                                      n_jobs = -1, scoring = 'f1', random_state = 7, n_iter = 50)

In [17]:
rf_model_rsearch.fit(X_train, y_train)

RandomizedSearchCV(cv=10, estimator=RandomForestClassifier(), n_iter=50,
                   n_jobs=-1,
                   param_distributions={'max_depth': array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
                                        'min_samples_leaf': <scipy.stats._distn_infrastructure.rv_frozen object at 0x12fcbc7c0>,
                                        'min_samples_split': array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
                                        'n_estimators': [50, 60, 70, 80, 90,
                                                         100]},
                   random_state=7, scoring='f1')

In [18]:
rf_model_rsearch.predict(X_val)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1,
       0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [20]:
print(classification_report(y_val, rf_model_rsearch.predict(X_val)))

              precision    recall  f1-score   support

           0       0.93      0.88      0.91       299
           1       0.53      0.66      0.59        59

    accuracy                           0.85       358
   macro avg       0.73      0.77      0.75       358
weighted avg       0.86      0.85      0.85       358



### Analyzing multicolinearity

In the previous notebooks, we have created new variables from pre-existing variables. Therefore we made room for possible multicolinearity.

Some techniques for analyzing multicolinearity are:

- Checking correlation values between variables;
- Checking the Variance Inflation Factor (VIF) and dropping variables with factor $> 10$;
- Performing Principal Component Analysis, to the cost of lesser interpretability;
- Perform regularization such as (Lasso or Ridge) for linear models, such as Logistic Regression;

For the sake of simplicity, let's go foward with `VIF` and drop variables with factor $ >10$:

In [None]:
X_train.columns

In [None]:
# Separating numeric features in a list, except booleans
numeric_features = [
    'Year_Birth', 
#     'Education',
    'Income', 
    'Kidhome',                
    'Teenhome', 
    'Recency', 
    'MntWines', 
    'MntFruits',
    'MntMeatProducts', 
    'MntFishProducts', 
    'MntSweetProducts',
    'MntGoldProds', 
    'NumDealsPurchases', 
    'NumWebPurchases',
    'NumCatalogPurchases', 
    'NumStorePurchases', 
    'NumWebVisitsMonth',
    'Total_Spent',
    'Total_Puchases',
    'Family_Size',
    'Income_PerCap',
    'Prop_Spending_Income_pc', 
    'Avg_Ticket', 
    'Age', 
    'Dt_Customer_InDays'
]

In [None]:
# Creating a list of varibles for VIF analysis

################################################################################################
#   This cell has been iterated "mannually" after checking vif values in the dataframe below   #
################################################################################################

numeric_features_vif_ok = [
#     'Year_Birth', 
#     'Education',
#     'Income', 
    'Kidhome',                
    'Teenhome', 
    'Recency', 
    'MntWines', 
    'MntFruits',
    'MntMeatProducts', 
    'MntFishProducts', 
    'MntSweetProducts',
    'MntGoldProds', 
    'NumDealsPurchases', 
    'NumWebPurchases',
    'NumCatalogPurchases', 
    'NumStorePurchases', 
#     'NumWebVisitsMonth',
#     'Total_Spent',
#     'Total_Puchases',
#     'Family_Size',
    'Income_PerCap',
    'Prop_Spending_Income_pc', 
    'Avg_Ticket', 
#     'Age', 
    'Dt_Customer_InDays'
]

In [None]:
# Creating a dataframe for storing vif and its respective variable
vif_df = pd.DataFrame()

# Calculating vif values and saving it into vif_index columns
vif_df["vif_index"] = [vif(X_train[numeric_features_vif_ok].values, i) \
                               for i in range(X_train[numeric_features_vif_ok].shape[1])]

# Saving variable name into feature column
vif_df["feature"] = X_train[numeric_features_vif_ok].columns

# Checking results
vif_df
# del(vif_df)

All `VIF` factor are now $\le 10$, we can start dealing with the different orders of magnitude in our numeric features. 

In [None]:
to_drop_from_vif = [feature for feature in numeric_features if feature not in numeric_features_vif_ok]
to_drop_from_vif

### Scaling

In order to have the numeric data in the same order of magnite, we will:

- Use RobustScaler for variables with outliers;
- Use StandardScaler for variables with no outliers;


Let's start by listing features with outliers:

**Getting features with ouliers**

In [None]:
# Listing features names if feature has outlier
to_robust_scale = []
for feature in numeric_features:
    
    Q1 = np.percentile(X_train[feature].sort_values(), 25, interpolation = 'midpoint')  
    Q3 = np.percentile(X_train[feature].sort_values(), 75, interpolation = 'midpoint')  

    IQR = Q3 - Q1  
    
    low_lim = Q1 - 1.5 * IQR 
    up_lim = Q3 + 1.5 * IQR 

    if (X_train[feature] > up_lim).any() or (X_train[feature] < low_lim).any(): 
         to_robust_scale.append(feature)

to_robust_scale

And then, from the previous list, we can list the variables that will be standardized:

**Listing features _without_ outliers**

In [None]:
to_standardize = [feature for feature in numeric_features if feature not in to_robust_scale]
to_standardize

**Applying RobustScaler to variables listed in `to_robust_scale` list**

In [None]:
robust_scaler = RobustScaler()
robust_scaler.fit(X_train[to_robust_scale])
X_train[to_robust_scale] = robust_scaler.transform(X_train[to_robust_scale])

**Applying StandardScaler to variables listed in `to_standardize` list**

In [None]:
stand_scaler = StandardScaler()

stand_scaler.fit(X_train[to_standardize])
X_train[to_standardize] = stand_scaler.transform(X_train[to_standardize])

Let's check the `X_train` dataset to see if everything went well:

In [None]:
# Checking statistics from scaled DFs
round(X_train.describe(), 2)

The dataset seems alright.

Aiming to avoid **data leakage**, we've performed the `.fit` method using only the `X_train` dataset. We need now to `.transform` the values from `X_val` and `X_test` datasets so we can use them later to make predictions and evaluate our model.

In [None]:
# Transforming x_val and x_test with scalers from X_train
X_val[to_robust_scale] = robust_scaler.transform(X_val[to_robust_scale])
X_test[to_robust_scale] = robust_scaler.transform(X_test[to_robust_scale])

X_val[to_standardize] = stand_scaler.transform(X_val[to_standardize])
X_test[to_standardize] = stand_scaler.transform(X_test[to_standardize])

In [None]:
# Checking transformed datasets
# X_val.head() # Uncomment to view dataframes
# X_test.head()

## Baseline Model

Let's start with a baseline model.

A baseline model is a good pratice to determine if all sweat put into modelling with different algorithms and hyperparameter tuning is worth the effort.

We can use a simple **Linear Regression**, not tunned, model as our baseline.

### Simple LinearRegression

In [None]:
# Instantiating the model
log_model = LogisticRegression()

# Fitting the model
log_model.fit(X_train, y_train)

In [None]:
# predicting using X_val
y_val_pred = log_model.predict(X_val)
# y_val_pred

In [None]:
# Evaluating metrics with Skelearn Classification Report
print(classification_report(y_val, y_val_pred))

In [None]:
fig, ax = plt.subplots(1, 2, figsize = (10, 4))
plot_roc_curve(log_model, X_val, y_val, ax = ax[0])
plot_confusion_matrix(log_model, X_val, y_val, cmap=plt.cm.Reds, ax = ax[1])
plt.tight_layout()

### Keeping metrics logs in MLFlow

In [None]:
from mlflow.tracking import MlflowClient

EXPERIMENT_NAME = '[v2.2] [customers_whole] [Consumer Behavior Analytics] [Renan Moises]'

client = MlflowClient()
experiment_id = client.create_experiment(EXPERIMENT_NAME)

In [None]:
metrics_names = ['accuracy', 'precision', 'recall', 'f1_score']
metrics = [accuracy_score, precision_score, recall_score, f1_score]

In [None]:
run = client.create_run(experiment_id)

client.log_param(run.info.run_id, 'model', 'LogistRegression-Baseline')

for metric in zip(metrics_names, metrics):
    client.log_metric(run.info.run_id, metric[0], metric[1](y_val, y_val_pred))

## Modelling for Real

- LogisticRegression (tunned)
- KNNClassifier
- SVC
- RFClassifier
- AdaBoost
- GradientBoostClassifier
- XGBoostClassifier

In [None]:
models = {
    'LogisticRegression-Tunned': LogisticRegression(),
    'KNNClassifier': KNeighborsClassifier(),
    'SVC': SVC(),
    'RandomForestClassifier': RandomForestClassifier(),
    'AdaBoostClassifier': AdaBoostClassifier(),
    'GradientBoostingClassifier': GradientBoostingClassifier(),
    'XGboostClassifier': XGBClassifier()
}

In [None]:
for model_key, model_value in models.items():
    X_train_modelling = X_train.copy().drop(to_drop_from_vif, axis = 1)
    X_val_modelling = X_val.copy().drop(to_drop_from_vif, axis = 1)
    
    if model_key == 'LogisticRegression-Tunned':
        # Dropping Multicolinear Features for Logistic Regression
#         X_train_modelling = X_train.drop(to_drop_from_vif, axis = 1)
#         X_val_modelling = X_val.drop(to_drop_from_vif, axis = 1)
        param_grid = {
            'penalty': ['l2', 'l1', 'elasticnet'],
            'tol': stats.loguniform(0.1, 1),
            'C': stats.loguniform(3, 10)
        }
    
    elif model_key == 'KNNClassifier':
        # Dropping Multicolinear Features for KNNClassifier
#         X_train_modelling = X_train.drop(to_drop_from_vif, axis = 1)
#         X_val_modelling = X_val.drop(to_drop_from_vif, axis = 1)
        param_grid = {'n_neighbors':[3, 4, 5, 6, 7]}
    
    elif model_key == 'SVC':
        # Dropping Multicolinear Features for SVC
#         X_train_modelling = X_train.drop(to_drop_from_vif, axis = 1)
#         X_val_modelling = X_val.drop(to_drop_from_vif, axis = 1)
        param_grid = {
            'C': stats.loguniform(3, 10)
        }
    
    elif model_key == 'RandomForestClassifier':
        param_grid = {
            'n_estimators': [50, 60, 70, 80, 90, 100],
            'max_depth': np.arange(0, 10),
            'min_samples_split': np.arange(0, 10),
            'min_samples_leaf': stats.loguniform(.01, 1),
        }
    
    elif model_key == 'AdaBoostClassifier':
        param_grid = {
            'learning_rate': stats.lognorm(.001, 1)
        }
    
    elif model_key == 'GradientBoostingClassifier':
        param_grid = {
            'learning_rate': stats.lognorm(.001, 1),
            'n_estimators': [50, 60, 70, 80, 90, 100],
            'min_samples_split': np.arange(0, 10),
            'min_samples_leaf': stats.loguniform(.01, 1),
            'max_depth': np.arange(0, 10)
        }
    
    else:
        param_grid = {
            'n_estimators': [50, 60, 70, 80, 90, 100],
            'max_depth': np.arange(0, 10),
            'learning_rate': stats.lognorm(.001, 1),
            'gamma': stats.lognorm(.001, 1),
        }   
    
    
    # Running RandomizedSearchCV
    print(model_key, '#'.replace('#', '#'*(61 - len(model_key))))
    model_rsearch = RandomizedSearchCV(model_value, 
                                       param_distributions = param_grid, 
                                       n_iter = 50, 
                                       scoring = 'f1', # Used to update weights
                                       cv = 10, 
                                       n_jobs = -1, 
                                       verbose = 1)
    
    # Fitting the model to the train data
    model_rsearch.fit(X_train_modelling, y_train)
    
    # Saving model as a joblib file
    joblib.dump(model_rsearch, f'../models/{model_key}.joblib')
    
    # Predictions using X_val
    y_val_pred = model_rsearch.predict(X_val_modelling)
    
    # Setting up metrics
    metrics_names = ['accuracy', 'precision', 'recall', 'f1_score']
    metrics = [accuracy_score, precision_score, recall_score, f1_score]
    
    # MLFlow Logs
    run = client.create_run(experiment_id)
    for metric_name, metric in zip(metrics_names, metrics):
        client.log_metric(run.info.run_id, metric_name, metric(y_val, y_val_pred))
    client.log_param(run.info.run_id, "model", model_key)
    client.log_param(run.info.run_id, "params", model_value.get_params())
    client.log_param(run.info.run_id, "features", model_rsearch.columns.tolist())
    
    print(classification_report(y_val, y_val_pred), '\n')

___________________