# MLflow Training

In [22]:
pd.read_csv("../../data/train.csv").describe()

Unnamed: 0,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Delay_from_due_date,Num_Credit_Inquiries,Credit_Utilization_Ratio,Total_EMI_per_month
count,84998.0,100000.0,100000.0,100000.0,100000.0,98035.0,100000.0,100000.0
mean,4194.17085,17.09128,22.47443,72.46604,21.06878,27.754251,32.285173,1403.118217
std,3183.686167,117.404834,129.05741,466.422621,14.860104,193.177339,5.116875,8306.04127
min,303.645417,-1.0,0.0,1.0,-5.0,0.0,20.0,0.0
25%,1625.568229,3.0,4.0,8.0,10.0,3.0,28.052567,30.30666
50%,3093.745,6.0,5.0,13.0,18.0,6.0,32.305784,69.249473
75%,5957.448333,7.0,7.0,20.0,28.0,9.0,36.496663,161.224249
max,15204.633333,1798.0,1499.0,5797.0,67.0,2597.0,50.0,82331.0


In [29]:
def train(in_n_estimators, in_max_depth):
    import os
    import warnings
    import sys

    import pandas as pd
    import numpy as np
    from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier

    import mlflow
    import mlflow.sklearn
    
    import logging
    logging.basicConfig(level=logging.WARN)
    logger = logging.getLogger(__name__)

    def eval_metrics(actual, pred):
        precision = precision_score(actual, pred, average='weighted')
        recall = recall_score(actual, pred, average='weighted')
        accuracy = accuracy_score(actual, pred)
        f1 = f1_score(actual, pred, average='weighted')
        return  precision, recall, accuracy, f1


    warnings.filterwarnings("ignore")
    np.random.seed(40)

    # Read the csv file
    csv_path = "../../data/train.csv"
    try:
        data = pd.read_csv(csv_path)
    except Exception as e:
        logger.exception(
            "Unable to download training & test CSV. Error: %s", e)
        
    data=data[["Interest_Rate", "Credit_Score"]]

    data_x = data.drop(["Credit_Score"], axis=1)
    data_y = data[["Credit_Score"]]
    
    # Split the data into training and test sets. (0.7, 0.3) split.
    train_x, test_x, train_y, test_y = train_test_split(data_x, 
                                                        data_y, 
                                                        test_size=0.3, 
                                                        stratify=data_y, 
                                                        random_state=40, 
                                                        shuffle=True)

    # Set default values if no number estimators is provided
    if float(in_n_estimators) is None:
        n_estimators = 100
    else:
        n_estimators = int(in_n_estimators)
                  
    # Set default values if no max_depth is provided
    if float(in_max_depth) is None:
        max_depth = 3
    else:
        max_depth = int(in_max_depth)
 
    with mlflow.start_run():
        # Execute ElasticNet
        rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
        rf.fit(train_x, train_y)

        # Evaluate Metrics
        predicted = rf.predict(test_x)
        (precision, recall, accuracy, f1) = eval_metrics(test_y, predicted)

        # Print out metrics
        print("Random Forest model (n_estimators=%f, max_depth=%f):" % (n_estimators, max_depth))
        print("  Precision: %s" % precision)
        print("  Recall: %s" % recall)
        print("  Accuracy: %s" % accuracy)
        print("  F1: %s" % f1)

        # Log parameter, metrics, and model to MLflow
        mlflow.log_param("n_estimators", n_estimators)
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("f1", accuracy)

        mlflow.sklearn.log_model(rf, "model")

In [30]:
train(100, 2)

Random Forest model (n_estimators=100.000000, max_depth=2.000000):
  Precision: 0.5015166508914135
  Recall: 0.6019666666666666
  Accuracy: 0.6019666666666666
  F1: 0.5353935839485038


In [3]:
train(0.2, 0.2)

Elasticnet model (alpha=0.200000, l1_ratio=0.200000):
  RMSE: 0.7859129997062342
  MAE: 0.6155290394093894
  R2: 0.20224631822892092


In [4]:
train(0.1, 0.1)

Elasticnet model (alpha=0.100000, l1_ratio=0.100000):
  RMSE: 0.7792546522251949
  MAE: 0.6112547988118587
  R2: 0.2157063843066196
