In [1]:
import sys
sys.path.append('..')

In [2]:
from letcon.src.utils.utils import save_artifacts
from letcon.src.data_loader import data_ml
from letcon.src.model import model_ml

import requests
import pandas as pd

Initializing Logger Path...
Logger Path Exists...
Path for Logger-->C:/Users/SPARS/Desktop/LETCON-Conference/letcon2020-ml-workshop/notebooks/Logs/
Initializing Artifacts Path...
Artifacts Path Exists...
Path for Artifacts-->C:/Users/SPARS/Desktop/LETCON-Conference/letcon2020-ml-workshop/notebooks/Artifacts/


In [3]:
TrainData = pd.read_csv('data/wine-quality/winequality-white-updated.csv')

In [11]:
data_config= {'train_data' : TrainData,
             'test_data' : None,
             'unique_id' : None,
             'time_id' : None,
             'x_vars' : [
                         'fixed_acidity', 
                         'volatile_acidity', 
                         'citric_acid', 
                         'residual_sugar',
                         'chlorides', 
                         'free_sulfur_dioxide', 
                         'total_sulfur_dioxide', 
                         'density',
                         'pH', 
                         'sulphates', 
                         'alcohol', 
             ],
             'cat_vars' : [],
             'y_var' : 'quality',
             'stratify' : 'quality',
             'test_size' : 0.20,
             'use_full_dataset' : True,
             'encoding_style' : 'label_encoding',
             'impute_missing' : 'mean',
             'capping_vars' : {},
             'task' : 'regression',
             'random_state' : 42,
}

In [12]:
data = data_ml.DataLoader(config=data_config)

test_data is none. Setting test_data equal to train_data


In [15]:
modelling_config = {
            'data': data.get_data(),
            'model_type' : 'simple',
            'model_name' : 'xgboost',
            'model_inputs' : {},
            'scoring_function' : 'rmse',
            'model_initial_params' : {'verbose':True,
                                     'boosting': 'gbtree',
                                     'tree_method': 'exact',
                                     'n_estimators': 127,
                                     'max_depth': 9,
                                     'reg_alpha': 10,
                                     'reg_lambda': 22,
                                     'min_child_weight': 1,
                                     'gamma': 1,
                                     'learning_rate': 0.4901527567844427,
                                     },
            'hyperparmeter_tuning' : {'enable_tuning' : True,
                                      'optimizer' : 'optuna',
                                      'optimizer_params' : {'fixed' : {'n_trials' : 5},
                                                            'varying' : {'boosting' : ['gbtree', 'gblinear'],
                                                                         'tree_method' : ['exact','approx','hist'],
                                                                         'n_estimators' : {'min' : 50, 'max' :200},
                                                                         'max_depth' : {'min' : 2, 'max' : 10},
                                                                         'reg_alpha' : {'min' : 0, 'max' : 10},
                                                                         'reg_lambda' : {'min' : 0, 'max' : 25},
                                                                         'min_child_weight' : {'min' : 0, 'max' : 1},
                                                                         'gamma' : {'min' : 0, 'max' : 50},
                                                                         'learning_rate' : {'min' : 0.3, 'max' : 0.5}
                                                                        }
                                                           },        
                                    },
            'shap_analysis' : {'enable_shap' : True,
                               'use_explainer' : 'tree',
                            },
    
            'task' : 'regression',
            'random_state' : 42,
}

In [16]:
model = model_ml.Model(config=modelling_config)
model.create_model()

[I 2020-08-19 00:04:48,577] Finished trial#0 with value: 0.704573788864112 with parameters: {'boosting': 'gbtree', 'tree_method': 'exact', 'n_estimators': 64, 'max_depth': 9, 'reg_alpha': 4, 'reg_lambda': 6, 'min_child_weight': 1, 'gamma': 18, 'learning_rate': 0.3157188882096635}. Best is trial#0 with value: 0.704573788864112.
[I 2020-08-19 00:04:48,895] Finished trial#1 with value: 0.47977427200818973 with parameters: {'boosting': 'gbtree', 'tree_method': 'exact', 'n_estimators': 149, 'max_depth': 9, 'reg_alpha': 7, 'reg_lambda': 2, 'min_child_weight': 1, 'gamma': 1, 'learning_rate': 0.4338062248425382}. Best is trial#0 with value: 0.704573788864112.
[I 2020-08-19 00:04:49,153] Finished trial#2 with value: 0.711846627544034 with parameters: {'boosting': 'gblinear', 'tree_method': 'approx', 'n_estimators': 179, 'max_depth': 6, 'reg_alpha': 0, 'reg_lambda': 11, 'min_child_weight': 1, 'gamma': 21, 'learning_rate': 0.301084851720929}. Best is trial#2 with value: 0.711846627544034.
[I 2020

In [17]:
data_pipeline_object = data_ml.ProcessPredictionData(config = data.get_data_artifacts())
trained_model_object = model_ml.PredictOnNewData(config = model.get_model_artifacts())

In [18]:
save_artifacts(data_object=data_pipeline_object,
               model_object=trained_model_object)

In [21]:
to_predict_dict = {"pH": 0.38,
                   "chlorides": 0.53,
                   "volatile_acidity": 2.0,
                   "citric_acid": 157,
                   "alcohol": 3.0,
                   "total_sulfur_dioxide": 0,
                   "density": 0,
                   "residual_sugar": 0.0,
                   "fixed_acidity": 0.0,
                   "sulphates" : 0.0,
                   "free_sulfur_dioxide" : 1.0}

to_predict_dict = {"pH": 3.00,
                   "chlorides": 0.045,
                   "volatile_acidity": 0.27,
                   "citric_acid": 0.36,
                   "alcohol": 8.8,
                   "total_sulfur_dioxide": 170.0,
                   "density": 1.0010,
                   "residual_sugar": 20.7,
                   "fixed_acidity": 7.0,
                   "sulphates" : 0.45,
                   "free_sulfur_dioxide" : 45.0}

url = 'http://127.0.0.1:8000/predict'
r = requests.post(url, json=to_predict_dict)

print(r.json())

{'prediction': 5.484379768371582}
