# Basic steps for developing a model with Pycaret

In [3]:
# !pip install pycaret

In [4]:
from pycaret.datasets import get_data
data1 = get_data(dataset = 'traffic')

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,Rush Hour,traffic_volume
0,,288.28,0.0,0.0,40,Clouds,1,5545
1,,289.36,0.0,0.0,75,Clouds,0,4516
2,,289.58,0.0,0.0,90,Clouds,0,4767
3,,290.13,0.0,0.0,90,Clouds,0,5026
4,,291.14,0.0,0.0,75,Clouds,0,4918


In [5]:
from pycaret.regression import *

In [8]:
dataset = setup(data = data1, target = 'traffic_volume',
                session_id = 438, verbose=True)

Unnamed: 0,Description,Value
0,Session id,438
1,Target,traffic_volume
2,Target type,Regression
3,Original data shape,"(48204, 8)"
4,Transformed data shape,"(48204, 29)"
5,Transformed train set shape,"(33742, 29)"
6,Transformed test set shape,"(14462, 29)"
7,Numeric features,5
8,Categorical features,2
9,Preprocess,True


In [9]:
best = compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lightgbm,Light Gradient Boosting Machine,1502.4277,3077063.5115,1754.0547,0.2189,0.9297,2.4635,0.127
gbr,Gradient Boosting Regressor,1530.7769,3148984.75,1774.4289,0.2006,0.9421,2.5603,0.41
ada,AdaBoost Regressor,1577.2863,3282090.0319,1811.574,0.1669,0.973,2.7074,0.245
knn,K Neighbors Regressor,1573.7193,3662877.1551,1913.7609,0.0702,0.9602,2.5605,0.147
rf,Random Forest Regressor,1547.0023,3740449.9683,1933.7739,0.0504,0.9554,2.4869,1.46
omp,Orthogonal Matching Pursuit,1712.8788,3848922.2942,1961.8288,0.0229,1.0224,2.964,0.049
dummy,Dummy Regressor,1743.7632,3939897.8403,1984.8734,-0.0001,1.0318,2.9022,0.078
et,Extra Trees Regressor,1679.3206,4592693.0108,2142.8898,-0.1661,1.0486,2.6657,0.958
dt,Decision Tree Regressor,1749.2798,5217471.6829,2283.9553,-0.3246,1.1263,2.5748,0.088
lar,Least Angle Regression,1604.9581,9624481.6422,2459.2351,-1.4661,0.9639,2.6734,0.045


In [10]:
tuned_best = tune_model(estimator = best)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1486.2811,3054604.889,1747.7428,0.2134,0.9327,5.0459
1,1509.9101,3123112.3214,1767.233,0.2099,0.9357,2.6766
2,1503.57,3098646.524,1760.2973,0.2193,0.9388,2.1946
3,1490.2079,3054327.4922,1747.6634,0.2206,0.9207,3.0757
4,1512.5551,3115128.6707,1764.9727,0.2296,0.9286,1.7317
5,1495.9241,3055230.1784,1747.9217,0.2144,0.9304,2.7295
6,1458.1523,2942615.7706,1715.4054,0.2433,0.9032,1.5192
7,1508.3671,3130499.9015,1769.3219,0.1975,0.9388,2.3379
8,1528.915,3204669.493,1790.1591,0.1847,0.9319,1.5767
9,1531.8013,3182206.2593,1783.8739,0.2078,0.9324,2.1785


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


In [11]:
final_model = finalize_model(estimator = tuned_best) 

In [19]:
# model_path = '../../../mlops/automl_traffic_model'
model_path = './automl_traffic_model'


In [18]:
save_model(model = final_model,model_name=model_path)

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=FastMemory(location=C:\Users\WMYFHCK\AppData\Local\Temp\joblib),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['temp', 'rain_1h', 'snow_1h',
                                              'clouds_all', 'Rush Hour'],
                                     transformer=SimpleImputer())),
                 ('categorical_imputer',
                  TransformerWrapper(include=['holiday', 'weather_main'],
                                     transformer=SimpleImputer(strategy='most_frequent'))),
                 ('onehot_encoding',
                  TransformerWrapper(include=['holiday', 'weather_main'],
                                     transformer=OneHotEncoder(cols=['holiday',
                                                                     'weather_main'],
                                                               handle_missing='return_nan',
                                                               use_cat_names=True))),

In [2]:
# import os
# os.getcwd()

#### Predictions

In [20]:
saved_model = load_model(model_path)

Transformation Pipeline and Model Successfully Loaded


In [22]:
predictions = predict_model(saved_model, data=data1)
predictions.head()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,1473.1839,2963948.3758,1721.6121,0.2492,0.9181,2.4328


Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,Rush Hour,traffic_volume,prediction_label
0,,288.279999,0.0,0.0,40,Clouds,1,5545,4667.723418
1,,289.359985,0.0,0.0,75,Clouds,0,4516,3332.933375
2,,289.579987,0.0,0.0,90,Clouds,0,4767,3256.430729
3,,290.130005,0.0,0.0,90,Clouds,0,5026,3234.240455
4,,291.140015,0.0,0.0,75,Clouds,0,4918,3370.561625
