In [1]:
import pandas as pd 
import numpy as np 
from pathlib import Path
from pycaret.regression import *

In [4]:
data = np.load(Path().resolve().parents[1] / f"data/aus_data.npy", allow_pickle=True)[()]
df = pd.DataFrame(data['X'].astype('float64'), columns=data['features'])
df['CT_RT'] = data['y'].astype('float64')
del data

In [6]:
df.to_csv(Path().resolve().parents[1] / "data/aus.csv")

In [3]:
exp = setup(data = df, 
            target = 'CT_RT', 
            session_id=123,
            normalize = True, 
            transformation = True, 
            #transform_target = True, 
            combine_rare_levels = True, 
            rare_level_threshold = 0.05,
            remove_multicollinearity = True, 
            multicollinearity_threshold = 0.95, 
            #train_size=0.8,
            log_experiment = True, 
            fold=5,
            experiment_name = 'aus')

Unnamed: 0,Description,Value
0,session_id,123
1,Target,CT_RT
2,Original Data,"(466, 24)"
3,Missing Values,False
4,Numeric Features,23
5,Categorical Features,0
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(326, 19)"


In [4]:
compare_models()


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,12052.9698,475049330.9464,21304.9661,0.6782,1.7787,15.1054,0.05
catboost,CatBoost Regressor,12460.1675,506351488.2575,21835.5657,0.6624,1.7381,9.6778,0.69
lightgbm,Light Gradient Boosting Machine,14890.4283,579652922.4267,23572.5597,0.599,2.0694,20.6233,0.036
gbr,Gradient Boosting Regressor,14750.3355,643671580.9616,24627.4903,0.5586,1.9636,15.2726,0.022
xgboost,Extreme Gradient Boosting,14325.6664,700807180.8,25801.8227,0.5175,1.6557,7.8422,0.316
rf,Random Forest Regressor,15806.8571,734249893.17,26669.6544,0.4871,2.0261,20.2171,0.06
knn,K Neighbors Regressor,16250.2182,828307603.2,28336.2598,0.4304,2.1043,18.2598,0.008
br,Bayesian Ridge,20638.2984,838242496.0058,28550.8059,0.4208,2.5943,56.2677,0.006
ridge,Ridge Regression,20986.5988,845287577.6,28669.3738,0.4152,2.6266,62.2346,0.29
llar,Lasso Least Angle Regression,21123.0435,852099898.2457,28782.964,0.4099,2.6482,64.4012,0.006


ExtraTreesRegressor(bootstrap=False, ccp_alpha=0.0, criterion='mse',
                    max_depth=None, max_features='auto', max_leaf_nodes=None,
                    max_samples=None, min_impurity_decrease=0.0,
                    min_impurity_split=None, min_samples_leaf=1,
                    min_samples_split=2, min_weight_fraction_leaf=0.0,
                    n_estimators=100, n_jobs=-1, oob_score=False,
                    random_state=123, verbose=0, warm_start=False)

In [5]:
ct = create_model('catboost', fold = 5)


Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,10369.9773,198506926.1121,14089.2486,0.7716,1.9252,11.5583
1,10053.251,333198696.6683,18253.731,0.7143,1.8186,12.3464
2,15766.0163,890479122.0315,29840.8968,0.5804,1.5357,5.9734
3,12530.0427,477875894.0696,21860.3727,0.7209,1.5929,7.161
4,13581.5502,631696802.4061,25133.5792,0.5248,1.8182,11.3498
Mean,12460.1675,506351488.2575,21835.5657,0.6624,1.7381,9.6778
SD,2114.3869,240344132.4226,5436.8705,0.0935,0.1483,2.5888


In [6]:
tuned_ct = tune_model(ct)


Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,10507.8052,222339040.518,14911.0375,0.7442,1.9389,12.3467
1,12078.7291,463538607.6168,21529.9468,0.6026,1.842,9.7586
2,17245.7345,997776219.7876,31587.596,0.5299,1.8789,10.3944
3,13917.7705,617314262.9235,24845.8098,0.6394,1.6891,6.6182
4,15218.2664,662155706.0776,25732.3863,0.5018,1.7243,7.688
Mean,13793.6611,592624767.3847,23721.3553,0.6036,1.8146,9.3612
SD,2353.7016,254309487.8979,5470.1071,0.0858,0.0941,2.023


In [7]:
predict_model(tuned_ct)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,CatBoost Regressor,10363.9447,282291201.9621,16801.5238,0.6828,2.1807,28.5897


Unnamed: 0,C,Cr,Mn,Si,Co,Mo,Nb,P,Cu,Ti,...,B,N,S,AGS No.,CT_Temp,CT_EL,CT_RA,log_CT_CS,CT_RT,Label
0,-2.210903,-1.425674,-1.429766,-1.939183,-0.945170,0.108128,-1.469381,-1.336728,1.503929,1.258959,...,-0.211940,-1.063697,-1.565190,0.975326,0.930476,0.052951,-0.267493,-1.241886,2889.000000,22174.432321
1,1.793959,0.438234,0.196628,-0.485400,-1.819383,-0.762986,1.162289,1.105836,-1.293876,-1.546636,...,-1.416130,-2.246065,-1.565190,2.032121,0.930476,0.961575,1.163163,-0.398283,385.600006,4173.450211
2,-0.686701,-0.194687,0.622673,1.480119,0.411143,-0.446481,0.304834,-0.063834,0.568354,-0.447475,...,-1.019383,0.076407,0.540785,-0.460828,-1.387317,-1.210697,-1.461063,0.297778,25568.400391,40370.275208
3,1.793959,0.438234,0.196628,-0.485400,-1.819383,-0.762986,1.162289,1.105836,-1.293876,-1.546636,...,-1.416130,-2.246065,-1.565190,2.032121,-0.584373,0.558572,0.686125,0.985627,6276.000000,6403.304082
4,-0.472198,0.022580,1.042898,0.660584,0.282596,-0.117400,1.073701,1.306274,1.261157,-0.396671,...,1.049922,0.166542,0.540785,0.296488,-1.387317,-0.407491,0.360700,2.032795,363.600006,-5581.269350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,0.008899,-1.260165,-1.328064,-0.895770,-0.945170,0.478120,-0.923127,-1.522957,-0.801109,1.844025,...,-1.280658,-0.001783,1.495670,1.393985,-1.387317,1.589510,1.062534,1.941503,39.099998,2926.043528
136,0.008899,-1.566252,-1.391201,-0.999076,-1.447807,-0.669716,-1.504588,-1.202039,-1.042619,1.561947,...,0.625984,-1.682426,-1.289720,-2.326754,-1.387317,1.657422,1.556308,0.057142,17416.300781,35308.026575
137,-0.686701,0.244642,0.769023,1.336661,0.561935,-0.277748,0.304834,0.913405,0.568354,-0.346862,...,-0.893675,-0.343730,-0.121339,-1.113986,-1.387317,-1.378518,-1.537699,0.297778,23114.400391,25324.091759
138,-0.686701,-0.194687,0.622673,1.480119,0.411143,-0.446481,0.304834,-0.063834,0.568354,-0.447475,...,-1.019383,0.076407,0.540785,-0.460828,-0.584373,-0.971661,-0.819469,-0.269141,26011.400391,53699.594042
