In [None]:
pip install bluemist

In [1]:
from sklearn import datasets

from bluemist.environment import initialize
from bluemist.datasource import get_data_from_filesystem
from bluemist.preprocessing import preprocess_data
from bluemist.regression import train_test_evaluate

In [2]:
initialize()
data = get_data_from_filesystem('https://raw.githubusercontent.com/plotly/datasets/3aa08e58607d1f36159efc4cca9d0d073bbf57bb/auto-mpg.csv')
data.head()

[34m
██████╗ ██╗     ██╗   ██╗███████╗███╗   ███╗██╗███████╗████████╗     █████╗ ██╗
██╔══██╗██║     ██║   ██║██╔════╝████╗ ████║██║██╔════╝╚══██╔══╝    ██╔══██╗██║
██████╔╝██║     ██║   ██║█████╗  ██╔████╔██║██║███████╗   ██║       ███████║██║
██╔══██╗██║     ██║   ██║██╔══╝  ██║╚██╔╝██║██║╚════██║   ██║       ██╔══██║██║
██████╔╝███████╗╚██████╔╝███████╗██║ ╚═╝ ██║██║███████║   ██║       ██║  ██║██║                                                                        
                                (version 0.1.1)
    [0m
Bluemist path :: /home/shashank-agrawal/anaconda3/envs/bluemist-test-2/lib/python3.9/site-packages/bluemist
System platform :: posix, Linux, 5.19.0-31-generic, linux-x86_64, ('64bit', 'ELF')


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year
0,18.0,8,307.0,130.0,3504,12.0,70
1,15.0,8,350.0,165.0,3693,11.5,70
2,18.0,8,318.0,150.0,3436,11.0,70
3,16.0,8,304.0,150.0,3433,12.0,70
4,17.0,8,302.0,140.0,3449,10.5,70


In [3]:
# Categorical encoding using OneHotEncoder
X_train, X_test, y_train, y_test = preprocess_data(data, 
                                                   target_variable='mpg', 
                                                   data_scaling_strategy='StandardScaler',
                                                   categorical_features=['model_year'], 
                                                   categorical_encoder='OneHotEncoder',
                                                   drop_categories_one_hot_encoder='first')
X_train.head()

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model_year_71,model_year_72,model_year_73,model_year_74,model_year_75,model_year_76,model_year_77,model_year_78,model_year_79,model_year_80,model_year_81,model_year_82
0,-0.83336,-0.907666,-0.849167,-1.339614,-1.222913,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-0.83336,-0.829817,-1.062608,-0.870397,-0.263143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,-0.83336,-1.024438,-0.875847,-1.19524,0.41225,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,-0.83336,-0.907666,-0.395605,-0.793398,1.194285,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.83336,-0.683851,-0.395605,0.398895,2.225149,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
# Train and compare models
train_test_evaluate(X_train, X_test, y_train, y_test)

Training TweedieRegressor: 100%|[34m██████████[0m| 46/46 [00:26<00:00,  1.73it/s]             


Unnamed: 0_level_0,mean_absolute_error,mean_squared_error,r2_score
Estimator,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ARDRegression,2.256667,8.19463,0.849847
AdaBoostRegressor,2.589514,11.544786,0.78846
BaggingRegressor,2.0755,8.978287,0.835487
BayesianRidge,2.305722,8.464032,0.84491
CCA,2.666047,11.255903,0.793754
DecisionTreeRegressor,2.57,11.3566,0.791908
DummyRegressor,6.386631,55.425677,-0.015587
ElasticNet,3.027509,15.35826,0.718584
ElasticNetCV,2.327361,8.477487,0.844664
ExtraTreeRegressor,2.475,12.4825,0.771278


In [5]:
# Hyperparameter tuning
train_test_evaluate(X_train, X_test, y_train, y_test, 
                    tune_models=['PLSCanonical', 'KernelRidge'])

Training PLSCanonical: 100%|[34m██████████[0m| 2/2 [00:37<00:00, 18.94s/it]


Unnamed: 0_level_0,mean_absolute_error,mean_squared_error,r2_score
Estimator,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KernelRidge,1.762374,5.959747,0.890797
PLSCanonical,4.928449,33.450465,0.387074


In [None]:
# After hyperparameter tuning all error parameters reduced for KernelRidge and PLSCanonical