In [1]:
# pip install -U bluemist

In [2]:
# Import bluemist modules

from bluemist.environment import initialize
from bluemist.datasource import get_data_from_filesystem
from bluemist.eda import perform_eda
from bluemist.preprocessing import preprocess_data
from bluemist.regression import train_test_evaluate, deploy_model

In [3]:
# Initialize bluemist

initialize()

[34m
██████╗ ██╗     ██╗   ██╗███████╗███╗   ███╗██╗███████╗████████╗     █████╗ ██╗
██╔══██╗██║     ██║   ██║██╔════╝████╗ ████║██║██╔════╝╚══██╔══╝    ██╔══██╗██║
██████╔╝██║     ██║   ██║█████╗  ██╔████╔██║██║███████╗   ██║       ███████║██║
██╔══██╗██║     ██║   ██║██╔══╝  ██║╚██╔╝██║██║╚════██║   ██║       ██╔══██║██║
██████╔╝███████╗╚██████╔╝███████╗██║ ╚═╝ ██║██║███████║   ██║       ██║  ██║██║                                                                        
                                (version 0.1.1)
    [0m
Bluemist path :: /home/shashank-agrawal/anaconda3/envs/bluemist-test-2/lib/python3.9/site-packages/bluemist
System platform :: posix, Linux, 5.19.0-31-generic, linux-x86_64, ('64bit', 'ELF')


In [4]:
# Load the data set and check first 5 rows

data = get_data_from_filesystem('https://raw.githubusercontent.com/plotly/datasets/3aa08e58607d1f36159efc4cca9d0d073bbf57bb/auto-mpg.csv')
data.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year
0,18.0,8,307.0,130.0,3504,12.0,70
1,15.0,8,350.0,165.0,3693,11.5,70
2,18.0,8,318.0,150.0,3436,11.0,70
3,16.0,8,304.0,150.0,3433,12.0,70
4,17.0,8,302.0,140.0,3449,10.5,70


In [5]:
# Visualize dataset to perform data analysis

perform_eda(data)

Output file :: /home/shashank-agrawal/anaconda3/envs/bluemist-test-2/lib/python3.9/site-packages/bluemist/artifacts/eda/pandas-profiling.html
Output file will be opened in the browser after analysis is completed !!


Summarize dataset: 100%|██████████| 52/52 [00:25<00:00,  2.00it/s, Completed]                         
Generate report structure: 100%|██████████| 1/1 [00:11<00:00, 11.65s/it]
Render HTML: 100%|██████████| 1/1 [00:05<00:00,  5.05s/it]
Export report to file: 100%|██████████| 1/1 [00:00<00:00, 77.45it/s]


In [6]:
# Encode categorical columns using OneHotEncoder

X_train, X_test, y_train, y_test = preprocess_data(data, 
                                                   target_variable='mpg', 
                                                   data_scaling_strategy='StandardScaler',
                                                   categorical_features=['model_year'], 
                                                   categorical_encoder='OneHotEncoder',
                                                   drop_categories_one_hot_encoder='first')
X_train.head()

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model_year_71,model_year_72,model_year_73,model_year_74,model_year_75,model_year_76,model_year_77,model_year_78,model_year_79,model_year_80,model_year_81,model_year_82
0,-0.841887,-0.911719,-0.982357,-0.976343,0.898008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,-0.841887,-0.835195,-0.77604,-0.876589,-0.383807,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.33046,0.312658,-0.260248,0.948311,1.264241,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.33046,0.551795,-0.1313,0.413163,-0.017574,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.502808,1.202245,1.15818,1.73695,-0.75004,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# Train and compare ML models

train_test_evaluate(X_train, X_test, y_train, y_test)

Training TweedieRegressor: 100%|[34m██████████[0m| 46/46 [00:38<00:00,  1.19it/s]             


Unnamed: 0_level_0,mean_absolute_error,mean_squared_error,r2_score
Estimator,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ARDRegression,2.76199,13.373264,0.77307
AdaBoostRegressor,2.854687,16.562742,0.718949
BaggingRegressor,2.5182,14.296642,0.757402
BayesianRidge,2.845257,13.626966,0.768765
CCA,2.835868,13.67974,0.76787
DecisionTreeRegressor,3.182,23.7786,0.596503
DummyRegressor,6.377987,59.045147,-0.001931
ElasticNet,3.442778,21.383647,0.637143
ElasticNetCV,2.832844,13.590949,0.769377
ExtraTreeRegressor,3.059,18.8999,0.67929


In [None]:
# Deploy the model as RESTful API

deploy_model(estimator_name='HistGradientBoostingRegressor')

INFO:     127.0.0.1:58982 - "GET /docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:58982 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     127.0.0.1:58986 - "GET /docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:58986 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     127.0.0.1:58888 - "POST /predict HTTP/1.1" 200 OK
