In [10]:
# pip install -U bluemist

In [11]:
# Import bluemist modules

from bluemist.environment import initialize
from bluemist.datasource import get_data_from_filesystem
from bluemist.eda import perform_eda
from bluemist.preprocessing import preprocess_data
from bluemist.regression import train_test_evaluate, deploy_model

In [12]:
# Initialize bluemist

initialize()

[34m
██████╗ ██╗     ██╗   ██╗███████╗███╗   ███╗██╗███████╗████████╗     █████╗ ██╗
██╔══██╗██║     ██║   ██║██╔════╝████╗ ████║██║██╔════╝╚══██╔══╝    ██╔══██╗██║
██████╔╝██║     ██║   ██║█████╗  ██╔████╔██║██║███████╗   ██║       ███████║██║
██╔══██╗██║     ██║   ██║██╔══╝  ██║╚██╔╝██║██║╚════██║   ██║       ██╔══██║██║
██████╔╝███████╗╚██████╔╝███████╗██║ ╚═╝ ██║██║███████║   ██║       ██║  ██║██║                                                                        
                                (version 0.1.1)
    [0m
Bluemist path :: /home/shashank-agrawal/anaconda3/envs/bluemist-test-2/lib/python3.9/site-packages/bluemist
System platform :: posix, Linux, 5.19.0-31-generic, linux-x86_64, ('64bit', 'ELF')


In [13]:
# Load the data set and check first 5 rows

data = get_data_from_filesystem('https://raw.githubusercontent.com/plotly/datasets/3aa08e58607d1f36159efc4cca9d0d073bbf57bb/auto-mpg.csv')
data.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year
0,18.0,8,307.0,130.0,3504,12.0,70
1,15.0,8,350.0,165.0,3693,11.5,70
2,18.0,8,318.0,150.0,3436,11.0,70
3,16.0,8,304.0,150.0,3433,12.0,70
4,17.0,8,302.0,140.0,3449,10.5,70


In [5]:
# Visualize dataset to perform data analysis

perform_eda(data)

Output file :: /home/shashank-agrawal/anaconda3/envs/bluemist-test-2/lib/python3.9/site-packages/bluemist/artifacts/eda/pandas-profiling.html
Output file will be opened in the browser after analysis is completed !!


Summarize dataset: 100%|██████████| 52/52 [00:26<00:00,  1.94it/s, Completed]                         
Generate report structure: 100%|██████████| 1/1 [00:11<00:00, 11.72s/it]
Render HTML: 100%|██████████| 1/1 [00:04<00:00,  4.80s/it]
Export report to file: 100%|██████████| 1/1 [00:00<00:00, 71.11it/s]


In [14]:
# Encode categorical columns using OneHotEncoder

X_train, X_test, y_train, y_test = preprocess_data(data, 
                                                   target_variable='mpg', 
                                                   data_scaling_strategy='StandardScaler',
                                                   categorical_features=['model_year'], 
                                                   categorical_encoder='OneHotEncoder',
                                                   drop_categories_one_hot_encoder='first')
X_train.head()

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model_year_71,model_year_72,model_year_73,model_year_74,model_year_75,model_year_76,model_year_77,model_year_78,model_year_79,model_year_80,model_year_81,model_year_82
0,-0.857474,-1.032359,-0.882257,-1.139566,0.537188,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-0.857474,-0.824944,-0.38135,-0.63263,-0.378194,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.508519,1.16435,0.996146,1.337507,-0.671116,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,-0.857474,-0.909796,-0.556667,-1.041635,0.13442,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,-0.857474,-0.560962,-0.531622,-0.788167,-1.440036,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [15]:
# Train and compare ML models

train_test_evaluate(X_train, X_test, y_train, y_test)

Training TweedieRegressor: 100%|[34m██████████[0m| 46/46 [00:27<00:00,  1.67it/s]             


Unnamed: 0_level_0,mean_absolute_error,mean_squared_error,r2_score
Estimator,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ARDRegression,2.399001,9.320664,0.825313
AdaBoostRegressor,2.811313,13.047338,0.755468
BaggingRegressor,2.3221,10.244341,0.808001
BayesianRidge,2.398702,9.306243,0.825583
CCA,2.42097,9.894391,0.81456
DecisionTreeRegressor,2.965,16.1625,0.697084
DummyRegressor,6.247638,53.361075,-8.9e-05
ElasticNet,3.146627,16.821015,0.684742
ElasticNetCV,2.396276,9.280701,0.826062
ExtraTreeRegressor,2.389,10.2091,0.808662


In [9]:
# Deploy the model as RESTful API

deploy_model(estimator_name='HistGradientBoostingRegressor')

INFO:     Started server process [73263]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://localhost:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:40818 - "GET /docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:40818 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     127.0.0.1:40820 - "POST /predict HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [73263]


In [None]:
http://localhost:8000/docs

{
  "cylinders": 8,
  "displacement": 307,
  "horsepower": 130,
  "weight": 3504,
  "acceleration": 12,
  "model_year": "70"
}