In [1]:
!python --version

Python 3.9.17


In [2]:
import os
import pickle
import pandas as pd
import numpy as np
from datetime import datetime

from sklearn.feature_extraction import DictVectorizer

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.model_selection import train_test_split

import xgboost as xgb

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

import mlflow
from mlflow.entities import ViewType
from mlflow.tracking import MlflowClient

In [3]:
from sklearn.pipeline import make_pipeline

In [4]:
from sklearn.linear_model import LinearRegression

In [5]:
from google.cloud import storage
client = storage.Client()

In [29]:
TRACKING_SERVER_HOST = "34.16.191.116"
TRACKING_SERVER_PORT = "5000"
DATA_PATH = "~/data/day.csv"
EXPERIMENT_NAME = "bike-sharing-regression"

In [12]:
TRACKING_URI=f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}"

In [15]:
def load_data(filename):
    df = pd.read_csv(filename, sep=',')
    
    # drop columns which are not required for training
    df = df.drop(['instant', 'dteday', 'yr', 'casual', 'registered'], axis=1)
    
    return df

In [8]:
def split_data(df):
    df_train, df_val = train_test_split(df, test_size=0.2, random_state=42,
                                        stratify=df[['season', 'weekday']])
    return df_train, df_val

In [9]:
def prepare_dictionaries(df: pd.DataFrame):
    features = ['season', 'mnth', 'holiday', 'weekday', 'workingday', 'weathersit',
                        'temp', 'atemp', 'hum', 'windspeed']
    return df[features].to_dict(orient='records')

In [10]:
def train_model_rf_search(dict_train, dict_val, y_train, y_val):
    mlflow.sklearn.autolog()

    def objective(params):
        with mlflow.start_run():
            mlflow.set_tag("model", "rf")
            mlflow.log_param("train_data",DATA_PATH)
            
            pipeline = make_pipeline(
                DictVectorizer(),
                RandomForestRegressor(**params, n_jobs=-1)
            )
            
            pipeline.fit(dict_train, y_train)

            y_pred = pipeline.predict(dict_val)
            rmse = mean_squared_error(y_val, y_pred, squared=False)
            r2score = r2_score(y_val, y_pred)
            mlflow.log_metric("rmse", rmse)
            mlflow.log_metric("r2_score", r2score)
                
            mlflow.sklearn.log_model(pipeline, artifact_path="models")

        return {'loss': rmse, 'status': STATUS_OK}


    search_space = {
        'n_estimators' : scope.int(hp.uniform('n_estimators',10,150)),
        'max_depth' : scope.int(hp.uniform('max_depth',1,40)),
        'min_samples_leaf' : scope.int(hp.uniform('min_samples_leaf',1,10)),
        'min_samples_split' : scope.int(hp.uniform('min_samples_split',2,10)),
        'random_state' : 42
    }
    
    rstate = np.random.default_rng(42)  # for reproducible results
    best_result =  fmin(
        fn=objective,
        space=search_space,
        algo=tpe.suggest,
        max_evals=200,
        trials=Trials(),
        rstate=rstate
    )
    return

In [7]:
mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}")
mlflow.set_experiment(EXPERIMENT_NAME)

<Experiment: artifact_location='gs://mlflow-assignment-mj/bike-sharing-prediction/3', creation_time=1691221899736, experiment_id='3', last_update_time=1691221899736, lifecycle_stage='active', name='bike-sharing-regression', tags={}>

In [12]:
df = load_data(DATA_PATH)

In [13]:
df_train, df_val = split_data(df)

In [14]:
dict_train = prepare_dictionaries(df_train)
dict_val = prepare_dictionaries(df_val)

In [15]:
target = 'cnt'
y_train = df_train[target].values
y_val = df_val[target].values

In [17]:
train_model_rf_search(dict_train, dict_val, y_train, y_val)

  0%|          | 0/200 [00:00<?, ?trial/s, best loss=?]






  0%|          | 1/200 [00:09<31:00,  9.35s/trial, best loss: 1192.221603892011]






  1%|          | 2/200 [00:18<29:54,  9.06s/trial, best loss: 1192.221603892011]






  2%|▏         | 3/200 [00:27<30:41,  9.35s/trial, best loss: 1192.221603892011]






  2%|▏         | 4/200 [00:37<30:18,  9.28s/trial, best loss: 1192.221603892011]






  2%|▎         | 5/200 [00:46<30:00,  9.24s/trial, best loss: 1192.221603892011]






  3%|▎         | 6/200 [00:55<30:03,  9.30s/trial, best loss: 1192.221603892011]






  4%|▎         | 7/200 [01:04<29:57,  9.31s/trial, best loss: 1192.221603892011]






  4%|▍         | 8/200 [01:13<29:13,  9.13s/trial, best loss: 1192.221603892011]






  4%|▍         | 9/200 [01:23<29:34,  9.29s/trial, best loss: 1192.221603892011]






  5%|▌         | 10/200 [01:34<31:11,  9.85s/trial, best loss: 1192.221603892011]






  6%|▌         | 11/200 [01:43<30:15,  9.61s/trial, best loss: 1192.221603892011]






  6%|▌         | 12/200 [01:53<30:14,  9.65s/trial, best loss: 1192.221603892011]






  6%|▋         | 13/200 [02:02<29:30,  9.47s/trial, best loss: 1192.221603892011]






  7%|▋         | 14/200 [02:11<28:50,  9.31s/trial, best loss: 1192.221603892011]






  8%|▊         | 15/200 [02:20<28:25,  9.22s/trial, best loss: 1192.221603892011]






  8%|▊         | 16/200 [02:29<28:36,  9.33s/trial, best loss: 1192.221603892011]






  8%|▊         | 17/200 [02:39<28:45,  9.43s/trial, best loss: 1192.015418064363]






  9%|▉         | 18/200 [02:48<28:18,  9.33s/trial, best loss: 1192.015418064363]






 10%|▉         | 19/200 [02:57<28:04,  9.31s/trial, best loss: 1192.015418064363]






 10%|█         | 20/200 [03:06<27:42,  9.24s/trial, best loss: 1187.4476830955357]






 10%|█         | 21/200 [03:16<27:30,  9.22s/trial, best loss: 1187.4476830955357]






 11%|█         | 22/200 [03:25<27:37,  9.31s/trial, best loss: 1187.4476830955357]






 12%|█▏        | 23/200 [03:34<27:21,  9.27s/trial, best loss: 1187.4476830955357]






 12%|█▏        | 24/200 [03:44<27:28,  9.36s/trial, best loss: 1187.4476830955357]






 12%|█▎        | 25/200 [03:53<27:12,  9.33s/trial, best loss: 1187.4476830955357]






 13%|█▎        | 26/200 [04:02<26:47,  9.24s/trial, best loss: 1187.4476830955357]






 14%|█▎        | 27/200 [04:11<26:32,  9.20s/trial, best loss: 1187.4476830955357]






 14%|█▍        | 28/200 [04:20<26:17,  9.17s/trial, best loss: 1187.4476830955357]






 14%|█▍        | 29/200 [04:29<26:03,  9.14s/trial, best loss: 1187.4476830955357]






 15%|█▌        | 30/200 [04:39<26:00,  9.18s/trial, best loss: 1187.4476830955357]






 16%|█▌        | 31/200 [04:48<25:37,  9.10s/trial, best loss: 1187.4476830955357]






 16%|█▌        | 32/200 [04:57<25:35,  9.14s/trial, best loss: 1187.4476830955357]






 16%|█▋        | 33/200 [05:06<25:28,  9.16s/trial, best loss: 1187.4476830955357]






 17%|█▋        | 34/200 [05:15<25:03,  9.06s/trial, best loss: 1187.4476830955357]






 18%|█▊        | 35/200 [05:24<24:55,  9.06s/trial, best loss: 1187.4476830955357]






 18%|█▊        | 36/200 [05:33<24:41,  9.03s/trial, best loss: 1187.4476830955357]






 18%|█▊        | 37/200 [05:42<24:37,  9.07s/trial, best loss: 1187.4476830955357]






 19%|█▉        | 38/200 [05:51<24:38,  9.13s/trial, best loss: 1187.4476830955357]






 20%|█▉        | 39/200 [06:00<24:18,  9.06s/trial, best loss: 1187.4476830955357]






 20%|██        | 40/200 [06:09<24:04,  9.03s/trial, best loss: 1187.4476830955357]






 20%|██        | 41/200 [06:18<24:04,  9.08s/trial, best loss: 1187.4476830955357]






 21%|██        | 42/200 [06:28<24:14,  9.20s/trial, best loss: 1187.4476830955357]






 22%|██▏       | 43/200 [06:37<24:01,  9.18s/trial, best loss: 1187.4476830955357]






 22%|██▏       | 44/200 [06:46<23:39,  9.10s/trial, best loss: 1187.4476830955357]






 22%|██▎       | 45/200 [06:55<23:35,  9.13s/trial, best loss: 1187.4476830955357]






 23%|██▎       | 46/200 [07:04<23:10,  9.03s/trial, best loss: 1187.4476830955357]






 24%|██▎       | 47/200 [07:13<23:12,  9.10s/trial, best loss: 1187.4476830955357]






 24%|██▍       | 48/200 [07:22<22:50,  9.02s/trial, best loss: 1187.4476830955357]






 24%|██▍       | 49/200 [07:32<23:06,  9.18s/trial, best loss: 1187.4476830955357]






 25%|██▌       | 50/200 [07:41<23:05,  9.24s/trial, best loss: 1187.4476830955357]






 26%|██▌       | 51/200 [07:50<22:41,  9.14s/trial, best loss: 1187.4476830955357]






 26%|██▌       | 52/200 [07:59<22:22,  9.07s/trial, best loss: 1187.4476830955357]






 26%|██▋       | 53/200 [08:08<22:09,  9.05s/trial, best loss: 1187.4476830955357]






 27%|██▋       | 54/200 [08:16<21:42,  8.92s/trial, best loss: 1187.4476830955357]






 28%|██▊       | 55/200 [08:26<22:06,  9.15s/trial, best loss: 1187.4476830955357]






 28%|██▊       | 56/200 [08:36<22:14,  9.27s/trial, best loss: 1187.4476830955357]






 28%|██▊       | 57/200 [08:45<21:53,  9.19s/trial, best loss: 1187.4476830955357]






 29%|██▉       | 58/200 [08:54<22:02,  9.31s/trial, best loss: 1187.4476830955357]






 30%|██▉       | 59/200 [09:03<21:33,  9.18s/trial, best loss: 1187.4476830955357]






 30%|███       | 60/200 [09:12<21:16,  9.12s/trial, best loss: 1187.4476830955357]






 30%|███       | 61/200 [09:22<21:25,  9.25s/trial, best loss: 1187.4476830955357]






 31%|███       | 62/200 [09:31<21:11,  9.21s/trial, best loss: 1187.4476830955357]






 32%|███▏      | 63/200 [09:40<20:57,  9.18s/trial, best loss: 1187.4476830955357]






 32%|███▏      | 64/200 [09:49<20:41,  9.13s/trial, best loss: 1187.4476830955357]






 32%|███▎      | 65/200 [09:58<20:29,  9.11s/trial, best loss: 1187.4476830955357]






 33%|███▎      | 66/200 [10:07<20:23,  9.13s/trial, best loss: 1187.4476830955357]






 34%|███▎      | 67/200 [10:16<20:11,  9.11s/trial, best loss: 1187.4476830955357]






 34%|███▍      | 68/200 [10:26<20:18,  9.23s/trial, best loss: 1187.4476830955357]






 34%|███▍      | 69/200 [10:35<20:12,  9.25s/trial, best loss: 1187.4476830955357]






 35%|███▌      | 70/200 [10:45<20:18,  9.37s/trial, best loss: 1187.4476830955357]






 36%|███▌      | 71/200 [10:54<20:04,  9.34s/trial, best loss: 1187.4476830955357]






 36%|███▌      | 72/200 [11:04<20:11,  9.46s/trial, best loss: 1187.4476830955357]






 36%|███▋      | 73/200 [11:13<19:56,  9.42s/trial, best loss: 1187.4476830955357]






 37%|███▋      | 74/200 [11:23<19:54,  9.48s/trial, best loss: 1187.4476830955357]






 38%|███▊      | 75/200 [11:32<19:40,  9.44s/trial, best loss: 1187.4476830955357]






 38%|███▊      | 76/200 [11:41<19:22,  9.37s/trial, best loss: 1187.4476830955357]






 38%|███▊      | 77/200 [11:51<19:14,  9.38s/trial, best loss: 1187.4476830955357]






 39%|███▉      | 78/200 [12:00<18:52,  9.28s/trial, best loss: 1187.4476830955357]






 40%|███▉      | 79/200 [12:09<18:46,  9.31s/trial, best loss: 1187.4476830955357]






 40%|████      | 80/200 [12:18<18:33,  9.28s/trial, best loss: 1187.4476830955357]






 40%|████      | 81/200 [12:28<18:38,  9.40s/trial, best loss: 1187.4476830955357]






 41%|████      | 82/200 [12:37<18:09,  9.23s/trial, best loss: 1187.4476830955357]






 42%|████▏     | 83/200 [12:46<17:54,  9.18s/trial, best loss: 1187.4476830955357]






 42%|████▏     | 84/200 [12:55<17:32,  9.07s/trial, best loss: 1187.4476830955357]






 42%|████▎     | 85/200 [13:04<17:32,  9.15s/trial, best loss: 1187.4476830955357]






 43%|████▎     | 86/200 [13:14<17:39,  9.30s/trial, best loss: 1187.4476830955357]






 44%|████▎     | 87/200 [13:23<17:33,  9.32s/trial, best loss: 1187.4476830955357]






 44%|████▍     | 88/200 [13:32<17:30,  9.38s/trial, best loss: 1187.4476830955357]






 44%|████▍     | 89/200 [13:42<17:13,  9.31s/trial, best loss: 1187.4476830955357]






 45%|████▌     | 90/200 [13:51<17:01,  9.29s/trial, best loss: 1187.4476830955357]






 46%|████▌     | 91/200 [14:00<16:51,  9.28s/trial, best loss: 1187.4476830955357]






 46%|████▌     | 92/200 [14:09<16:36,  9.23s/trial, best loss: 1187.4476830955357]






 46%|████▋     | 93/200 [14:18<16:19,  9.16s/trial, best loss: 1187.4476830955357]






 47%|████▋     | 94/200 [14:27<16:14,  9.19s/trial, best loss: 1187.4476830955357]






 48%|████▊     | 95/200 [14:36<15:58,  9.13s/trial, best loss: 1187.4476830955357]






 48%|████▊     | 96/200 [14:45<15:42,  9.07s/trial, best loss: 1187.4476830955357]






 48%|████▊     | 97/200 [14:55<15:51,  9.24s/trial, best loss: 1187.4476830955357]






 49%|████▉     | 98/200 [15:04<15:39,  9.21s/trial, best loss: 1187.4476830955357]






 50%|████▉     | 99/200 [15:13<15:11,  9.03s/trial, best loss: 1187.4476830955357]






 50%|█████     | 100/200 [15:22<14:57,  8.97s/trial, best loss: 1187.4476830955357]






 50%|█████     | 101/200 [15:30<14:41,  8.91s/trial, best loss: 1187.4476830955357]






 51%|█████     | 102/200 [15:39<14:37,  8.96s/trial, best loss: 1187.4476830955357]






 52%|█████▏    | 103/200 [15:48<14:28,  8.95s/trial, best loss: 1187.4476830955357]






 52%|█████▏    | 104/200 [15:57<14:23,  9.00s/trial, best loss: 1187.4476830955357]






 52%|█████▎    | 105/200 [16:07<14:30,  9.17s/trial, best loss: 1187.4476830955357]






 53%|█████▎    | 106/200 [16:16<14:19,  9.14s/trial, best loss: 1187.4476830955357]






 54%|█████▎    | 107/200 [16:25<14:08,  9.12s/trial, best loss: 1187.4476830955357]






 54%|█████▍    | 108/200 [16:35<14:20,  9.35s/trial, best loss: 1187.4476830955357]






 55%|█████▍    | 109/200 [16:44<14:05,  9.29s/trial, best loss: 1187.4476830955357]






 55%|█████▌    | 110/200 [16:54<13:55,  9.29s/trial, best loss: 1187.4476830955357]






 56%|█████▌    | 111/200 [17:04<14:12,  9.58s/trial, best loss: 1187.4476830955357]






 56%|█████▌    | 112/200 [17:13<13:50,  9.43s/trial, best loss: 1187.4476830955357]






 56%|█████▋    | 113/200 [17:22<13:26,  9.27s/trial, best loss: 1187.4476830955357]






 57%|█████▋    | 114/200 [17:31<13:15,  9.26s/trial, best loss: 1187.4476830955357]






 57%|█████▊    | 115/200 [17:40<13:04,  9.23s/trial, best loss: 1187.4476830955357]






 58%|█████▊    | 116/200 [17:49<12:49,  9.16s/trial, best loss: 1187.4476830955357]






 58%|█████▊    | 117/200 [17:58<12:40,  9.16s/trial, best loss: 1187.4476830955357]






 59%|█████▉    | 118/200 [18:08<12:38,  9.25s/trial, best loss: 1187.4476830955357]






 60%|█████▉    | 119/200 [18:17<12:33,  9.30s/trial, best loss: 1187.4476830955357]






 60%|██████    | 120/200 [18:26<12:10,  9.13s/trial, best loss: 1187.4476830955357]






 60%|██████    | 121/200 [18:35<11:52,  9.02s/trial, best loss: 1187.4476830955357]






 61%|██████    | 122/200 [18:44<11:52,  9.14s/trial, best loss: 1187.4476830955357]






 62%|██████▏   | 123/200 [18:53<11:42,  9.13s/trial, best loss: 1187.4476830955357]






 62%|██████▏   | 124/200 [19:03<11:38,  9.19s/trial, best loss: 1187.4476830955357]






 62%|██████▎   | 125/200 [19:11<11:22,  9.10s/trial, best loss: 1187.4476830955357]






 63%|██████▎   | 126/200 [19:21<11:13,  9.10s/trial, best loss: 1187.4476830955357]






 64%|██████▎   | 127/200 [19:30<11:04,  9.11s/trial, best loss: 1187.4476830955357]






 64%|██████▍   | 128/200 [19:39<10:53,  9.08s/trial, best loss: 1187.4476830955357]






 64%|██████▍   | 129/200 [19:47<10:36,  8.97s/trial, best loss: 1187.4476830955357]






 65%|██████▌   | 130/200 [19:56<10:27,  8.97s/trial, best loss: 1187.4476830955357]






 66%|██████▌   | 131/200 [20:05<10:15,  8.92s/trial, best loss: 1187.4476830955357]






 66%|██████▌   | 132/200 [20:14<10:15,  9.05s/trial, best loss: 1187.4476830955357]






 66%|██████▋   | 133/200 [20:23<10:05,  9.04s/trial, best loss: 1187.4476830955357]






 67%|██████▋   | 134/200 [20:33<09:56,  9.04s/trial, best loss: 1187.4476830955357]






 68%|██████▊   | 135/200 [20:42<09:48,  9.06s/trial, best loss: 1187.4476830955357]






 68%|██████▊   | 136/200 [20:51<09:36,  9.01s/trial, best loss: 1187.4476830955357]






 68%|██████▊   | 137/200 [21:00<09:29,  9.03s/trial, best loss: 1187.4476830955357]






 69%|██████▉   | 138/200 [21:09<09:20,  9.04s/trial, best loss: 1187.4476830955357]






 70%|██████▉   | 139/200 [21:18<09:17,  9.14s/trial, best loss: 1187.4476830955357]






 70%|███████   | 140/200 [21:28<09:14,  9.25s/trial, best loss: 1187.4476830955357]






 70%|███████   | 141/200 [21:36<08:59,  9.15s/trial, best loss: 1187.4476830955357]






 71%|███████   | 142/200 [21:46<08:49,  9.13s/trial, best loss: 1187.4476830955357]






 72%|███████▏  | 143/200 [21:55<08:40,  9.12s/trial, best loss: 1187.4476830955357]






 72%|███████▏  | 144/200 [22:04<08:32,  9.15s/trial, best loss: 1187.4476830955357]






 72%|███████▎  | 145/200 [22:13<08:17,  9.04s/trial, best loss: 1187.4476830955357]






 73%|███████▎  | 146/200 [22:22<08:11,  9.10s/trial, best loss: 1187.4476830955357]






 74%|███████▎  | 147/200 [22:31<08:05,  9.15s/trial, best loss: 1187.4476830955357]






 74%|███████▍  | 148/200 [22:40<07:58,  9.20s/trial, best loss: 1187.4476830955357]






 74%|███████▍  | 149/200 [22:50<07:52,  9.27s/trial, best loss: 1187.4476830955357]






 75%|███████▌  | 150/200 [22:59<07:42,  9.25s/trial, best loss: 1187.4476830955357]






 76%|███████▌  | 151/200 [23:08<07:33,  9.25s/trial, best loss: 1187.4476830955357]






 76%|███████▌  | 152/200 [23:17<07:21,  9.21s/trial, best loss: 1187.4476830955357]






 76%|███████▋  | 153/200 [23:27<07:13,  9.22s/trial, best loss: 1187.4476830955357]






 77%|███████▋  | 154/200 [23:36<07:09,  9.33s/trial, best loss: 1187.4476830955357]






 78%|███████▊  | 155/200 [23:46<06:59,  9.31s/trial, best loss: 1187.4476830955357]






 78%|███████▊  | 156/200 [23:55<06:51,  9.35s/trial, best loss: 1187.4476830955357]






 78%|███████▊  | 157/200 [24:05<06:44,  9.41s/trial, best loss: 1187.4476830955357]






 79%|███████▉  | 158/200 [24:14<06:31,  9.33s/trial, best loss: 1187.4476830955357]






 80%|███████▉  | 159/200 [24:23<06:17,  9.20s/trial, best loss: 1187.4476830955357]






 80%|████████  | 160/200 [24:32<06:07,  9.19s/trial, best loss: 1187.4476830955357]






 80%|████████  | 161/200 [24:41<05:56,  9.15s/trial, best loss: 1187.4476830955357]






 81%|████████  | 162/200 [24:50<05:48,  9.16s/trial, best loss: 1187.4476830955357]






 82%|████████▏ | 163/200 [24:59<05:39,  9.19s/trial, best loss: 1187.4476830955357]






 82%|████████▏ | 164/200 [25:09<05:31,  9.20s/trial, best loss: 1187.4476830955357]






 82%|████████▎ | 165/200 [25:18<05:20,  9.16s/trial, best loss: 1187.4476830955357]






 83%|████████▎ | 166/200 [25:27<05:09,  9.09s/trial, best loss: 1187.4476830955357]






 84%|████████▎ | 167/200 [25:35<04:58,  9.03s/trial, best loss: 1187.4476830955357]






 84%|████████▍ | 168/200 [25:45<04:51,  9.09s/trial, best loss: 1187.4476830955357]






 84%|████████▍ | 169/200 [25:54<04:39,  9.03s/trial, best loss: 1187.4476830955357]






 85%|████████▌ | 170/200 [26:02<04:29,  9.00s/trial, best loss: 1187.4476830955357]






 86%|████████▌ | 171/200 [26:12<04:23,  9.07s/trial, best loss: 1187.4476830955357]






 86%|████████▌ | 172/200 [26:21<04:13,  9.05s/trial, best loss: 1187.4476830955357]






 86%|████████▋ | 173/200 [26:30<04:04,  9.06s/trial, best loss: 1187.4476830955357]






 87%|████████▋ | 174/200 [26:39<03:56,  9.10s/trial, best loss: 1187.4476830955357]






 88%|████████▊ | 175/200 [26:48<03:48,  9.12s/trial, best loss: 1187.4476830955357]






 88%|████████▊ | 176/200 [26:57<03:39,  9.15s/trial, best loss: 1187.4476830955357]






 88%|████████▊ | 177/200 [27:07<03:33,  9.27s/trial, best loss: 1187.4476830955357]






 89%|████████▉ | 178/200 [27:16<03:25,  9.34s/trial, best loss: 1187.4476830955357]






 90%|████████▉ | 179/200 [27:26<03:17,  9.38s/trial, best loss: 1187.4476830955357]






 90%|█████████ | 180/200 [27:35<03:06,  9.30s/trial, best loss: 1187.4476830955357]






 90%|█████████ | 181/200 [27:44<02:54,  9.19s/trial, best loss: 1187.4476830955357]






 91%|█████████ | 182/200 [27:53<02:46,  9.26s/trial, best loss: 1187.4476830955357]






 92%|█████████▏| 183/200 [28:03<02:36,  9.23s/trial, best loss: 1187.4476830955357]






 92%|█████████▏| 184/200 [28:12<02:29,  9.36s/trial, best loss: 1187.4476830955357]






 92%|█████████▎| 185/200 [28:21<02:18,  9.26s/trial, best loss: 1187.4476830955357]






 93%|█████████▎| 186/200 [28:30<02:09,  9.23s/trial, best loss: 1187.4476830955357]






 94%|█████████▎| 187/200 [28:40<02:00,  9.23s/trial, best loss: 1187.4476830955357]






 94%|█████████▍| 188/200 [28:48<01:49,  9.09s/trial, best loss: 1187.4476830955357]






 94%|█████████▍| 189/200 [28:58<01:40,  9.16s/trial, best loss: 1187.4476830955357]






 95%|█████████▌| 190/200 [29:11<01:43, 10.39s/trial, best loss: 1187.4476830955357]






 96%|█████████▌| 191/200 [29:21<01:32, 10.28s/trial, best loss: 1187.4476830955357]






 96%|█████████▌| 192/200 [29:30<01:20, 10.02s/trial, best loss: 1187.4476830955357]






 96%|█████████▋| 193/200 [29:40<01:08,  9.76s/trial, best loss: 1187.4476830955357]






 97%|█████████▋| 194/200 [29:48<00:57,  9.51s/trial, best loss: 1187.4476830955357]






 98%|█████████▊| 195/200 [29:58<00:47,  9.44s/trial, best loss: 1187.4476830955357]






 98%|█████████▊| 196/200 [30:07<00:37,  9.45s/trial, best loss: 1187.4476830955357]






 98%|█████████▊| 197/200 [30:17<00:28,  9.44s/trial, best loss: 1187.4476830955357]






 99%|█████████▉| 198/200 [30:26<00:18,  9.30s/trial, best loss: 1187.4476830955357]






100%|█████████▉| 199/200 [30:35<00:09,  9.22s/trial, best loss: 1187.4476830955357]






100%|██████████| 200/200 [30:44<00:00,  9.22s/trial, best loss: 1187.4476830955357]


In [8]:
client = MlflowClient(tracking_uri=f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}")

In [11]:
def register_best_model(tracking_uri, experiment_name, model_registry_name):
    
    client = MlflowClient(tracking_uri=tracking_uri)
    
    experiment = client.get_experiment_by_name(experiment_name)
    best_run = client.search_runs(
        experiment_ids=experiment.experiment_id,
        run_view_type=ViewType.ACTIVE_ONLY,
        max_results=1,
        order_by=["metrics.rmse ASC"]
    )[0]
    
    # register the best model
    run_id = best_run.info.run_id
    model_uri = f"runs:/{run_id}/model"
    model_rmse = best_run.data.metrics['rmse']
    model_details = mlflow.register_model(model_uri=model_uri, name=model_registry_name)

    date = datetime.today().date()
    
    # transition of our best model in "Production"
    client.transition_model_version_stage(
        name=model_details.name,
        version=model_details.version,
        stage="Production",
        archive_existing_versions=True
    )
    
    client.update_model_version(
        name=model_details.name,
        version=model_details.version,
        description=f"The model version {model_details.version} was transitioned to Production on {date}"
    )
    
    client.update_registered_model(
        name=model_details.name,
        description=f"Current model version in production: {model_details.version}, rmse: {model_rmse}"
    )

In [13]:
model_name = "random-forest-regressor"

In [14]:
register_best_model(TRACKING_URI, EXPERIMENT_NAME, model_name)

Registered model 'random-forest-regressor' already exists. Creating a new version of this model...
2023/08/07 17:52:53 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: random-forest-regressor, version 2
Created version '2' of model 'random-forest-regressor'.


In [9]:
experiment = client.get_experiment_by_name(EXPERIMENT_NAME)
best_run = client.search_runs(
    experiment_ids=experiment.experiment_id,
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=1,
    order_by=["metrics.rmse ASC"]
)[0]
best_run

<Run: data=<RunData: metrics={'r2_score': 0.632749344020741,
 'rmse': 1187.4476830955357,
 'training_mean_absolute_error': 839.3195837883158,
 'training_mean_squared_error': 984957.1543581459,
 'training_r2_score': 0.7355399970094947,
 'training_root_mean_squared_error': 992.4500765066956,
 'training_score': 0.7355399970094947}, params={'dictvectorizer': 'DictVectorizer()',
 'dictvectorizer__dtype': "<class 'numpy.float64'>",
 'dictvectorizer__separator': '=',
 'dictvectorizer__sort': 'True',
 'dictvectorizer__sparse': 'True',
 'memory': 'None',
 'randomforestregressor': 'RandomForestRegressor(max_depth=9, '
                          'min_samples_leaf=9, min_samples_split=7,\n'
                          '                      n_estimators=72, n_jobs=-1, '
                          'random_state=42)',
 'randomforestregressor__bootstrap': 'True',
 'randomforestregressor__ccp_alpha': '0.0',
 'randomforestregressor__criterion': 'squared_error',
 'randomforestregressor__max_depth': '9',
 'r

In [28]:
run_id = best_run.info.run_id
model_uri=f"runs:/{run_id}/models"


In [23]:
print(model_uri)

runs:/79934c79a98f4932aade316cce6e61a0/models


In [24]:
mlflow.register_model(model_uri=model_uri, name=model_name)

Successfully registered model 'random-forest-regressor'.
2023/08/05 13:18:49 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: random-forest-regressor, version 1
Created version '1' of model 'random-forest-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1691241529170, current_stage='None', description='', last_updated_timestamp=1691241529170, name='random-forest-regressor', run_id='79934c79a98f4932aade316cce6e61a0', run_link='', source='gs://mlflow-assignment-mj/bike-sharing-prediction/3/79934c79a98f4932aade316cce6e61a0/artifacts/models', status='READY', status_message='', tags={}, user_id='', version='1'>

In [29]:
client.transition_model_version_stage(
    name=model_name,
    version=1,
    stage="Production",
    archive_existing_versions=True
)

<ModelVersion: aliases=[], creation_timestamp=1691241529170, current_stage='Production', description='', last_updated_timestamp=1691241730726, name='random-forest-regressor', run_id='79934c79a98f4932aade316cce6e61a0', run_link='', source='gs://mlflow-assignment-mj/bike-sharing-prediction/3/79934c79a98f4932aade316cce6e61a0/artifacts/models', status='READY', status_message='', tags={}, user_id='', version='1'>

In [30]:
latest_versions = client.get_latest_versions(name=model_name)

In [37]:
prod_model_uri = f"models:/{model_name}/latest"

In [38]:
# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(model_uri)

In [40]:
# testing prediction
bike_data = {
    'season': 1,
    'mnth': 1,
    'holiday': 0,
    'weekday': 6, 
    'workingday': 0, 
    'weathersit': 2,
    'temp': 0.344167, 
    'atemp': 0.363625,
    'hum': 0.805833,
    'windspeed': 0.160446
}

In [41]:
loaded_model.predict(bike_data)

array([2475.51869788])