In [1]:
import numpy as np
import pandas as pd
import mlflow
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor,AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR,LinearSVR
from sklearn.tree import DecisionTreeRegressor
from urllib.parse import urlparse

In [2]:
x_train = pd.read_csv('../../data/split/train/x_train.csv',index_col=0)
y_train = pd.read_csv('../../data/split/train/y_train.csv',index_col=0)

x_test = pd.read_csv('../../data/split/test/x_test.csv',index_col=0)
y_test = pd.read_csv('../../data/split/test/y_test.csv',index_col=0)

In [3]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score

In [4]:
models_dic = dict(

    linear = LinearRegression(),
    lasso = Lasso(random_state=5,alpha=0.2),
    ridge = Ridge(random_state=5,alpha=0.5),
   
)

In [5]:
experiment = {
'number':[],
    'metrics' :[]

}


In [6]:
metric = {
    'name': [],
    'train_mse':[],
    'train_mae':[],
    'train_r2' : [],
    'train_rmse':[],
    'test_mse':[],
    'test_mae':[],
    'test_r2' : [],
    'test_rmse':[],
    'metrics':[]
    

}

In [7]:
# run : mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./artifacts --host 0.0.0.0
# server : http://0.0.0.0:5000
remote_server_uri = 'http://0.0.0.0:5000'
mlflow.set_tracking_uri(remote_server_uri)
mlflow.set_experiment('restaurant7')


2022/12/29 04:58:30 INFO mlflow.tracking.fluent: Experiment with name 'restaurant7' does not exist. Creating a new experiment.


<Experiment: artifact_location='./artifacts/1', creation_time=1672270110502, experiment_id='1', last_update_time=1672270110502, lifecycle_stage='active', name='restaurant7', tags={}>

In [16]:
for keys in models_dic.keys():
    
    number = 1
    mlflow.sklearn.autolog()
    with mlflow.start_run(run_name=keys):
        
        print(keys,"\n")
        lr = models_dic[keys]
        lr.fit(x_train,y_train)
        
        metric['metrics'].append(lr.get_params())
        y_train_pred = lr.predict(x_train)
        y_test_pred = lr.predict(x_test)
        
        mse_train = mean_squared_error(y_train,y_train_pred)
        mae_train = mean_absolute_error(y_train,y_train_pred)
        
        mse_test = mean_squared_error(y_test,y_test_pred)
        mae_test = mean_absolute_error(y_test,y_test_pred)
        
        r2_train =r2_score(y_train,y_train_pred)
        r2_test = r2_score(y_test,y_test_pred)
        
        rmse_train = np.sqrt(mse_train)
        rmse_test = np.sqrt(mse_test)
        
        
        metric['name'].append(keys)
        metric['train_mse'].append(mse_train)
        metric['test_mse'].append(mse_test)
        metric['train_mae'].append(mae_train)
        metric['test_mse'].append(mae_test)
        metric['train_r2'].append(r2_train)
        metric['test_r2'].append(r2_test)
        metric['train_rmse'].append(rmse_train)
        metric['test_rmse'].append(rmse_test)
        
        mlflow.log_metric('train_mse',mse_train)
        mlflow.log_metric('test_mse',mse_test)
        mlflow.log_metric('train_mae',mae_train)
        mlflow.log_metric('test_mae',mae_test)
        mlflow.log_metric('train_r2',r2_train)
        mlflow.log_metric('test_r2',r2_test)
        mlflow.log_metric('train_rmse',rmse_train)
        mlflow.log_metric('test_rmse',rmse_test)
        
      
       
        tracking_url_type_store = urlparse(
            mlflow.get_artifact_uri()).scheme

        if tracking_url_type_store != "file":
            mlflow.sklearn.log_model(
                lr,
                keys,
                registered_model_name=keys)
        else:
            mlflow.sklearn.log_model(lr, keys)
        
        print(f' train = mse : {mse_train} mae : {mae_train} r2 : {r2_train} \n \
                test = mse : {mse_test} mae : {mae_test} r2 : {r2_test} ')
        print("================================================================")
        
experiment['number'].append(number)
experiment['metrics'].append(metric)

linear 



Registered model 'linear' already exists. Creating a new version of this model...
2022/12/29 05:08:44 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: linear, version 2
Created version '2' of model 'linear'.


 train = mse : 0.07804743368387873 mae : 0.20078302960235664 r2 : 0.5916809246863175 
                 test = mse : 0.08298500852408149 mae : 0.20473000644178632 r2 : 0.5860407000126586 
lasso 



Registered model 'lasso' already exists. Creating a new version of this model...
2022/12/29 05:09:03 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: lasso, version 2
Created version '2' of model 'lasso'.


 train = mse : 0.1520564669641754 mae : 0.3015614903363699 r2 : 0.20448946165538995 
                 test = mse : 0.160223469676195 mae : 0.3074980893270784 r2 : 0.2007472611218265 
ridge 



Registered model 'ridge' already exists. Creating a new version of this model...
2022/12/29 05:09:26 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: ridge, version 2


 train = mse : 0.07805752606482656 mae : 0.20085450794844528 r2 : 0.5916281245946062 
                 test = mse : 0.08283527466074668 mae : 0.20462295143309067 r2 : 0.5867876267932046 


Created version '2' of model 'ridge'.


In [16]:
#run: mlflow ui

In [10]:
import sqlite3

In [11]:
conn = sqlite3.connect('mlflow.db')

In [12]:
pd.read_sql_query('''select name from sqlite_master
            where type='table';''',conn)

Unnamed: 0,name
0,experiments
1,alembic_version
2,experiment_tags
3,tags
4,registered_models
5,runs
6,registered_model_tags
7,model_version_tags
8,model_versions
9,latest_metrics


In [13]:
pd.read_sql_query('''
select * from params

''',conn)

Unnamed: 0,key,value,run_uuid
0,copy_X,True,aa733b4b3bdf4882b9f4a557b658811c
1,fit_intercept,True,aa733b4b3bdf4882b9f4a557b658811c
2,n_jobs,,aa733b4b3bdf4882b9f4a557b658811c
3,normalize,deprecated,aa733b4b3bdf4882b9f4a557b658811c
4,positive,False,aa733b4b3bdf4882b9f4a557b658811c
5,alpha,0.2,147efad8267849fd87e82f0163be33e8
6,copy_X,True,147efad8267849fd87e82f0163be33e8
7,fit_intercept,True,147efad8267849fd87e82f0163be33e8
8,max_iter,1000,147efad8267849fd87e82f0163be33e8
9,normalize,deprecated,147efad8267849fd87e82f0163be33e8


In [14]:
pd.read_sql_query('''
select * from experiments

''',conn)

Unnamed: 0,experiment_id,name,artifact_location,lifecycle_stage,creation_time,last_update_time
0,0,Default,./artifacts/0,active,1672269993280,1672269993280
1,1,restaurant7,./artifacts/1,active,1672270110502,1672270110502


In [15]:
conn.close()

In [18]:
y_pred = boost.predict(x_test)
r2_score(y_test,y_pred)

0.8727651513495669

In [19]:
np.sqrt(mean_squared_error(y_test,y_pred))

0.1597070318104302

In [20]:
y_test.describe()

Unnamed: 0,rate
count,8253.0
mean,3.695892
std,0.447762
min,1.8
25%,3.4
50%,3.7
75%,4.0
max,4.9
