In [11]:
!pip install google-cloud-storage
!pip install google-cloud

Collecting google-cloud-storage
  Using cached google_cloud_storage-2.7.0-py2.py3-none-any.whl (110 kB)
Collecting google-auth<3.0dev,>=1.25.0
  Using cached google_auth-2.16.0-py2.py3-none-any.whl (177 kB)
Collecting google-resumable-media>=2.3.2
  Using cached google_resumable_media-2.4.1-py2.py3-none-any.whl (77 kB)
Collecting google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5
  Using cached google_api_core-2.11.0-py3-none-any.whl (120 kB)
Collecting google-cloud-core<3.0dev,>=2.3.0
  Using cached google_cloud_core-2.3.2-py2.py3-none-any.whl (29 kB)
Collecting googleapis-common-protos<2.0dev,>=1.56.2
  Using cached googleapis_common_protos-1.58.0-py2.py3-none-any.whl (223 kB)
Collecting cachetools<6.0,>=2.0.0
  Using cached cachetools-5.3.0-py3-none-any.whl (9.3 kB)
Collecting google-crc32c<2.0dev,>=1.0
  Using cached google_crc32c-1.5.0-cp38-cp38-win_amd64.whl (27 kB)
Installing collected packages: cachetools, googleapis-common-protos, google-auth, google-crc32c, goo



Collecting google-cloud
  Using cached google_cloud-0.34.0-py2.py3-none-any.whl (1.8 kB)
Installing collected packages: google-cloud
Successfully installed google-cloud-0.34.0




In [12]:
import os 
os.environ['GOOGLE_APPLICATION_CREDENTIALS']='unique-grid-374316-833ea25bc3c7.json'

In [13]:
import os
import argparse
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn

In [2]:
#mlflow server --backend-store-uri sqlite:///mlflow.db
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("Wine-Quality")

<Experiment: artifact_location='gs://mlflow-bs/1', creation_time=1675587273057, experiment_id='1', last_update_time=1675587273057, lifecycle_stage='active', name='Wine-Quality', tags={}>

In [14]:
def get_data():
    url = r"C:\Users\shubham\Documents\winequality-red.csv"

    try:
        df = pd.read_csv(url,sep=";")
        return df   
    except Exception as e:
        raise e

In [15]:
     
def evaluate(y,pred):
    rmse = np.sqrt(mean_squared_error(y,pred))
    mae = mean_absolute_error(y,pred)
    r2 = r2_score(y,pred)

    return rmse, mae, r2

In [16]:
df = get_data()
train,test = train_test_split(df,random_state=42)
train_x = train.drop(["quality"],axis=1)
test_x = test.drop(["quality"],axis=1)

train_y = train[["quality"]]
test_y = test[["quality"]]

In [17]:
alpha=0.6
l1_ratio = 0.9

with mlflow.start_run():
    
    mlflow.set_tag("delevoper","karndeep")
    mlflow.set_tag("model","elastic-net")
    
    mlflow.log_param("alpha",alpha)
    mlflow.log_param("l1_ratio",l1_ratio)
    
    lr = ElasticNet(alpha=alpha, l1_ratio=alpha)
    lr.fit(train_x,train_y)

    pred = lr.predict(test_x)

    rmse,mae,r2 = evaluate(test_y,pred)
    
    mlflow.log_metric("rmse",rmse)
    mlflow.log_metric("mae",mae)
    mlflow.log_metric("r2",r2)
    
    mlflow.sklearn.log_model(lr,"elastic-net-lr")


    print(f"Elastic net Params: alpha: {alpha}, l1_ratio: {l1_ratio}")
    print(f"Elastic net metric: rmse:{rmse}, mae:{mae},r2:{r2}")



Elastic net Params: alpha: 0.6, l1_ratio: 0.9
Elastic net metric: rmse:0.7772257709002306, mae:0.6365170497744915,r2:0.023462653879936957


In [7]:
mlflow.sklearn.autolog()
with mlflow.start_run() :
    lr = ElasticNet(alpha=alpha, l1_ratio=alpha)
    lr.fit(train_x,train_y)

    pred = lr.predict(test_x)

    rmse,mae,r2 = evaluate(test_y,pred)

    print(f"Elastic net Params: alpha: {alpha}, l1_ratio: {l1_ratio}")
    print(f"Elastic net metric: rmse:{rmse}, mae:{mae},r2:{r2}")



Elastic net Params: alpha: 0.6, l1_ratio: 0.9
Elastic net metric: rmse:0.7772257709002306, mae:0.6365170497744915,r2:0.023462653879936957


## HYPERPARAMETER TUNING AND TRACKING USING MLFLOW


In [8]:
!pip install hyperopt



In [9]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [10]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "Elasticnet")
        mlflow.log_params(params)
        
        lr = ElasticNet(**params)
        lr.fit(train_x,train_y)

        pred = lr.predict(test_x)

        rmse,mae,r2 = evaluate(test_y,pred)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mae",mae)
        mlflow.log_metric("r2",r2)

    return {'loss': rmse, 'status': STATUS_OK}

In [11]:
search_space = { "alpha": hp.loguniform('alpha',0.01,1),
                  "l1_ratio": hp.uniform('l1_ratio',0,1)}

In [12]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=10,
    trials=Trials()
)

100%|████████████████████████████████████████████████| 10/10 [00:34<00:00,  3.48s/trial, best loss: 0.7132064466235608]


In [13]:
params= {"alpha":1.43098798581676,
"copy_X":True,
"fit_intercept":True,
"l1_ratio":0.003137093485394149,
"max_iter":1000,
"normalize":"deprecated",
"positive":False,
"precompute":False,
"random_state":None,
"selection":"cyclic",
"tol":0.0001,
"warm_start":False}

In [14]:
mlflow.sklearn.autolog()
with mlflow.start_run() :
    lr = ElasticNet(**params)
    lr.fit(train_x,train_y)

    pred = lr.predict(test_x)

    rmse,mae,r2 = evaluate(test_y,pred)

    print(f"Elastic net Params: alpha: {alpha}, l1_ratio: {l1_ratio}")
    print(f"Elastic net metric: rmse:{rmse}, mae:{mae},r2:{r2}")

Elastic net Params: alpha: 0.6, l1_ratio: 0.9
Elastic net metric: rmse:0.715046298073532, mae:0.5735208349754253,r2:0.17346205261746928


## Model Registry

In [34]:
from mlflow.tracking import MlflowClient
MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

### Interacting with MLflow Tracking Server to extract run ids with higher accuracy

In [35]:
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

client.list_experiments()

[<Experiment: artifact_location='./mlruns/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>,
 <Experiment: artifact_location='./mlruns/1', experiment_id='1', lifecycle_stage='active', name='Wine-Quality', tags={}>]

In [36]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='1',
    filter_string="metrics.rmse >0.7",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse DESC"]
)

In [37]:
for run in runs:
    print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run id: da14fd8975f84c1392c3e794c6d6330f, rmse: 0.7851
run id: d4b8c19f7e0f4afab7b94ca06d8e9985, rmse: 0.7851
run id: da82670e2463467b8c8557834e980ad0, rmse: 0.7850
run id: 45df2569b727451c8ed48ba0dafb30d0, rmse: 0.7849
run id: 42611b028730423e8c26a9756bee5bff, rmse: 0.7849


## Interacting with the Model Registry

In [23]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [38]:
run_id = "da82670e2463467b8c8557834e980ad0"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="wine_quality")

Registered model 'wine_quality' already exists. Creating a new version of this model...
2023/01/15 19:09:40 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: wine_quality, version 3
Created version '3' of model 'wine_quality'.


<ModelVersion: creation_timestamp=1673789980538, current_stage='None', description=None, last_updated_timestamp=1673789980538, name='wine_quality', run_id='da82670e2463467b8c8557834e980ad0', run_link=None, source='./mlruns/1/da82670e2463467b8c8557834e980ad0/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [39]:
model_name = "wine_quality"
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 1, stage: Staging
version: 2, stage: Production
version: 3, stage: None


In [40]:
model_version = 3
new_stage = "Production"
client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=True
)

<ModelVersion: creation_timestamp=1673789980538, current_stage='Production', description=None, last_updated_timestamp=1673790152817, name='wine_quality', run_id='da82670e2463467b8c8557834e980ad0', run_link=None, source='./mlruns/1/da82670e2463467b8c8557834e980ad0/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [41]:
model_name = "wine_quality"
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 1, stage: Staging
version: 2, stage: Archived
version: 3, stage: Production


## TEST THE MODEL IN PRODUCTION 

In [42]:
df = get_data()
train,test = train_test_split(df,random_state=42)
train_x = train.drop(["quality"],axis=1)
test_x = test.drop(["quality"],axis=1)

train_y = train[["quality"]]
test_y = test[["quality"]]

In [43]:
def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [44]:
test_model(name=model_name, stage="Production", X_test=test_x, y_test=test_y)

{'rmse': 0.7850412845660397}

In [46]:
test_model(name=model_name, stage="Archived", X_test=test_x, y_test=test_y)

{'rmse': 0.7306820887248511}

## Following setps involved to setup MLFlow on GCP:


1. Create a PostgreSQL DB for storing model metadata.
2. Create a Google Cloud Storage Bucket for storing artifacts.
3. Create a Compute Engine instance to install MLFlow and run the MLFlow server
4. SSH into Compute machine using the UI and run following commands:

- sudo apt update
- pip3 install mlflow psycopg2-binary
- mlflow server -h 0.0.0.0 -p 5000 --backend-store-uri postgresql://DB_USER:DB_PASSWORD@DB_ENDPOINT:5432/DB_NAME --default-artifact-root gs://GS_BUCKET_NAME '''

In [10]:
#mlflow server --backend-store-uri sqlite:///mlflow.db
mlflow.set_tracking_uri("http://34.135.17.37:5000/")
mlflow.set_experiment("Wine-Quality")

2023/02/05 19:15:06 INFO mlflow.tracking.fluent: Experiment with name 'Wine-Quality' does not exist. Creating a new experiment.


<Experiment: artifact_location='gs://mlflow-as/1', creation_time=1675604707770, experiment_id='1', last_update_time=1675604707770, lifecycle_stage='active', name='Wine-Quality', tags={}>