## Luanch mlflow server 
###### run   in cmd 
mlflow server --backend-store-uri sqlite:///backend.db --default-artifact-root ./mlruns

* --backend-store-uri : use a local SQLite database as  backend store(The backend store is where MLflow Tracking Server stores experiment and run metadata as well as params, metrics, and tags for runs. MLflow supports two types of backend stores: file store and database-backed store)

* --default-artifact-root :  to configure default location to server’s artifact store


## Configure mlflow

In [1]:
import mlflow

mlflow.set_tracking_uri("http://127.0.0.1:5000") #  connects to a tracking URI.
mlflow.set_experiment("digits-classification-experiment") ## sets an experiment as active. If the experiment does not exist, creates a new experiment

2023/03/07 22:26:49 INFO mlflow.tracking.fluent: Experiment with name 'digits-classification-experiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='./mlruns/1', creation_time=1678220809435, experiment_id='1', last_update_time=1678220809435, lifecycle_stage='active', name='digits-classification-experiment', tags={}>

## import libraries

In [2]:
from sklearn import datasets
from sklearn import metrics
import requests
import json
import ast

import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from xgboost import XGBClassifier
from sklearn.metrics import auc ,accuracy_score ,roc_curve, roc_auc_score , f1_score
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType
from mlflow.store.artifact.runs_artifact_repo import RunsArtifactRepository

## load dataset 


In [3]:

digits = datasets.load_digits() #dataset loading
x = digits.data               #Features stored in X 
y = digits.target 

In [4]:
df = pd.DataFrame(data= np.c_[digits['data'], digits['target']],
                     columns= digits['feature_names'] + ['target'])
df.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,target
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1.0
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2.0
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3.0
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4.0


In [5]:
list(digits.target_names)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [6]:
df['target'].value_counts()

3.0    183
1.0    182
5.0    182
4.0    181
6.0    181
9.0    180
7.0    179
0.0    178
2.0    177
8.0    174
Name: target, dtype: int64

In [7]:
x_train, x_test, y_train, y_test = train_test_split(df[digits['feature_names']], df['target'], test_size=0.2, random_state=42)

## hyper parameter tuning using hyopt

In [8]:
search_space=space = {
    'learning_rate': hp.choice('learning_rate', [0.0005,0.001, 0.01, 0.5, 1]),
    'max_depth' : hp.choice('max_depth', range(3,21,3)),
    'gamma' : hp.choice('gamma', [i/10.0 for i in range(0,5)]),
    'colsample_bytree' : hp.choice('colsample_bytree', [i/10.0 for i in range(3,10)]),     
    'reg_alpha' : hp.choice('reg_alpha', [1e-5, 1e-2, 0.1, 1, 10, 100]), 
    'reg_lambda' : hp.choice('reg_lambda', [1e-5, 1e-2, 0.1, 1, 10, 100]),
    'seed': hp.choice('seed', [0,7,42])
}

def objective (params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        clf=XGBClassifier(**params)
        evaluation = [( x_train, y_train), (x_test, y_test)]
        clf.fit(x_train, y_train,
                eval_set=evaluation, early_stopping_rounds=10, verbose=False)
        y_pred=clf.predict(x_test)
        y_score=clf.predict_proba(x_test)
        accuracy=accuracy_score(y_test,y_pred)
        mlflow.log_metric("accuracy", accuracy)
        f1= f1_score(y_test,y_pred,  average='micro')
        mlflow.log_metric("f1_score", f1)
        mlflow.xgboost.log_model(
        xgb_model=clf,
        artifact_path="mlruns",
        registered_model_name="xgboost",
        
            
    )

    return {'loss': -accuracy, 'status': STATUS_OK } 
    

In [9]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=5,
    trials=Trials()
)

  0%|                                                                            | 0/5 [00:00<?, ?trial/s, best loss=?]



Successfully registered model 'xgboost'.
2023/03/07 23:04:55 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: xgboost, version 1



 20%|█████████▊                                       | 1/5 [00:05<00:22,  5.60s/trial, best loss: -0.9111111111111111]

Created version '1' of model 'xgboost'.

Registered model 'xgboost' already exists. Creating a new version of this model...
2023/03/07 23:05:00 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: xgboost, version 2



 40%|███████████████████▌                             | 2/5 [00:10<00:15,  5.19s/trial, best loss: -0.9166666666666666]

Created version '2' of model 'xgboost'.

Registered model 'xgboost' already exists. Creating a new version of this model...
2023/03/07 23:05:04 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: xgboost, version 3



 60%|█████████████████████████████▍                   | 3/5 [00:14<00:09,  4.83s/trial, best loss: -0.9166666666666666]

Created version '3' of model 'xgboost'.

Registered model 'xgboost' already exists. Creating a new version of this model...
2023/03/07 23:05:08 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: xgboost, version 4



 80%|███████████████████████████████████████▏         | 4/5 [00:18<00:04,  4.46s/trial, best loss: -0.9166666666666666]

Created version '4' of model 'xgboost'.

Registered model 'xgboost' already exists. Creating a new version of this model...
2023/03/07 23:05:13 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: xgboost, version 5



100%|█████████████████████████████████████████████████| 5/5 [00:23<00:00,  4.74s/trial, best loss: -0.9611111111111111]


Created version '5' of model 'xgboost'.


## compare models and get best one 

In [10]:
client = MlflowClient(tracking_uri="http://127.0.0.1:5000")

In [11]:

run = MlflowClient().search_runs(
  experiment_ids="1",
  run_view_type=ViewType.ACTIVE_ONLY,
  order_by=["metrics.accuracy DESC"]
)[0]

In [12]:
run_id = run.info.run_id
model_uri = f"runs:/{run_id}/model"

In [13]:
model_src = RunsArtifactRepository.get_underlying_uri(model_uri)
filter_string = "run_id='{}'".format(run_id)
results = client.search_model_versions(filter_string)
model_version=results[0].version

## Promote best model to production stage

In [14]:
new_stage = "Production"
client.transition_model_version_stage(
    name="xgboost",
    version=model_version,
    stage=new_stage,
    archive_existing_versions=False
)

<ModelVersion: creation_timestamp=1678223113311, current_stage='Production', description='', last_updated_timestamp=1678228365436, name='xgboost', run_id='d00c053e15124fbf83db2527ca021ff4', run_link='', source='./mlruns/1/d00c053e15124fbf83db2527ca021ff4/artifacts/mlruns', status='READY', status_message='', tags={}, user_id='', version='5'>

## serve production model
### in cmd run 
* set MLflow_TRACKING_URI=http://127.0.0.1:5000
* mlflow models serve -m "models:/xgboost/Production" -p 8080

## Send requet to served model

In [15]:
### get row from test data 
row=ast.literal_eval(x_train.loc[100].to_json())
#row

In [16]:
y_test.iloc[100]

4.0

In [17]:

host = '127.0.0.1'
port = '8080'
url = f'http://{host}:{port}/invocations'
headers = {'Content-Type': 'application/json',}
input_data ={
  "dataframe_records": [
   row
   
  ]
}       

headers = {'Content-Type': 'application/json',}
r = requests.post(url=url, headers=headers,data=json.dumps(input_data)  )
print(f'Predictions: {r.text}')

Predictions: {"predictions": [4]}
