# Evaluate & Register Model

In [1]:
import ray
import mlflow
from sklearn import metrics

In [2]:
MLFLOW_TRACKING_URI = 'http://127.0.0.1:5000'
MLFLOW_EXPERIMENT_NAME = 'openfoodfacts-nova-dev'

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment(experiment_name=MLFLOW_EXPERIMENT_NAME)

<Experiment: artifact_location='mlflow-artifacts:/1', experiment_id='1', lifecycle_stage='active', name='openfoodfacts-nova-dev', tags={}>

In [3]:
mlflow.start_run(run_id='a0652b7d994e4c38b516f674289f89b0')

<ActiveRun: >

In [12]:
INPUT_GROUND_TRUTH_PATH = '/Users/rgareev/data/openfoodfacts/wrk/20220831-dev/test.parquet'
INPUT_MODEL_OUT_PATH = '/Users/rgareev/projects/mlops-openfoodfacts/wrk/testings/20220831-dev/model'
# model name in the registry
MODEL_NAME = 'openfoodfacts-nova-dev'

In [7]:
truth_ds = ray.data.read_parquet(INPUT_GROUND_TRUTH_PATH).repartition(10)
predicted_ds = ray.data.read_parquet(INPUT_MODEL_OUT_PATH).repartition(10)

2022-09-11 22:08:37,370	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8267 [39m[22m
Read: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.90it/s]
Repartition: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 42.07it/s]
Read: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 69.93it/s]
Repartition: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 329.53it/s]


In [8]:
eval_ds = truth_ds.drop_columns(['product_name', 'ingredients_list'])\
    .zip(predicted_ds.drop_columns(['product_name', 'ingredients_list']))

Map_Batches: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 95.30it/s]
Map_Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 165.43it/s]


In [9]:
eval_df = eval_ds.to_pandas(eval_ds.count())
eval_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 135135 entries, 0 to 135134
Data columns (total 2 columns):
 #   Column       Non-Null Count   Dtype
---  ------       --------------   -----
 0   nova_group   135135 non-null  int8 
 1   predictions  135135 non-null  int8 
dtypes: int8(2)
memory usage: 264.1 KB


In [10]:
y_true = eval_df.nova_group
y_pred = eval_df.predictions
mlflow.log_metrics({
    'f1_' + t[0] : t[1]
    # get F1 for each class, TODO restore labels robustly
    for t in zip([str(l) for l in range(4)], metrics.f1_score(y_true, y_pred, average=None))
})

In [11]:
mlflow.log_metrics({
    'accuracy' : metrics.accuracy_score(y_true, y_pred),
    'f1_micro' : metrics.f1_score(y_true, y_pred, average='micro'),
    'f1_macro' : metrics.f1_score(y_true, y_pred, average='macro')
})

In [13]:
mlflow.register_model(
    model_uri=f'runs:/{mlflow.active_run().info.run_id}/model',
    name=MODEL_NAME
)

Successfully registered model 'openfoodfacts-nova-dev'.
2022/09/11 22:11:58 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: openfoodfacts-nova-dev, version 1
Created version '1' of model 'openfoodfacts-nova-dev'.


<ModelVersion: creation_timestamp=1662959518614, current_stage='None', description='', last_updated_timestamp=1662959518614, name='openfoodfacts-nova-dev', run_id='a0652b7d994e4c38b516f674289f89b0', run_link='', source='mlflow-artifacts:/1/a0652b7d994e4c38b516f674289f89b0/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [14]:
mlflow.end_run()