In [10]:
import mlflow
import shutil
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,roc_auc_score


In [3]:
id_experiment = mlflow.get_experiment_by_name('MLflow Depression').experiment_id
runs_all = mlflow.search_runs(
    [id_experiment],
    order_by=['start_time DESC']
)

runs = runs_all[~runs_all['tags.mlflow.parentRunId'].isin(runs_all[runs_all['status']=='RUNNING']['tags.mlflow.parentRunId'])]
latest_parent_run = runs.iloc[0]['tags.mlflow.parentRunId']
latest_nested_runs = runs[runs['tags.mlflow.parentRunId'] == latest_parent_run]

best_latest_run = latest_nested_runs.sort_values('metrics.test_auc', ascending=False).iloc[0]
best_latest_run

run_id                                            3231a1b66cdf47ca8e409072e4b27e88
experiment_id                                                   439831194545044561
status                                                                    FINISHED
artifact_uri                     mlflow-artifacts:/439831194545044561/3231a1b66...
start_time                                        2024-12-21 17:31:37.170000+00:00
end_time                                          2024-12-21 17:35:56.978000+00:00
metrics.train_auc                                                         0.621791
metrics.test_auc                                                          0.618505
params.n_estimators                                                            250
params.max_depth                                                                 4
tags.mlflow.user                                                             nithi
tags.mlflow.source.name                                        src/train_mlflow.py
tags

In [5]:
mlflow.set_tracking_uri(uri="http://localhost:8080")
best_model = f'runs:/{best_latest_run.run_id}/depression_model'
load_model = mlflow.sklearn.load_model(model_uri=best_model)

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 7/7 [02:58<00:00, 25.49s/it]


In [6]:
load_model

In [None]:
X  = pd.read_pickle('data/processed_data.pkl')
# Load the Iris dataset
y = X['label_c']
X.drop(['label','label_c'],axis=1,inplace=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

train_accuracy = accuracy_score(y_train, load_model.predict(X_train))

test_accuracy = accuracy_score(y_test, load_model.predict(X_test))
test_accuracy,train_accuracy

(0.48366242115184765, 0.48730869389210124)

In [8]:
shutil.copyfile(
'mlartifacts/439831194545044561/{}/artifacts/depression_model/model.pkl'.format(best_latest_run.run_id),
'models/model.pkl'
)

'models/model.pkl'