In [None]:
import mlflow
import pandas as pd
import shutil
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,roc_auc_score
from sklearn.model_selection import train_test_split

In [None]:
id_experiment = mlflow.get_experiment_by_name('MLflow Depression').experiment_id
runs_all = mlflow.search_runs(
    [id_experiment],
    order_by=['start_time DESC']
)

runs = runs_all[~runs_all['tags.mlflow.parentRunId'].isin(runs_all[runs_all['status']=='RUNNING']['tags.mlflow.parentRunId'])]
latest_parent_run = runs.iloc[0]['tags.mlflow.parentRunId']
latest_nested_runs = runs[runs['tags.mlflow.parentRunId'] == latest_parent_run]

best_latest_run = latest_nested_runs.sort_values('metrics.test_auc', ascending=False).iloc[0]
best_latest_run

In [None]:
mlflow.set_tracking_uri(uri="http://localhost:8080")
best_model = f'runs:/{best_latest_run.run_id}/depression_model'
load_model = mlflow.sklearn.load_model(model_uri=best_model)

In [None]:
load_model

In [None]:
X  = pd.read_pickle('data/processed_data.pkl')
y = X['label_c']
X.drop(['label','label_c'],axis=1,inplace=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

train_accuracy = accuracy_score(y_train, load_model.predict(X_train))
test_accuracy = accuracy_score(y_test, load_model.predict(X_test))
test_accuracy,train_accuracy

In [None]:
shutil.copyfile(
'mlartifacts/439831194545044561/{}/artifacts/depression_model/model.pkl'.format(best_latest_run.run_id),
'models/model.pkl'
)