In [1]:
import mlflow
from mlflow import MlflowClient
import pandas as pd
import xgboost as xgb
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn import metrics
import pickle

In [2]:
TRACKING_SERVER_HOST = "35.217.20.109"
# mlflow.set_tracking_uri("app/sqlite:///mlflow.db")

mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:5000")
mlflow.set_experiment("telco-customers-churn")

<Experiment: artifact_location='gs://mlflow-zoomcamp-bucket/mlruns/1', creation_time=1692174508631, experiment_id='1', last_update_time=1692174508631, lifecycle_stage='active', name='telco-customers-churn', tags={}>

In [8]:
f"http://{TRACKING_SERVER_HOST}:5000"

'http://35.217.20.109:5000'

In [3]:
client = MlflowClient()

In [5]:
experiment = mlflow.get_experiment_by_name("telco-customers-churn")
runs = mlflow.search_runs(experiment.experiment_id)
best_run = runs.loc[runs['metrics.test_f1_score'].idxmax()]

In [6]:
model_uri = f"runs:/{best_run.run_id}/model"
model_uri

'runs:/a22fd2ebe68e4a6698560b2ceee2bea9/model'

In [7]:
# Load the best model using the MLflow API
model_pyfunc = mlflow.pyfunc.load_model(model_uri)
model_pyfunc

mlflow.pyfunc.loaded_model:
  artifact_path: model
  flavor: mlflow.lightgbm
  run_id: a22fd2ebe68e4a6698560b2ceee2bea9

In [8]:
df = pd.read_csv("data/test.csv")

In [9]:
num_features = ["tenure", "MonthlyCharges", "TotalCharges"]
cat_features = [
     'gender',
     'SeniorCitizen',
     'Partner',
     'Dependents',
     'PhoneService',
     'MultipleLines',
     'InternetService',
     'OnlineSecurity',
     'OnlineBackup',
     'DeviceProtection',
     'TechSupport',
     'StreamingTV',
     'StreamingMovies',
     'Contract',
     'PaperlessBilling',
     'PaymentMethod'
               ]

In [10]:
with open("app/models/min_max_scaler.bin", "rb") as f:
    scaler = pickle.load(f)
    
with open("app/models/ohe.bin", "rb") as f:
    ohe = pickle.load(f)

def prepare_features(df):
    df.TotalCharges = pd.to_numeric(df.TotalCharges, errors='coerce')
    df = df.iloc[:,1:]
    df = df.dropna()
    df = df.reset_index(drop=True)

    df['Churn'] = df['Churn'].replace(to_replace='Yes', value=1)
    df['Churn'] = df['Churn'].replace(to_replace='No',  value=0)

    df_dummies = ohe.fit_transform(df[cat_features])
    X = pd.DataFrame(df_dummies, columns=ohe.get_feature_names_out())
    X = pd.concat([X, df[num_features]], axis=1)
    
    features = X.columns.values
    scaler = MinMaxScaler(feature_range = (0,1))
    
    X = pd.DataFrame(scaler.fit_transform(X))
    X.columns = features
    return X, df.Churn.values

In [11]:
X, y = prepare_features(df)

In [12]:
preds = model_pyfunc.predict(X)

In [13]:
metrics.f1_score(y, preds)

0.5583333333333335

In [5]:
model_name = 'churn_model'


In [14]:

# Register model
mlflow.register_model(model_uri, model_name)

Successfully registered model 'churn_model'.
2023/08/16 12:13:40 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: churn_model, version 1
Created version '1' of model 'churn_model'.


<ModelVersion: aliases=[], creation_timestamp=1692188020334, current_stage='None', description='', last_updated_timestamp=1692188020334, name='churn_model', run_id='a22fd2ebe68e4a6698560b2ceee2bea9', run_link='', source='gs://mlflow-zoomcamp-bucket/mlruns/1/a22fd2ebe68e4a6698560b2ceee2bea9/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [15]:
client.transition_model_version_stage(name=model_name, version=1, stage="Production")

<ModelVersion: aliases=[], creation_timestamp=1692188020334, current_stage='Production', description='', last_updated_timestamp=1692188031702, name='churn_model', run_id='a22fd2ebe68e4a6698560b2ceee2bea9', run_link='', source='gs://mlflow-zoomcamp-bucket/mlruns/1/a22fd2ebe68e4a6698560b2ceee2bea9/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [6]:
stage = 'Production'

model_registry_path = f'models:/{model_name}/{stage}'
production_model = mlflow.pyfunc.load_model(model_registry_path)

In [7]:
model_registry_path

'models:/churn_model/Production'

In [17]:
preds = production_model.predict(X)

In [18]:
metrics.f1_score(y, preds)

0.5583333333333335

In [34]:
mlflow.__version__

'2.5.0'