# Setup

Install docker and create an Conda-Env: `conda env create -f environment. yml`


In [None]:
import mlflow

## Kill

In [None]:
# Stop the Postgres DB
!docker stop pg_modelregistry

In [None]:
# Kill the MLFlow Server
!ps aux | grep 'mlflow' | grep -v 'grep' | awk '{print $2}' | xargs kill

In [None]:
# Kill the FastAPI-Endpoint
!ps aux | grep 'app:api' | grep -v 'grep' | awk '{print $2}' | xargs kill

## Start

In [None]:
!docker run --name pg_modelregistry -e POSTGRES_USER=mlflow  -e POSTGRES_USER=mlflow -e POSTGRES_DB=mlflow -e POSTGRES_PASSWORD=mlflow -p 5432:5432 -d --rm postgres

In [None]:
%%script bash --bg

mlflow server \
--backend-store-uri "postgresql://mlflow:mlflow@localhost:5432/mlflow" \
--default-artifact-root file:./mlruns \
--host 0.0.0.0

# Write Program

In [None]:
%%writefile custom_code/model_preperation.py
from sklearn.base import TransformerMixin
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline 
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import label_binarize
import pandas as pd

def get_title(names):
    titles = names.str.extract(r' ([A-Za-z]+)\.', expand=False)
    titles = titles.replace(['Lady', 'Countess','Capt', 'Col','Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'],'Rare')
    titles = titles.replace('Mlle','Miss')
    titles = titles.replace('Ms','Miss')
    titles = titles.replace('Mme','Mrs')
    return titles

class ModelPreperation(TransformerMixin):
    #Class Constructor 
    def __init__( self ):
        self.title_encoder = LabelEncoder()
        pass
     
    def fit( self, X, y=None):
        self.default_age = X.Age.median()
        self.default_price = X.Fare.mean()
        self.default_harbor = X.Embarked.mode()[0]
        self.embarked_categories = list(X.Embarked.dropna().unique())
        self.title_encoder.fit(get_title(X.Name))
        return self
        
    def transform(self, df):
        df = df.copy()
        df["Age"].fillna(self.default_age, inplace=True)
        df["Fare"].fillna(self.default_price, inplace=True)
        df["Embarked"].fillna(self.default_harbor, inplace=True)
        df['Sex'] = label_binarize(df.Sex, ['male', 'female'])
        df["has_cabin"] = ~df.Cabin.isna()
        df["title"] = self.title_encoder.transform(get_title(df.Name))
        df = df.drop(columns=["Cabin","Name","Ticket","PassengerId"], errors='ignore')
        df["Embarked"] = df.Embarked.astype("category").cat.set_categories(self.embarked_categories)
        df = pd.get_dummies(df, ["Embarked"], columns=["Embarked"], drop_first=True)
        return df


In [None]:
%%writefile train.py
import mlflow
import mlflow.sklearn
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import cross_val_score
from custom_code import ModelPreperation

import logging
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

if __name__ == "__main__":
    np.random.seed(40)

    train = pd.read_csv("./data/titanic/train.csv")
    X = train.drop(columns="Survived")
    y = np.ravel(train[['Survived']])

    gb_params = {'n_estimators': 200,'min_samples_split': 16,'min_samples_leaf': 16,'max_features': 5,'max_depth': 3,'learning_rate': 0.25}
    mlflow.set_tracking_uri('http://localhost:5000')
    with mlflow.start_run():
        pipe = Pipeline(steps=[("prepare",ModelPreperation()), ("clr",GradientBoostingClassifier(**gb_params))])
        cv_result = cross_val_score(pipe, X, y = y, scoring = "accuracy", cv = 5, n_jobs=-1)
        mlflow.log_metric("mean accuracy", cv_result.mean())
        mlflow.log_metric("std accuracy", cv_result.std())
        clf = pipe.fit(X, y)
        
        pipe.fit(X, y)
        for k, v in gb_params.items():
            mlflow.log_param(k, v)

        mlflow.sklearn.log_model(pipe, "")
        mlflow.log_artifacts("custom_code", "custom_code")

In [None]:
from time import sleep
print("wait until server is ready")
sleep(10)

In [None]:
%run train.py

# Check Models

In [None]:
from mlflow.tracking import MlflowClient
mlflow_client = MlflowClient("http://localhost:5000")
mlflow_client.list_experiments()

In [None]:
mlflow_client.list_run_infos(experiment_id=0)

In [None]:
mlflow_client.create_registered_model("my_cool_model")

Go to the [ml flow UI](http://localhost:5000) and mark Register a model

In [None]:
Check above

## Create API

In [None]:
!rm -r api

!mkdir -p api

This only works from this folder, since there is no real artifact store configured:

In [None]:
def download_most_recent_model(model_name, download_path, mlflow_client=None):
    from mlflow.tracking import MlflowClient
    if mlflow_client is None:
        mlflow_client = MlflowClient("http://localhost:5000")
    run_id=mlflow_client.get_registered_model_details(model_name).latest_versions[0].run_id
    mlflow_client.download_artifacts(run_id, "",download_path)

download_most_recent_model(model_name="my_cool_model", download_path="api")

In [None]:
%%writefile api/api.py

import cloudpickle
from fastapi import FastAPI
from pydantic import BaseModel
from custom_code import ModelPreperation

from sklearn.pipeline import FeatureUnion, Pipeline 
import numpy as np 
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier

def download_most_recent_model(model_name, download_path, mlflow_client=None):
    from mlflow.tracking import MlflowClient
    if mlflow_client is None:
        mlflow_client = MlflowClient("http://localhost:5000")
    run_id=mlflow_client.get_registered_model_details(model_name).latest_versions[0].run_id
    mlflow_client.download_artifacts(run_id, "*",download_path)

class Passenger(BaseModel):
    PassengerId: float = 1
    Pclass: str = 3
    Name: str = 'Nico, Rare. Kreiling'
    Sex: str = 'F'
    Age: int = 30
    SibSp: float = 0
    Parch: float = 3
    Ticket: str = ''
    Fare: float = 100
    Cabin: str = ''
    Embarked: str = 'C'
        

app = FastAPI()
        
@app.get("/")
async def root():
    return {"message": "Hello World"}

@app.post("/survived/custom")
async def predict(input_data: Passenger):
    with open("./model.pkl", "rb") as f:
        pipe = cloudpickle.load(f)
        
    input_data = pd.Series(dict(input_data)).to_frame().transpose()
    prediction = pipe.predict(input_data)
    
    if prediction[0] == 0:
        return {"message": "Sorry, you die!"}
    else:
        return {"message": "Yeaaah, you will survive :)"}

## Important

Start the Fast-API Server from the api directory with: `uvicorn api:app --port=8000`

# Use

In [None]:
import requests
form_data = {
    "PassangerId": 1,
    "Pclass": 3, 
    "Name": 'Nico, Mrs. Kreiling',
    "Sex": 'F',
    "Age": 30,
    "SibSp": 4,
    "Parch": 0,
    "Ticket": "abc",
    "Fare": 100,
    'Cabin': "ab",
    "Embarked": 'C'
}
r = requests.post('http://127.0.0.1:8000/survived/custom', json=form_data)
r.status_code
r.json()["message"]

# Other

In [None]:
from pathlib import Path
from mlflow.tracking import MlflowClient
def download_most_recent_model(model_name, download_path="downloaded_artifact"):
    mlc = MlflowClient("http://localhost:5000")
    recent_model = mlc.get_latest_versions(model_name)[0]
    model_path = mlc.get_model_version_download_uri(recent_model.get_name(), recent_model.version).replace("file:","")
    Path(download_path).mkdir(parents=True, exist_ok=True)
    mlc.download_artifacts(recent_model.run_id, "model",download_path)

download_most_recent_model("my_cool_model", "api")

## REST

In [None]:
!curl -XGET http://localhost:5000/api/2.0/mlflow/experiments/list

In [None]:
!curl -XGET http://localhost:5000/api/2.0/preview/mlflow/registered-models/list

In [None]:
!curl -XGET http://localhost:5000/api/2.0/mlflow/artifacts/list?run_id=880e9b44f79f4ab2b33d6fef4324e54d