# Import relevant libraries

In [1]:
import xgboost as xgb
import mlflow
import pandas as pd
from evidently.tabs import ClassificationPerformanceTab
from sklearn.model_selection import train_test_split
from evidently.dashboard import Dashboard

# Get reference dataset

In [2]:
reference_data = pd.read_csv("training_data.csv",
                                            header=None,
                                            names=[ "day{}".format(i) for i in range(0,14) ]+["target"] )

X=reference_data.iloc[:,:-1]
Y=reference_data.iloc[:,-1]

reference, production, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=4284, stratify=Y)

reference_train = xgb.DMatrix(reference,label=y_train)
dproduction= xgb.DMatrix(production)
dreference=xgb.DMatrix(reference)


# Train your model

In [3]:
mlflow.xgboost.autolog()
EXPERIMENT_NAME="reports_model_performance"
mlflow.set_experiment(EXPERIMENT_NAME)
threshold=0.5
with mlflow.start_run() as run:

    model=xgb.train(dtrain=reference_train,params={})
    
    train_proba_predict = model.predict(dreference)
    test_proba_predict = model.predict(dproduction)
    test_predictions = [1. if y_cont > threshold else 0. for y_cont in test_proba_predict]
    train_predictions = [1. if y_cont > threshold else 0. for y_cont in train_proba_predict]
    reference['target'] = y_train
    reference['prediction'] = train_predictions
    production['target'] = y_test
    production['prediction'] = test_predictions
    classification_performance = Dashboard( 
                       tabs=[ClassificationPerformanceTab])
    classification_performance.calculate(reference,production)

    classification_performance.save('.reports/'+EXPERIMENT_NAME+'.html')
    mlflow.log_artifact('.reports/'+EXPERIMENT_NAME+'.html')




