# Logistic Regression

In [None]:
import pandas
import numpy as np

import mlflow

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, confusion_matrix

from data_utils import get_train_test_split_for_stock
from config import *

In [None]:
print(os.getcwd())

# Retrieve data

In [None]:
X_train, X_test, y_train, y_test = get_train_test_split_for_stock(PATH_TO_DATA_FILE)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

# Set experiment

In [None]:
mlflow.set_experiment("SP_EXP_Modelling")

# Create model

In [None]:
# Let MLflow log all params, metrics and tags
# https://www.mlflow.org/docs/latest/tracking.html#automatic-logging
mlflow.sklearn.autolog()

model = LogisticRegression()

# Run the model

In [None]:
with mlflow.start_run(run_name='logistic regression') as run:
    
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    
    y_pred = np.where(preds > 0.5, 1, 0) # if preds would be other than 0 or 1, for example 0.7 (like in case of xgboost)
    
    f1 = f1_score(y_test, y_pred)
    
    mlflow.set_tag("Model_name", "Logistic regression")
    mlflow.log_metric(key="testing_f1_score", value=f1)

In [None]:
f1

# Show
- confusion matrix
- model meta-data

## Build Docker container to be used lated in the demo

In [None]:
# In the HOST terminal

# Setup your own path to the file in the HOST environment

# If there is conda env. error, provide more available RAM in Docker

In [None]:
PATH_TO_MODEL = '/Users/ksatola/git/cerebro-agh/data/artifacts/3/df2f6a41c8fd4c53809b9905a418b418/artifacts/model'

In [None]:
mlflow models build-docker -m '/Users/ksatola/git/cerebro-agh/data/artifacts/3/df2f6a41c8fd4c53809b9905a418b418/artifacts/model' -n "mlflow_demo"

In [None]:
/data/artifacts/3/4f7e27c86241437e99e9018fff857fb3/artifacts/model