# Import dependencies

In [1]:
import pandas
import numpy as np
import mlflow
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.model_selection import train_test_split

# Retrieve Data

In [2]:
pandas_df = pandas.read_csv("training_data.csv")
X=pandas_df.iloc[:,:-1]
Y=pandas_df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=4284, stratify=Y)

# Set Experiment

In [3]:

mlflow.set_experiment("Baseline_Predictions")

# Create model

In [4]:
mlflow.sklearn.autolog()
model = LogisticRegression()

# Run the model

In [5]:
with mlflow.start_run(run_name='logistic_regression_model_baseline') as run:
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    y_pred = np.where(preds>0.5,1,0)
    f1 = f1_score(y_test, y_pred)
    mlflow.log_metric(key="f1_experiment_score", value=f1)

2021/07/02 19:57:56 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'd9a2deb74126423f978bada44a59ca5a'


In [6]:
f1

0.6633954857703631