In [None]:
%pip install mlflow pandas python-dotenv sklearn

In [None]:
import getpass
import os

import pandas as pd
import dotenv
import sklearn
import mlflow
mlflow.autolog()

import mlflow.sklearn
from sklearn import linear_model, model_selection, metrics

dotenv.load_dotenv()

if "MLFLOW_TRACKING_PASSWORD" not in os.environ:
    raise ValueError("You need to properly configure the `.env` file")
else:
    print("good work! mlflow ready to go")

mlflow.set_experiment(os.environ.get("MLFLOW_EXPERIMENT_NAME", "Default"))

In [None]:
df = pd.read_csv("train.csv")
df.head()

In [None]:
feature_cols = ["Age", "RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck"]
target_col = "Transported"

no_na = df.dropna()

X = no_na[feature_cols]
y = no_na[target_col]

In [None]:
train_X, test_X, train_y, test_y = model_selection.train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
with mlflow.start_run(tags=dict(who=getpass.getuser())):
    model = linear_model.LogisticRegression()
    model.fit(train_X, train_y)
    mlflow.sklearn.eval_and_log_metrics(model, test_X, test_y, prefix="val_")