In [0]:
# train / test split, LogReg, MLflow logging
# mimic_mod/train
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import mlflow
import mlflow.sklearn

def train_logreg_tfidf(X, y, test_size=0.2, random_state=42, C=1.0, max_iter=1000):
    Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    with mlflow.start_run():
        clf = LogisticRegression(C=C, max_iter=max_iter, n_jobs=-1)
        clf.fit(Xtr, ytr)
        yhat = clf.predict(Xte)
        report = classification_report(yte, yhat, output_dict=True)

        # log params/metrics
        mlflow.log_param("C", C)
        mlflow.log_param("max_iter", max_iter)
        mlflow.log_metric("f1_anxiety", report["1"]["f1-score"])
        mlflow.log_metric("f1_macro", report["macro avg"]["f1-score"])
        mlflow.sklearn.log_model(clf, "model")

    return clf, report
