# Logistic Regression

In [1]:
import sys
import os

cur_dir = os.getcwd()
root_dir = os.path.dirname(cur_dir+"/../../")
sys.path.append(root_dir)

In [2]:
# import the necessary packages
import pandas as pd
from src.data.load_data import load_data
from src.features.feature_labeling import label_encode, one_hot_encode
from src.data.data_scaling import standardize_data, normalize_data
from src.data.split_data import split_data
from src.models.Logistic import train_Logistic
from src.models.evaluate import evaluate_model

In [3]:
# load the dataset
df_wo = load_data()
df_w = load_data(weight=True)

In [4]:
# label encode readmitted column
df_wo = label_encode(df_wo, "readmitted", {'NO': 0, '>30': 1, '<30': 1})

# one hot encode the categorical columns
df_wo = one_hot_encode(df_wo)

In [5]:
# split the data into train and test
X_train_wo, X_test_wo, y_train_wo, y_test_wo = split_data(df_wo, "readmitted")

In [6]:
# standardize the data
X_train_wo, X_test_wo = standardize_data(X_train_wo), standardize_data(X_test_wo)

In [7]:
# train Logistic Regression model
logreg= train_Logistic(X_train_wo, y_train_wo, max_iter=2000)

In [8]:
evaluate_model(logreg, X_train_wo, X_test_wo, y_train_wo, y_test_wo)

Train accuracy:  0.6499943848615869
Test accuracy:  0.6251555846708156


(0.6499943848615869, 0.6251555846708156)

In [9]:
import mlflow
import mlflow.sklearn


# Start a new MLflow run
with mlflow.start_run():
    # Train a Logistic Regression model
    #logreg.fit(X_train, y_train)

    # Log model
    mlflow.sklearn.log_model(logreg, "Logistic Regression Model")

    # Log params
    mlflow.log_param("C", logreg.get_params()["C"])
    mlflow.log_param("penalty", logreg.get_params()["penalty"])

    # Evaluate the model and log metrics
    score = evaluate_model(logreg, X_train_wo, X_test_wo, y_train_wo, y_test_wo)
    mlflow.log_metric("accuracy", score[1])



Train accuracy:  0.6499943848615869
Test accuracy:  0.6251555846708156


In [2]:
%run '../../scripts/train_DecisionTree.py'

2024/02/08 00:07:00 INFO mlflow.tracking.fluent: Experiment with name 'decision_tree_experiment' does not exist. Creating a new experiment.


Train accuracy:  0.6564939075748217
Test accuracy:  0.6277104487389453


In [3]:
%run '../../scripts/train_Logistic.py'

2024/02/08 00:10:23 INFO mlflow.tracking.fluent: Experiment with name 'logistic_regression_experiment' does not exist. Creating a new experiment.


Train accuracy:  0.6499943848615869
Test accuracy:  0.6251555846708156
