In [89]:
import warnings
warnings.simplefilter("ignore")

# Binary-class with gradient updates

# Multi-class

In [5]:
from ucimlrepo import fetch_ucirepo, list_available_datasets

heart_disease = fetch_ucirepo(id=45) 
  
# data (as pandas dataframes) 
X = heart_disease.data.features 
y = heart_disease.data.targets 
  
# variable information 
heart_disease.variables

Unnamed: 0,name,role,type,demographic,description,units,missing_values
0,age,Feature,Integer,Age,,years,no
1,sex,Feature,Categorical,Sex,,,no
2,cp,Feature,Categorical,,,,no
3,trestbps,Feature,Integer,,resting blood pressure (on admission to the ho...,mm Hg,no
4,chol,Feature,Integer,,serum cholestoral,mg/dl,no
5,fbs,Feature,Categorical,,fasting blood sugar > 120 mg/dl,,no
6,restecg,Feature,Categorical,,,,no
7,thalach,Feature,Integer,,maximum heart rate achieved,,no
8,exang,Feature,Categorical,,exercise induced angina,,no
9,oldpeak,Feature,Integer,,ST depression induced by exercise relative to ...,,no


In [None]:
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, roc_auc_score, log_loss, confusion_matrix

mice_imputer = IterativeImputer(random_state=123) # must have no NaNs
X = mice_imputer.fit_transform(X)

X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, stratify=y,
    test_size=0.25, random_state=123)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)

multi_class_model = LogisticRegression(
    penalty = "l2", # l2 supports multinominal
    tol = 0.0001,
    C = 0.01, # smaller values imply stronger regularization, reduce overfitting
    class_weight = "balanced",
    random_state = 123,
    solver = "lbfgs",
    max_iter = 200,
    # multi_class = "multinomial", # API changed
    verbose = 0
)

# multi_class_model = LogisticRegressionCV(
#     cv=5,
#     penalty = "l2",
#     tol = 1e-4,
#     # C = 1.0,
#     class_weight = None,
#     random_state = 123,
#     solver = "lbfgs",
#     max_iter = 200,
#     # multi_class = "multinomial", # API changed
#     verbose = 0
# )

multi_class_model.fit(X_train, y_train)
y_preds = multi_class_model.predict(X_valid)
y_preds_probs = multi_class_model.predict_proba(X_valid)

accuracy = accuracy_score(y_valid, y_preds)
precision = precision_score(y_valid, y_preds, average='weighted')
recall = recall_score(y_valid, y_preds, average='weighted')
f1 = f1_score(y_valid, y_preds, average='weighted')

roc_auc = roc_auc_score(y_valid, y_preds_probs, multi_class='ovr')

print(accuracy, precision, recall, f1, roc_auc)
multi_class_model.coef_ # (n_classes, n_features)

0.5921052631578947 0.561654135338346 0.5921052631578947 0.5594253219080089 0.7682059222370341


array([[-0.08843996, -0.10681667, -0.21039802, -0.02031334, -0.0129146 ,
        -0.01217517, -0.07925308,  0.13936476, -0.13266781, -0.14694061,
        -0.08734563, -0.21866621, -0.18266378],
       [-0.00627953,  0.03609585, -0.0142747 ,  0.00220479,  0.00758366,
        -0.04470998, -0.01094598,  0.01914847,  0.00836041, -0.10161291,
        -0.08410252, -0.0787852 , -0.03746149],
       [ 0.02179476,  0.03913942,  0.09711474, -0.04146647,  0.06054844,
         0.12798226, -0.05106097, -0.06621271,  0.05177281,  0.06740908,
         0.00811835,  0.04009986,  0.0677462 ],
       [-0.09880347, -0.0062584 ,  0.05792577,  0.00400038, -0.05066756,
         0.06595486,  0.00230296, -0.09269345,  0.06455014,  0.03969532,
         0.0484704 ,  0.06538833,  0.10801748],
       [ 0.17172819,  0.0378398 ,  0.06963221,  0.05557464, -0.00454994,
        -0.13705198,  0.13895706,  0.00039293,  0.00798446,  0.14144912,
         0.1148594 ,  0.19196322,  0.04436159]])

Given a new instance with a feature vector:
- Repeat the same dot product for all classes to get logits for each class (coef_i @ feature_vector for each logit $i$)
- the softmax function gives the probabilities for each class, and the model selects the class with the highest probability as the prediction.