# Import Libraries

In [1]:
import numpy as np 

from sklearn import datasets, linear_model, metrics
from sklearn.linear_model import LogisticRegression

#from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, r2_score

from sklearn.model_selection import train_test_split  
from sklearn.preprocessing import StandardScaler

## Load Input File

In [2]:
def load_input_file():
  
    iris = datasets.load_iris() # Source: https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html

    #X = iris.data[:, np.newaxis, 2] # single factor 
    X = iris.data  # all factors
    y = iris.target

    return X, y

## Describe Data

In [3]:
X,y = load_input_file()

# Describe Dataset
print(f"{X.shape=}")
print(f"{y.shape=}")
print(f"First two x examples:\n{X[:2]}")
print(f"First three y examples: \n{y[:3]}")

print(f"\n{X.min()=}, {X.max()=}")
print(f"{y.min()=}, {y.max()=}\n")

# give unique values of y
print(f"Unique y values: {np.unique(y)}")

X.shape=(150, 4)
y.shape=(150,)
First two x examples:
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]]
First three y examples: 
[0 0 0]

X.min()=0.1, X.max()=7.9
y.min()=0, y.max()=2

Unique y values: [0 1 2]


## Error / Accuracy / Confusion Matrix

In [4]:

def logistic_metrics(model, x_train, x_test, y_train, y_test):

    """
        model input parameter is the fitted classification model
    """
    # Make predictions using the testing set
    y_pred = model.predict(x_test) 

    # Accuracy
    accuracy = metrics.accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.4f}")

    # Precision, Recall, F1-Score (across all classes if multiclass)
    precision, recall, f1_score, _ = metrics.precision_recall_fscore_support(y_test, y_pred, average='weighted')
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1_score:.4f}")

    # AUC-ROC (only for binary classification)
    if len(np.unique(y_test)) == 2:
        y_prob = model.predict_proba(x_test)[:, 1]  # Probabilities of the positive class
        auc_roc = metrics.roc_auc_score(y_test, y_prob)
        print(f"AUC-ROC: {auc_roc:.4f}")
    else:
        auc_roc = None

    # Confusion Matrix
    confusion = metrics.confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(confusion)

    return accuracy, precision, recall, f1_score, auc_roc, confusion


# Main Function

In [5]:
def main(): 

    # Import data and load into X and y variables
    X,y = load_input_file()

    print("Number of Training Examples: ", X.shape[0])
    print("Number of Features: ", X.shape[1])

    # Normalize data
    #   https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    # Split data set into training and testing sets: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html 
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=2)    
 
    # Define Regularization Parameter
    regularization = 'l2' # 'l1' or 'l2' or None
    reg_tolerence = 0.01

    # Define Which model to use and including the regularization parameter
    if regularization == None: # No regularization
        model = linear_model.LogisticRegression(solver='lbfgs', multi_class='multinomial')
         #   https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
    else: # With Regularization
        model = linear_model.LogisticRegression(solver='lbfgs', C=1e5, multi_class='multinomial', penalty=regularization, tol=reg_tolerence)
    
    # Train the model using the training set
    model.fit(x_train, y_train) 

    # Report Classification Metrics
    accuracy, precision, recall, f1_score, auc_roc, confusion = logistic_metrics(model, x_train, x_test, y_train, y_test)

if __name__ == '__main__':
    main()

Number of Training Examples:  150
Number of Features:  4
Accuracy: 0.9500
Precision: 0.9511
Recall: 0.9500
F1-Score: 0.9501
Confusion Matrix:
[[23  0  0]
 [ 0 15  1]
 [ 0  2 19]]
