In [116]:
#Import statements
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
from pandas.plotting import scatter_matrix
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_recall_fscore_support
%matplotlib inline
RAND_STATE = 42

In [117]:
# Reading csv
CSV_NAME = "parkinsons_disease_progression_500.csv"
# Skipping first row with column headers:
# Patient_ID Age GenderS Years_Since_Diagnosis UPDRS_Score Tremor_Severity Motor_Function	Speech_Difficulty Balance_Problems Medications Exercise_Level Disease_Progression

#raw_data = np.loadtxt(CSV_NAME, dtype=str, delimiter=",", skiprows=1)
raw_data_df = pd.read_csv(CSV_NAME)
raw_data = raw_data_df.to_numpy(dtype=str)
feature_df = raw_data_df.drop(columns=['Disease_Progression'])
target_df = raw_data_df['Disease_Progression']

In [118]:
# Splitting data into target and features
X = raw_data[: , 1:11]  # Excluding first column with Patient ID
y = raw_data[:, 11]
N = X.shape[0]
d = X.shape[1]

In [None]:
# Split data into train, validation, test sets
# random_state set to 42 for reproducibility, will change later
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.4, random_state=RAND_STATE)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size=0.5, random_state=RAND_STATE)
np.set_printoptions(threshold=1000000)  # Increases print threshold before cutting off matrix
print(X_train)
print(X_train.shape)

In [None]:
# Visualizing Unscaled Data
feature_names_orig = ['Age', 'Gender', 'Years_Since_Diagnosis', 'UPDRS_Score', 'Tremor_Severity', 'Motor_Function', 'Speech_Difficulty', 'Balance_Problems', 'Medications', 'Exercise_Level']
feature_names_orig_numeric = ['Age', 'Years_Since_Diagnosis', 'UPDRS_Score', 'Tremor_Severity', 'Motor_Function', 'Speech_Difficulty', 'Balance_Problems']

# Distributions for each feature
X_train_orig_numeric = np.delete(X_train, np.array([1, 8, 9]), 1).astype(float)
fig, axs = plt.subplots(7, 1, figsize=(10,10))
for i in range (0,7):
    """
    # Density plots
    density = gaussian_kde(X_train_orig_numeric[: , i])
    density.covariance_factor = lambda : .25
    density._compute_covariance()
    xs = np.linspace(X_train_orig_numeric[: , i].min(axis=0), X_train_orig_numeric[: , i].max(axis=0), 200)
    axs[i].plot(xs, density(xs))
    """
    # Histograms
    axs[i].hist(X_train_orig_numeric[: , i])

    axs[i].set_xlabel(feature_names_orig_numeric[i])
fig.tight_layout()
plt.suptitle("Feature Distributions", fontsize=16, fontweight='bold')
plt.show(block=False)

# Plotting individual features against target variable
fig, axs = plt.subplots(7, 1, figsize=(10,10))
y_train_int = y_train.astype(int)
for i in range (0,7):
    axs[i].scatter(X_train_orig_numeric[:,i], y_train_int, s=1)
    axs[i].set_xlabel(feature_names_orig_numeric[i])
fig.tight_layout()
plt.suptitle("Features vs. Target Variable", fontsize=16, fontweight='bold')
plt.show(block=False)

# Standard correlation coefficients for numeric variables against label
print(f"Correlation coefficients against label: \n{feature_df[feature_names_orig_numeric].corrwith(target_df, axis=0).sort_values(ascending=False)}")

# Plot numeric variables against each other for correlation visualization
#scatter_matrix(feature_df[feature_names_orig_numeric], figsize=(12,8))

# Correlation matrix for numeric variables
corr_df = feature_df[feature_names_orig_numeric].astype(float)
corr_matrix = corr_df.corr()
f = plt.figure()
plt.matshow(corr_matrix, fignum=f.number)
plt.xticks(range(corr_df.select_dtypes(['number']).shape[1]), corr_df.select_dtypes(['number']).columns, fontsize=8, rotation=-45)
plt.yticks(range(corr_df.select_dtypes(['number']).shape[1]), corr_df.select_dtypes(['number']).columns, fontsize=8)
cb = plt.colorbar()
cb.ax.tick_params(labelsize=14)
plt.title('Correlation Matrix')
plt.show(block=False)
print(f"Correlation Matrix: \n{corr_matrix}")

In [None]:
# One-hot encoding for categorical features
# Check handle_unknown parameter later for handling Medication feature
ohe = OneHotEncoder(drop='first')
ord = OrdinalEncoder(categories=[["Low", "Moderate", "High"]])
ohe_cols = [1, 8]
ord_cols = [9]

# Not scaling one hot encoded or ordinal encoded columns
scaler = StandardScaler()
# scale_cols = [0, 2, 3, 4, 5, 6, 7]
scale_cols = [0, 2, 3]  # not scaling variables with values from 1 - 5 here
encoder = ColumnTransformer([("ohe", ohe, ohe_cols), ("ord", ord, ord_cols), ("scl", scaler, scale_cols)], remainder='passthrough')
X_train = encoder.fit_transform(X_train).astype(float)
X_val = encoder.transform(X_val).astype(float)
X_test = encoder.transform(X_test).astype(float)
print(f"Shape of X_train: {X_train.shape}")

feature_names = ['Gender', 'Medications', 'Exercise_Level', 'Age', 'Years_Since_Diagnosis','UPDRS_Score', 'Tremor_Severity', 'Motor_Function', 'Speech_Difficulty', 'Balance_Problems']
feature_names_transformed = ['Gender', 'Medications 1', 'Medications 2', 'Medications 3', 'Exercise_Level', 'Age', 'Years_Since_Diagnosis','UPDRS_Score', 'Tremor_Severity', 'Motor_Function', 'Speech_Difficulty', 'Balance_Problems']
feature_names_numeric = ['Age', 'Years_Since_Diagnosis','UPDRS_Score', 'Tremor_Severity', 'Motor_Function', 'Speech_Difficulty', 'Balance_Problems']


"""
# Implementation standardizing all columns
encoder = ColumnTransformer([("ohe", ohe, ohe_cols), ("ord", ord, ord_cols)], remainder='passthrough')
X_train = encoder.fit_transform(X_train)
X_val = encoder.transform(X_val)
X_test = encoder.transform(X_test)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
"""

"""
# Trying to scale labels to [-1, 0, 1]
print(y_train)
y_train = (y_train.astype('int') - 2).astype(str)
y_val = (y_val.astype('int') - 2).astype(str)
y_test = (y_test.astype('int') - 2).astype(str)
print(y_train)
"""

In [None]:
"""
# Visualizing Encoded Scaled Data

# Density charts of each feature for their distribution
fig, axs = plt.subplots(3, 4, figsize=(10,10))
for i in range (0,3):
    for j in range (0,4):
        density = gaussian_kde(X_train[: , i*4+j])
        density.covariance_factor = lambda : .25
        density._compute_covariance()
        xs = np.linspace(-3, 6, 200)
        axs[i, j].plot(xs, density(xs))
        axs[i, j].set_xlabel(feature_names_transformed[i*4+j])
fig.tight_layout()
plt.show(block=False)
"""

In [None]:
"""
# Plotting individual features against target variable
fig, axs = plt.subplots(3, 4, figsize=(10,10))
for i in range (0,3):
    for j in range (0,4):
        axs[i, j].scatter(X_train[:,i*4+j], y_train_int, s=1)
        axs[i, j].set_xlabel(feature_names_transformed[i*4+j])
fig.tight_layout()
        
# Correlation matrix for numeric variables
scatter_matrix(feature_df[feature_names], figsize=(12,8))
plt.show(block=False)

# Standard correlation coefficients for numeric variables
feature_df[feature_names_numeric].corrwith(target_df, axis=0).sort_values(ascending=False)
"""

In [None]:
# PCA / clustering unsupervised analysis
# PCA to three components for visualizing clusters / feature transformation
pca_model = PCA(n_components=3, random_state=RAND_STATE)
pca_train = pca_model.fit_transform(X_train)
pca_val = pca_model.transform(X_val)
pca_test = pca_model.transform(X_test)
#pca_data = pd.DataFrame(pca_train, columns=['PC1', 'PC2'])  # 2D dataframe for 2D PCA clusters
pca_data = pd.DataFrame(pca_train, columns=['PC1', 'PC2', 'PC3'])  # 3D dataframe for 3D PCA clusters

# Elbow method for finding optimal k: number of clusters
wcss = []  # Using within cluster sum of squares for deciding k
for i in range(1, 21):
    kmeans_model = KMeans(n_clusters=i, random_state=RAND_STATE, n_init='auto').fit(X_train)
    wcss.append(kmeans_model.inertia_)
fig = plt.figure()
plt.plot(range(1, 21), wcss, marker='o')
plt.xlabel('Num Clusters')
plt.ylabel('WCSS')
plt.title('K-means Elbow Graph')
plt.show()

# Optimal k = 4, running k-means clustering using optimal k
kmeans_model = KMeans(n_clusters=4, random_state=RAND_STATE, n_init='auto').fit(X_train)
pca_data['cluster'] = pd.Categorical(kmeans_model.labels_)

# Plotting clusters on PCA axes
fig = plt.figure()

# 3D plot for 3-dimensional PCA
ax = fig.add_subplot(111, projection='3d')
ax.view_init(azim=-30, elev=30)  # Set azimuth and elevation angles, default is (-60,30)
scatter = ax.scatter(pca_data['PC1'], pca_data['PC2'], pca_data['PC3'], c=pca_data['cluster'], cmap='Set3', alpha=0.7)

"""
# 2D plot for 2-dimensional PCA
ax = fig.add_subplot(111)
scatter = ax.scatter(pca_data['PC1'], pca_data['PC2'], c=pca_data['cluster'], cmap='Set3', alpha=0.7)
"""
legend1 = ax.legend(*scatter.legend_elements(), loc="upper left", title="")
ax.add_artist(legend1)
plt.title('Cluster Plot')
plt.show(block=False)

# Redoing PCA with Minka's MLE to guess dimension
pca_model = PCA(n_components='mle', random_state=RAND_STATE)
pca_train = pca_model.fit_transform(X_train)
pca_val = pca_model.transform(X_val)
pca_test = pca_model.transform(X_test)
print(f"Number of PCA dimensions: {pca_model.n_components_}")

In [None]:
""" First Model ------- Logistic Regression """

In [None]:
def log_regression(X_train, X_test, y_train, y_test, lamb): # DEFINING logistic regression 

    # Tried newton-cg solver with l2 penalty, saga solver with l2 and l1 penalty (slow)
    logreg = LogisticRegression(penalty='l2', solver='lbfgs', C=1/lamb, max_iter=10000, random_state=RAND_STATE)  # create a logistic regression model object
                                                                                    # C: inverse of lambda
                                                                                    # our problem is multiclass since we have three possible labels

    """Working on training set"""
    logreg.fit(X_train, y_train) # optimizing our logistic regression model using our Training set
                                 # This is where we find the best w
    #train_acc = logreg.score(X_train, y_train) # accuracy on training set
    y_train_hat = logreg.predict(X_train)
    prec_train, rec_train, fscore_train, sup_train = precision_recall_fscore_support(y_train, y_train_hat, average='macro')

    """Working on test set"""
    y_test_hat = logreg.predict(X_test) # predicting on test set
    #test_acc = logreg.score(X_test, y_test) # computing accuracy on test set
    
    """
    w = logreg.coef_
    intercept = logreg.intercept_
    print('w: ', w)
    print('intercept: ', intercept)
    print(f"y_train_hat:  {y_train_hat}")
    print(f"true y train: {y_train}")
    print(f"y_test_hat: {y_test_hat}")
    print(f"true y val: {y_test}")
    """

    """ Computing metrics"""
    prec, rec, fscore, sup = precision_recall_fscore_support(y_test, y_test_hat, average='macro') # Using macro-averaging for metrics currently
    metrics = (fscore, prec, rec, fscore_train)
    return metrics

In [None]:
# CALLING logistic regression that we defined above

poly_transform_degrees = [1,2,3,4]
lambda_values = [1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7]
#lambda_values = [1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1]
#lambda_values = np.logspace(-8,8,20) # a vector of our six lambda values that we use for regularization
all_lambdas_best = []
validation_fscores = []

for d in poly_transform_degrees:
    avg_fscores = []
    train_accs = []
    """Transforming our features"""
    print("THIS IS A DEGREE %d POLYNOMIAL TRANSFORM" % d)
    poly = PolynomialFeatures(d)
    X_train_poly = poly.fit_transform(X_train)
    X_val_poly = poly.transform(X_val)
    #X_test_poly = poly.transform(X_test)

    for l in lambda_values:
        print("LAMBDA = %f :" % l)
        metrics = log_regression(X_train_poly, X_val_poly, y_train, y_val, l)
        print("fscore: %f" % metrics[0])
        print("precision: %f" % metrics[1])
        print("recall: %f" % metrics[2])
        print("Training fscore: %f" % metrics[3])
        avg_fscores.append(metrics[0])  # need to take average first if not using macro/macro averaging
        train_accs.append(metrics[3])
        print("-----------------------------------------------------")

    print(f"Lambda values: {lambda_values}")
    print(f"Training fscores: {train_accs}")
    print(f"F scores: {avg_fscores}")
    # Plotting Regularization Lambda vs Accuracy
    plt.ylim(0,1) 
    plt.xscale("log",base=10)
    plt.plot(lambda_values, train_accs, '.-')  # plot lambda vs train fscore
    plt.plot(lambda_values,avg_fscores,'.-') # plot lambda vs val fscore
    plt.legend(['train', 'validation'], loc='upper left')
    plt.title(f'Logistic Regression Degree {d}, Lambda vs F Score')
    plt.ylabel('F Score')
    plt.xlabel('Lambda values')
    plt.grid(True)
    plt.show(block=False)

    # Comparing fscores for best hyperparameters

    # Using lambda with highest val fscore, gets first lambda with maximum value since lower lambda usually has lower training error
    best_lambda_ind = max(range(len(avg_fscores)), key=avg_fscores.__getitem__)
    """
    # Gets first lambda with min difference between train and val fscores
    avg_fscores_np = np.asarray(avg_fscores)
    train_accs_np = np.asarray(train_accs)
    best_lambda_ind = np.argmin(np.absolute(avg_fscores_np - train_accs_np))
    """
    lambda_best = lambda_values[best_lambda_ind]
    fscore_best = avg_fscores[best_lambda_ind]
    print(f"Comparing Logistic Regression with Degree {d} Transform: lambda = {lambda_best}, F score = {fscore_best}")
    all_lambdas_best.append(lambda_best)
    validation_fscores.append(fscore_best)
    print("============================================================")

In [None]:
def SVM(X_train, X_test, y_train, y_test, lamb, t): # DEFINING SVM 

    """ Transformation will tell us which model to use"""
    if (t == "RBF"): # our transformation is RBF KERNEL
        SVM_model = SVC(C=1/lamb, kernel='rbf', random_state=RAND_STATE)
    elif (t == "Poly2"): # our transformation is POLYNOMIAL KERNEL of DEGREE 2
        SVM_model = SVC(C=1/lamb, kernel='poly', degree=2, random_state=RAND_STATE)  
    elif (t == "Poly3"): # our transformation is POLYNOMIAL KERNEL of DEGREE 3
        SVM_model = SVC(C=1/lamb, kernel='poly', degree=3, random_state=RAND_STATE)  
    else: # t == "Poly1", includes PCA
        SVM_model = SVC(C=1/lamb, kernel='poly', degree=1, random_state=RAND_STATE)  

    """ Working on training set"""
    SVM_model.fit(X_train, y_train) # optimizing our SVM model using training set
                                    # finding the best w
    #train_acc = SVM_model.score(X_train, y_train) # accuracy on training set
    y_train_hat = SVM_model.predict(X_train)
    prec_train, rec_train, fscore_train, sup_train = precision_recall_fscore_support(y_train, y_train_hat, average='macro')
    
    """ Working on test set"""
    y_test_hat = SVM_model.predict(X_test) # predicting on test set
    #test_acc = SVM_model.score(X_test, y_test) # computing accuracy on test set
    
    """
    w = logreg.coef_
    intercept = logreg.intercept_
    print('w: ', w)
    print('intercept: ', intercept)
    print(f"y_train_hat:  {y_train_hat}")
    print(f"true y train: {y_train}")
    print(f"y_test_hat: {y_test_hat}")
    print(f"true y val: {y_test}")
    """
    
    """Computing metrics"""
    prec, rec, fscore, sup = precision_recall_fscore_support(y_test, y_test_hat, average='macro') # Using macro-averaging for metrics currently
    metrics = (fscore, prec, rec, fscore_train)
    return metrics

In [None]:
# CALLING SVM that we defined above

svm_transformations = ["Poly1","Poly2","Poly3","RBF","PCA"]
lambda_values = [1e-3,1e-2,1e-1,1,1e1,1e2]

for t in svm_transformations:
    avg_fscores = []
    train_accs = []
    if (t == "PCA"):
        print("THE TRANSFORMATION IS PCA")

        for l in lambda_values:

            print("LAMBDA = %f :" % l)

            metrics = SVM(pca_train, pca_val, y_train, y_val, l,t)

            print("fscore: %f" % metrics[0])
            print("precision: %f" % metrics[1])
            print("recall: %f" % metrics[2])
            print("Training fscore: %f" % metrics[3])
            avg_fscores.append(metrics[0])
            train_accs.append(metrics[3])
            print("-----------------------------------------------------")
    else:
        if(t == "Poly1"):
            print("THE TRANSFORMATION IS POLYNOMIAL OF DEGREE 1")
        elif(t == "Poly2"):
            print("THE TRANSFORMATION IS POLYNOMIAL OF DEGREE 2")
        elif(t == "Poly3"):
            print("THE TRANSFORMATION IS POLYNOMIAL OF DEGREE 3")
        else: # t == "RBF"
            print("THE TRANSFORMATION IS RBF KERNEL")

        # Polynomial-transformed features used if not PCA
        for l in lambda_values:

            print("LAMBDA = %f :" % l)

            metrics = SVM(X_train, X_val, y_train, y_val, l,t)

            print("fscore: %f" % metrics[0])
            print("precision: %f" % metrics[1])
            print("recall: %f" % metrics[2])
            print("Training fscore: %f" % metrics[3])
            avg_fscores.append(metrics[0])
            train_accs.append(metrics[3])
            print("-----------------------------------------------------")

    print(f"Lambda values: {lambda_values}")
    print(f"Training fscores: {train_accs}")
    print(f"F scores: {avg_fscores}")
    # Plotting Regularization Lambda vs Accuracy
    plt.ylim(0,1) 
    plt.xscale("log",base=10)
    plt.plot(lambda_values, train_accs, '.-')  # plot lambda vs train fscore
    plt.plot(lambda_values,avg_fscores,'.-') # plot lambda vs val fscore
    plt.legend(['train', 'validation'], loc='upper left')
    plt.title(f'SVM {t}, Lambda vs F Score')
    plt.ylabel('F Score')
    plt.xlabel('Lambda values')
    plt.grid(True)
    plt.show(block=False)

    # Comparing fscores for best hyperparameters

    # Using lambda with highest val fscore, gets first lambda with maximum value since lower lambda usually has lower training error
    best_lambda_ind = max(range(len(avg_fscores)), key=avg_fscores.__getitem__)
    """
    # Gets first lambda with min difference between train and val fscores
    avg_fscores_np = np.asarray(avg_fscores)
    train_accs_np = np.asarray(train_accs)
    best_lambda_ind = np.argmin(np.absolute(avg_fscores_np - train_accs_np))
    """
    lambda_best = lambda_values[best_lambda_ind]
    fscore_best = avg_fscores[best_lambda_ind]
    print(f"Comparing SVM with {t} Transform: lambda = {lambda_best}, F score = {fscore_best}")
    all_lambdas_best.append(lambda_best)
    validation_fscores.append(fscore_best)
    print("=====================================================================")

In [None]:
def NN(X_train, X_test, y_train, y_test, lamb, nn_structure): # DEFINING Neural Network 

    # Tried logistic, tanh activations
    # Tried lbfgs solver
    # Tried higher initial learning rate
    # Tried PCA as input
    NN_model = MLPClassifier(hidden_layer_sizes = nn_structure, activation= 'relu', solver='adam', alpha=lamb, max_iter=10000, learning_rate='constant', learning_rate_init=0.001, random_state=RAND_STATE)  


    "Working on test set"
    NN_model.fit(X_train, y_train)
    #train_acc = NN_model.score(X_train, y_train)
    y_train_hat = NN_model.predict(X_train)
    prec_train, rec_train, fscore_train, sup_train = precision_recall_fscore_support(y_train, y_train_hat, average='macro')


    "Working on training set"
    y_test_hat = NN_model.predict(X_test)
    #test_acc = NN_model.score(X_test, y_test)
    
    """
    w = logreg.coef_
    intercept = logreg.intercept_
    print('w: ', w)
    print('intercept: ', intercept)
    print(f"y_train_hat:  {y_train_hat}")
    print(f"true y train: {y_train}")
    print(f"y_test_hat: {y_test_hat}")
    print(f"true y val: {y_test}")
    """

    """Computing metrics"""
    prec, rec, fscore, sup = precision_recall_fscore_support(y_test, y_test_hat, average='macro')
    metrics = (fscore, prec, rec, fscore_train)
    return metrics

In [None]:
# CALLING our Neural Network function defined above

nn_structure = [(10,), (10,10), (10,10,10), (50,)] # NOTE: these are HIDDEN layer sizes (excluding the input and output layers)
                                           # e.g. our first architecture has one hidden layer with 10 nodes
                                                 # our second architecture has two hidden layers with 10 nodes each
                                         
lambda_values = [1e-3,1e-2,1e-1,1,1e1,1e2]

for n in nn_structure:
    avg_fscores = []
    train_accs = []
    print("THIS INNER LAYER ARCHITECTURE IS ", n)

    for l in lambda_values:

        print("LAMBDA = %f :" % l)

        metrics = NN(X_train, X_val, y_train, y_val, l, n)
        print("fscore: %f" % metrics[0])
        print("precision: %f" % metrics[1])
        print("recall: %f" % metrics[2])
        print("training fscore: %f" % metrics[3])
        avg_fscores.append(metrics[0])
        train_accs.append(metrics[3])
        print("-----------------------------------------------------")

    print(f"Lambda values: {lambda_values}")
    print(f"Training fscores: {train_accs}")
    print(f"F scores: {avg_fscores}")
    # Plotting Regularization Lambda vs Accuracy
    plt.ylim(0,1) 
    plt.xscale("log",base=10)
    plt.plot(lambda_values, train_accs, '.-')  # plot lambda vs train fscore
    plt.plot(lambda_values,avg_fscores,'.-') # plot lambda vs val fscore
    plt.legend(['train', 'validation'], loc='upper left')
    plt.title(f'Neural Network of Structure {n}, Lambda vs F Score')
    plt.ylabel('F Score')
    plt.xlabel('Lambda values')
    plt.grid(True)
    plt.show(block=False)

    # Comparing fscores for best hyperparameters

    # Using lambda with highest val fscore, gets first lambda with maximum value since lower lambda usually has lower training error
    best_lambda_ind = max(range(len(avg_fscores)), key=avg_fscores.__getitem__)
    """
    # Gets first lambda with min difference between train and val fscores
    avg_fscores_np = np.asarray(avg_fscores)
    train_accs_np = np.asarray(train_accs)
    best_lambda_ind = np.argmin(np.absolute(avg_fscores_np - train_accs_np))
    """
    lambda_best = lambda_values[best_lambda_ind]
    fscore_best = avg_fscores[best_lambda_ind]
    print(f"Comparing Neural Network of Structure {n}: lambda = {lambda_best}, F score = {fscore_best}")
    all_lambdas_best.append(lambda_best)
    validation_fscores.append(fscore_best)
    print("=============================================================")

In [None]:
"""
# NN with PCA transform
for n in nn_structure:
    avg_fscores = []
    train_accs = []
    print("Neural network models with PCA for features")
    print("THIS INNER LAYER ARCHITECTURE IS ", n)

    for l in lambda_values:

        print("LAMBDA = %f :" % l)

        metrics = NN(pca_train, pca_val, y_train, y_val, l, n)
        print("fscore: %f" % metrics[0])
        print("precision: %f" % metrics[1])
        print("recall: %f" % metrics[2])
        print("training fscore: %f" % metrics[3])
        avg_fscores.append(metrics[0])
        train_accs.append(metrics[3])
        print("-----------------------------------------------------")

    print(f"Lambda values: {lambda_values}")
    print(f"Training fscores: {train_accs}")
    print(f"F scores: {avg_fscores}")
    # Plotting Regularization Lambda vs Accuracy
    plt.ylim(0,1) 
    plt.xscale("log",base=10)
    plt.plot(lambda_values, train_accs, '.-')  # plot lambda vs train fscore
    plt.plot(lambda_values,avg_fscores,'.-') # plot lambda vs val fscore
    plt.legend(['train', 'validation'], loc='upper left')
    plt.title(f'Neural Network of Structure {n} with PCA, Lambda vs F Score')
    plt.ylabel('F Score')
    plt.xlabel('Lambda values')
    plt.grid(True)
    plt.show(block=False)

    # Comparing fscores for best hyperparameters

    # Using lambda with highest val fscore, gets first lambda with maximum value since lower lambda usually has lower training error
    best_lambda_ind = max(range(len(avg_fscores)), key=avg_fscores.__getitem__)
    
    # Gets first lambda with min difference between train and val fscores
    #avg_fscores_np = np.asarray(avg_fscores)
    #train_accs_np = np.asarray(train_accs)
    #best_lambda_ind = np.argmin(np.absolute(avg_fscores_np - train_accs_np))
    
    lambda_best = lambda_values[best_lambda_ind]
    fscore_best = avg_fscores[best_lambda_ind]
    print(f"Comparing Neural Network of Structure {n} with PCA: lambda = {lambda_best}, F score = {fscore_best}")
    all_lambdas_best.append(lambda_best)
    validation_fscores.append(fscore_best)
    print("=============================================================")
"""

In [None]:
# First four are logreg poly1-4
# Next five are SVM poly1-3, RBF, PCA
# Next four are NN with structures (10,), (10,10), (10,10,10), (50,)
# Next four are NN with PCA and structures (10,), (10,10), (10,10,10), (50,)
print(f"Validation Set Lambdas: {all_lambdas_best}")
print(f"Validation Set F Scores: {validation_fscores}")

validation_fscores_logreg = validation_fscores[0:4]
lambdas_logreg = all_lambdas_best[0:4]
transforms_logreg = ['Poly1', 'Poly2', 'Poly3', 'Poly4']
validation_fscores_SVM = validation_fscores[4:9]
lambdas_SVM = all_lambdas_best[4:9]
transforms_SVM = ['Poly1', 'Poly2', 'Poly3', 'RBF', 'PCA']
validation_fscores_NN = validation_fscores[9:17]
#lambdas_NN = all_lambdas_best[9:17]
#transforms_NN = ['(10,)', '(10,10)', '(10,10,10)', '(50,)', 'PCA (10,)', 'PCA (10,10)', 'PCA (10,10,10)', 'PCA (50,)']
lambdas_NN = all_lambdas_best[9:13]
transforms_NN = ['(10,)', '(10,10)', '(10,10,10)', '(50,)']

# Logreg bar chart for feature transformations
plt.bar(transforms_logreg, validation_fscores_logreg)
plt.title(f'Validation F Scores for Feature Transforms on Logistic Regression')
plt.ylabel('F Score')
plt.xlabel('Transform')
plt.show(block=False)

# SVM bar chart for feature transformations
plt.bar(transforms_SVM, validation_fscores_SVM)
plt.title(f'Validation F Scores for Feature Transforms on SVM')
plt.ylabel('F Score')
plt.xlabel('Transform')
plt.show(block=False)

# NN bar chart for feature transformations
plt.bar(transforms_NN, validation_fscores_NN)
plt.title(f'Validation F Scores for Feature Transforms on Neural Network')
plt.ylabel('F Score')
plt.xlabel('Architecture')
plt.xticks(rotation=45, ha='right')
plt.show(block=False)

In [None]:
# Test data for best feature transform / regularization for each model
test_fscores = []

# Chooses feature transform with highest validation fscore based on best lambda
best_logreg_ind = max(range(len(validation_fscores_logreg)), key=validation_fscores_logreg.__getitem__)
poly = PolynomialFeatures(poly_transform_degrees[best_logreg_ind])
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
test_fscores.append(log_regression(X_train_poly, X_test_poly, y_train, y_test, lambdas_logreg[best_logreg_ind])[0])

best_SVM_ind = max(range(len(validation_fscores_SVM)), key=validation_fscores_SVM.__getitem__)
if (svm_transformations[best_SVM_ind] == 'PCA'):
    test_fscores.append(SVM(pca_train, pca_test, y_train, y_test, lambdas_SVM[best_SVM_ind], svm_transformations[best_SVM_ind])[0])
else:
    test_fscores.append(SVM(X_train, X_test, y_train, y_test, lambdas_SVM[best_SVM_ind], svm_transformations[best_SVM_ind])[0])

best_NN_ind = max(range(len(validation_fscores_NN)), key=validation_fscores_NN.__getitem__)
"""
if (best_NN_ind < 4):  # No PCA
    test_fscores.append(NN(X_train, X_test, y_train, y_test, lambdas_NN[best_NN_ind], nn_structure[best_NN_ind % 4])[0])
else:  # PCA
    test_fscores.append(NN(pca_train, pca_test, y_train, y_test, lambdas_NN[best_NN_ind], nn_structure[best_NN_ind % 4])[0])
"""
test_fscores.append(NN(pca_train, pca_test, y_train, y_test, lambdas_NN[best_NN_ind], nn_structure[best_NN_ind])[0])

print(f'Best Logreg: Poly{poly_transform_degrees[best_logreg_ind]}, Lambda = {lambdas_logreg[best_logreg_ind]}')
print(f'Best SVM: {svm_transformations[best_SVM_ind]}, Lambda = {lambdas_SVM[best_SVM_ind]}')
"""
if (best_NN_ind < 4):
    print(f'Best NN: {nn_structure[best_NN_ind]}, Lambda = {lambdas_NN[best_NN_ind]}')
else:
    print(f'Best NN: PCA {nn_structure[best_NN_ind]}, Lambda = {lambdas_NN[best_NN_ind]}')
"""
print(f'Best NN: {nn_structure[best_NN_ind]}, Lambda = {lambdas_NN[best_NN_ind]}')
print(f'Test Set F Scores for Logistic Regression, SVM, Neural Network: {test_fscores}')