### Import Library

In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import sys
from csv import writer
import csv
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

### Read Dataset

In [None]:
def dataset_read(featurelist):
    data = pd.read_csv('heart_failure.csv')
    data = data[featurelist]
    #Normalization
    data=np.array(data)
    input_data_normed = data / data.max(axis=0)
    #print(input_data_normed.shape)
    return input_data_normed

### Dataset Split

In [None]:
# 85/15 train/test split

def dataset_split(input_data_normed):
    training, test = input_data_normed[:int(.85*len(input_data_normed)),:], input_data_normed[int(.85*len(input_data_normed)):,:]
    #label_training = training[:, 0]
    #label_test = test[:, 0]
    #test = np.delete(test, 0, 1)
    return training,test

In [None]:
def training_1(training,test, splitting_ratio):
    #np.seed
    #np.random.shuffle(training)
    train,valid = training[:int(splitting_ratio*len(training)),:], training[int(splitting_ratio*len(training)):,:]
    label_train, label_valid = train[:, 0], valid[:, 0]
    train = np.delete(train, 0, 1)
    valid = np.delete(valid, 0, 1)
    
    #print(train.shape,label_train.shape)

    #Run Logistic Regression
    #clf=SVC(kernel='rbf')
    #clf = LogisticRegression(random_state=0)
    clf = MLPClassifier(batch_size=32,learning_rate_init=0.001,hidden_layer_sizes=32,random_state=42,activation= 'relu', max_iter=2000)
    clf.fit(train, label_train)

    label_test = test[:, 0]
    test = np.delete(test, 0, 1)
    
    #print(test.shape,label_test.shape)
    y_pred=clf.predict(test)
    result = accuracy_score(label_test, y_pred)
    return result
    #print("Test Score: ", accuracy_score(label_test, y_pred))

### Main Function

In [None]:
## Main Function, run for only one feature set

cur_feature = ['Event', 'TIME', 'Gender', 'Smoking']

input_data_normed = dataset_read(cur_feature)
train,test = dataset_split(input_data_normed)

training_1(train,test,0.95)

0.8888888888888888

### FeatureSet selection (dynamic)

In [None]:
data = pd.read_csv('heart_failure.csv')
all_features = data.columns.tolist()
all_features.remove('Event')

In [None]:
def powerset(s):
    x = len(s)
    all_feature_set = []
    for i in range(1 << x):
        cur_list = [s[j] for j in range(x) if (i & (1 << j))]
        #print(cur_list)
        all_feature_set.append(cur_list)
    return all_feature_set

all_feature_set = powerset(all_features)
len(all_feature_set)

4096

In [None]:
def write_final_data(filename, List):

    # Open our existing CSV file in append mode
    # Create a file object for this file
    with open(filename, 'a') as f_object:

        # Pass this file object to csv.writer()
        # and get a writer object
        writer_object = writer(f_object)

        # Pass the list as an argument into
        # the writerow()
        writer_object.writerow(List)

        #Close the file object
        f_object.close()

In [None]:
%%time

splitting_ratio = .85

for i in range(0, 3):
    #filename = "finalResult.csv"
    filename ='finalResultNN_' + str(i+1) + '.csv'
    
    # opening the file with w+ mode truncates the file
    f = open(filename, "w+")
    f.close()


    cnt = 0
    for feature_set in all_feature_set:
        cnt = cnt + 1
        if(cnt==1):
            continue
        cur_feature_list = ['Event']+feature_set
        #print(cur_feature_list)
        input_data_normed = dataset_read(cur_feature_list)
        train,test = dataset_split(input_data_normed)
        result = training_1(train,test, splitting_ratio)
        write_final_data(filename, [cur_feature_list, result])
    
    splitting_ratio = splitting_ratio + .05

### Read from CSV file to find the **maximum accuracy** and **best feautre**

In [None]:
def best_feature(data, maxAcc):
    
    best_feature = []
    len_best_feature = 100000

    for i in range(0, len(data), 1):
        if data[i][1] == maxAcc:
            best_feature.append(data[i][0])
            if len(data[i][0]) < len_best_feature:
                len_best_feature = len(data[i][0])
                feature = data[i][0]
    
    print_function(feature, maxAcc, best_feature)

In [None]:
def print_function(feature, maxAcc, best_feature):

    print("Best Feature List", feature)
    print("Maximum Accuracy: ", maxAcc)
    print()
    #print("All Feature List: \n", best_feature)
    print()
    print()

In [None]:
def best_feature_func():
    for i in range(0, 3):
        #filename = "finalResult.csv"
        filename ='finalResultNN_' + str(i+1) + '.csv'
        with open(filename, newline='') as f:
            reader = csv.reader(f)
            data = list(reader)

        data = np.array(data)
        data.shape

        accList = []
        for i in range(0, len(data), 1):
            accList.append(data[i][1])

        maxAcc = max(accList)
        best_feature(data, maxAcc)

In [None]:
best_feature_func()

Best Feature List ['Event', 'TIME', 'Smoking', 'Ejection.Fraction']
Maximum Accuracy:  0.9555555555555556



Best Feature List ['Event', 'TIME', 'Gender', 'Ejection.Fraction', 'Sodium']
Maximum Accuracy:  0.9555555555555556



Best Feature List ['Event', 'TIME', 'Smoking', 'BP', 'Ejection.Fraction']
Maximum Accuracy:  0.9555555555555556





In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd
import numpy as np
from google.colab import drive

drive.mount('/content/drive')
data = pd.read_csv('/content/drive/My Drive/Colab Notebooks/heart_failure.csv')

#Normalization
data=np.array(data)
input_data_normed = data / data.max(axis=0)

training, test = input_data_normed[:int(.85*len(input_data_normed)),:], input_data_normed[int(.85*len(input_data_normed)):,:]
label_training = training[:, 1]
label_test = test[:, 1]
test = np.delete(test, 1, 1)

np.random.shuffle(training)
train,valid = training[:int(.95*len(input_data_normed)),:], input_data_normed[int(.95*len(input_data_normed)):,:]
label_train, label_valid = train[:, 1], valid[:, 1]
train = np.delete(train, 1, 1)
valid = np.delete(valid, 1, 1)

#Run MLP Classifier
clf = MLPClassifier(batch_size=32,learning_rate_init=0.001,hidden_layer_sizes=32,random_state=42,activation= 'relu', max_iter=2000).fit(train,label_train)
print("Validation Score: ", clf.score(valid, label_valid))

In [None]:
#for combinations of hyper-parameters
from mlxtend.evaluate import bias_variance_decomp


expected_loss_list = []
bias_list = []
variance_list = []
hidden_layer_sizesList_forGraph = []
alphaList_forGraph = []

minExpectedLoss = 1000000000000000
best_hidden_layer_sizes_Value = -1
best_alphaValue = -1
# itr = 1
for hidden_layer_sizes_Value in [100, 200, 300, 400]:
  for alphaValue in [0.00001, 0.0001, 0.001, 0.01, 0.1]:
    # print(itr)
    # itr = itr + 1
    clf=MLPClassifier(batch_size=32,learning_rate_init=0.001,hidden_layer_sizes=hidden_layer_sizes_Value, alpha = alphaValue, random_state=42,activation= 'relu', max_iter=2000)
    expected_loss, bias, variance = bias_variance_decomp(
        clf, train, label_train, valid, label_valid, 
        loss='0-1_loss', num_rounds = 400,
        random_seed=123)
    if expected_loss < minExpectedLoss:
      minExpectedLoss = expected_loss
      best_hidden_layer_sizes_Value = hidden_layer_sizes_Value
      best_alphaValue = alphaValue
    
    # expected_loss_list.append(expected_loss)
    # bias_list.append(bias)
    # variance_list.append(variance)

    hidden_layer_sizesList_forGraph.append(hidden_layer_sizes_Value)
    alphaList_forGraph.append(alphaValue)

    # print(expected_loss, bias, variance)

print("Best hyperparameters:", best_hidden_layer_sizes_Value, best_alphaValue)

In [None]:
#for graph
from mlxtend.evaluate import bias_variance_decomp



expected_loss_list = []
bias_list = []
variance_list = []
hidden_layer_sizesList_forGraph = []
alphaList_forGraph = []

# itr = 1
# for cValue in np.arange(0.1, 2.1, 0.1):
minExpectedLoss = 10000000000
best_hidden_layer_sizes_Value = -1
for hidden_layer_sizes_Value in [100, 200, 300, 400]:
    # print(itr)
    # itr = itr + 1
    clf=MLPClassifier(batch_size=32,learning_rate_init=0.001,hidden_layer_sizes=hidden_layer_sizes_Value, alpha = 0.0001, random_state=42,activation= 'relu', max_iter=2000)
    expected_loss, bias, variance = bias_variance_decomp(
        clf, train, label_train, valid, label_valid, 
        loss='0-1_loss', num_rounds = 400,
        random_seed=123)
    if expected_loss < minExpectedLoss:
      minExpectedLoss = expected_loss
      best_hidden_layer_sizes_Value = hidden_layer_sizes_Value
    
    expected_loss_list.append(expected_loss)
    bias_list.append(bias*bias)
    variance_list.append(variance)

    hidden_layer_sizesList_forGraph.append(hidden_layer_sizes_Value)
    alphaList_forGraph.append(alphaValue)

#draw graph
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 12})

fig = plt.figure()
 
# ax = plt.axes(projection ='3d')

ax = fig.add_subplot(111)
# fig.suptitle('C=1.0')
ax.plot(hidden_layer_sizesList_forGraph, expected_loss_list, 'red', label='Expected loss')
ax.plot(hidden_layer_sizesList_forGraph, bias_list, 'green', label = 'Bias'r'$^2$')
ax.plot(hidden_layer_sizesList_forGraph, variance_list, 'blue', label = 'Variance')
# ax.plot3D(clist_forGraph, gammaList_forGraph, expected_loss_list, 'green')
# ax.plot3D(clist_forGraph, gammaList_forGraph, bias_list, 'red')
# ax.plot3D(clist_forGraph, gammaList_forGraph, variance_list, 'blue')

ax.set_xlabel('hidden_layer_sizes')
ax.set_ylabel('Loss value')
ax.legend()
plt.show()