In [480]:
# Samuel Ghebreyesus
# 12-2-2022

import pandas as pd
import numpy as np

# function to normalize the elements in the heart dataset
def normalize(datafile):
        df = pd.read_csv(datafile)
        scaledAge = df['Age'].to_numpy() / 100.0
        gender = df['Sex'].to_numpy() == 'F'
        gender = gender.astype(int)
        cpt_ASY = df['ChestPainType'].to_numpy() == 'ASY'
        cpt_ASY = cpt_ASY.astype(int)
        cpt_NAP = df['ChestPainType'].to_numpy() == 'NAP'
        cpt_NAP = cpt_NAP.astype(int)
        cpt_ATA = df['ChestPainType'].to_numpy() == 'ATA'
        cpt_ATA = cpt_ATA.astype(int)
        cpt_TA = df['ChestPainType'].to_numpy() == 'TA'
        cpt_TA = cpt_TA.astype(int)
        restingBP = df['RestingBP'].to_numpy()
        restingBP = (restingBP - 50.0) / (200 - 50)
        cholesterol = df['Cholesterol'].to_numpy()
        cholesterol = cholesterol / 400.0
        fasting = df['FastingBS'].to_numpy().astype(int)
        recg_normal = df['RestingECG'].to_numpy() == 'Normal'
        recg_normal = recg_normal.astype(int)
        recg_st = df['RestingECG'].to_numpy() == 'ST'
        recg_st = recg_st.astype(int)
        recg_lvh = df['RestingECG'].to_numpy() == 'LVH'
        recg_lvh = recg_lvh.astype(int)
        maxHR = (df['MaxHR'].to_numpy() - 50) / 200.0
        exAngina = df['ExerciseAngina'].to_numpy() == 'Y'
        exAngina = exAngina.astype(int)
        oldpeak = (df['Oldpeak'].to_numpy() + 0.1) / 4.0
        stslope_flat = df['ST_Slope'].to_numpy() == 'Flat'
        stslope_flat = stslope_flat.astype(int)
        stslope_up = df['ST_Slope'].to_numpy() == 'Up'
        stslope_up = stslope_up.astype(int)
        stslope_down = df['ST_Slope'].to_numpy() == 'Down'
        stslope_down = stslope_down.astype(int)
        labels = df['HeartDisease'].to_numpy().astype(int)
        
        data = np.vstack((scaledAge, 
                              gender,
                              cpt_ASY, cpt_NAP, cpt_ATA, cpt_TA,
                              restingBP,
                              cholesterol,
                              fasting,
                              recg_normal, recg_st, recg_lvh,
                              maxHR,
                              exAngina,
                              oldpeak,
                              stslope_flat, stslope_up, stslope_down,
                              labels)).T
        data = data.astype(float)
        DF = pd.DataFrame(data)
        return DF

In [481]:
# call on normalize function, create X and Y train/test dataframes/arrays
df = normalize("C:/Users/samgh/Downloads/heart.csv")
train_df = normalize("C:/Users/samgh/Downloads/heart_train_718.csv")
test_df = normalize("C:/Users/samgh/Downloads/heart_test_200.csv")
X = df.iloc[:, :-1]
X2 = X.to_numpy()
Y = df.iloc[:, -1]
Y2 = Y.to_numpy()
X_train = train_df.iloc[:, :-1]
Y_train = train_df.iloc[:, -1]
X_train2 = X_train.to_numpy()
Y_train2 = Y_train.to_numpy()
X_test = test_df.iloc[:, :-1]
Y_test = test_df.iloc[:, -1]
X_test2 = X_test.to_numpy()
Y_test2 = Y_test.to_numpy()

In [482]:

import torch
from torch.utils.data import Dataset, DataLoader

# class represents the heart dataset 
class HeartData(Dataset):
    def __init__(self, X, Y):
        self.X = torch.from_numpy(X.astype(np.float32))
        self.y = torch.from_numpy(Y.astype(np.float32))
        self.len = self.X.shape[0]
       
    def __getitem__(self, index):
        return self.X[index], self.y[index]
   
    def __len__(self):
        return self.len
   
batch_size = 64

# put train and test datasets into dataloader object
train_data = HeartData(X_train2, Y_train2)
train_dataloader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)

test_data = HeartData(X_test2, Y_test2)
test_dataloader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)

for batch, (X, y) in enumerate(train_dataloader):
    print(f"Batch: {batch+1}")
    print(f"X shape: {X.shape}")
    print(f"y shape: {y.shape}")
    break

Batch: 1
X shape: torch.Size([64, 18])
y shape: torch.Size([64])


In [483]:
import torch
from torch import nn
from torch import optim

in_features = 18
out_features = 20
out_features2 = 12
out_dim = 2

# Neural Network #1: consists of two fully connected linear layers 
class HeartNet(nn.Module):
    def __init__(self, in_features, out_features, out_features2, out_dim):
        super(HeartNet, self).__init__()
        self.fc1 = nn.Linear(in_features, out_features)
        self.fc2 = nn.Linear(out_features, out_features2)
        self.output = nn.Linear(out_features2, out_dim)
       
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.output(x)
        return x
       
network = HeartNet(in_features, out_features, out_features2, out_dim)
print(network)

HeartNet(
  (fc1): Linear(in_features=18, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=12, bias=True)
  (output): Linear(in_features=12, out_features=2, bias=True)
)


In [484]:
# initializing hyperparameters
learning_rate = 0.1
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(network.parameters(), lr=learning_rate)

In [485]:
num_epochs = 100
loss_values = []

# function for training neural network, takes in the train dataloader and a network model, prints out for each epoch,
# the associated loss value
def train(train_dataloader, network):
    network.train()
    for epoch in range(num_epochs):
        for X, y in train_dataloader:
            optimizer.zero_grad()
            pred = network(X)
            loss = criterion(pred, y.type(torch.LongTensor))
            loss_values.append(loss.item())
            loss.backward()
            optimizer.step()
        print(f'Epoch: {epoch} Loss: {loss}')

In [486]:
# call train function
import torch.nn.functional as F
train(train_dataloader, network)

Epoch: 0 Loss: 0.64759361743927
Epoch: 1 Loss: 0.7372914552688599
Epoch: 2 Loss: 0.6012390851974487
Epoch: 3 Loss: 0.5883570313453674
Epoch: 4 Loss: 0.504421591758728
Epoch: 5 Loss: 0.4765053689479828
Epoch: 6 Loss: 0.3344568610191345
Epoch: 7 Loss: 0.4085538685321808
Epoch: 8 Loss: 0.16527321934700012
Epoch: 9 Loss: 0.25336506962776184
Epoch: 10 Loss: 0.36719927191734314
Epoch: 11 Loss: 0.46469977498054504
Epoch: 12 Loss: 0.2066451758146286
Epoch: 13 Loss: 0.49870166182518005
Epoch: 14 Loss: 0.4130649268627167
Epoch: 15 Loss: 0.5125716924667358
Epoch: 16 Loss: 0.44209668040275574
Epoch: 17 Loss: 0.4369516670703888
Epoch: 18 Loss: 0.6517018675804138
Epoch: 19 Loss: 0.2741278111934662
Epoch: 20 Loss: 0.27365604043006897
Epoch: 21 Loss: 0.3798691928386688
Epoch: 22 Loss: 0.43311867117881775
Epoch: 23 Loss: 0.17220497131347656
Epoch: 24 Loss: 0.2744288146495819
Epoch: 25 Loss: 0.3885229229927063
Epoch: 26 Loss: 0.648061215877533
Epoch: 27 Loss: 0.1787603795528412
Epoch: 28 Loss: 0.4444331

In [487]:
# test function, takes in the test dataloader and a network model, prints out for the test set the average loss 
# and accuracy 
def test(test_dataloader, network):
    loss_values = []
    test_loss = 0
    correct = 0
    with torch.no_grad():
        network.eval()
        for X, y in test_dataloader:
            y_pred = network(X)
            test_loss += criterion(y_pred, y.type(torch.LongTensor))
            pred = y_pred.data.max(1, keepdim=True)[1]
            correct += pred.eq(y.data.view_as(pred)).sum()
    test_loss /= len(test_dataloader.dataset)
    loss_values.append(test_loss)
    print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_dataloader.dataset),
        100. * correct / len(test_dataloader.dataset)))

In [488]:
# call test and print out results
test(test_dataloader, network)


Test set: Avg. loss: 0.0071, Accuracy: 168/200 (84%)



In [489]:
# Neural Network #2: consists of 2 convolution layers and one linear layer 
class HeartNet2(nn.Module):
    def __init__(self):
        super(HeartNet2, self).__init__()
        self.conv1 = nn.Conv1d(64, 10, kernel_size=5)
        self.conv2 = nn.Conv1d(10, 5, kernel_size=5)
        nn.Flatten()
        self.fc1 = nn.Linear(10, 2)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.fc1(x)
        return x

In [490]:
# print out setup of second neural network
network2 = HeartNet2()
network2

HeartNet2(
  (conv1): Conv1d(64, 10, kernel_size=(5,), stride=(1,))
  (conv2): Conv1d(10, 5, kernel_size=(5,), stride=(1,))
  (fc1): Linear(in_features=10, out_features=2, bias=True)
)

In [491]:
# call train function and print out results 
train(train_dataloader, network)

Epoch: 0 Loss: 0.23471570014953613
Epoch: 1 Loss: 0.19875384867191315
Epoch: 2 Loss: 0.11649491637945175
Epoch: 3 Loss: 0.26884010434150696
Epoch: 4 Loss: 0.29933813214302063
Epoch: 5 Loss: 0.22075171768665314
Epoch: 6 Loss: 0.14443445205688477
Epoch: 7 Loss: 0.2569844424724579
Epoch: 8 Loss: 0.3796629309654236
Epoch: 9 Loss: 0.3612794578075409
Epoch: 10 Loss: 0.43122047185897827
Epoch: 11 Loss: 0.18672263622283936
Epoch: 12 Loss: 0.14409467577934265
Epoch: 13 Loss: 0.14019931852817535
Epoch: 14 Loss: 0.530377209186554
Epoch: 15 Loss: 0.4085719883441925
Epoch: 16 Loss: 0.2580457031726837
Epoch: 17 Loss: 0.14605438709259033
Epoch: 18 Loss: 0.20976577699184418
Epoch: 19 Loss: 0.25388258695602417
Epoch: 20 Loss: 0.6081656813621521
Epoch: 21 Loss: 0.3749033510684967
Epoch: 22 Loss: 0.13420896232128143
Epoch: 23 Loss: 0.2194700390100479
Epoch: 24 Loss: 0.126507967710495
Epoch: 25 Loss: 0.05837346240878105
Epoch: 26 Loss: 0.0848638191819191
Epoch: 27 Loss: 0.3538387715816498
Epoch: 28 Loss: 

In [492]:
# call test function and pick out results 
test(test_dataloader, network)


Test set: Avg. loss: 0.0070, Accuracy: 162/200 (81%)



In [538]:
# Extension: tried another architecture where I added a dropout layer with dropout rate of 0.25 after the first convolution
# layer
class HeartNet3(nn.Module):
    def __init__(self):
        super(HeartNet3, self).__init__()
        self.conv1 = nn.Conv1d(64, 10, kernel_size=5)
        self.dropout1 = nn.Dropout1d(0.25)
        self.conv2 = nn.Conv1d(10, 5, kernel_size=5)
        nn.Flatten()
        self.fc1 = nn.Linear(10, 2)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.dropout1(x)
        x = F.relu(self.conv2(x))
        x = self.fc1(x)
        return x

In [543]:
# print out contents of new architecture
network3 = HeartNet3()
network3

HeartNet3(
  (conv1): Conv1d(64, 10, kernel_size=(5,), stride=(1,))
  (dropout1): Dropout1d(p=0.25, inplace=False)
  (conv2): Conv1d(10, 5, kernel_size=(5,), stride=(1,))
  (fc1): Linear(in_features=10, out_features=2, bias=True)
)

In [541]:
# call train, print out loss results for each of 100 epochs
train(train_dataloader, network)

Epoch: 0 Loss: 0.7578279376029968
Epoch: 1 Loss: 0.8208339810371399
Epoch: 2 Loss: 0.8529514670372009
Epoch: 3 Loss: 0.7466496825218201
Epoch: 4 Loss: 0.6920698285102844
Epoch: 5 Loss: 0.6596370339393616
Epoch: 6 Loss: 0.7509660720825195
Epoch: 7 Loss: 0.7198501825332642
Epoch: 8 Loss: 0.7869028449058533
Epoch: 9 Loss: 0.7553368210792542
Epoch: 10 Loss: 0.7140529751777649
Epoch: 11 Loss: 0.7238460183143616
Epoch: 12 Loss: 0.625843346118927
Epoch: 13 Loss: 0.7237834334373474
Epoch: 14 Loss: 0.754167377948761
Epoch: 15 Loss: 0.6825264096260071
Epoch: 16 Loss: 0.756429135799408
Epoch: 17 Loss: 0.6198042631149292
Epoch: 18 Loss: 0.6661861538887024
Epoch: 19 Loss: 0.6597274541854858
Epoch: 20 Loss: 0.6903911232948303
Epoch: 21 Loss: 0.7586681246757507
Epoch: 22 Loss: 0.6649648547172546
Epoch: 23 Loss: 0.7184661626815796
Epoch: 24 Loss: 0.7565951347351074
Epoch: 25 Loss: 0.6587368845939636
Epoch: 26 Loss: 0.6254586577415466
Epoch: 27 Loss: 0.7210241556167603
Epoch: 28 Loss: 0.789739727973938

In [542]:
# get avg loss/accuracy for test set
test(test_dataloader, network)


Test set: Avg. loss: 0.0137, Accuracy: 105/200 (52%)



In [544]:
# Extension: tried another architecture where I added a max pool layer of size 2 to each convolution layer
class HeartNet4(nn.Module):
    def __init__(self):
        super(HeartNet4, self).__init__()
        self.conv1 = nn.Conv1d(64, 10, kernel_size=5)
        self.dropout1 = nn.Dropout1d(0.25)
        self.conv2 = nn.Conv1d(10, 5, kernel_size=5)
        nn.Flatten()
        self.fc1 = nn.Linear(10, 2)
    
    def forward(self, x):
        x = F.relu(F.max_pool1d(self.conv1(x), 2))
        x = self.dropout1(x)
        x = F.relu(F.max_pool1d(self.conv2(x), 2))
        x = self.fc1(x)
        return x

In [546]:
# print out contents of new architecture
network4 = HeartNet4()
network4

HeartNet4(
  (conv1): Conv1d(64, 10, kernel_size=(5,), stride=(1,))
  (dropout1): Dropout1d(p=0.25, inplace=False)
  (conv2): Conv1d(10, 5, kernel_size=(5,), stride=(1,))
  (fc1): Linear(in_features=10, out_features=2, bias=True)
)

In [548]:
# print out loss for each epoch
train(train_dataloader, network)

Epoch: 0 Loss: 0.7458941340446472
Epoch: 1 Loss: 0.5929374694824219
Epoch: 2 Loss: 0.6944848299026489
Epoch: 3 Loss: 0.7281133532524109
Epoch: 4 Loss: 0.7760716676712036
Epoch: 5 Loss: 0.6994405388832092
Epoch: 6 Loss: 0.6221628189086914
Epoch: 7 Loss: 0.8135789036750793
Epoch: 8 Loss: 0.5620203614234924
Epoch: 9 Loss: 0.6910406351089478
Epoch: 10 Loss: 0.6273007988929749
Epoch: 11 Loss: 0.7237446904182434
Epoch: 12 Loss: 0.6870070099830627
Epoch: 13 Loss: 0.7899597883224487
Epoch: 14 Loss: 0.6214264035224915
Epoch: 15 Loss: 0.7987815141677856
Epoch: 16 Loss: 0.6535539031028748
Epoch: 17 Loss: 0.6878836750984192
Epoch: 18 Loss: 0.689551830291748
Epoch: 19 Loss: 0.6644907593727112
Epoch: 20 Loss: 0.5924659967422485
Epoch: 21 Loss: 0.6006001234054565
Epoch: 22 Loss: 0.5976199507713318
Epoch: 23 Loss: 0.6970958709716797
Epoch: 24 Loss: 0.7213200926780701
Epoch: 25 Loss: 0.6544215083122253
Epoch: 26 Loss: 0.693144679069519
Epoch: 27 Loss: 0.7184223532676697
Epoch: 28 Loss: 0.69083547592163

In [549]:
# print out avg loss/accuracy for dataset
test(test_dataloader, network)


Test set: Avg. loss: 0.0145, Accuracy: 105/200 (52%)

