In [12]:
import numpy as np
import pandas as pd
import torch 
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from torch import nn

In [3]:
data = pd.read_csv('D:\OneDrive\MLprac\heart_disease_prediction.csv')
data.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,No,16.6,Yes,No,No,3.0,30.0,No,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
1,No,20.34,No,No,Yes,0.0,0.0,No,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
2,No,26.58,Yes,No,No,20.0,30.0,No,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
3,No,24.21,No,No,No,0.0,0.0,No,Female,75-79,White,No,No,Good,6.0,No,No,Yes
4,No,23.71,No,No,No,28.0,0.0,Yes,Female,40-44,White,No,Yes,Very good,8.0,No,No,No


In [8]:
catcols = ['Smoking', 'AlcoholDrinking', 'Stroke',
        'DiffWalking', 'Sex', 'AgeCategory',
       'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 
       'Asthma', 'KidneyDisease', 'SkinCancer']
data = pd.get_dummies(data, columns = catcols)
data['HeartDisease'] = LabelBinarizer().fit_transform(data['HeartDisease'])

In [11]:
x_train,y_train,x_test,y_test = train_test_split(data.drop('HeartDisease', axis = 1), data['HeartDisease'], test_size= 0.2, shuffle= True, stratify= data['HeartDisease'], random_state= 777)

In [9]:
data.head()

Unnamed: 0,HeartDisease,BMI,PhysicalHealth,MentalHealth,SleepTime,Smoking_No,Smoking_Yes,AlcoholDrinking_No,AlcoholDrinking_Yes,Stroke_No,...,GenHealth_Fair,GenHealth_Good,GenHealth_Poor,GenHealth_Very good,Asthma_No,Asthma_Yes,KidneyDisease_No,KidneyDisease_Yes,SkinCancer_No,SkinCancer_Yes
0,0,16.6,3.0,30.0,5.0,0,1,1,0,1,...,0,0,0,1,0,1,1,0,0,1
1,0,20.34,0.0,0.0,7.0,1,0,1,0,0,...,0,0,0,1,1,0,1,0,1,0
2,0,26.58,20.0,30.0,8.0,0,1,1,0,1,...,1,0,0,0,0,1,1,0,1,0
3,0,24.21,0.0,0.0,6.0,1,0,1,0,1,...,0,1,0,0,1,0,1,0,0,1
4,0,23.71,28.0,0.0,8.0,1,0,1,0,1,...,0,0,0,1,1,0,1,0,1,0


In [13]:
#building network
class HeartDiseaseNet(nn.Module):
    def __init__(self):
        super().__init__()
        hidden1 = nn.Linear(50,300)
        relu1 = nn.ReLU()
        hidden2 = nn.Linear(300,300)
        relu2 = nn.ReLU()
        hidden3 = nn.Linear(300,1)
        sigmoid = nn.Sigmoid()
        self.model = nn.Sequential(hidden1,relu1,hidden2,relu2,hidden3,sigmoid)
    def forward(self,x):
        return self.model(x)

In [15]:
#preparing for training  
epochs = 5
batch_size = 1024
batch_count = x_train.shape[0]/batch_size if x_train.shape[0]%batch_size == 0 else (x_train.shape[0]//batch_size) + 1
learning_rate = 0.05
loss_fn = nn.BCELoss() 
model = HeartDiseaseNet()
model = model.to(device = 'cuda')
optim = torch.optim.SGD(model.parameters(),lr = learning_rate)

In [17]:
#training loop
for epoch in range(0,epochs): #epoch loop
    for batch in range(0,batch_count): # batch loop
        x_batch = torch.from_numpy(x_train.iloc[batch*batch_size:(batch+1)*batch_size,:].values).to(dtype=torch.float32, device= 'cuda')
        y_batch = torch.from_numpy(y_train.iloc[batch*batch_size:(batch+1)*batch_size].values.reshape(-1,1)).to(dtype=torch.float32, device= 'cuda')
        probs = model.forward(x_batch)
        loss = loss_fn(probs, y_batch)
        optim.zero_grad() # resets the optimizer
        loss.backward() # doing first derivative (calc gradients)
        optim.step() # adjusting weights
