In [2]:
import torch
import torch.nn as nn
from sklearn import datasets
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [3]:
#Input data
dataset = pd.read_csv('Dataset/HeartDisease.csv')
dataset = np.array(dataset)
np.random.shuffle(dataset)
dataset_train = dataset[: 46000, :]
dataset_valid = dataset[46000 : 51000, :]
dataset_test = dataset[51000:, :]

In [4]:
#Customize dataset
class HeartDataset(Dataset):
  def __init__(self, indata):
    dataset = indata
    dataset = np.array(dataset)
    self.x = dataset[:, 1:]
    self.y = dataset[:, 0]
    self.n_samples = self.x.shape[0]
    self.n_feature = self.x.shape[1]

  def __getitem__(self, index):
    return self.x[index], self.y[index]

  def __len__(self):
    return self.n_samples

In [5]:
#Data loading
data_train = HeartDataset(dataset_train)
data_valid = HeartDataset(dataset_valid)
X_test = torch.tensor(dataset_test[:, 1:], dtype = float)
Y_test = torch.tensor(dataset_test[:, 0], dtype = float)
dataloader = DataLoader(dataset=data_train, batch_size=32, shuffle=True)
validloader = DataLoader(dataset=data_valid, batch_size=32, shuffle=True)

In [6]:
#Build Logistic Regression model
class LogisticRegression(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(LogisticRegression, self).__init__()
    self.linear1 = nn.Linear(input_size, output_size)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    out = self.linear1(x.float())
    y_pred = self.sigmoid(out)
    return y_pred

In [10]:
#Train and validate model
n_feature = data_train.n_feature
n_samples = data_train.n_samples
model = LogisticRegression(n_feature, 6, 1)
loss = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
min_valid_loss = np.inf
epochs = 100
try:
    model.load_state_dict(torch.load('Saved Model/saved_model.pt'))
except:
    for epoch in range(epochs):
        training_loss = 0
        validating_loss = 0
        for input, label in dataloader:
            y_pred = model(input)
            label = label.reshape(-1, 1).float()
            l = loss(y_pred, label)
            training_loss += l.item()
            l.backward()
            optimizer.step()
            optimizer.zero_grad()
        
        for input, label in validloader:
            y_pred = model(input)
            label = label.reshape(-1, 1).float()
            l = loss(y_pred, label)
            validating_loss += l.item()
        print(f'Epoch {epoch+1} \t\t Training Loss: {training_loss / len(dataloader)} \t\t Validation Loss: {validating_loss / len(validloader)}')
        if min_valid_loss > validating_loss:
            print(f'Validation Loss Decreased({min_valid_loss:.6f}--->{validating_loss:.6f}) \t Saving The Model')
            min_valid_loss = validating_loss
            # Saving State Dict
            torch.save(model.state_dict(), 'saved_model.pt')



In [11]:
#Model Testing
predictions = model(X_test).detach().numpy()
predictions = predictions.reshape(-1).round()
Y_train = Y_test.detach().numpy()
result = predictions == Y_train
print(f"Model Accuracy {int(np.sum(result)/len(result)*100)}%")

Model Accuracy 92%
