In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

# set default figure size
plt.rcParams['figure.figsize'] = (15, 7.0)

In [None]:
heart_data = '../input/heart-attack-analysis-prediction-dataset/heart.csv'

heart_df = pd.read_csv(heart_data)

heart_df.head()

In [None]:
# describe the data
heart_df.describe()

In [None]:
# checking data types
heart_df.dtypes

In [None]:
# drop duplicates if any
heart_df.drop_duplicates()

# check missing valus
heart_df.isna().sum()

In [None]:
# check output column class distribution
sns.countplot(x='output', data=heart_df).set_title("output Column Distribution")

In [None]:
# check sex column class distribution
sns.countplot(x='sex', data=heart_df).set_title("Sex Column Distribution")

In [None]:
# box plot for output and cholestrol level
sns.boxplot(x="output",y="chol",data=heart_df)

In [None]:
# box plot for output and cholestrol level
sns.boxplot(x="output",y="thalachh",data=heart_df)

In [None]:
# box plot for output and cholestrol level
sns.boxplot(x="output",y="oldpeak",data=heart_df)

In [None]:
# box plot for output and cholestrol level
sns.boxplot(x="output",y="age",data=heart_df)

In [None]:
ax = sns.countplot(x='age', data=heart_df)

In [None]:
# check correlation
corr = heart_df.corr()

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5}).set_title("Columns Correlation")

In [None]:
# split data for training
y = heart_df.output.to_numpy()
X = heart_df.drop('output', axis=1).to_numpy()

# scale X values
scaler = StandardScaler()
X = scaler.fit_transform(X)

# split data while keeping output class distribution consistent
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

In [None]:
# convert data to pytorch tensors
def df_to_tensor(df):
    return torch.from_numpy(df).float()

X_traint = df_to_tensor(X_train)
y_traint = df_to_tensor(y_train)
X_testt = df_to_tensor(X_test)
y_testt = df_to_tensor(y_test)

In [None]:
# create pytorch dataset
train_ds = TensorDataset(X_traint, y_traint)
test_ds = TensorDataset(X_testt, y_testt)

# create data loaders
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
test_dl = DataLoader(test_ds, batch_size, shuffle=False)

In [None]:
# model architecture
class BinaryNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.l1 = nn.Linear(input_size, 64)
        self.l2 = nn.Linear(64, 32)
        self.l3 = nn.Linear(32, 16)
        self.out = nn.Linear(16, output_size)
        
    def forward(self, x):
        x = self.l1(x)
        x = F.relu(x)
        x = self.l2(x)
        x = F.relu(x)
        x = self.l3(x)
        x = F.relu(x)
        x = self.out(x)
        return torch.sigmoid(x) # scaling values between 0 and 1

In [None]:
input_size = 13 # number of features
output_size = 1
model = BinaryNetwork(input_size, output_size)
loss_fn = nn.BCELoss() # Binary Cross Entropy
optim = torch.optim.Adam(model.parameters(), lr=1e-3)
model

In [None]:
epochs = 100
losses = []
for i in range(epochs):
    epoch_loss = 0
    for feat, target in train_dl:
        optim.zero_grad()
        out = model(feat)
        loss = loss_fn(out, target.unsqueeze(1))
        epoch_loss += loss.item()
        loss.backward()
        optim.step()
    losses.append(epoch_loss)
    # print loss every 10 
    if i % 10 == 0:
        print(f"Epoch: {i}/{epochs}, Loss = {loss:.5f}")

In [None]:
# plot losses
graph = sns.lineplot(x=[x for x in range(0, epochs)], y=losses)
graph.set(title="Loss change during training", xlabel='epochs', ylabel='loss')
plt.show()

In [None]:
# evaluate the model
y_pred_list = []
model.eval()
with torch.no_grad():
    for X, y in test_dl:
        y_test_pred = model(X)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag)

# convert predictions to a list of tensors with 1 dimention
y_pred_list = [a.squeeze() for a in y_pred_list]

In [None]:
# check confusion matrix (hstack will merge all tensor lists into one list)
cfm = confusion_matrix(y_test, torch.hstack(y_pred_list))
sns.heatmap(cfm / np.sum(cfm), annot=True, fmt='.2%')

In [None]:
# print metrics
print(classification_report(y_test, torch.hstack(y_pred_list)))