# Import libraries to use

In [None]:
import numpy as np
import math
import torch
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Import data to use

In [None]:
# Load our data using pandas and explore if we need to clean/feature engineer
df = pd.read_csv(r"../input/heart-disease-uci/heart.csv")
#Let's see what we loaded
df

In [None]:
#Let's see if we have any nan's
def explore_dataframe(df):
    # function to print nan values, column types
    print("-"*25)
    print(df.info()) #overall info of columns
    print("-"*25)
    print(df.isna().sum()) #quantity of nans
    print("-"*25)
    print(df.isna().mean()*100) #percentage of nan's
    print("-"*25)
    print(df.dtypes) #variable types
    print("-"*25)
    

In [None]:
explore_dataframe(df)
#Cool our dataset is complete and don't need to clean

# EDA with plots

In [None]:
columns = [col for col in df]
column_features = columns [ : -1]
target = columns[-1]
print(column_features)
print(target)


In [None]:
sns_plot = sns.distplot(df[target])

In [None]:
#Exploring the range and distribution of numerical Variables
def univariate_plot(df,columns):
    fig, ax = plt.subplots(len(columns), 2, figsize = (15, 30))
    for idx, column in enumerate(columns):
        sns.boxplot(x= df[column], ax = ax[idx,0])
        sns.distplot(df[column], ax = ax[idx,1])
    plt.tight_layout()
        

In [None]:
univariate_plot(df, columns)

In [None]:
def bivariate_plot(df,columns, target):
    f,axarr = plt.subplots(len(columns), figsize=(15,40))
    target_values = df[target].values
    for idx,column in enumerate(columns):
        axarr[idx].scatter(df[column].values, target_values)
        axarr[idx].set_title(column)
    f.text(-0.01, 0.5, target, va='center', rotation='vertical', fontsize = 12)
    plt.tight_layout()
    plt.show()

In [None]:
bivariate_plot(df,column_features,target)

In [None]:
def heatmap(df):
    plt.figure(figsize=(10,6))
    sns.heatmap(df.corr(),cmap=plt.cm.Reds,annot=True)
    plt.title('Heatmap displaying the relationship betweennthe features of the data',
         fontsize=13)
    plt.show()

In [None]:
heatmap(df)
#With this we have reached a few insights about the data:
#-The features that most positively correlate (affect) our target are chest pain (cp), thalach (maximum heart rate achieved)
#and slope(the slope of the peak exercise ST segment).
#-The features that most negatively correlate (affect) our target are exang(exercise induced angina), oldpeak(ST depression
#induced by exercise relative to rest),ca(number of major vessels (0-3) colored by flourosopy) and thal(defects)


In [None]:
#these columns are not in the range from 0-1, which might worsen our model
#normalization funtion
def normalize_columns(df,columns):
    for column in columns:
        df[column] = (df[column] - df[column].min()) / (df[column].max() - df[column].min())

In [None]:
#normalize
normalize_columns(df,columns)

# Create Dataloaders

In [None]:
#shuffle rows in the dataframe since they are ordered by target, the first half is all 1's and the second is 0's
df = df.sample(frac=1).reset_index(drop=True)

In [None]:
#get train-test-split sizes for our dataloader
train_size = int(0.8 * len(df)) + 1
val_size = math.ceil((len(df) - train_size)/2)
val_len = train_size + val_size
test_size = val_size 
print(train_size,val_len,test_size)

In [None]:
#create the different dataframes to use 
df_train = df.iloc[:train_size]
df_val = df[train_size:val_len]
df_test = df[val_len:]

In [None]:
#check the length of the df's we just created
print(len(df_train),len(df_val),len(df_test))
batch_size = 32

In [None]:
#create our data loaders
train_target = torch.tensor(df_train['target'].values.astype(np.float32))
train = torch.tensor(df_train.drop('target', axis = 1).values.astype(np.float32)) 
train_tensor = TensorDataset(train, train_target) 
train_loader = DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)

val_target = torch.tensor(df_val['target'].values.astype(np.float32))
val = torch.tensor(df_val.drop('target', axis = 1).values.astype(np.float32)) 
val_tensor = TensorDataset(val, val_target) 
val_loader = DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)

test_target = torch.tensor(df_test['target'].values.astype(np.float32))
test = torch.tensor(df_test.drop('target', axis = 1).values.astype(np.float32)) 
test_tensor = TensorDataset(test, test_target) 
test_loader = DataLoader(dataset = test_tensor, batch_size = batch_size, shuffle = False)

# Create model

In [None]:
#An MLP(Multi-layer perceptron) should work just fine for our solution
import torch.nn.functional as F
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden)   # hidden layer
        self.out = torch.nn.Linear(n_hidden, n_output)# output layer
        
        self.dropout = torch.nn.Dropout(0.40)#dropout layer

    def forward(self, x):
        x = F.relu(self.hidden(x)) # activation function for hidden layer
        x = self.dropout(x)
        x = self.out(x)
        return x



In [None]:
def weights_init_normal(m):
    """
    Applies initial weights to certain layers in a model .
    The weights are taken from a normal distribution 
    with mean = 0, std dev = 0.02.
    """
    classname = m.__class__.__name__
    if classname.find('Linear') != -1:
        y = 0.02
        m.weight.data.normal_(0, y)
        m.bias.data.fill_(0)
    

In [None]:
#Create our neural net
net = Net(n_feature=13, n_hidden=128, n_output=2)     # define the network
print(net)  # net architecture


In [None]:
#Optimizers
learning_rate = 0.1
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, nesterov=True)
criterion = torch.nn.CrossEntropyLoss()  # Categorical loss

In [None]:
#Scheduler since we are using SGD, helps us with convergence
from torch.optim.lr_scheduler import StepLR
scheduler = StepLR(optimizer, step_size=25, gamma=0.1)

In [None]:
#Train our model
iter = 0
num_epochs = 100
for epoch in range(num_epochs):
    net.train()
    # Decay Learning Rate
    scheduler.step()
    # Print Learning Rate
    print('Epoch:', epoch,'LR:', scheduler.get_lr())
    for i, (features, labels) in enumerate(train_loader):
        # Load images
        data = features

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = net(data)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels.type(torch.LongTensor))

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 1 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for features, labels in val_loader:
                net.eval()
                # Load images to a Torch Variable
                data = features

                # Forward pass only to get logits/output
                outputs = net(data)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                correct += (predicted == labels).sum()

            accuracy = 100 * correct / total

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

In [None]:
#Test our model
test_loss = 0.
correct = 0.
total = 0.

# set the module to evaluation mode
net.eval()

# Iterate through test dataset
for batch_idx,(features, labels) in enumerate(test_loader):
    # Load features
    data = features

    # Forward pass only to get output
    outputs = net(data)
    
    # calculate the loss
    loss = criterion(outputs, labels.type(torch.LongTensor))
    
    # update average test loss 
    test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data.item() - test_loss))

    # Get predictions from the maximum value
    _, predicted = torch.max(outputs.data, 1)

    # Total number of labels
    total += labels.size(0)

    # Total correct predictions
    correct += (predicted == labels).sum()

    accuracy = 100 * correct / total
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        accuracy, correct, total))