<a href="https://colab.research.google.com/github/sajan-sarker/PyTorch_Practice_Codes/blob/main/PyTorch_Practice_NN_Heart_Disease_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pytorch Practice Code

In [None]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from torchinfo import summary
from torch.utils.data import DataLoader, TensorDataset

### Importing Dataset

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("oktayrdeki/heart-disease")

#print("Path to dataset files:", path)
df = pd.read_csv(path+'/heart_disease.csv')
df.shape

(10000, 21)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Age                   9971 non-null   float64
 1   Gender                9981 non-null   object 
 2   Blood Pressure        9981 non-null   float64
 3   Cholesterol Level     9970 non-null   float64
 4   Exercise Habits       9975 non-null   object 
 5   Smoking               9975 non-null   object 
 6   Family Heart Disease  9979 non-null   object 
 7   Diabetes              9970 non-null   object 
 8   BMI                   9978 non-null   float64
 9   High Blood Pressure   9974 non-null   object 
 10  Low HDL Cholesterol   9975 non-null   object 
 11  High LDL Cholesterol  9974 non-null   object 
 12  Alcohol Consumption   7414 non-null   object 
 13  Stress Level          9978 non-null   object 
 14  Sleep Hours           9975 non-null   float64
 15  Sugar Consumption   

In [None]:
print(df.isnull().sum())

Age                       29
Gender                    19
Blood Pressure            19
Cholesterol Level         30
Exercise Habits           25
Smoking                   25
Family Heart Disease      21
Diabetes                  30
BMI                       22
High Blood Pressure       26
Low HDL Cholesterol       25
High LDL Cholesterol      26
Alcohol Consumption     2586
Stress Level              22
Sleep Hours               25
Sugar Consumption         30
Triglyceride Level        26
Fasting Blood Sugar       22
CRP Level                 26
Homocysteine Level        20
Heart Disease Status       0
dtype: int64


### Preprocessing

In [None]:
num_cols = df.select_dtypes(include='float64')
df[num_cols.columns]=num_cols.fillna(num_cols.mean()) # filling the missing values with the mean of the features
df = df.drop(columns='Alcohol Consumption', axis=1)

In [None]:
obj_cols = df.select_dtypes(include='object')
for col in obj_cols.columns:
  most_freq = df[col].mode()
  if not most_freq.empty:
    df[col].fillna(most_freq[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(most_freq[0], inplace=True)


In [None]:
y = df['Heart Disease Status']
X = df.drop(columns='Heart Disease Status', axis=1)

label_encoder_features = LabelEncoder()
label_encoder_target = LabelEncoder()

cat_cols = X.select_dtypes(include='object').columns
# encode categorical features
for col in X[cat_cols]:
  X[col] = label_encoder_features.fit_transform(X[col])
y = label_encoder_target.fit_transform(y)

In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

### Neunal Network Model Building

In [None]:
class NN(nn.Module):
  def __init__(self, input_size):
    super().__init__()
    self.network = nn.Sequential(
        # first hidden layer
        nn.Linear(input_size, 5),
        nn.ReLU(),
        # second hidden layer
        nn.Linear(5, 3),
        nn.ReLU(),
        # third hidden layer
        nn.Linear(3, 1),
        nn.Sigmoid()
    )

  def forward(self, features):
    out = self.network(features)
    return out

In [None]:
def model_train(X_train, y_train, X_val, y_val, input_size, batch_size, learning_rate, num_epochs):
  # convert data into Tensors
  X_train = torch.FloatTensor(X_train)
  y_train = torch.FloatTensor(y_train).unsqueeze(1)
  X_val = torch.FloatTensor(X_val)
  y_val = torch.FloatTensor(y_val).unsqueeze(1)

  # create dataloader for training set
  train_dataset = TensorDataset(X_train, y_train)
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

  # initialize model, loss funciton and optimizer
  device = torch.device('cude' if torch.cuda.is_available() else 'cpu')
  model = NN(input_size).to(device)
  criterion = nn.BCELoss()
  optimizer = optim.Adam(model.parameters(), lr=learning_rate)

  train_losses = []
  val_losses = []

  # training loop
  model.train()
  for epoch in range(num_epochs):
    running_train_loss = 0.0
    for batch_X, batch_y in train_loader:
      batch_X = batch_X.to(device)
      batch_y = batch_y.to(device)
      optimizer.zero_grad()
      outputs = model(batch_X)
      train_loss = criterion(outputs, batch_y)
      train_loss.backward()
      optimizer.step()
      running_train_loss += train_loss.item()

    # calculate average training loss
    avg_train_loss = running_train_loss/len(train_loader)
    train_losses.append(avg_train_loss)

    # validation phase
    model.eval()
    with torch.no_grad():
      X_val = X_val.to(device)
      y_val = y_val.to(device)
      val_outputs = model(X_val)
      val_loss = criterion(val_outputs, y_val)
      val_losses.append(val_loss.item())
      val_predictions = (val_outputs >= 0.5).float()
      val_accuracy = (val_predictions.eq(y_val).sum() / float(y_val.shape[0])).item()
    model.train()
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")

  return model, train_losses, val_losses

In [None]:
model = NN(X_train.shape[1])
summary(model, input_size=(X_train.shape[1],))

Layer (type:depth-idx)                   Output Shape              Param #
NN                                       [1]                       --
├─Sequential: 1-1                        [1]                       --
│    └─Linear: 2-1                       [5]                       100
│    └─ReLU: 2-2                         [5]                       --
│    └─Linear: 2-3                       [3]                       18
│    └─ReLU: 2-4                         [3]                       --
│    └─Linear: 2-5                       [1]                       4
│    └─Sigmoid: 2-6                      [1]                       --
Total params: 122
Trainable params: 122
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [None]:
%%time
model, train_loss, val_loss = model_train(X_train, y_train, X_val, y_val, X_train.shape[1], 32, 0.01, 50)

Epoch [1/50], Train Loss: 0.5138, Val Loss: 0.4891, Val Accuracy: 0.8080
Epoch [2/50], Train Loss: 0.5028, Val Loss: 0.4892, Val Accuracy: 0.8080
Epoch [3/50], Train Loss: 0.5027, Val Loss: 0.4892, Val Accuracy: 0.8080
Epoch [4/50], Train Loss: 0.5027, Val Loss: 0.4891, Val Accuracy: 0.8080
Epoch [5/50], Train Loss: 0.5030, Val Loss: 0.4892, Val Accuracy: 0.8080
Epoch [6/50], Train Loss: 0.5030, Val Loss: 0.4893, Val Accuracy: 0.8080
Epoch [7/50], Train Loss: 0.5029, Val Loss: 0.4893, Val Accuracy: 0.8080
Epoch [8/50], Train Loss: 0.5026, Val Loss: 0.4892, Val Accuracy: 0.8080
Epoch [9/50], Train Loss: 0.5029, Val Loss: 0.4892, Val Accuracy: 0.8080
Epoch [10/50], Train Loss: 0.5030, Val Loss: 0.4892, Val Accuracy: 0.8080
Epoch [11/50], Train Loss: 0.5028, Val Loss: 0.4891, Val Accuracy: 0.8080
Epoch [12/50], Train Loss: 0.5029, Val Loss: 0.4894, Val Accuracy: 0.8080
Epoch [13/50], Train Loss: 0.5027, Val Loss: 0.4903, Val Accuracy: 0.8080
Epoch [14/50], Train Loss: 0.5025, Val Loss: 0.

In [None]:
model_params = list(model.parameters())

for i, param in enumerate(model.parameters()):
    print(f"Parameter {i}: shape = {param.shape}")
    # print(param)  # Uncomment to see the actual tensor values

Parameter 0: shape = torch.Size([5, 19])
Parameter 1: shape = torch.Size([5])
Parameter 2: shape = torch.Size([3, 5])
Parameter 3: shape = torch.Size([3])
Parameter 4: shape = torch.Size([1, 3])
Parameter 5: shape = torch.Size([1])


In [None]:
# Assuming 'model' is your trained NN instance
# Get weights and biases for each Linear layer in the network

# First hidden layer (num_features -> 5)
weight1 = model.network[0].weight.data  # Shape: (5, num_features)
bias1 = model.network[0].bias.data      # Shape: (5,)

# Second hidden layer (5 -> 3)
weight2 = model.network[2].weight.data  # Shape: (3, 5)
bias2 = model.network[2].bias.data      # Shape: (3,)

# Third hidden layer (3 -> 1)
weight3 = model.network[4].weight.data  # Shape: (1, 3)
bias3 = model.network[4].bias.data      # Shape: (1,)

# Optional: Print them to verify
print("First Layer Weights:\n", weight1)
print("First Layer Biases:\n", bias1)
print("Second Layer Weights:\n", weight2)
print("Second Layer Biases:\n", bias2)
print("Third Layer Weights:\n", weight3)
print("Third Layer Biases:\n", bias3)

First Layer Weights:
 tensor([[ 2.8269e-02,  1.2095e-01,  2.2711e-01,  1.7547e-01,  2.7030e-02,
          3.5280e-02, -1.7148e-01,  2.4663e-01, -2.2709e-01, -2.1423e-01,
          9.3512e-02,  2.0054e-01,  1.0654e-01, -1.4674e-02, -1.0468e-01,
          2.0094e-02,  1.9525e-02,  3.3871e-02, -2.5295e-01],
        [-7.1505e-02, -3.0973e-01, -1.2926e-01, -2.9150e-01,  1.5016e-01,
          1.2252e-02,  1.3324e-01,  2.7900e-01, -1.7190e-01, -1.8996e-02,
         -2.2946e-01,  3.0023e-01, -2.8385e-01,  2.3134e-01, -5.9619e-02,
          4.8256e-02, -8.9575e-02, -1.5250e-01,  5.8856e-02],
        [-2.1486e-01,  3.1651e-02,  4.2000e-02,  5.2087e-02,  2.5094e-01,
          2.0579e-02, -6.7470e-02,  1.7323e-01,  9.2217e-02, -4.9068e-02,
          1.5926e-01,  1.2028e-01, -7.0278e-02,  1.6187e-01,  1.1167e-01,
          2.7400e-01, -1.0934e-01, -1.0872e-01,  2.7178e-01],
        [-8.5326e-02, -1.4658e-01,  2.2368e-01, -3.5930e-03,  3.0699e-02,
          1.5045e-01,  6.8869e-02,  2.8594e-01,  1.9