In [14]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

In [15]:

# Load and preprocess Heart Disease dataset
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
data = pd.read_csv(url,na_values="?")
print(data)


     PassengerId  Survived  Pclass  \
0              1         0       3   
1              2         1       1   
2              3         1       3   
3              4         1       1   
4              5         0       3   
..           ...       ...     ...   
886          887         0       2   
887          888         1       1   
888          889         0       3   
889          890         1       1   
890          891         0       3   

                                                  Name     Sex   Age  SibSp  \
0                              Braund, Mr. Owen Harris    male  22.0      1   
1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                               Heikkinen, Miss. Laina  female  26.0      0   
3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                             Allen, Mr. William Henry    male  35.0      0   
..                                                 ...     ...   ... 

In [16]:
data.isna().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [17]:
data['Age'].fillna(int(data['Age'].mean()), inplace=True)
data['Embarked'].fillna('S',inplace=True)
data

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Age'].fillna(int(data['Age'].mean()), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Embarked'].fillna('S',inplace=True)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,29.0,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [18]:
data.drop(columns = ["Cabin", "Ticket", "Name"], inplace=True)
data

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,male,22.0,1,0,7.2500,S
1,2,1,1,female,38.0,1,0,71.2833,C
2,3,1,3,female,26.0,0,0,7.9250,S
3,4,1,1,female,35.0,1,0,53.1000,S
4,5,0,3,male,35.0,0,0,8.0500,S
...,...,...,...,...,...,...,...,...,...
886,887,0,2,male,27.0,0,0,13.0000,S
887,888,1,1,female,19.0,0,0,30.0000,S
888,889,0,3,female,29.0,1,2,23.4500,S
889,890,1,1,male,26.0,0,0,30.0000,C


In [19]:
data.isna().sum()

PassengerId    0
Survived       0
Pclass         0
Sex            0
Age            0
SibSp          0
Parch          0
Fare           0
Embarked       0
dtype: int64

In [20]:
data_encoded = pd.get_dummies(data, columns=["Sex", "Embarked"])
data_encoded

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,1,0,3,22.0,1,0,7.2500,False,True,False,False,True
1,2,1,1,38.0,1,0,71.2833,True,False,True,False,False
2,3,1,3,26.0,0,0,7.9250,True,False,False,False,True
3,4,1,1,35.0,1,0,53.1000,True,False,False,False,True
4,5,0,3,35.0,0,0,8.0500,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,27.0,0,0,13.0000,False,True,False,False,True
887,888,1,1,19.0,0,0,30.0000,True,False,False,False,True
888,889,0,3,29.0,1,2,23.4500,True,False,False,False,True
889,890,1,1,26.0,0,0,30.0000,False,True,True,False,False


In [21]:
correlation_matrix = data_encoded.corr().abs()
correlation_matrix["Survived"]

PassengerId    0.005007
Survived       1.000000
Pclass         0.338481
Age            0.067814
SibSp          0.035322
Parch          0.081629
Fare           0.257307
Sex_female     0.543351
Sex_male       0.543351
Embarked_C     0.168240
Embarked_Q     0.003650
Embarked_S     0.149683
Name: Survived, dtype: float64

In [22]:
X = data_encoded.drop(columns=['Survived']).values
y = data_encoded['Survived'].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [23]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Create DataLoader for training
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)



In [46]:
for batch_images, batch_labels in train_loader:
    print(batch_images.size(), batch_labels)

torch.Size([8, 11]) tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.]])
torch.Size([8, 11]) tensor([[1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.]])
torch.Size([8, 11]) tensor([[0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.]])
torch.Size([8, 11]) tensor([[1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.]])
torch.Size([8, 11]) tensor([[0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.]])
torch.Size([8, 11]) tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.]])
torch.Size([8, 11]) tensor([[0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.]])
torch.Size([8, 11]) tensor([[0.],
        [1.],
        [0.],
       

In [36]:
train_dataset.tensors # contains X_train_tensor and y_train_tensor

(tensor([[-0.4432, -1.5661,  1.2264,  ..., -0.4820, -0.3076,  0.6158],
         [ 1.1197, -0.3694, -0.5047,  ..., -0.4820, -0.3076,  0.6158],
         [-0.2449,  0.8274,  0.1877,  ..., -0.4820, -0.3076,  0.6158],
         ...,
         [ 1.6135,  0.8274,  0.8801,  ..., -0.4820, -0.3076,  0.6158],
         [-0.0389, -1.5661, -1.1972,  ..., -0.4820, -0.3076,  0.6158],
         [-1.3335, -1.5661, -0.6586,  ..., -0.4820, -0.3076,  0.6158]]),
 tensor([[0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.],
         [1.],
         [1.],
         [0.],
         [0.],
         [0.],
         [1.],
         [1.],
         [1.],
         [1.],
         [0.],
         [1.],
         [0.],
         [0.],
         [0.],
         [1.],
         [1.],
         [1.],
         [1.],
         [0.],
         [1.],
         [0.],
         [0.],
         [0.],
         [0.],
         [1.],
         [0.],
         [0.],
         [0.],
   

In [24]:
X_train_tensor

tensor([[-0.4432, -1.5661,  1.2264,  ..., -0.4820, -0.3076,  0.6158],
        [ 1.1197, -0.3694, -0.5047,  ..., -0.4820, -0.3076,  0.6158],
        [-0.2449,  0.8274,  0.1877,  ..., -0.4820, -0.3076,  0.6158],
        ...,
        [ 1.6135,  0.8274,  0.8801,  ..., -0.4820, -0.3076,  0.6158],
        [-0.0389, -1.5661, -1.1972,  ..., -0.4820, -0.3076,  0.6158],
        [-1.3335, -1.5661, -0.6586,  ..., -0.4820, -0.3076,  0.6158]])

In [56]:
# Define MLP model in PyTorch
class MLPModel(nn.Module):
    def __init__(self, input_dim):
        super(MLPModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.Tanh(),
            nn.Linear(16, 1),
            nn.Sigmoid()

        )

    def forward(self, x):
        return self.model(x)
    


In [57]:
# Initialize model, loss function, and optimizer
input_dim = X.shape[1]
model = MLPModel(input_dim)
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = optim.AdamW(model.parameters(), lr=0.001)

# Train the model
epochs = 10
for epoch in range(epochs):
    model.train()
    epoch_loss = 0.0
    correct = 0
    total = 0

    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        predictions = (outputs > 0.5).float()
        correct += (predictions == batch_y).sum().item()
        total += batch_y.size(0)

    epoch_accuracy = correct / total
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(train_loader):.4f}, Accuracy: {epoch_accuracy:.4f}")


Epoch 1/10, Loss: 0.5848, Accuracy: 0.7022
Epoch 2/10, Loss: 0.4590, Accuracy: 0.8034
Epoch 3/10, Loss: 0.4388, Accuracy: 0.8090
Epoch 4/10, Loss: 0.4278, Accuracy: 0.8188
Epoch 5/10, Loss: 0.4150, Accuracy: 0.8174
Epoch 6/10, Loss: 0.4069, Accuracy: 0.8244
Epoch 7/10, Loss: 0.4023, Accuracy: 0.8258
Epoch 8/10, Loss: 0.3947, Accuracy: 0.8301
Epoch 9/10, Loss: 0.3881, Accuracy: 0.8469
Epoch 10/10, Loss: 0.3793, Accuracy: 0.8469


In [54]:
# from torchviz import make_dot
# dot = make_dot(model(X_train_tensor), params=dict(model.named_parameters()))
# dot.format = 'png'  
# dot.render('model_graph') 

In [58]:
# Evaluate the model
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_loss = criterion(test_outputs, y_test_tensor).item()
    test_predictions = (test_outputs > 0.5).float()
    #print("test_predictions: ", test_predictions)
    #print("y_test_tensor: ", y_test_tensor)
    test_accuracy = (test_predictions == y_test_tensor).float().mean().item()

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Test Loss: 0.4294, Test Accuracy: 0.8212
