In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from utils import train

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Load Titanic dataset (assuming it's saved locally as 'titanic.csv')
df = pd.read_csv('Titanic-Dataset.csv')

# Preprocess Data (fill missing values, encode categorical variables)
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

# Convert 'Sex' to binary (male = 0, female = 1)
df['Sex'] = LabelEncoder().fit_transform(df['Sex'])

# Convert 'Embarked' to numeric
df['Embarked'] = LabelEncoder().fit_transform(df['Embarked'])

# Drop columns not needed for prediction
df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)

# Separate features and target
X = df.drop('Survived', axis=1).values
y = df['Survived'].values

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)


In [3]:
# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [4]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for batching
train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [5]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return self.softmax(x)

# Initialize the model
input_size = X_train.shape[1]
hidden_size = 1  # You can experiment with this value
output_size = 2    # Binary classification (survived or not)

model = MLP(input_size, hidden_size, output_size).to(device)

In [6]:
# CrossEntropyLoss is typically used for classification problems
criterion = nn.CrossEntropyLoss()

# Optimizer (Stochastic Gradient Descent with momentum)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)


In [7]:
train_metrics_3, val_metrics_3, test_metrics_3 = train(model, train_loader, None, test_loader, 20, optimizer, criterion, False)


Epoch: 1 Total_Time: 0.2191 Average_Time_per_batch: 0.0095 Train_Accuracy: 0.6264 Train_Loss: 0.6746 
Epoch: 2 Total_Time: 0.0312 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.6250 Train_Loss: 0.6662 
Epoch: 3 Total_Time: 0.0320 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.6250 Train_Loss: 0.6624 
Epoch: 4 Total_Time: 0.0327 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.6236 Train_Loss: 0.6611 
Epoch: 5 Total_Time: 0.0341 Average_Time_per_batch: 0.0015 Train_Accuracy: 0.6236 Train_Loss: 0.6605 
Epoch: 6 Total_Time: 0.0364 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.6236 Train_Loss: 0.6599 
Epoch: 7 Total_Time: 0.0366 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.6236 Train_Loss: 0.6592 
Epoch: 8 Total_Time: 0.0383 Average_Time_per_batch: 0.0017 Train_Accuracy: 0.6236 Train_Loss: 0.6585 
Epoch: 9 Total_Time: 0.0377 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.6236 Train_Loss: 0.6574 
Epoch: 10 Total_Time: 0.0393 Average_Time_per_batch: 0.0017 Train_Accuracy: 0.623

In [8]:
import torch
from torch import nn

hidden_dims = [hidden_size, hidden_size, output_size]
total = sum(hidden_dims)

blocks = len(hidden_dims)
features = input_size
neural_blocks = []
for dim in hidden_dims:
    std_dev = torch.sqrt(torch.tensor(1 / features)).to(device)
    neural_blocks.append(torch.randn(dim, features).to(device) * std_dev)
    features += dim

feature_blocks = []
features_start = 0
for i in range(len(neural_blocks)):
    features_end = neural_blocks[i].shape[1]
    block = neural_blocks[i][:, features_start:]
    for j in range(i + 1, len(neural_blocks)):
        block = torch.cat((block, neural_blocks[j][:, features_start:features_end]), dim=0)
    feature_blocks.append(nn.Parameter(block))
    features_start = features_end

biases = biases = nn.Parameter(torch.empty(total).uniform_(0.0, 1.0)).to(device)

In [9]:
from dpn_2.dpn import DPN as DPN_2
    
class DPN_Softmax(nn.Module):
    def __init__(self, input_size, hidden_dims, output_size):
        super(DPN_Softmax, self).__init__()
        self.fc1 =  DPN_2(input_size, sum(hidden_dims), output_size, True)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        return self.softmax(x)
    
model = DPN_Softmax(input_size, hidden_dims, output_size).to(device)
model.fc1.weights.extend(feature_blocks)
model.fc1.biases = biases

In [10]:
# CrossEntropyLoss is typically used for classification problems
criterion = nn.CrossEntropyLoss()

# Optimizer (Stochastic Gradient Descent with momentum)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [11]:
train_metrics_3, val_metrics_3, test_metrics_3 = train(model, train_loader, None, test_loader, 20, optimizer, criterion, False)


Epoch: 1 Total_Time: 0.1629 Average_Time_per_batch: 0.0071 Train_Accuracy: 0.6826 Train_Loss: 0.6830 
Epoch: 2 Total_Time: 0.0685 Average_Time_per_batch: 0.0030 Train_Accuracy: 0.6868 Train_Loss: 0.6819 
Epoch: 3 Total_Time: 0.0713 Average_Time_per_batch: 0.0031 Train_Accuracy: 0.6966 Train_Loss: 0.6804 
Epoch: 4 Total_Time: 0.0737 Average_Time_per_batch: 0.0032 Train_Accuracy: 0.6994 Train_Loss: 0.6791 
Epoch: 5 Total_Time: 0.0747 Average_Time_per_batch: 0.0032 Train_Accuracy: 0.7008 Train_Loss: 0.6778 
Epoch: 6 Total_Time: 0.0754 Average_Time_per_batch: 0.0033 Train_Accuracy: 0.7037 Train_Loss: 0.6765 
Epoch: 7 Total_Time: 0.0747 Average_Time_per_batch: 0.0032 Train_Accuracy: 0.7051 Train_Loss: 0.6752 
Epoch: 8 Total_Time: 0.0815 Average_Time_per_batch: 0.0035 Train_Accuracy: 0.7051 Train_Loss: 0.6739 
Epoch: 9 Total_Time: 0.0749 Average_Time_per_batch: 0.0033 Train_Accuracy: 0.7065 Train_Loss: 0.6726 
Epoch: 10 Total_Time: 0.0749 Average_Time_per_batch: 0.0033 Train_Accuracy: 0.709

In [12]:
from dpn_2.dpn import DPN as DPN_2
    
class DPN_Softmax(nn.Module):
    def __init__(self, input_size, hidden_dims, output_size):
        super(DPN_Softmax, self).__init__()
        self.fc1 =  DPN_2(input_size, sum(hidden_dims), output_size, False)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        return self.softmax(x)
    
model = DPN_Softmax(input_size, hidden_dims, output_size).to(device)
model.fc1.compile()

In [13]:
# CrossEntropyLoss is typically used for classification problems
criterion = nn.CrossEntropyLoss()

# Optimizer (Stochastic Gradient Descent with momentum)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [14]:
train_metrics_3, val_metrics_3, test_metrics_3 = train(model, train_loader, None, test_loader, 20, optimizer, criterion, False)


Epoch: 1 Total_Time: 0.0816 Average_Time_per_batch: 0.0035 Train_Accuracy: 0.5716 Train_Loss: 0.6948 
Epoch: 2 Total_Time: 0.0816 Average_Time_per_batch: 0.0035 Train_Accuracy: 0.5730 Train_Loss: 0.6921 
Epoch: 3 Total_Time: 0.0814 Average_Time_per_batch: 0.0035 Train_Accuracy: 0.5702 Train_Loss: 0.6888 
Epoch: 4 Total_Time: 0.0834 Average_Time_per_batch: 0.0036 Train_Accuracy: 0.5730 Train_Loss: 0.6854 
Epoch: 5 Total_Time: 0.0814 Average_Time_per_batch: 0.0035 Train_Accuracy: 0.5815 Train_Loss: 0.6821 
Epoch: 6 Total_Time: 0.0812 Average_Time_per_batch: 0.0035 Train_Accuracy: 0.5885 Train_Loss: 0.6787 
Epoch: 7 Total_Time: 0.0805 Average_Time_per_batch: 0.0035 Train_Accuracy: 0.5955 Train_Loss: 0.6757 
Epoch: 8 Total_Time: 0.0806 Average_Time_per_batch: 0.0035 Train_Accuracy: 0.6025 Train_Loss: 0.6728 
Epoch: 9 Total_Time: 0.0805 Average_Time_per_batch: 0.0035 Train_Accuracy: 0.6053 Train_Loss: 0.6699 
Epoch: 10 Total_Time: 0.0807 Average_Time_per_batch: 0.0035 Train_Accuracy: 0.611

In [15]:
from dpn_3.dpn import DPN as DPN_2
class DPN_Softmax(nn.Module):
    def __init__(self, input_size, hidden_dims, output_size):
        super(DPN_Softmax, self).__init__()
        self.fc1 =  DPN_2(input_size, sum(hidden_dims), output_size, True)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        return self.softmax(x)
    
model = DPN_Softmax(input_size, hidden_dims, output_size).to(device)

In [16]:
# CrossEntropyLoss is typically used for classification problems
criterion = nn.CrossEntropyLoss()

# Optimizer (Stochastic Gradient Descent with momentum)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [17]:
train_metrics_3, val_metrics_3, test_metrics_3 = train(model, train_loader, None, test_loader, 20, optimizer, criterion, False)


Epoch: 1 Total_Time: 0.0427 Average_Time_per_batch: 0.0019 Train_Accuracy: 0.4986 Train_Loss: 0.6897 
Epoch: 2 Total_Time: 0.0395 Average_Time_per_batch: 0.0017 Train_Accuracy: 0.6587 Train_Loss: 0.6614 
Epoch: 3 Total_Time: 0.0377 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.6924 Train_Loss: 0.6360 
Epoch: 4 Total_Time: 0.0362 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.7205 Train_Loss: 0.6127 
Epoch: 5 Total_Time: 0.0361 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.7570 Train_Loss: 0.5942 
Epoch: 6 Total_Time: 0.0366 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.7851 Train_Loss: 0.5780 
Epoch: 7 Total_Time: 0.0350 Average_Time_per_batch: 0.0015 Train_Accuracy: 0.8020 Train_Loss: 0.5651 
Epoch: 8 Total_Time: 0.0349 Average_Time_per_batch: 0.0015 Train_Accuracy: 0.8062 Train_Loss: 0.5547 
Epoch: 9 Total_Time: 0.0365 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.8048 Train_Loss: 0.5463 
Epoch: 10 Total_Time: 0.0353 Average_Time_per_batch: 0.0015 Train_Accuracy: 0.811