In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from dpn_3.dpn import DPN
from utils import train

In [2]:
class MLP_MNIST(nn.Module):
    def __init__(self):
        super(MLP_MNIST, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(784, 12),
            nn.ReLU(),
            nn.Linear(12, 12),
            nn.ReLU(),
            nn.Linear(12, 10),
        )

    def forward(self, x):
        return self.model(x)


In [3]:
#hyperparameters
batch_size = 64
learning_rate = 0.001
epochs = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# Load and preprocess the entire MNIST dataset once
train_dataset = datasets.MNIST(root='./data', train=True, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, download=True)

num_classes = 10

# Normalize and flatten once
train_data = train_dataset.data.float().div(255).view(-1, 784).to(device)
train_labels = F.one_hot(train_dataset.targets.to(device), num_classes=num_classes).float()

test_data = test_dataset.data.float().div(255).view(-1, 784).to(device)
test_labels = F.one_hot(test_dataset.targets.to(device), num_classes=num_classes).float()

# Create DataLoader from preloaded GPU tensors
train_loader = DataLoader(TensorDataset(train_data, train_labels), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(TensorDataset(test_data, test_labels), batch_size=batch_size, shuffle=False)

In [5]:
model_1 = MLP_MNIST().to(device)
model_1 = torch.jit.script(model_1)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_1.parameters(), lr=learning_rate)

In [6]:
train_metrics_1, val_metrics_1, test_metrics_1 = train(model_1, train_loader, None, test_loader, epochs, optimizer, criterion)


Epoch: 1 Total_Time: 2.2055 Average_Time_per_batch: 0.0024 Train_Accuracy: 0.7984 Train_Loss: 0.6833 
Epoch: 2 Total_Time: 1.8261 Average_Time_per_batch: 0.0019 Train_Accuracy: 0.9023 Train_Loss: 0.3387 
Epoch: 3 Total_Time: 1.6340 Average_Time_per_batch: 0.0017 Train_Accuracy: 0.9193 Train_Loss: 0.2831 
Epoch: 4 Total_Time: 1.5308 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.9292 Train_Loss: 0.2480 
Epoch: 5 Total_Time: 1.5278 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.9366 Train_Loss: 0.2253 
Epoch: 6 Total_Time: 1.5342 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.9404 Train_Loss: 0.2109 
Epoch: 7 Total_Time: 1.5039 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.9440 Train_Loss: 0.1995 
Epoch: 8 Total_Time: 1.5449 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.9449 Train_Loss: 0.1929 
Epoch: 9 Total_Time: 1.5161 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.9465 Train_Loss: 0.1873 
Epoch: 10 Total_Time: 1.4865 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.948

In [7]:
model_2 = DPN(784, 34, 10).cuda()
model_2 = torch.jit.trace(model_2, torch.randn(batch_size, 784).cuda())
optimizer = optim.Adam(model_2.parameters(), lr=learning_rate)

In [8]:
train_metrics_2, val_metrics_2, test_metrics_2 = train(model_2, train_loader, None, test_loader, epochs, optimizer, criterion)


Epoch: 1 Total_Time: 1.4606 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.8852 Train_Loss: 0.4422 
Epoch: 2 Total_Time: 1.2975 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.9274 Train_Loss: 0.2569 
Epoch: 3 Total_Time: 1.3095 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.9414 Train_Loss: 0.2070 
Epoch: 4 Total_Time: 1.3153 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.9514 Train_Loss: 0.1721 
Epoch: 5 Total_Time: 1.3121 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.9570 Train_Loss: 0.1473 
Epoch: 6 Total_Time: 1.3061 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.9619 Train_Loss: 0.1302 
Epoch: 7 Total_Time: 1.3058 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.9656 Train_Loss: 0.1168 
Epoch: 8 Total_Time: 1.2883 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.9681 Train_Loss: 0.1066 
Epoch: 9 Total_Time: 1.2843 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.9705 Train_Loss: 0.0989 
Epoch: 10 Total_Time: 1.3085 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.972