In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
import numpy as np


In [2]:

# Define the LeNet model in PyTorch
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 4, kernel_size=3, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(4)
        self.conv2 = nn.Conv2d(4, 16, kernel_size=3, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(16)
        self.avgpool = nn.AvgPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(16, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = x ** 2 + x  # Polynomial activation
        x = self.avgpool(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = x ** 2 + x  # Polynomial activation
        x = self.avgpool(x)
        x = x.mean(dim=[2, 3])  # Global average pooling
        x = self.fc1(x)
        return x


In [3]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import time
import torchvision
 
device = 'cuda' if torch.cuda.is_available() else 'cpu'
mnist_transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize((0.5,), (1.0,))
])
train_dataset = torchvision.datasets.MNIST('./dataset/', train=True, download=True, transform=mnist_transform,)
test_dataset = torchvision.datasets.MNIST('./dataset/', train=False, transform=mnist_transform,)

In [4]:

# Initialize the LeNet model
model = LeNet()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [5]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)

In [6]:

# Training loop
num_epochs = 1
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    if epoch % 5 == 0:
        print(f'Epoch {epoch}, Batch {batch_idx}, Loss: {loss.item()}')

# Save the model
torch.save(model.state_dict(), 'best_practice.pth')


Epoch 0, Batch 468, Loss: 0.7638010382652283


In [17]:
import torchsummary

In [None]:
model

In [7]:
sample_data = data[0]
sample_target = target[0]

In [8]:
sample_data.size()

torch.Size([1, 28, 28])

In [38]:
model.to('cpu')

LeNet(
  (conv1): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (bn1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(4, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (fc1): Linear(in_features=16, out_features=10, bias=True)
)

In [40]:
sample_data.device

device(type='cpu')

In [9]:
model = LeNet()
test_dataset = torchvision.datasets.MNIST('./dataset/', train=False, transform=mnist_transform,)

In [11]:
import torchsummary
torchsummary.summary(model, test_dataset[0][0].size(), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 4, 26, 26]              36
       BatchNorm2d-2            [-1, 4, 26, 26]               8
         AvgPool2d-3            [-1, 4, 13, 13]               0
            Conv2d-4           [-1, 16, 11, 11]             576
       BatchNorm2d-5           [-1, 16, 11, 11]              32
         AvgPool2d-6             [-1, 16, 5, 5]               0
            Linear-7                   [-1, 10]             170
Total params: 822
Trainable params: 822
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.08
Params size (MB): 0.00
Estimated Total Size (MB): 0.09
----------------------------------------------------------------


In [None]:
import json
with open("best_practice_torch.json", "w") as f:
    json.dump({'X': sample_data.numpy().flatten().tolist(), 'y': sample_target.flatten().tolist()}, f)