In [4]:
from sklearn.datasets import fetch_openml
import torch

mnist = fetch_openml('mnist_784', as_frame=False)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
X = torch.from_numpy(mnist.data).float().to(device)
Y = torch.from_numpy(mnist.target.astype(int)).to(device)

In [341]:
import torch.nn.functional as F
import torch.nn as nn

class MulticlassCCN(nn.Module):
    def __init__(self, c1=32, c2=64, fc_hidden=128):
        """
        c1: number of channels after first conv layer
        c2: number of channels after second conv layer
        fc_hidden: number of hidden units in the fully connected layer
        """
        super().__init__()
        self.conv1 = nn.Conv2d(1, c1, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(c1, c2, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(c2 * 7 * 7, fc_hidden)
        self.fc2 = nn.Linear(fc_hidden, 10)

    def forward(self, X):
        X = F.relu(self.conv1(X))       # (N, c1, 28, 28)
        X = F.max_pool2d(X, 2, 2)       # (N, c1, 14, 14)
        X = F.relu(self.conv2(X))       # (N, c2, 14, 14)
        X = F.max_pool2d(X, 2, 2)       # (N, c2, 7, 7)
        X = X.view(X.size(0), -1)       # flatten
        X = F.relu(self.fc1(X))         # (N, fc_hidden)
        X = self.fc2(X)                 # (N, 10)
        return X


In [None]:
model = MulticlassCCN()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)