<a href="https://colab.research.google.com/github/stanley1208/AI-Learning/blob/main/TinyCNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms



In [15]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:",device)
torch.manual_seed(0)

device: cuda


<torch._C.Generator at 0x7828ba11a0d0>

In [16]:
# transforms: to tensor + standard MNIST normalization
transform=transforms.Compose([
    transforms.ToTensor(),                        # [0,1]
    transforms.Normalize((0.1307,),(0.3081,))     # standard MNIST mean/std
])

In [17]:
# datasets
train_ds=datasets.MNIST(root="data", train=True, download=True, transform=transform)
test_ds=datasets.MNIST(root="data", train=False, download=True, transform=transform)

In [18]:
# dataloaders
train_loader=DataLoader(train_ds, batch_size=128, shuffle=True,num_workers=2,pin_memory=True)
test_loader=DataLoader(test_ds, batch_size=256, shuffle=False,num_workers=2,pin_memory=True)


In [19]:
# sanity peek
x0,y0=next(iter(train_loader))
print(x0.shape,y0.shape)

torch.Size([128, 1, 28, 28]) torch.Size([128])


In [20]:
class TinyCNN(nn.Module):
  def __init__(self,):
    super().__init__()
    self.features=nn.Sequential(
        nn.Conv2d(1,8,kernel_size=3,padding=1),     # 28x28 -> 28x28, channels: 1->8
        nn.ReLU(),
        nn.MaxPool2d(2),                            # 28x28 -> 14x14
        nn.Conv2d(8,16,kernel_size=3,padding=1),    # 14x14 -> 14x14, 8->16
        nn.ReLU(),
        nn.MaxPool2d(2)                             # 14x14 -> 7x7
    )

    self.classifier=nn.Sequential(
        nn.Flatten(),   # 16x7x7=784 features
        nn.Linear(16*7*7,64),
        nn.ReLU(),
        nn.Linear(64,10)  # 10 logits for classes 0..9
    )
  def forward(self,x):
    x=self.features(x)
    x=self.classifier(x)
    return x

model=TinyCNN().to(device)
print(model)

TinyCNN(
  (features): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=64, bias=True)
    (2): ReLU()
    (3): Linear(in_features=64, out_features=10, bias=True)
  )
)


In [21]:
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=1e-3)


In [23]:
def train_epoch(model,loader,criterion,optimizer,device):
  model.train()
  for xb,yb in loader:
    xb,yb=xb.to(device),yb.to(device)
    logits=model(xb)  # forward
    loss=criterion(logits,yb)   # compute loss

    optimizer.zero_grad()   # clear old grads
    loss.backward()   # compute gradients
    optimizer.step()  # update weights

    total_loss+=loss.item()*xb.size(0)
    preds=logits.argmax(dim=1)
    correct+=torch.sum(preds==yb).sum().item()
    total+=xb.size(0)
  return total_loss/total,correct/total
