In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.transforms import ToTensor
import torchvision.datasets as datasets
from torch.utils.data import Dataset, DataLoader

class CNNClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 64, 3),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2),
            nn.Conv2d(64, 128, 3),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2),
            nn.Conv2d(128, 64, 3),
            nn.ReLU(),
            nn.MaxPool2d((2, 2), stride=2),
        )
        self.classification_head = nn.Sequential(
            nn.Linear(64, 20, bias=True),
            nn.ReLU(),
            nn.Linear(20, 10, bias=True),
        )

    def forward(self, x):
        features = self.net(x)
        return self.classification_head(features.view(x.size(0), -1))

def generategaussian(tindx):
    return torch.normal(tindx[0], tindx[1], (1, 28, 28))

class MyDataset(Dataset):
    def __init__(self, n):
        classes = {0: (0.5, 2), 1: (1, 2.5)}
        self.y = [torch.randint(0, 2, (1,)).item() for _ in range(n)]
        self.x = [generategaussian(classes[self.y[i]]) for i in range(n)]

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

    def __len__(self):
        return len(self.x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNClassifier().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
batch_size = 4

train_dataset = MyDataset(n=1000)
test_dataset = MyDataset(n=200)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

total_params = sum(p.numel() for p in model.parameters())
print(f"Total params = {total_params}")

total_params = 0
for name, param in model.named_parameters():
    params = param.numel()
    total_params += params

for epoch in range(20):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print(f"Finished Training. Final loss = {loss.item()}, Total params = {total_params}")

correct, total = 0, 0
with torch.no_grad():
    for i, vdata in enumerate(test_loader):
        tinputs, tlabels = vdata[0].to(device), vdata[1].to(device)
        toutputs = model(tinputs)

        _, predicted = torch.max(toutputs, 1)
        total += tlabels.size(0)
        correct += (predicted == tlabels).sum()

print(f"Accuracy on test set: {100 * correct / total}%")


Total params = 149798
[1,   100] loss: 0.989
[1,   200] loss: 0.747
[2,   100] loss: 0.671
[2,   200] loss: 0.617
[3,   100] loss: 0.453
[3,   200] loss: 0.363
[4,   100] loss: 0.335
[4,   200] loss: 0.239
[5,   100] loss: 0.240
[5,   200] loss: 0.180
[6,   100] loss: 0.172
[6,   200] loss: 0.177
[7,   100] loss: 0.167
[7,   200] loss: 0.100
[8,   100] loss: 0.149
[8,   200] loss: 0.104
[9,   100] loss: 0.083
[9,   200] loss: 0.076
[10,   100] loss: 0.044
[10,   200] loss: 0.064
[11,   100] loss: 0.024
[11,   200] loss: 0.047
[12,   100] loss: 0.026
[12,   200] loss: 0.033
[13,   100] loss: 0.007
[13,   200] loss: 0.008
[14,   100] loss: 0.006
[14,   200] loss: 0.012
[15,   100] loss: 0.004
[15,   200] loss: 0.003
[16,   100] loss: 0.003
[16,   200] loss: 0.003
[17,   100] loss: 0.002
[17,   200] loss: 0.001
[18,   100] loss: 0.001
[18,   200] loss: 0.001
[19,   100] loss: 0.001
[19,   200] loss: 0.001
[20,   100] loss: 0.001
[20,   200] loss: 0.001
Finished Training. Final loss = 6.36