In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import torchvision
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import random
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


transform = transforms.ToTensor()
train_dataset = datasets.MNIST(root   ='./data',  train=True   ,  download=True   , transform=transform)
test_dataset = datasets.MNIST(root   ='./data',  train=False,  download=True   , transform=transform)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64 ,  shuffle=True   )
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

In [6]:
class Network(nn.Module):
    def __init__(self):
        super(Network,self).__init__()

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.l1 = nn.Linear(in_features=64*14*14, out_features=256)
        self.l2 = nn.Linear(in_features=256, out_features=64)
        self.l3 = nn.Linear(in_features=64, out_features=10)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x1 = self.relu(self.conv1(x))
        x2 = self.relu(self.conv2(x))
        x = torch.cat((x1, x2), dim=1)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.relu(self.l1(x))
        x = self.dropout(x)
        x = self.relu(self.l2(x))
        x = self.dropout(x)
        x = self.l3(x)
        return x

In [15]:
model = Network()
modelname = 'CNN'
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

loss_rate = []

loss_min = float('inf')
steps = 0
patience = 5
i = 0

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

while steps < patience:
    i += 1
    model.train()
    loss_avg = []
    for data, targets in train_loader:
        data, targets = data.to(device), targets.to(device)  # Move data and targets to the device

        optimizer.zero_grad()
        outputs = model(data)
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

    model.eval()
    correct, total = 0, 0
    tr_correct, tr_total = 0, 0

    with torch.no_grad():
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)  # Move data and targets to the device
            outputs = model(data)
            _, predicted = torch.max(outputs.detach(), dim=1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    lossrate = 1- correct / total
    print(f'Epoch: {i}, Lossrate: {lossrate}')
    loss_rate.append(lossrate)

    if lossrate < loss_min:
        loss_min = lossrate
        steps=0

        torch.save(model, f'{modelname}_epoch{i}.pt')
    else:
        steps += 1

Epoch: 1, Lossrate: 0.023900000000000032
Epoch: 2, Lossrate: 0.016199999999999992
Epoch: 3, Lossrate: 0.012599999999999945
Epoch: 4, Lossrate: 0.012199999999999989
Epoch: 5, Lossrate: 0.011399999999999966
Epoch: 6, Lossrate: 0.0121
Epoch: 7, Lossrate: 0.010499999999999954
Epoch: 8, Lossrate: 0.010800000000000032
Epoch: 9, Lossrate: 0.009800000000000031
Epoch: 10, Lossrate: 0.009600000000000053
Epoch: 11, Lossrate: 0.009600000000000053
Epoch: 12, Lossrate: 0.011199999999999988
Epoch: 13, Lossrate: 0.009499999999999953
Epoch: 14, Lossrate: 0.010499999999999954
Epoch: 15, Lossrate: 0.009800000000000031
Epoch: 16, Lossrate: 0.009099999999999997
Epoch: 17, Lossrate: 0.009099999999999997
Epoch: 18, Lossrate: 0.010099999999999998
Epoch: 19, Lossrate: 0.009700000000000042
Epoch: 20, Lossrate: 0.009099999999999997
Epoch: 21, Lossrate: 0.009600000000000053
