In [3]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch import optim
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

# Load and preprocess the MNIST dataset
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
X = np.array(X).astype('float32')
y = np.array(y).astype('int64')
X /= 255.0


X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = torch.from_numpy(X_train.reshape(-1, 1, 28, 28))
y_train = torch.from_numpy(y_train)
train_data = list(zip(X_train, y_train))
train_dataloader = DataLoader(train_data, batch_size=100, shuffle=True)

X_val = torch.from_numpy(X_val.reshape(-1, 1, 28, 28))
y_val = torch.from_numpy(y_val)
val_data = list(zip(X_val, y_val))
val_dataloader = DataLoader(val_data, batch_size=100)

# Define the network structure
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.fc1 = nn.Linear(26*26*32, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = x.view(-1, 26*26*32) # flatten
        x = torch.relu(self.fc1(x))
        return self.fc2(x)

model = Net()

# Choose an optimizer and loss function
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
criterion = nn.CrossEntropyLoss()

def train(epoch, log_interval=200):
    model.train()
    
    for batch_idx, (data, target) in enumerate(train_dataloader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_dataloader.dataset)} ({100. * batch_idx / len(train_dataloader):.0f}%)]\tLoss: {loss.item():.6f}")

# Define the testing function
def test():
    model.eval()
    test_loss = 0
    correct = 0
    
    with torch.no_grad():
        for data, target in val_dataloader:
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(val_dataloader.dataset)

    print(f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(val_dataloader.dataset)} ({100. * correct / len(val_dataloader.dataset):.0f}%)\n")

# Execute the training/testing process
for epoch in range(1, 11):
    train(epoch)
    test()


  warn(



Test set: Average loss: 0.0030, Accuracy: 12777/14000 (91%)


Test set: Average loss: 0.0023, Accuracy: 13068/14000 (93%)


Test set: Average loss: 0.0019, Accuracy: 13297/14000 (95%)


Test set: Average loss: 0.0016, Accuracy: 13369/14000 (95%)


Test set: Average loss: 0.0014, Accuracy: 13440/14000 (96%)


Test set: Average loss: 0.0013, Accuracy: 13500/14000 (96%)


Test set: Average loss: 0.0012, Accuracy: 13534/14000 (97%)


Test set: Average loss: 0.0011, Accuracy: 13560/14000 (97%)


Test set: Average loss: 0.0010, Accuracy: 13576/14000 (97%)


Test set: Average loss: 0.0010, Accuracy: 13599/14000 (97%)

