In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define transformations for the training and test sets
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.1307,), (0.3081,))  # Normalize with mean and std of MNIST
])

# Load the MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data_gitignore', 
                                          train=True, 
                                          transform=transform, 
                                          download=True)

test_dataset = torchvision.datasets.MNIST(root='./data_gitignore', 
                                         train=False, 
                                         transform=transform)

# Create data loaders
train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=64, 
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=1000, 
                         shuffle=False)

# Define the neural network
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28 * 28, 392)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(392, 10)
        
    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Create the model and move it to the device
model = SimpleNN().to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    # Print statistics
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')
    
    # Validate the model
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        print(f'Accuracy on test set: {100 * correct / total:.2f}%')

# Save the trained model
torch.save(model.state_dict(), 'mnist_model.pth')
print("Model saved to mnist_model.pth")

Using device: cpu
Epoch [1/10], Loss: 0.2132
Accuracy on test set: 96.21%
Epoch [2/10], Loss: 0.0882
Accuracy on test set: 97.24%
Epoch [3/10], Loss: 0.0595
Accuracy on test set: 97.69%
Epoch [4/10], Loss: 0.0432
Accuracy on test set: 98.09%
Epoch [5/10], Loss: 0.0331
Accuracy on test set: 97.88%
Epoch [6/10], Loss: 0.0268
Accuracy on test set: 98.16%
Epoch [7/10], Loss: 0.0247
Accuracy on test set: 98.00%
Epoch [8/10], Loss: 0.0193
Accuracy on test set: 98.08%
Epoch [9/10], Loss: 0.0191
Accuracy on test set: 97.84%
Epoch [10/10], Loss: 0.0159
Accuracy on test set: 97.80%
Model saved to mnist_model.pth


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define transformations for the training and test sets
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
)

# Load the MNIST dataset
train_dataset = torchvision.datasets.MNIST(
    root="./data_gitignore", train=True, transform=transform, download=True
)

test_dataset = torchvision.datasets.MNIST(
    root="./data_gitignore", train=False, transform=transform
)

# Create data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

test_loader = DataLoader(dataset=test_dataset, batch_size=1000, shuffle=False)


# Define the CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = x.view(-1, 64 * 7 * 7)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        return x


# Create the model and move it to the device
model = CNN().to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print statistics
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

    # Validate the model
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        print(f"Accuracy on test set: {100 * correct / total:.2f}%")

# Save the trained model
torch.save(model.state_dict(), "mnist_cnn_model.pth")
print("Model saved to mnist_cnn_model.pth")

Using device: cpu
Epoch [1/10], Loss: 0.1338
Accuracy on test set: 98.55%
Epoch [2/10], Loss: 0.0423
Accuracy on test set: 98.76%
Epoch [3/10], Loss: 0.0277
Accuracy on test set: 99.02%
Epoch [4/10], Loss: 0.0198
Accuracy on test set: 98.87%
Epoch [5/10], Loss: 0.0162
Accuracy on test set: 99.31%
Epoch [6/10], Loss: 0.0132
Accuracy on test set: 99.05%
Epoch [7/10], Loss: 0.0093
Accuracy on test set: 99.28%
Epoch [8/10], Loss: 0.0091
Accuracy on test set: 99.20%
Epoch [9/10], Loss: 0.0066
Accuracy on test set: 98.91%
Epoch [10/10], Loss: 0.0080
Accuracy on test set: 99.18%
Model saved to mnist_cnn_model.pth


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
from torchvision.transforms import functional as F
from PIL import Image

# Create a directory to save augmented samples
os.makedirs("tmp", exist_ok=True)

# Check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# Custom PyTorch transform for elastic deformation
class ElasticTransform:
    def __init__(self, alpha=1.0, sigma=0.1):
        self.alpha = alpha
        self.sigma = sigma

    def __call__(self, img):
        img_np = np.array(img)

        # Generate displacement fields
        shape = img_np.shape
        dx = np.random.rand(*shape) * 2 - 1
        dy = np.random.rand(*shape) * 2 - 1

        # Gaussian filter the displacement fields
        dx = cv2.GaussianBlur(dx, (0, 0), self.sigma) * self.alpha
        dy = cv2.GaussianBlur(dy, (0, 0), self.sigma) * self.alpha

        # Create meshgrid for mapping coordinates
        x, y = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]))

        # Map coordinates
        map_x = np.float32(x + dx)
        map_y = np.float32(y + dy)

        # Apply elastic transform
        distorted = cv2.remap(
            img_np,
            map_x,
            map_y,
            interpolation=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_REFLECT_101,
        )

        return Image.fromarray(distorted)


# Custom PyTorch transform for salt and pepper noise
class SaltPepperNoise:
    def __init__(self, prob=0.05):
        self.prob = prob

    def __call__(self, img):
        img_np = np.array(img)

        # Add salt noise
        salt = np.random.random(img_np.shape) < self.prob
        img_np[salt] = 255

        # Add pepper noise
        pepper = np.random.random(img_np.shape) < self.prob
        img_np[pepper] = 0

        return Image.fromarray(img_np)


# Define advanced transformations for training
transform_train = transforms.Compose(
    [
        transforms.RandomRotation(15),  # Randomly rotate by up to 15 degrees
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Random shifts
        ElasticTransform(alpha=15.0, sigma=3.0),  # Elastic deformation
        SaltPepperNoise(prob=0.01),  # Add salt and pepper noise
        transforms.ColorJitter(
            brightness=0.2, contrast=0.2
        ),  # Adjust brightness and contrast
        transforms.ToTensor(),  # Convert to tensor first
        transforms.Normalize((0.1307,), (0.3081,)),
        transforms.RandomErasing(
            p=0.2, scale=(0.02, 0.1), ratio=(0.3, 3.3), value=0
        ),  # Random erasing (must be after ToTensor)
    ]
)

# For test data, we only normalize (no augmentation needed)
transform_test = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
)

# Load the MNIST dataset with augmented transforms
train_dataset = torchvision.datasets.MNIST(
    root="./data_gitignore", train=True, transform=transform_train, download=True
)

test_dataset = torchvision.datasets.MNIST(
    root="./data_gitignore", train=False, transform=transform_test
)


# Function to save sample augmented images
def save_augmented_samples(num_samples=5):
    """Save augmented versions of sample images"""
    # Use a separate transform for visualization
    vis_transform = transforms.Compose(
        [
            transforms.RandomRotation(15),
            transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
            ElasticTransform(alpha=15.0, sigma=3.0),
            SaltPepperNoise(prob=0.01),
            transforms.ColorJitter(brightness=0.2, contrast=0.2),
            transforms.ToTensor(),
        ]
    )

    # Create a temporary dataset with visualization transform
    vis_dataset = torchvision.datasets.MNIST(
        root="./data_gitignore", train=True, transform=vis_transform, download=False
    )

    # Get some samples
    samples = []
    for i in range(num_samples):
        img, _ = vis_dataset[i]
        samples.append(img)

    plt.figure(figsize=(10, 8))
    for i, img in enumerate(samples):
        plt.subplot(1, num_samples, i + 1)
        plt.imshow(img.squeeze().numpy(), cmap="gray")
        plt.title(f"Sample {i+1}")
        plt.axis("off")

    plt.tight_layout()
    plt.savefig("tmp/augmented_samples.png")
    plt.close()

    print("Saved augmented samples to tmp/augmented_samples.png")


# Create data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

test_loader = DataLoader(dataset=test_dataset, batch_size=1000, shuffle=False)

# Save sample augmented images
save_augmented_samples()


# Use the same CNN model as before
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # First convolutional block
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2)

        # Second convolutional block
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2)

        # Third convolutional block
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.relu3 = nn.ReLU()

        # Fully connected layers
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.drop = nn.Dropout(0.5)
        self.relu4 = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        # First block
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.pool1(x)

        # Second block
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.pool2(x)

        # Third block
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu3(x)

        # Fully connected
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.drop(x)
        x = self.relu4(x)
        x = self.fc2(x)
        return x


# Create the model and move it to the device
model = CNN().to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print statistics
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

    # Validate the model
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        print(f"Accuracy on test set: {100 * correct / total:.2f}%")

# Save the trained model
torch.save(model.state_dict(), "mnist_augmented_model.pth")
print("Model saved to mnist_augmented_model.pth")

Using device: cpu
Saved augmented samples to tmp/augmented_samples.png
Epoch [1/10], Loss: 0.4934
Accuracy on test set: 97.55%
Epoch [2/10], Loss: 0.2721
Accuracy on test set: 98.63%
Epoch [3/10], Loss: 0.2233
Accuracy on test set: 98.79%
Epoch [4/10], Loss: 0.1981
Accuracy on test set: 99.03%
Epoch [5/10], Loss: 0.1839
Accuracy on test set: 99.25%
Epoch [6/10], Loss: 0.1683
Accuracy on test set: 99.14%
Epoch [7/10], Loss: 0.1575
Accuracy on test set: 99.14%
Epoch [8/10], Loss: 0.1499
Accuracy on test set: 99.35%
Epoch [9/10], Loss: 0.1433
Accuracy on test set: 99.36%
Epoch [10/10], Loss: 0.1414
Accuracy on test set: 99.36%
Model saved to mnist_augmented_model.pth
