In [17]:
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

In [23]:
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale if not already
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
])

train_data_path = 'double_mnist/train'
val_data_path = 'double_mnist/val'
test_data_path = 'double_mnist/test'

# Create ImageFolder datasets for training, validation, and test sets
train_dataset = ImageFolder(train_data_path, transform=transform)
val_dataset = ImageFolder(val_data_path, transform=transform)
test_dataset = ImageFolder(test_data_path, transform=transform)

class_to_label = {str(i).zfill(2): i for i in range(100)}
# Define batch sizes
batch_size = 64

# Create DataLoader instances for training, validation, and test sets
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Number of classes
num_classes = 20

class_names = train_dataset.classes
class_names = val_dataset.classes

print(class_names)






['03', '07', '10', '22', '27', '34', '39', '40', '48', '52', '58', '61', '64', '71', '93', '99']


In [28]:
class SimpleCNN(nn.Module):
    def __init__(self, input_channels, num_classes, kernel_size=3, pool_size=2, stride=2, dropout_rate=0.5):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=kernel_size, padding=int((kernel_size - 1) / 2))
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=pool_size, stride=stride)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=kernel_size, padding=int((kernel_size - 1) / 2))
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=pool_size, stride=stride)
        var1 = int((28 - pool_size) / stride) + 1
        self.dim = int((var1 - pool_size) / stride) + 1
        self.fc1 = nn.Linear(64 * self.dim * self.dim, 128)
        self.relu3 = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        # Split the input into two halves
        left_half = x[:, :, :, :14]
        right_half = x[:, :, :, 14:]
        
        left_output = self.pool1(self.relu1(self.conv1(left_half)))
        left_output = self.pool2(self.relu2(self.conv2(left_output)))
        left_output = left_output.view(-1, 64 * 14 * self.dim * self.dim)

        right_output = self.pool1(self.relu1(self.conv1(right_half)))
        right_output = self.pool2(self.relu2(self.conv2(right_output)))
        right_output = right_output.view(-1, 64 * 14 * self.dim * self.dim)

        # Concatenate the two halves
        concatenated = torch.cat((left_output, right_output), dim=1)

        concatenated = self.relu3(self.fc1(concatenated))
        concatenated = self.dropout(concatenated)
        concatenated = self.fc2(concatenated)
        return concatenated

model = SimpleCNN(input_channels=1, num_classes=20)  # 10 classes for each half

# Define a loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_train_loss = 0
    correct_train = 0
    total_train = 0
    
    for images, labels in train_loader:
        class_names = train_dataset.classes
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)

        total_train += labels.size(0)

        # Split predicted and true labels for left and right halves
        predicted_left = predicted // 10
        predicted_right = predicted % 10
        true_left = labels // 10
        true_right = labels % 10

        correct_train += (predicted_left == true_left).sum().item() + (predicted_right == true_right).sum().item()

    train_accuracy = 100 * correct_train / (2 * total_train)
    average_train_loss = total_train_loss / len(train_loader)
    print(f'Epoch {epoch+1}/{num_epochs} - Train Loss: {average_train_loss:.4f} - Train Accuracy: {train_accuracy:.2f}%')


RuntimeError: shape '[-1, 43904]' is invalid for input of size 86016