In [165]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

img_x_size = int(335)
img_y_size = int(218)
num_channels = 1
input_size = img_x_size * img_y_size * num_channels
dropout_p = 0.4

if not torch.backends.mps.is_available():
    device = torch.device('cpu')
    if not torch.backends.mps.is_built():
        print("MPS not available because the current PyTorch install was not "
              "built with MPS enabled.")
    else:
        print("MPS not available because the current MacOS version is not 12.3+ "
              "and/or you do not have an MPS-enabled device on this machine.")

else:
    device = torch.device("mps")

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        fc1_in = input_size
        fc1_out = 1000
        self.fc1 = nn.Linear(fc1_in, fc1_out)
        nn.init.normal_(self.fc1.weight, mean=0, std=1/math.sqrt(fc1_in))
        nn.init.normal_(self.fc1.bias, mean=0, std=1)
        # self.dropout1 = nn.Dropout(p=dropout_p)

        self.bn1 = nn.BatchNorm1d(fc1_out) # Batch normalization for fc1
        # TODO: weight initialization for batchnorm layer?

        fc2_in = fc1_out
        fc2_out = 500
        self.fc2 = nn.Linear(fc2_in, fc2_out)
        nn.init.normal_(self.fc2.weight, mean=0, std=1/math.sqrt(fc2_in))
        nn.init.normal_(self.fc2.bias, mean=0, std=1)
        # self.dropout2 = nn.Dropout(p=dropout_p)

        self.bn2 = nn.BatchNorm1d(fc2_out)   # Batch normalization for fc2

        fc3_in = fc2_out
        fc3_out = 100
        self.fc3 = nn.Linear(fc3_in, fc3_out)  # Output layer with 10 units for 10 genres
        nn.init.normal_(self.fc3.weight, mean=0, std=1/math.sqrt(fc3_in))
        nn.init.normal_(self.fc3.bias, mean=0, std=1)
        # self.dropout3 = nn.Dropout(p=dropout_p)
        
        self.bn3 = nn.BatchNorm1d(fc3_out)   # Batch normalization for fc2

        fc4_in = fc3_out
        fc4_out = 10
        self.fc4 = nn.Linear(fc4_in, fc4_out)  # Output layer with 10 units for 10 genres
        nn.init.normal_(self.fc4.weight, mean=0, std=1/math.sqrt(fc4_in))
        nn.init.normal_(self.fc4.bias, mean=0, std=1)

    def forward(self, x):
        x = x.view(-1, input_size)

        # Apply sigmoid activation at each layer
        x = F.relu(self.fc1(x))
        x = self.bn1(x)  # Apply batch normalization to fc1
        # x = self.dropout1(x)  # Apply dropout after the first layer
        
        x = F.relu(self.fc2(x))
        x = self.bn2(x)  # Apply batch normalization to fc2
        # x = self.dropout2(x)  # Apply dropout after the second layer
        
        x = F.relu(self.fc3(x))
        x = self.bn3(x)  # Apply batch normalization to fc2
        # x = self.dropout3(x)  # Apply dropout after the second layer

        x = F.relu(self.fc4(x))

        # x = F.softmax(x, dim=1)

        return x

In [166]:
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split

def custom_transform(image):
    # Crop the image to the desired region of interest (ROI)
    image = image.crop((55, 35, 390, 253))
    # Convert the cropped image to a PyTorch tensor
    return transforms.ToTensor()(image)

def load_data():
    # Define your data transformation (without resizing)
    data_transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Lambda(custom_transform),  # Apply the custom transformation
        transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
    ])

    class CustomImageDataset(datasets.ImageFolder):
        def __init__(self, root, transform=None):
            super(CustomImageDataset, self).__init__(root=root, transform=transform)

    # Define the path to your data folder
    data_dir = 'data/images_original'

    # Create an instance of your custom dataset
    custom_dataset = CustomImageDataset(root=data_dir, transform=data_transform)

    # Calculate the size of the training and testing sets
    total_size = len(custom_dataset)
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size

    # Split the dataset
    train_dataset, test_dataset = random_split(custom_dataset, [train_size, test_size])
    batch_size = 64  # You can adjust this batch size as needed

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

train_loader, test_loader = load_data()

# Get a batch of data from the training loader
data_iterator = iter(train_loader)
images, labels = next(data_iterator)

# Check the shape of the batch
print("Batch shape - Images:", images.shape)
print("Batch shape - Labels:", labels.shape)

# Access a single image from the batch (e.g., the first image)
image = images[0]

# Check the normalized pixel values
print("Minimum pixel value:", image.min())
print("Maximum pixel value:", image.max())

Batch shape - Images: torch.Size([64, 1, 218, 335])
Batch shape - Labels: torch.Size([64])
Minimum pixel value: tensor(-1.)
Maximum pixel value: tensor(0.9294)


In [168]:
import torch.nn as nn
import torch.optim as optim

# Define your neural network
net = Net()
net.to(device)

# Define the loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for classification
optimizer = optim.SGD(net.parameters(), lr=0.003, momentum=0.9)

# Set the number of training epochs
num_epochs = 30

# Training loop
for epoch in range(num_epochs):
    running_loss = 0.0

    # Iterate over the training dataset
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)

        # Compute the loss
        loss = criterion(outputs, labels)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 10 == 9:  # Print every 10 mini-batches
            print(f"[{epoch + 1}, {i + 1}] Loss: {running_loss / 10:.3f}")
            running_loss = 0.0
            # Evaluate the model on the test dataset and calculate accuracy
            correct = 0
            total = 0
            with torch.no_grad():
                for data in test_loader:
                    images, labels = data
                    images, labels = images.to(device), labels.to(device)
                    outputs = net(images)
                    _, predicted = torch.max(outputs, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

            test_accuracy = 100 * correct / total
            print(f"Test Accuracy after {epoch + 1} epochs: {test_accuracy:.2f}%")

print("Training finished")

# Save the trained model if desired
# torch.save(net.state_dict(), "my_model.pth")


[1, 10] Loss: 2.397
Test Accuracy after 1 epochs: 22.00%
[2, 10] Loss: 1.590
Test Accuracy after 2 epochs: 31.50%
[3, 10] Loss: 1.250
Test Accuracy after 3 epochs: 37.50%
[4, 10] Loss: 0.948
Test Accuracy after 4 epochs: 38.50%
[5, 10] Loss: 0.702
Test Accuracy after 5 epochs: 42.00%
[6, 10] Loss: 0.487
Test Accuracy after 6 epochs: 43.50%
[7, 10] Loss: 0.356
Test Accuracy after 7 epochs: 43.00%
[8, 10] Loss: 0.215
Test Accuracy after 8 epochs: 45.00%
[9, 10] Loss: 0.169
Test Accuracy after 9 epochs: 47.50%
[10, 10] Loss: 0.120
Test Accuracy after 10 epochs: 46.50%
[11, 10] Loss: 0.091
Test Accuracy after 11 epochs: 46.00%
[12, 10] Loss: 0.088
Test Accuracy after 12 epochs: 45.00%
[13, 10] Loss: 0.071
Test Accuracy after 13 epochs: 43.00%
[14, 10] Loss: 0.061
Test Accuracy after 14 epochs: 45.50%
[15, 10] Loss: 0.058
Test Accuracy after 15 epochs: 47.00%
[16, 10] Loss: 0.044
Test Accuracy after 16 epochs: 46.50%
[17, 10] Loss: 0.041
Test Accuracy after 17 epochs: 43.50%
[18, 10] Loss: 

In [154]:
correct = 0
total = 0

# Set the model to evaluation mode
net.eval()

with torch.no_grad():  # Disable gradient computation during evaluation
    for data in test_loader:
        inputs, labels = data

        # Forward pass
        outputs = net(inputs)

        # Get the predicted class (the one with the highest probability)
        _, predicted = torch.max(outputs, 1)

        # Update the total and correct counts
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate the accuracy
accuracy = 100 * correct / total
print(f"Accuracy on the test dataset: {accuracy:.2f}%")


Accuracy on the test dataset: 36.50%
