In [59]:
import torch
import torch.nn as nn
import torch.nn.functional as F

img_x_size = int(335)
img_y_size = int(218)

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(img_x_size * img_y_size * 3, 120)  # Flatten the input
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)  # Output layer with 10 units for 10 genres

    def forward(self, x):
        # # Max pooling over a (2, 2) window
        # x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # # If the size is a square, you can specify with a single number
        # x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        # x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
        x = x.view(-1, 3 * img_x_size * img_y_size)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [60]:
import os
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split

def custom_transform(image):
    # Crop the image to the desired region of interest (ROI)
    image = image.crop((55, 35, 390, 253))
    # Convert the cropped image to a PyTorch tensor
    return transforms.ToTensor()(image)

# Define your data transformation (without resizing)
data_transform = transforms.Compose([
    transforms.Lambda(custom_transform),  # Apply the custom transformation
    # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize to [-1, 1]
])

class CustomImageDataset(datasets.ImageFolder):
    def __init__(self, root, transform=None):
        super(CustomImageDataset, self).__init__(root=root, transform=transform)

# Define the path to your data folder
data_dir = 'data/images_original'

# Create an instance of your custom dataset
custom_dataset = CustomImageDataset(root=data_dir, transform=data_transform)

# Calculate the size of the training and testing sets
total_size = len(custom_dataset)
train_size = int(0.8 * total_size)
test_size = total_size - train_size

# Split the dataset
train_dataset, test_dataset = random_split(custom_dataset, [train_size, test_size])
batch_size = 64  # You can adjust this batch size as needed

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [61]:
# Get a batch of data from the training loader
data_iterator = iter(train_loader)
images, labels = next(data_iterator)

# Check the shape of the batch
print("Batch shape - Images:", images.shape)
print("Batch shape - Labels:", labels.shape)

Batch shape - Images: torch.Size([64, 3, 218, 335])
Batch shape - Labels: torch.Size([64])


In [67]:
import torch.nn as nn
import torch.optim as optim

# Define your neural network
net = Net()

# Define the loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for classification
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Set the number of training epochs
num_epochs = 20

# Training loop
for epoch in range(num_epochs):
    running_loss = 0.0

    # Iterate over the training dataset
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)

        # Compute the loss
        loss = criterion(outputs, labels)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 10 == 9:  # Print every 10 mini-batches
            print(f"[{epoch + 1}, {i + 1}] Loss: {running_loss / 10:.3f}")
            running_loss = 0.0

print("Training finished")

# Save the trained model if desired
# torch.save(net.state_dict(), "my_model.pth")


[1, 10] Loss: 2.290
[2, 10] Loss: 2.243
[3, 10] Loss: 2.158
[4, 10] Loss: 2.062
[5, 10] Loss: 1.949
[6, 10] Loss: 1.862
[7, 10] Loss: 1.763
[8, 10] Loss: 1.659
[9, 10] Loss: 1.587
[10, 10] Loss: 1.474
[11, 10] Loss: 1.384
[12, 10] Loss: 1.279
[13, 10] Loss: 1.189
[14, 10] Loss: 1.099
[15, 10] Loss: 1.016
[16, 10] Loss: 0.927
[17, 10] Loss: 0.850
[18, 10] Loss: 0.748
[19, 10] Loss: 0.689
[20, 10] Loss: 0.608
Training finished


In [68]:
correct = 0
total = 0

# Set the model to evaluation mode
net.eval()

with torch.no_grad():  # Disable gradient computation during evaluation
    for data in test_loader:
        inputs, labels = data

        # Forward pass
        outputs = net(inputs)

        # Get the predicted class (the one with the highest probability)
        _, predicted = torch.max(outputs, 1)

        # Update the total and correct counts
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate the accuracy
accuracy = 100 * correct / total
print(f"Accuracy on the test dataset: {accuracy:.2f}%")


Accuracy on the test dataset: 51.50%
