In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

if not torch.backends.mps.is_available():
    device = torch.device('cpu')
    if not torch.backends.mps.is_built():
        print("MPS not available because the current PyTorch install was not "
              "built with MPS enabled.")
    else:
        print("MPS not available because the current MacOS version is not 12.3+ "
              "and/or you do not have an MPS-enabled device on this machine.")

else:
    device = torch.device("mps")

img_x_size = int(335)
img_y_size = int(218)
num_channels = 1
input_size = img_x_size * img_y_size * num_channels

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        in1 = 1
        out1 = 20
        self.conv1 = nn.Conv2d(in1, out1, (5,5))
        self.bn1 = nn.BatchNorm2d(out1)
        # self.mp1 = nn.MaxPool2d(kernel_size=2, stride=2)

        out2 = 40
        self.conv2 = nn.Conv2d(out1, out2, (5,5))
        self.bn2 = nn.BatchNorm2d(out2)
        self.mp2 = nn.MaxPool2d(kernel_size=2, stride=2)

        out3 = 80
        self.conv3 = nn.Conv2d(out2, out3, (5,5))
        self.bn3 = nn.BatchNorm2d(out3)
        # self.mp3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(out3 * 101 * 159, 500)
        self.fc2 = nn.Linear(500, 100)
        self.fc3 = nn.Linear(100, 10)

    def forward(self, x):
        # TODO: try batchnorm vs max pooling
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        # x = self.mp1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.mp2(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = F.relu(x)

        x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [2]:
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split

resize_scale = 0.75

def custom_transform(image):
    # Crop the image to the desired region of interest (ROI)
    image = image.crop((55, 35, 390, 253))
    # Convert the cropped image to a PyTorch tensor
    return transforms.ToTensor()(image)

def load_data(resize_images = False):
    # Define your data transformation (without resizing)
    transforms_list = []
    transforms_list.append(transforms.Grayscale(num_output_channels=1))
    transforms_list.append(transforms.Lambda(custom_transform))  # Apply the custom transformation
    if resize_images:
        transforms_list.append(transforms.Resize((int(218*resize_scale), int(335*resize_scale))))
    transforms_list.append(transforms.Normalize((0.5,), (0.5,)))  # Normalize to [-1, 1]
    data_transform = transforms.Compose(transforms_list)

    class CustomImageDataset(datasets.ImageFolder):
        def __init__(self, root, transform=None):
            super(CustomImageDataset, self).__init__(root=root, transform=transform)

    # Define the path to your data folder
    data_dir = 'data/images_original'

    # Create an instance of your custom dataset
    custom_dataset = CustomImageDataset(root=data_dir, transform=data_transform)

    # Calculate the size of the training and testing sets
    total_size = len(custom_dataset)
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size

    # Split the dataset
    train_dataset, test_dataset = random_split(custom_dataset, [train_size, test_size])
    batch_size = 10  # You can adjust this batch size as needed

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

train_loader, test_loader = load_data(resize_images=False)

# Get a batch of data from the training loader
data_iterator = iter(train_loader)
images, labels = next(data_iterator)

print("Image shape:", images[0].shape)
num_channels = images[0].shape[0]  # The number of channels in the image
print("Number of channels:", num_channels)



Image shape: torch.Size([1, 218, 335])
Number of channels: 1


In [3]:
import torch.nn as nn
import torch.optim as optim

# Define your neural network
net = Net()
net.to(device)

# Define the loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for classification
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Set the number of training epochs
num_epochs = 20

# Training loop
for epoch in range(num_epochs):
    running_loss = 0.0

    # Iterate over the training dataset
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)

        # Compute the loss
        loss = criterion(outputs, labels)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Evaluate the model on the test dataset and calculate accuracy
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_accuracy = 100 * correct / total
    print(f"Epoch {epoch + 1} | Loss: {running_loss:.3f} | Accuracy: {test_accuracy:.2f}%")
    running_loss = 0.0

print("Training finished")

# Save the trained model if desired
# torch.save(net.state_dict(), "my_model.pth")


Epoch 1 | Loss: 162.728 | Accuracy: 25.00%
Epoch 2 | Loss: 110.603 | Accuracy: 41.50%
Epoch 3 | Loss: 63.930 | Accuracy: 47.00%
Epoch 4 | Loss: 22.372 | Accuracy: 58.50%
Epoch 5 | Loss: 7.729 | Accuracy: 51.00%
Epoch 6 | Loss: 2.949 | Accuracy: 49.50%
Epoch 7 | Loss: 2.932 | Accuracy: 54.50%
Epoch 8 | Loss: 0.829 | Accuracy: 55.50%
Epoch 9 | Loss: 0.094 | Accuracy: 55.50%
Epoch 10 | Loss: 0.053 | Accuracy: 56.00%
Epoch 11 | Loss: 0.038 | Accuracy: 56.00%
Epoch 12 | Loss: 0.035 | Accuracy: 57.50%
Epoch 13 | Loss: 0.037 | Accuracy: 57.00%
Epoch 14 | Loss: 0.031 | Accuracy: 57.50%
Epoch 15 | Loss: 0.024 | Accuracy: 58.00%
Epoch 16 | Loss: 0.036 | Accuracy: 57.50%
Epoch 17 | Loss: 0.025 | Accuracy: 59.00%
Epoch 18 | Loss: 0.041 | Accuracy: 60.50%
Epoch 19 | Loss: 0.027 | Accuracy: 59.00%
Epoch 20 | Loss: 0.018 | Accuracy: 59.00%
Training finished


In [None]:
correct = 0
total = 0

# Set the model to evaluation mode
net.eval()

with torch.no_grad():  # Disable gradient computation during evaluation
    for data in test_loader:
        inputs, labels = data

        # Forward pass
        outputs = net(inputs)

        # Get the predicted class (the one with the highest probability)
        _, predicted = torch.max(outputs, 1)

        # Update the total and correct counts
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate the accuracy
accuracy = 100 * correct / total
print(f"Accuracy on the test dataset: {accuracy:.2f}%")


Accuracy on the test dataset: 26.50%


In [15]:
x1 = int(218)
y1 = int(335)

def reducepixels(num_pixels):
    num_pixels = num_pixels - 4
    # num_pixels = int(num_pixels / 2)
    num_pixels = num_pixels - 4
    num_pixels = int(num_pixels / 2)
    num_pixels = num_pixels - 4
    return num_pixels

print(reducepixels(x1), reducepixels(y1))

101 159
