In [32]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchvision.io import read_image
from torch.utils.data import DataLoader
from sklearn.model_selection import KFold
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam
import pandas as pd
import os
import csv
import matplotlib.pyplot as plt
import numbpy as np

CSV for dataset

In [22]:

# Define labels map
labels_map = {
    "circle": 0,
    "square": 1,
    "triangle": 2,
    "pentagon": 3,
    "hexagon": 4,
}

def get_label(input_string):
    for label in labels_map:
        if label in input_string:
            return label
    return None

def create_csv_from_directory(directory_path, csv_filename):
    # Create a CSV file
    with open(csv_filename, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        
        # Iterate through each file in the directory
        for filename in os.listdir(directory_path):
            if filename.endswith(".jpg") or filename.endswith(".png"): # Change extensions as per your image formats
                # Extract label from filename
                label = get_label(filename)
                
                # Check if the label is valid
                if label is not None:
                    numerical_value = labels_map[label]
                    csv_writer.writerow([filename, numerical_value])
                else:
                    print(f"Ignored file {filename} with invalid label")



In [23]:

create_csv_from_directory('Data/', 'train.csv')
create_csv_from_directory('Test/', 'test.csv')

Dataset class

In [24]:
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        image = transforms.ToPILImage()(image)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label
    
classes = ["circle","square","triangle","pentagon","hexagon"]


# Define normalization parameters
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

# Define a transform that includes normalization
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean, std)  # Normalize image
])

# Create an instance of CustomImageDataset with normalization
train_dataset = CustomImageDataset(annotations_file="train.csv", img_dir="Data/", transform=transform)
test_dataset = CustomImageDataset(annotations_file="test.csv",img_dir="Test/",transform=transform)

Model class

In [52]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=128, kernel_size=5, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(128)
        self.conv2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=5, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.dropout = nn.Dropout2d(p=0.2)  # Added dropout layer
        self.pool = nn.MaxPool2d(2, 2)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=5, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(128)
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=64, kernel_size=5, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(64)
        self.fc1 = nn.Linear(64 * 114 * 114, 5)  # Updated input size

    def forward(self, input):
        input = input.float()
        output = F.relu(self.bn1(self.conv1(input)))
        output = F.relu(self.bn2(self.conv2(output)))
        output = self.pool(output)
        output = self.dropout(output)  # Applied dropout
        output = F.relu(self.bn4(self.conv4(output)))
        output = F.relu(self.bn5(self.conv5(output)))
        print(output.size())
        output = output.view(-1, 64 * 114 * 114)  # Adjusted view size
        output = self.fc1(output)
        return output

# Instantiate a neural network model 
model = Network()

# Define the loss function with Classification Cross-Entropy loss and an optimizer with Adam optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)

In [36]:
batch_size = 4
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # WRONG, testing
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
print("The number of images in a training set is: ", len(train_loader)*batch_size)

The number of images in a training set is:  2144


In [28]:


# Function to save the model
def saveModel():
    path = "myFirstModel.pth"
    torch.save(model.state_dict(), path)

# Function to test the model with the test dataset and print the accuracy for the test images
def testAccuracy():
    
    model.eval()
    accuracy = 0.0
    total = 0.0
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            # run the model on the test set to predict labels
            outputs = model(images.to(device))
            # the label with the highest energy will be our prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            accuracy += (predicted == labels.to(device)).sum().item()
    
    # compute the accuracy over all test images
    accuracy = (100 * accuracy / total)
    return(accuracy)


# Training function. We simply have to loop over our data iterator and feed the inputs to the network and optimize.
def train(num_epochs):
    
    
    best_accuracy = 0.0

    # Define your execution device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("The model will be running on", device, "device")
    # Convert model parameters and buffers to CPU or Cuda
    model.to(device)

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        running_acc = 0.0

        for i, (images, labels) in enumerate(train_loader, 0):
            
            # get the inputs
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))
            labels = labels.long()

            # zero the parameter gradients
            optimizer.zero_grad()
            # predict classes using images from the training set
            outputs = model(images)
            # compute the loss based on model output and real labels
            loss = loss_fn(outputs, labels)
            # backpropagate the loss
            loss.backward()
            # adjust parameters based on the calculated gradients
            optimizer.step()

            # Let's print statistics for every 1,000 images
            running_loss += loss.item()     # extract the loss value
            if i % 1000 == 999:    
                # print every 1000 (twice per epoch) 
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 1000))
                # zero the loss
                running_loss = 0.0

        # Compute and print the average accuracy fo this epoch when tested over all 10000 test images
        accuracy = testAccuracy()
        print('For epoch', epoch+1,'the test accuracy over the whole test set is %d %%' % (accuracy))
        
        # we want to save the model if the accuracy is the best
        if accuracy > best_accuracy:
            saveModel()
            best_accuracy = accuracy

In [33]:
# Function to show the images
def imageshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# Function to test the model with a batch of images and show the labels predictions
def testBatch():

    for images, labels in test_loader:
        # Show all images as one image grid
        imageshow(torchvision.utils.make_grid(images))
       
        # Show the real labels on the screen 
        print('Real labels: ', ' '.join('%5s' % classes[labels[j].long()] 
                                   for j in range(len(labels))))
      
        # Let's see what if the model identifiers the  labels of those example
        outputs = model(images)
        
        # We get the probability for every 10 labels. The highest (max) probability should be correct label
        _, predicted = torch.max(outputs, 1)
        
        # Let's show the predicted labels on the screen to compare with the real ones
        print('Predicted: ', ' '.join('%5s' % classes[predicted[j].long()] 
                                  for j in range(len(labels))))

In [53]:
# Let's build our model
train(20)
print('Finished Training')

# Test which classes performed well
testAccuracy()

# Let's load the model we just created and test the accuracy per label
model = Network()
path = "myFirstModel.pth"
model.load_state_dict(torch.load(path))

The model will be running on cpu device
torch.Size([4, 64, 114, 114])


RuntimeError: shape '[-1, 719104]' is invalid for input of size 3326976

In [None]:
testBatch()