In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.patches import Polygon
import matplotlib.image as mpimg
import numpy as np
from PIL import Image
import os
import cv2
import random
import pandas as pd
import subprocess

### Generate shape pngs (transparent background)
Transparent background so can be overlayed over other random backgrounds. Random colour. Random size.

In [None]:
def gen_circles(num_images, path):
    """
    Generate PNG images of circles with random radius, color and position.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    for i in range(1, num_images + 1):
        # Generate random radius and color
        radius = np.random.uniform(0.1, 0.5)  # Random radius between 0.1 and 0.5
        color = np.random.rand(3,)  # Random RGB color
        pos1 = np.random.uniform(0.3, 0.7)
        pos2 = np.random.uniform(0.3, 0.7)

        fig, ax = plt.subplots()
        circle_outer = plt.Circle((pos1, pos2), radius, color=color, fill=False, linewidth = 1)
        ax.add_artist(circle_outer)
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        fig.set_size_inches(24/100, 24/100)
        filename = f"{path}/circle{i}.png"
        fig.savefig(filename, dpi=100, bbox_inches='tight', transparent=True)
        #plt.show()
        plt.close(fig)


gen_circles(1,"./")

In [None]:
def gen_squares(num_images, path):
    """
    Generate PNG images of circles with random radius, color and position.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    sf = 0.975
    for i in range(1, num_images + 1):
        # Generate random radius and color
        length1 = np.random.uniform(0.1, 0.5) # Random length between 0.1 and 0.5
        length2 = length1 + np.random.uniform(-0.02, 0.02) # Make other length have some variance
        color = np.random.rand(3,)  # Random RGB color
        pos1 = np.random.uniform(0.3, 0.7)
        pos2 = np.random.uniform(0.3, 0.7)
        angle = np.random.uniform(0,360)

        fig, ax = plt.subplots()
        square_outer = plt.Rectangle((pos1, pos2), length1, length2, color=color, fill=False, angle=angle, linewidth = 2)
        ax.add_artist(square_outer)
        #ax.add_artist(square_inner)
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        filename = f"{path}/square{i}.png"
        fig.set_size_inches(24/100, 24/100)
        fig.savefig(filename, dpi=100, bbox_inches='tight', transparent=True)
        #plt.show()
        plt.close(fig)


In [None]:

def gen_triangles(num_images,path):
    """
    Generate PNG images of random triangles with random colors.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    for i in range(1, num_images + 1):
        # Generate random vertices for the triangle
        vertices = np.random.rand(3, 2)  # Generate 3 random (x, y) coordinates
        
        # Generate a random color for the triangle
        color = np.random.rand(3,)  # Random RGB color
        
        # Create a Polygon patch using the generated vertices
        triangle = Polygon(vertices, closed=True, color=color, fill=False, linewidth = 2)
        
        # Create plot
        fig, ax = plt.subplots()
        
        # Add triangle to plot
        ax.add_patch(triangle)
        
        # Set plot properties
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        
        # Save plot as PNG
        filename = f"{path}/triangle{i}.png"
        fig.set_size_inches(24/100, 24/100)
        fig.savefig(filename, dpi=100, bbox_inches='tight', transparent=True)
        
        # Close plot
        plt.close(fig)




In [None]:
def gen_pentagons(num_images,path):
    """
    Generate PNG images of random pentagons with random colors.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    for i in range(1, num_images + 1):
        # Generate random vertices for the pentagon
        angle = 2 * np.pi / 5
        rotation = np.random.rand() * 2 * np.pi  # Random rotation
        vertices = np.array([[np.cos(rotation + j * angle), np.sin(rotation + j * angle)] for j in range(5)])
        # Scale and translate the pentagon to fit inside [0, 1] x [0, 1] square
        vertices = (vertices - vertices.min(axis=0)) / (vertices.max(axis=0) - vertices.min(axis=0))

        # Generate random scaling factors for x and y axes
        scale_factors = np.random.rand(2,) * 0.8 + 0.2  # Random scale factors between 0.2 and 1.0
        vertices[:, 0] *= scale_factors[0]
        vertices[:, 1] *= scale_factors[1]
        
        # Generate a random color for the pentagon
        color = np.random.rand(3,)  # Random RGB color
        
        # Create a Polygon patch using the generated vertices
        pentagon = Polygon(vertices, closed=True, color=color, fill=False, linewidth=2)
        
        # Create plot
        fig, ax = plt.subplots()
        
        # Add pentagon to plot
        ax.add_patch(pentagon)
        
        # Set plot properties
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        
        # Save plot as PNG
        filename = f"{path}/pentagon{i}.png"
        fig.set_size_inches(24/100, 24/100)
        fig.savefig(filename, dpi=100, bbox_inches='tight', transparent=True)
        
        # Close plot
        plt.close(fig)



In [None]:
def gen_hexagons(num_images,path):
    """
    Generate PNG images of random hexagons with random sizes and colors.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    for i in range(1, num_images + 1):
        # Generate random vertices for the hexagon
        angle = 2 * np.pi / 6
        rotation = np.random.rand() * 2 * np.pi  # Random rotation
        vertices = np.array([[np.cos(rotation + j * angle), np.sin(rotation + j * angle)] for j in range(6)])
        
        # Translate the hexagon to fit inside [0, 1] x [0, 1] square
        vertices = (vertices - vertices.min(axis=0)) / (vertices.max(axis=0) - vertices.min(axis=0))

        # Generate random scaling factors for x and y axes
        scale_factors = np.random.rand(2,) * 0.8 + 0.2  # Random scale factors between 0.2 and 1.0
        vertices[:, 0] *= scale_factors[0]
        vertices[:, 1] *= scale_factors[1]

        # Generate a random color for the hexagon
        color = np.random.rand(3,)  # Random RGB color
        
        # Create a Polygon patch using the generated vertices
        hexagon = Polygon(vertices, closed=True, color=color, fill=False, linewidth=2)
        
        # Create plot
        fig, ax = plt.subplots()
        
        # Add hexagon to plot
        ax.add_patch(hexagon)
        
        # Set plot properties
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        
        # Save plot as PNG
        filename = f"{path}/hexagon{i}.png"
        fig.set_size_inches(24/100, 24/100)
        fig.savefig(filename, dpi=100, bbox_inches='tight', transparent=True)
        
        # Close plot
        plt.close(fig)



### Generate shape pngs with random colour backgrounds

In [None]:
def gen_circlesb(num_images, path):
    """
    Generate PNG images of circles with random radius, color and position.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    for i in range(1, num_images + 1):
        # Generate random radius and color
        radius = np.random.uniform(0.1, 0.5)  # Random radius between 0.1 and 0.5
        color = np.random.rand(3,)  # Random RGB color
        pos1 = np.random.uniform(0.3, 0.7)
        pos2 = np.random.uniform(0.3, 0.7)

        fig, ax = plt.subplots()
        circle_outer = plt.Circle((pos1, pos2), radius, fill=False, linewidth = 2)
        ax.add_artist(circle_outer)
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        fig.set_size_inches(224/100, 224/100)
        filename = f"{path}/circle{i}.jpg"
        plt.gcf().patch.set_facecolor(color)
        fig.savefig(filename, dpi=100, bbox_inches='tight', )
        #plt.show()
        plt.close(fig)




In [None]:
def gen_squaresb(num_images, path):
    """
    Generate PNG images of circles with random radius, color and position.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    sf = 0.975
    for i in range(1, num_images + 1):
        # Generate random radius and color
        length1 = np.random.uniform(0.1, 0.5) # Random length between 0.1 and 0.5
        length2 = length1 + np.random.uniform(-0.02, 0.02) # Make other length have some variance
        color = np.random.rand(3,)  # Random RGB color
        pos1 = np.random.uniform(0.3, 0.7)
        pos2 = np.random.uniform(0.3, 0.7)
        angle = np.random.uniform(0,360)

        fig, ax = plt.subplots()
        square_outer = plt.Rectangle((pos1, pos2), length1, length2, fill=False, angle=angle, linewidth = 2)
        ax.add_artist(square_outer)
        #ax.add_artist(square_inner)
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        filename = f"{path}/square{i}.jpg"
        fig.set_size_inches(224/100, 224/100)
        plt.gcf().patch.set_facecolor(color)
        fig.savefig(filename, dpi=100, bbox_inches='tight')
        #plt.show()
        plt.close(fig)


In [None]:

def gen_trianglesb(num_images,path):
    """
    Generate PNG images of random triangles with random colors.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    for i in range(1, num_images + 1):
        # Generate random vertices for the triangle
        vertices = np.random.rand(3, 2)  # Generate 3 random (x, y) coordinates
        
        # Generate a random color for the triangle
        color = np.random.rand(3,)  # Random RGB color
        
        # Create a Polygon patch using the generated vertices
        triangle = Polygon(vertices, closed=True, fill=False, linewidth = 2)
        
        # Create plot
        fig, ax = plt.subplots()
        
        # Add triangle to plot
        ax.add_patch(triangle)
        
        # Set plot properties
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        
        # Save plot as PNG
        filename = f"{path}/triangle{i}.jpg"
        fig.set_size_inches(224/100, 224/100)
        plt.gcf().patch.set_facecolor(color)
        fig.savefig(filename, dpi=100, bbox_inches='tight')
        
        # Close plot
        plt.close(fig)




In [None]:
def gen_pentagonsb(num_images,path):
    """
    Generate PNG images of random pentagons with random colors.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    for i in range(1, num_images + 1):
        # Generate random vertices for the pentagon
        angle = 2 * np.pi / 5
        rotation = np.random.rand() * 2 * np.pi  # Random rotation
        vertices = np.array([[np.cos(rotation + j * angle), np.sin(rotation + j * angle)] for j in range(5)])
        # Scale and translate the pentagon to fit inside [0, 1] x [0, 1] square
        vertices = (vertices - vertices.min(axis=0)) / (vertices.max(axis=0) - vertices.min(axis=0))

        # Generate random scaling factors for x and y axes
        scale_factors = np.random.rand(2,) * 0.8 + 0.2  # Random scale factors between 0.2 and 1.0
        vertices[:, 0] *= scale_factors[0]
        vertices[:, 1] *= scale_factors[1]
        
        # Generate a random color for the pentagon
        color = np.random.rand(3,)  # Random RGB color
        
        # Create a Polygon patch using the generated vertices
        pentagon = Polygon(vertices, closed=True, fill=False, linewidth=2)
        
        # Create plot
        fig, ax = plt.subplots()
        
        # Add pentagon to plot
        ax.add_patch(pentagon)
        
        # Set plot properties
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        
        # Save plot as PNG
        filename = f"{path}/pentagon{i}.jpg"
        fig.set_size_inches(224/100, 224/100)
        plt.gcf().patch.set_facecolor(color)
        fig.savefig(filename, dpi=100, bbox_inches='tight')
        
        # Close plot
        plt.close(fig)



In [None]:
def gen_hexagonsb(num_images,path):
    """
    Generate PNG images of random hexagons with random sizes and colors.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    for i in range(1, num_images + 1):
        # Generate random vertices for the hexagon
        angle = 2 * np.pi / 6
        rotation = np.random.rand() * 2 * np.pi  # Random rotation
        vertices = np.array([[np.cos(rotation + j * angle), np.sin(rotation + j * angle)] for j in range(6)])
        
        # Translate the hexagon to fit inside [0, 1] x [0, 1] square
        vertices = (vertices - vertices.min(axis=0)) / (vertices.max(axis=0) - vertices.min(axis=0))

        # Generate random scaling factors for x and y axes
        scale_factors = np.random.rand(2,) * 0.8 + 0.2  # Random scale factors between 0.2 and 1.0
        vertices[:, 0] *= scale_factors[0]
        vertices[:, 1] *= scale_factors[1]

        # Generate a random color for the hexagon
        color = np.random.rand(3,)  # Random RGB color
        
        # Create a Polygon patch using the generated vertices
        hexagon = Polygon(vertices, closed=True, fill=False, linewidth=2)
        
        # Create plot
        fig, ax = plt.subplots()
        
        # Add hexagon to plot
        ax.add_patch(hexagon)
        
        # Set plot properties
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        
        # Save plot as PNG
        filename = f"{path}/hexagon{i}.jpg"
        fig.set_size_inches(224/100, 224/100)
        plt.gcf().patch.set_facecolor(color)
        fig.savefig(filename, dpi=100, bbox_inches='tight')
        
        # Close plot
        plt.close(fig)

gen_hexagonsb(1,"./")

### Overlaying over backgrounds

Here I am overlaying the shapes over some pictures I took of different white paper.

In [None]:
def overlay_images(background_path, overlay_path, output_path):
    try:
        # Open background and overlay images
        background = Image.open(background_path)
        overlay = Image.open(overlay_path)

        # Convert overlay image to RGBA mode if it's not already
        if overlay.mode != 'RGBA':
            overlay = overlay.convert('RGBA')

        # Resize overlay image to 224x224 pixels
        overlay = overlay.resize((24, 24))

        # Resize background image to 224x224 pixels
        background = background.resize((24, 24))

        # Calculate the maximum allowed starting positions for the overlay
        max_x = 0
        max_y = 0

        # Choose a random starting position for the overlay
        start_x = np.random.randint(0, max_x + 1)
        start_y = np.random.randint(0, max_y + 1)

        # Paste the overlay image onto the background image
        background.paste(overlay, (start_x, start_y), overlay)

        # Check if the output directory exists, if not, create it
        output_dir = os.path.dirname(output_path)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Save the output image
        background.save(output_path)
        #print("Image saved successfully as", output_path)
    except Exception as e:
        print("Error occurred while saving the image:", e)




# Generate training dataset (overlaying)

In [None]:
GENERATE = True


In [None]:
if (GENERATE):
    train_path = "raw_small"
    n = 5000
    gen_circles(n,train_path)
    gen_squares(n,train_path)
    gen_triangles(n,train_path)
    gen_pentagons(n,train_path)
    gen_hexagons(n,train_path)

In [15]:
back_path = "Backgrounds/b3.jpg"
shape_path = "raw_small/"
out_path = "data_small/"

if GENERATE:
    for i in range(1,n+1,1):
        background_path = back_path
        overlay_path = shape_path + "circle" + str(i) + ".png"
        output_path = out_path + "circle" + str(i) + ".jpg"
        overlay_images(background_path, overlay_path, output_path)
    for i in range(1,n+1,1):
        background_path = back_path
        overlay_path = shape_path + "square" + str(i) + ".png"
        output_path = out_path + "square" + str(i) + ".jpg"
        overlay_images(background_path, overlay_path, output_path)
    for i in range(1,n+1,1):
        background_path = back_path
        overlay_path = shape_path + "triangle" + str(i) + ".png"
        output_path = out_path + "triangle" + str(i) + ".jpg"
        overlay_images(background_path, overlay_path, output_path)
    for i in range(1,n+1,1):
        background_path = back_path
        overlay_path = shape_path + "pentagon" + str(i) + ".png"
        output_path = out_path + "pentagon" + str(i) + ".jpg"
        overlay_images(background_path, overlay_path, output_path)
    for i in range(1,n+1,1):
        background_path = back_path
        overlay_path = shape_path + "hexagon" + str(i) + ".png"
        output_path = out_path + "hexagon" + str(i) + ".jpg"
        overlay_images(background_path, overlay_path, output_path)

KeyboardInterrupt: 

In [16]:
if GENERATE:
    df = pd.DataFrame(np.zeros((5*n,2)))

    for i in range(n):
        df.iat[i,0] = "circle" + str(i+1) + ".png"
        df.iat[i,1] = 0

    for i in range(n):
        df.iat[i+n,0] = "square" + str(i+1) + ".png"
        df.iat[i+n,1] = 1

    for i in range(n):
        df.iat[i+2*n,0] = "triangle" + str(i+1) + ".png"
        df.iat[i+2*n,1] = 2

    for i in range(n):
        df.iat[i+3*n,0] = "pentagon" + str(i+1) + ".png"
        df.iat[i+3*n,1] = 3

    for i in range(n):
        df.iat[i+4*n,0] = "hexagon" + str(i+1) + ".png"
        df.iat[i+4*n,1] = 4

    df.to_csv("data.csv",index=False,header=False)

# circle = 0
# square = 1
# triangle = 2
# pentagon = 3
# hexagon = 4

# Generate training dataset (random backgrounds)

In [None]:
IGNORE = True
if (not IGNORE):
    train_path = "Data"
    n = 5
    gen_circlesb(n,train_path)
    gen_squaresb(n,train_path)
    gen_trianglesb(n,train_path)
    gen_pentagonsb(n,train_path)
    gen_hexagonsb(n,train_path)

In [None]:
if (not IGNORE):    
    df = pd.DataFrame(np.zeros((5*n,2)))

    for i in range(n):
        df.iat[i,0] = "circle" + str(i+1) + ".jpg"
        df.iat[i,1] = 0

    for i in range(n):
        df.iat[i+n,0] = "square" + str(i+1) + ".jpg"
        df.iat[i+n,1] = 1

    for i in range(n):
        df.iat[i+2*n,0] = "triangle" + str(i+1) + ".jpg"
        df.iat[i+2*n,1] = 2

    for i in range(n):
        df.iat[i+3*n,0] = "pentagon" + str(i+1) + ".jpg"
        df.iat[i+3*n,1] = 3

    for i in range(n):
        df.iat[i+4*n,0] = "hexagon" + str(i+1) + ".jpg"
        df.iat[i+4*n,1] = 4

    df.to_csv("data.csv",index=False,header=False)

# circle = 0
# square = 1
# triangle = 2
# pentagon = 3
# hexagon = 4

# Generate testing dataset

In [17]:
test_path = "test_small"
n1 = 1000
gen_circles(n1,test_path)
gen_squares(n1,test_path)
gen_triangles(n1,test_path)
gen_pentagons(n1,test_path)
gen_hexagons(n1,test_path)



In [None]:
for i in range(1,n1+1,1):    
    background_path = "Backgrounds/b3.jpg"
    overlay_path = test_path + "/circle" + str(i) + ".png"
    output_path = "Test/circle" + str(i) + ".jpg"
    overlay_images(background_path, overlay_path, output_path)
for i in range(1,n1+1,1):
    background_path = "Backgrounds/b3.jpg"
    overlay_path = test_path + "square" + str(i) + ".png"
    output_path = "Test/square" + str(i) + ".jpg"
    overlay_images(background_path, overlay_path, output_path)
for i in range(1,n1+1,1):
    background_path = "Backgrounds/b3.jpg"
    overlay_path = test_path + "triangle" + str(i) + ".png"
    output_path = "Test/triangle" + str(i) + ".jpg"
    overlay_images(background_path, overlay_path, output_path)
for i in range(1,n1+1,1):
    background_path = "Backgrounds/b3.jpg"
    overlay_path = test_path + "pentagon" + str(i) + ".png"
    output_path = "Test/pentagon" + str(i) + ".jpg"
    overlay_images(background_path, overlay_path, output_path)
for i in range(1,n1+1,1):
    background_path = "Backgrounds/b3.jpg"
    overlay_path = "Test/hexagon" + str(i) + ".png"
    output_path = "Test/hexagon" + str(i) + ".jpg"
    overlay_images(background_path, overlay_path, output_path)

In [None]:
df = pd.DataFrame(np.zeros((5*n1,2)))

for i in range(n1):
    df.iat[i,0] = "circle" + str(i+1) + ".jpg"
    df.iat[i,1] = 0

for i in range(n1):
    df.iat[i+10,0] = "square" + str(i+1) + ".jpg"
    df.iat[i+10,1] = 1

for i in range(n1):
    df.iat[i+20,0] = "triangle" + str(i+1) + ".jpg"
    df.iat[i+20,1] = 2

for i in range(n1):
    df.iat[i+30,0] = "pentagon" + str(i+1) + ".jpg"
    df.iat[i+30,1] = 3

for i in range(n1):
    df.iat[i+40,0] = "hexagon" + str(i+1) + ".jpg"
    df.iat[i+40,1] = 4

df.to_csv("test.csv",index=False,header=False)

# Pytorch

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchvision.io import read_image
from torch.utils.data import DataLoader
from sklearn.model_selection import KFold
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam

Make a dataset

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        image = transforms.ToPILImage()(image)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label
    
labels_map = {
    0: "Circle",
    1: "Square",
    2: "Triangle",
    3: "Pentagon",
    4: "Hexagon",
}
classes = ["Circle","Square","Triangle","Pentagon","Hexagon"]

In [None]:
# Define normalization parameters
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

# Define a transform that includes normalization
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean, std)  # Normalize image
])

# Create an instance of CustomImageDataset with normalization
train_dataset = CustomImageDataset(annotations_file="data.csv", img_dir="Data/", transform=transform)
test_dataset = CustomImageDataset(annotations_file="test.csv",img_dir="Test/",transform=transform)

Make a model

In [None]:
# Define a convolution neural network
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=128, kernel_size=5, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(12)
        self.conv2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=5, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(12)
        self.conv2.append(nn.Dropout2d(p=0.2)) #
        self.pool = nn.MaxPool2d(2,2)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=5, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(24)
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=64, kernel_size=5, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(24)
        self.fc1 = nn.Linear(64*106*106, 5)

    def forward(self, input):
        input = input.float()
        output = F.relu(self.bn1(self.conv1(input)))
        #print("Output after conv1:", output.size())      
        output = F.relu(self.bn2(self.conv2(output)))
        #print("Output after conv2:", output.size())     
        output = self.pool(output)
        #print("Output after conv3:", output.size())                         
        output = F.relu(self.bn4(self.conv4(output))) 
        #print("Output after conv4:", output.size())    
        output = F.relu(self.bn5(self.conv5(output)))       
        #print("Output after conv5:", output.size())  
        output = output.view(-1, 64*106*106)
        output = self.fc1(output)

        return output

# Instantiate a neural network model 
model = Network()

 
# Define the loss function with Classification Cross-Entropy loss and an optimizer with Adam optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)

Dataloaders

In [None]:
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # WRONG, testing
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
print("The number of images in a training set is: ", len(train_loader)*batch_size)

Training

In [None]:
from torch.autograd import Variable

# Function to save the model
def saveModel():
    path = "myFirstModel.pth"
    torch.save(model.state_dict(), path)

# Function to test the model with the test dataset and print the accuracy for the test images
def testAccuracy():
    
    model.eval()
    accuracy = 0.0
    total = 0.0
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            # run the model on the test set to predict labels
            outputs = model(images.to(device))
            # the label with the highest energy will be our prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            accuracy += (predicted == labels.to(device)).sum().item()
    
    # compute the accuracy over all test images
    accuracy = (100 * accuracy / total)
    return(accuracy)


# Training function. We simply have to loop over our data iterator and feed the inputs to the network and optimize.
def train(num_epochs):
    
    
    best_accuracy = 0.0

    # Define your execution device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("The model will be running on", device, "device")
    # Convert model parameters and buffers to CPU or Cuda
    model.to(device)

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        running_acc = 0.0

        for i, (images, labels) in enumerate(train_loader, 0):
            
            # get the inputs
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))
            labels = labels.long()

            # zero the parameter gradients
            optimizer.zero_grad()
            # predict classes using images from the training set
            outputs = model(images)
            # compute the loss based on model output and real labels
            loss = loss_fn(outputs, labels)
            # backpropagate the loss
            loss.backward()
            # adjust parameters based on the calculated gradients
            optimizer.step()

            # Let's print statistics for every 1,000 images
            running_loss += loss.item()     # extract the loss value
            if i % 1000 == 999:    
                # print every 1000 (twice per epoch) 
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 1000))
                # zero the loss
                running_loss = 0.0

        # Compute and print the average accuracy fo this epoch when tested over all 10000 test images
        accuracy = testAccuracy()
        print('For epoch', epoch+1,'the test accuracy over the whole test set is %d %%' % (accuracy))
        
        # we want to save the model if the accuracy is the best
        if accuracy > best_accuracy:
            saveModel()
            best_accuracy = accuracy

In [None]:
# Function to show the images
def imageshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# Function to test the model with a batch of images and show the labels predictions
def testBatch():

    for images, labels in test_loader:
        # Show all images as one image grid
        imageshow(torchvision.utils.make_grid(images))
       
        # Show the real labels on the screen 
        print('Real labels: ', ' '.join('%5s' % classes[labels[j].long()] 
                                   for j in range(len(labels))))
      
        # Let's see what if the model identifiers the  labels of those example
        outputs = model(images)
        
        # We get the probability for every 10 labels. The highest (max) probability should be correct label
        _, predicted = torch.max(outputs, 1)
        
        # Let's show the predicted labels on the screen to compare with the real ones
        print('Predicted: ', ' '.join('%5s' % classes[predicted[j].long()] 
                                  for j in range(len(labels))))

In [None]:
# Let's build our model
train(20)
print('Finished Training')

# Test which classes performed well
testAccuracy()

# Let's load the model we just created and test the accuracy per label
model = Network()
path = "myFirstModel.pth"
model.load_state_dict(torch.load(path))


In [None]:

# Test with batch of images
# This is testing against itself...
testBatch()