In [81]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.patches import Polygon
import matplotlib.image as mpimg
import numpy as np
from PIL import Image
import os
import pandas as pd

### Generate shape pngs
Transparent background so can be overlayed over other random backgrounds. Random colour. Random size.

In [39]:
def gen_circles(num_images):
    """
    Generate PNG images of circles with random radius, color and position.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    for i in range(1, num_images + 1):
        # Generate random radius and color
        radius = np.random.uniform(0.1, 0.5)  # Random radius between 0.1 and 0.5
        color = np.random.rand(3,)  # Random RGB color
        pos1 = np.random.uniform(0.3, 0.7)
        pos2 = np.random.uniform(0.3, 0.7)

        fig, ax = plt.subplots()
        circle_outer = plt.Circle((pos1, pos2), radius, color=color, fill=False, linewidth =2)
        ax.add_artist(circle_outer)
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        fig.set_size_inches(224/100, 224/100)
        filename = f"Raw shapes/circle{i}.png"
        fig.savefig(filename, dpi=100, bbox_inches='tight', transparent=True)
        #plt.show()
        plt.close(fig)

# Example usage
gen_circles(5)  # Generates 5 PNG images: circle1.png, circle2.png, ..., circle5.png


In [40]:
def gen_squares(num_images):
    """
    Generate PNG images of circles with random radius, color and position.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    sf = 0.975
    for i in range(1, num_images + 1):
        # Generate random radius and color
        length1 = np.random.uniform(0.1, 0.5) # Random length between 0.1 and 0.5
        length2 = length1 + np.random.uniform(-0.02, 0.02) # Make other length have some variance
        color = np.random.rand(3,)  # Random RGB color
        pos1 = np.random.uniform(0.3, 0.7)
        pos2 = np.random.uniform(0.3, 0.7)
        angle = np.random.uniform(0,360)

        fig, ax = plt.subplots()
        square_outer = plt.Rectangle((pos1, pos2), length1, length2, color=color, fill=False, angle=angle)
        ax.add_artist(square_outer)
        #ax.add_artist(square_inner)
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        filename = f"Raw shapes/square{i}.png"
        fig.set_size_inches(224/100, 224/100)
        fig.savefig(filename, dpi=100, bbox_inches='tight', transparent=True)
        #plt.show()
        plt.close(fig)

# Example usage
gen_squares(5)  # Generates 5 PNG images: circle1.png, circle2.png, ..., circle5.png


In [41]:

def gen_triangles(num_images):
    """
    Generate PNG images of random triangles with random colors.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    for i in range(1, num_images + 1):
        # Generate random vertices for the triangle
        vertices = np.random.rand(3, 2)  # Generate 3 random (x, y) coordinates
        
        # Generate a random color for the triangle
        color = np.random.rand(3,)  # Random RGB color
        
        # Create a Polygon patch using the generated vertices
        triangle = Polygon(vertices, closed=True, color=color, fill=False)
        
        # Create plot
        fig, ax = plt.subplots()
        
        # Add triangle to plot
        ax.add_patch(triangle)
        
        # Set plot properties
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        
        # Save plot as PNG
        filename = f"Raw shapes/triangle{i}.png"
        fig.set_size_inches(224/100, 224/100)
        fig.savefig(filename, dpi=100, bbox_inches='tight', transparent=True)
        
        # Close plot
        plt.close(fig)

# Example usage
gen_triangles(5)  # Generates 5 PNG images of random triangles


In [42]:
def gen_pentagons(num_images):
    """
    Generate PNG images of random pentagons with random colors.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    for i in range(1, num_images + 1):
        # Generate random vertices for the pentagon
        angle = 2 * np.pi / 5
        rotation = np.random.rand() * 2 * np.pi  # Random rotation
        vertices = np.array([[np.cos(rotation + j * angle), np.sin(rotation + j * angle)] for j in range(5)])
        # Scale and translate the pentagon to fit inside [0, 1] x [0, 1] square
        vertices = (vertices - vertices.min(axis=0)) / (vertices.max(axis=0) - vertices.min(axis=0))

        # Generate random scaling factors for x and y axes
        scale_factors = np.random.rand(2,) * 0.8 + 0.2  # Random scale factors between 0.2 and 1.0
        vertices[:, 0] *= scale_factors[0]
        vertices[:, 1] *= scale_factors[1]
        
        # Generate a random color for the pentagon
        color = np.random.rand(3,)  # Random RGB color
        
        # Create a Polygon patch using the generated vertices
        pentagon = Polygon(vertices, closed=True, color=color, fill=False)
        
        # Create plot
        fig, ax = plt.subplots()
        
        # Add pentagon to plot
        ax.add_patch(pentagon)
        
        # Set plot properties
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        
        # Save plot as PNG
        filename = f"Raw shapes/pentagon{i}.png"
        fig.set_size_inches(224/100, 224/100)
        fig.savefig(filename, dpi=100, bbox_inches='tight', transparent=True)
        
        # Close plot
        plt.close(fig)

# Example usage
gen_pentagons(5)  # Generates 5 PNG images of random pentagons

In [45]:
def gen_hexagons(num_images):
    """
    Generate PNG images of random hexagons with random sizes and colors.
    
    Parameters:
        num_images (int): Number of PNG images to generate.
    """
    for i in range(1, num_images + 1):
        # Generate random vertices for the hexagon
        angle = 2 * np.pi / 6
        rotation = np.random.rand() * 2 * np.pi  # Random rotation
        vertices = np.array([[np.cos(rotation + j * angle), np.sin(rotation + j * angle)] for j in range(6)])
        
        # Translate the hexagon to fit inside [0, 1] x [0, 1] square
        vertices = (vertices - vertices.min(axis=0)) / (vertices.max(axis=0) - vertices.min(axis=0))

        # Generate random scaling factors for x and y axes
        scale_factors = np.random.rand(2,) * 0.8 + 0.2  # Random scale factors between 0.2 and 1.0
        vertices[:, 0] *= scale_factors[0]
        vertices[:, 1] *= scale_factors[1]

        # Generate a random color for the hexagon
        color = np.random.rand(3,)  # Random RGB color
        
        # Create a Polygon patch using the generated vertices
        hexagon = Polygon(vertices, closed=True, color=color, fill=False)
        
        # Create plot
        fig, ax = plt.subplots()
        
        # Add hexagon to plot
        ax.add_patch(hexagon)
        
        # Set plot properties
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')  # Turn off axes
        
        # Save plot as PNG
        filename = f"Raw shapes/hexagon{i}.png"
        fig.set_size_inches(224/100, 224/100)
        fig.savefig(filename, dpi=100, bbox_inches='tight', transparent=True)
        
        # Close plot
        plt.close(fig)

# Example usage
gen_hexagons(5)  # Generates 5 PNG images of random hexagons

### Overlaying over backgrounds

Here I am overlaying the shapes over some pictures I took of different white paper.

In [78]:
def overlay_images(background_path, overlay_path, output_path):
    try:
        # Open background and overlay images
        background = Image.open(background_path)
        overlay = Image.open(overlay_path)

        # Convert overlay image to RGBA mode if it's not already
        if overlay.mode != 'RGBA':
            overlay = overlay.convert('RGBA')

        # Resize overlay image to 224x224 pixels
        overlay = overlay.resize((224, 224))

        # Resize background image to 224x224 pixels
        background = background.resize((224, 224))

        # Calculate the maximum allowed starting positions for the overlay
        max_x = 0
        max_y = 0

        # Choose a random starting position for the overlay
        start_x = np.random.randint(0, max_x + 1)
        start_y = np.random.randint(0, max_y + 1)

        # Paste the overlay image onto the background image
        background.paste(overlay, (start_x, start_y), overlay)

        # Check if the output directory exists, if not, create it
        output_dir = os.path.dirname(output_path)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Save the output image
        background.save(output_path)
        #print("Image saved successfully as", output_path)
    except Exception as e:
        print("Error occurred while saving the image:", e)

# Example usage:
background_path = "Backgrounds/b3.jpg"
overlay_path = "Raw shapes/circle1.png"
output_path = "Overlayed/overlay1.jpg"
overlay_images(background_path, overlay_path, output_path)


# Generate images and dataset

In [51]:
gen_circles(100)
gen_squares(100)
gen_triangles(100)
gen_pentagons(100)
gen_hexagons(100)

In [80]:
for i in range(1,101,1):
    background_path = "Backgrounds/b3.jpg"
    overlay_path = "Raw shapes/circle" + str(i) + ".png"
    output_path = "Data/circle" + str(i) + ".jpg"
    overlay_images(background_path, overlay_path, output_path)
for i in range(1,101,1):
    background_path = "Backgrounds/b3.jpg"
    overlay_path = "Raw shapes/square" + str(i) + ".png"
    output_path = "Data/square" + str(i) + ".jpg"
    overlay_images(background_path, overlay_path, output_path)
for i in range(1,101,1):
    background_path = "Backgrounds/b3.jpg"
    overlay_path = "Raw shapes/triangle" + str(i) + ".png"
    output_path = "Data/triangle" + str(i) + ".jpg"
    overlay_images(background_path, overlay_path, output_path)
for i in range(1,101,1):
    background_path = "Backgrounds/b3.jpg"
    overlay_path = "Raw shapes/pentagon" + str(i) + ".png"
    output_path = "Data/pentagon" + str(i) + ".jpg"
    overlay_images(background_path, overlay_path, output_path)
for i in range(1,101,1):
    background_path = "Backgrounds/b3.jpg"
    overlay_path = "Raw shapes/hexagon" + str(i) + ".png"
    output_path = "Data/hexagon" + str(i) + ".jpg"
    overlay_images(background_path, overlay_path, output_path)

In [119]:
df = pd.DataFrame(np.zeros((500,2)))

for i in range(100):
    df.iat[i,0] = "circle" + str(i+1) + ".jpg"
    df.iat[i,1] = 0

for i in range(100):
    df.iat[i+100,0] = "square" + str(i+1) + ".jpg"
    df.iat[i+100,1] = 1

for i in range(100):
    df.iat[i+200,0] = "triangle" + str(i+1) + ".jpg"
    df.iat[i+200,1] = 2

for i in range(100):
    df.iat[i+300,0] = "pentagon" + str(i+1) + ".jpg"
    df.iat[i+300,1] = 3

for i in range(100):
    df.iat[i+400,0] = "hexagon" + str(i+1) + ".jpg"
    df.iat[i+400,1] = 4

df.to_csv("data.csv",index=False,header=False)

# circle = 0
# square = 1
# triangle = 2
# pentagon = 3
# hexagon = 4

# Pytorch

In [133]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchvision.io import read_image
from torch.utils.data import DataLoader
from sklearn.model_selection import KFold
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [110]:
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label
    
labels_map = {
    0: "Circle",
    1: "Square",
    2: "Triangle",
    3: "Pentagon",
    4: "Hexagon",
}

In [120]:
img_dir = "Data/"
annotations_file = "data.csv"
dataset = CustomImageDataset(annotations_file=annotations_file,img_dir=img_dir)

In [142]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x=x.float()
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [143]:
train_dataloader = DataLoader(dataset, batch_size=64, shuffle=True) # WRONG, testing
test_dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

In [144]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

RuntimeError: Given groups=1, weight of size [16, 8, 5, 5], expected input[64, 6, 110, 110] to have 8 channels, but got 6 channels instead

kFold

In [137]:
k = 5
kf = KFold(n_splits=k, shuffle=True)

for fold, (train_indices, val_indices) in enumerate(kf.split(dataset)):
    print(f"Fold {fold + 1}/{k}")

    # Split data into training and validation sets
    train_data = torch.utils.data.Subset(dataset, train_indices)
    val_data = torch.utils.data.Subset(dataset, val_indices)

    # Initialize data loaders
    train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=64, shuffle=False)

    # Initialize model
    model = model  # Replace YourModelHere with your model class

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training loop
    num_epochs = 10  # Define the number of epochs
    for epoch in range(num_epochs):
        # Training
        model.train()
        for batch_data, batch_targets in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_data)
            loss = criterion(outputs, batch_targets)
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_data, batch_targets in val_loader:
                outputs = model(batch_data)
                _, predicted = torch.max(outputs, 1)
                total += batch_targets.size(0)
                correct += (predicted == batch_targets).sum().item()
                val_loss += criterion(outputs, batch_targets).item()

        val_loss /= len(val_loader)
        accuracy = correct / total
        print(f"Epoch {epoch + 1}/{num_epochs}, Validation Loss: {val_loss}, Accuracy: {accuracy}")

Fold 1/5


RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x100352 and 131072x256)