In [1]:
import pandas as pd

df = pd.read_csv('images_2.csv')
df

Unnamed: 0,image,label
0,ea7b6656-3f84-4eb3-9099-23e623fc1018,Top
1,ea2ffd4d-9b25-4ca8-9dc2-bd27f1cc59fa,Top
2,3b86d877-2b9e-4c8b-a6a2-1d87513309d0,Shoes
3,5d3a1404-697f-479f-9090-c1ecd0413d27,Bottoms
4,b0c03127-9dfb-4573-8934-1958396937bf,Top
...,...,...
4672,3855ea22-5e7f-411f-b1fa-6db27a676c06,Shoes
4673,dfd4079d-967b-4b3e-8574-fbac11b58103,Bottoms
4674,5379356a-40ee-4890-b416-2336a7d84061,Bottoms
4675,65507fb8-3456-4c15-b53e-d1b03bf71a59,Shoes


# Encode labels, take sample of data into train/valid/test

In [2]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
labels = le.fit_transform(df.label)
labels

array([3, 3, 2, ..., 0, 2, 0])

In [3]:
images = df.iloc[:, 0]

In [4]:
from sklearn.model_selection import StratifiedShuffleSplit

ss = StratifiedShuffleSplit(n_splits=1, test_size=0.1)
for _, group_index in ss.split(images, labels):
    group_x = images[group_index].to_numpy()
    group_y = labels[group_index]

In [5]:
ss = StratifiedShuffleSplit(n_splits=1, test_size=0.2)
for train_index, test_index in ss.split(group_x, group_y):
    train_x, test_x = group_x[train_index], group_x[test_index]
    train_y, test_y = group_y[train_index], group_y[test_index]

In [6]:
ss = StratifiedShuffleSplit(n_splits=1, test_size=0.2)
for train_index, valid_index in ss.split(train_x, train_y):
    train_x, valid_x = train_x[train_index], train_x[valid_index]
    train_y, valid_y = train_y[train_index], train_y[valid_index]

# Get all images loaded in from sample and convert to tensor

In [7]:
import numpy as np
from skimage.io import imread
from skimage.transform import resize

def get_images(path_list):
    train_img = []
    for filename in path_list:
        path = 'images_compressed/' + filename + '.jpg'
        img = imread(path, as_gray=True)
        img = resize(img, (100, 100))
        img /= 255.0
        img = img.astype('float32')
        train_img.append(img)
    return np.asarray(train_img)

In [8]:
train_x_images = get_images(train_x)

In [9]:
train_x_images.shape

(299, 100, 100)

In [10]:
valid_x_images = get_images(valid_x)
test_x_images = get_images(test_x)

In [17]:
import torch

train_x_torch = torch.from_numpy(train_x_images).unsqueeze(dim=1)
valid_x_torch = torch.from_numpy(valid_x_images).unsqueeze(dim=1)
test_x_torch = torch.from_numpy(test_x_images).unsqueeze(dim=1)

In [18]:
train_x_torch.shape

torch.Size([299, 1, 100, 100])

In [19]:
train_y_torch = torch.from_numpy(train_y)
valid_y_torch = torch.from_numpy(valid_y)
test_y_torch = torch.from_numpy(test_y)
train_y_torch.shape

torch.Size([299])

# Create CNN model

In [31]:
from torch.nn import Sequential, Linear, ReLU, MaxPool2d, BatchNorm2d, Conv2d

# need to figure out what the numbers in Conv2d and other parts are / what to use for them

class ConvNN(torch.nn.Module):
    def __init__(self):
        super(ConvNN, self).__init__()
        
        # two layers for this cnn
        self.cnn_layers = Sequential (
            # Defining a 2D convolution layer
            Conv2d(1, 32, kernel_size=5, stride=1, padding=1),
            BatchNorm2d(32),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
            # Defining another 2D convolution layer
            Conv2d(32, 32, kernel_size=5, stride=1, padding=1),
            BatchNorm2d(32),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
        )

    
        self.linear_layers = Sequential(
            Linear(32 * 5 * 5, 4)
        )

    
    # Defining the forward pass    
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x

In [33]:
cnn = ConvNN()