# COGS 181 Neural Networks & Deep Learning Final Project

Import the necessary packages

- Pandas and NumPy for data management
- Matplotlib for plotting and displaying images
- Torch and its submodules for building the network
- Torchvision for doing image processing operations

In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision
import os

Define the device on which we should train the network and store each tensor.

In [39]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [43]:
classifications = pd.read_csv('./classification/output/images.csv', header=None)
image_data = pd.read_csv('./archive/images.csv')

# clean and format classifications
classifications.columns = ['image', 'class']
classifications['image'] = classifications['image'].apply(lambda x: '.'.join(x.split('.')[:-1]))

# clean and format image data
image_data.sort_values(by=['image'], inplace=True)
image_data.reset_index(inplace=True)
image_data.drop(columns=['index', 'sender_id'], inplace=True)

image_data.columns = ['image', 'type', 'kids']

types = image_data['type']

encoded = np.zeros((len(types), len(types.unique())))

for idx in range(len(types)):
    encoded[idx][list(types.unique()).index(types[idx])] = 1
    
image_data = pd.concat([image_data, pd.DataFrame(encoded)], axis=1)

image_data.columns = np.concatenate((image_data.columns[:3], types.unique()))

image_data.columns = [c.lower().replace('-', '_').replace(' ', '_') for c in image_data.columns]

image_data.drop(columns=['type'], inplace=True)

image_data['kids'] = image_data['kids'].apply(lambda x: int(x))

In [44]:
# load images, takes about a minute

PATH = './archive/images_compressed'

image_paths = np.array([f'{PATH}/{i}' for i in os.listdir(PATH)])

image_paths.shape

tensor_images = []

for path in image_paths:
    try:
        tensor_images.append(torchvision.io.read_image(path))
    except:
        continue

In [30]:
def display_image(tsr_img):
    plt.imshow(tsr_img.permute(1, 2, 0))

In [31]:
transform_seq = transforms.Compose([
    transforms.Resize(size=(200, 200), antialias=True),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [32]:
for img in tensor_images:
    img.to(device)
    img.type(dtype=torch.double)

In [51]:
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
        
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx,0])
        image = torchvision.read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label