In [1]:
import os
import re
import csv
import string
import pandas as pd
import numpy as np
import statistics
import itertools
from PIL import Image
import time
from io import StringIO
from skimage import io
import matplotlib.pyplot as plt
import torch
from torch import nn
import torchvision
from torchvision import models
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.transforms import ToTensor, Lambda, Resize, Compose, ToPILImage, Normalize, RandomCrop, RandomHorizontalFlip, RandomVerticalFlip
import matplotlib.pyplot as plt
import csv
from sklearn.metrics import f1_score, confusion_matrix, precision_score, recall_score

In [2]:
DIR = '/Users/sanaali/Downloads/COMP5329S1A2Dataset 2'

TRAIN_CSV = os.path.join(DIR, "train.csv")
TEST_CSV = os.path.join(DIR, "test.csv")
IMAGES_DIR = os.path.join(DIR, "data")

In [None]:
import matplotlib.pyplot as pl
from ipywidgets import interact, widgets
from matplotlib import animation
import os
import pandas as pd
from skimage import io
from torch.utils.data import Dataset
from torchvision import transforms

In [3]:
class AssignmentDataset(Dataset):
    '''
    The AssignmentDataset Class, child of torch.utils.data.Dataset.

    Attributes:
    csv_file (str): A string representation of the file directory to the csv data file.
    image_dir (str): A string representation of the file directory to the images data file.
    transform (torchvision.transforms.Compose): A torchvision.transforms.Compose object consisting of desired transforms to be made to the image data.
    target_transform (function): A function to be passed to the label data for any desired transformations. Used to apply one-hot encoding.
    has_labels (Bool): A boolean value to flag whether the dataset has labels or not, i.e. if it is training or testing.
    '''
    
    def __init__(self, csv_file, image_dir, transform=None, target_transform=None, has_labels = True):
        self.image_dir = image_dir
        self.transform = transform
        self.target_transform = target_transform
        self.has_labels = has_labels
        
        with open(csv_file) as file:
            lines = [re.sub(r'([^,])"(\s*[^\n])', r'\1/"\2', line) for line in file]
            self.dataframe = pd.read_csv(StringIO(''.join(lines)), escapechar="/")
                

    def __len__(self):
        '''
        Returns the length of the dataframe representation of the csv component of the dataset.

        Parameters:
        None

        Returns:
        self.dataframe.shape[0] (int): The length of the datasets dataframe representation.

        '''
        return self.dataframe.shape[0]
    
    def __getitem__(self, idx):
        '''
        Reads the data from the dataframe then outputs the observation's image tensor, OHE label tensor (if there are labels present), ImageID string and caption string in a tuple form.
                
        Parameters:
        idx (int): The index for which observation to return.

        Returns:
        sample (tuple): The tuple containing the observation's image tensor, OHE label tensor (if present), ImageID string and caption string.
        '''
        
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        img_path = os.path.join(self.image_dir, 
                                self.dataframe.iloc[idx, self.dataframe.columns.get_loc('ImageID')])
        
        img = io.imread(img_path)

        img_id = self.dataframe.iloc[idx, self.dataframe.columns.get_loc('ImageID')]
        caption = self.dataframe.iloc[idx, self.dataframe.columns.get_loc('Caption')]

        if self.has_labels:
          labels = self.dataframe.iloc[idx, self.dataframe.columns.get_loc('Labels')]
          labels = labels.split(' ') # converts the string into an iterable Python list
          labels = [int(x) for x in labels] # convert the string of numbers into integer using Pytorch for computation speed
                     
          if self.target_transform:
              labels = self.target_transform(labels)
          if self.transform:
              img = self.transform(img)

          sample = (img, labels, img_id, caption)
          
        else:
          if self.transform:
              img = self.transform(img)          
          sample = (img, img_id, caption)
            
        return sample
    
    ##Finding Mean/std
NUM_LABELS = 19
BATCH_SIZE = 1

transforms = Compose([
    ToTensor(),
])

train_dataset_check = AssignmentDataset(csv_file = TRAIN_CSV,
                                 image_dir = IMAGES_DIR,
                                 transform = transforms,
                                 target_transform = Lambda(lambda y: torch.zeros(NUM_LABELS, dtype=torch.uint8).scatter_(dim=0, index=torch.sub(torch.tensor(y), 1), value=1)),
                                  has_labels = True,
                                 )

In [4]:
def image_stats(checking_dataloader):
  '''
  A function to calculate the channel means and standard deviations of the image input in a training dataset.
  Sums the channel means & squared means per image, then calculates the overall mean and standard deviation by taking the first moment (mean) and square root of the second moment (variance).

  Parameters:
  checking_dataloader (torch.utils.data.Dataloader): The Dataloader object with a batch size of 1 and containing images, image labels, image IDs and captions.

  Returns:
    means (torch.Tensor): A tensor object containing the channel means as floats. 
    stdevs (torch.Tensor): A tensor object containing the channel standard deviations as floats.
  '''

  sum_channels, sumsq_channels, n_batches = 0, 0, 0

  for step, (x, _, _, _ ) in enumerate(checking_dataloader):
    sum_channels += torch.mean(x, dim = [0, 2, 3])
    sumsq_channels += torch.mean(x**2 , dim = [0, 2, 3])
    n_batches += 1

  means = sum_channels/n_batches
  stdevs = (sumsq_channels/n_batches - means**2)**0.5

  return means, stdevs

In [6]:
train_dataloader_check = DataLoader(train_dataset_check, batch_size=BATCH_SIZE, shuffle=False)

In [7]:
means, stdevs = image_stats(train_dataloader_check)
print(f'Mean is: {means} \n Standard Deviation is: {stdevs}')

Mean is: tensor([0.4638, 0.4490, 0.4222]) 
 Standard Deviation is: tensor([0.2725, 0.2698, 0.2849])


In [8]:
def get_image_sizes(checking_dataloader):
    '''
    Obtains the heights and widths of the images within the supplied dataset.

    Parameters:
    checking_dataloader (torch.utils.data.Dataloader): The Dataloader object with a batch size of 1 and containing images, image labels, image IDs and captions.

    Returns:
      heights (list(int)): A list of the image heights as integers.
      widths (list(int)): A list of the image widths as integers.
    '''
    
    heights, widths = [], []

    for step, (img, _, _, _) in enumerate(checking_dataloader):
        heights.append(img.size()[2])
        widths.append(img.size()[3])

    return heights, widths

In [9]:
heights, widths = get_image_sizes(train_dataloader_check)

print(f'Median height is: {statistics.median(heights)}')
print(f'Mean height is: {statistics.mean(heights)}')
print(f'Median width is: {statistics.median(widths)}')
print(f'Mean width is: {statistics.mean(widths)}')

Median height is: 240.0
Mean height is: 240.5502066942259
Median width is: 320.0
Mean width is: 289.9553940525403
