In [1]:
import os
import cv2

import numpy as np
import pandas as pd
from tqdm import tqdm, trange

import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid

import torch
import torch.nn as nn


# The cross-entropy loss penalizes the model more when it is more confident in the incorrect class
# which makes intuitive sense.
from torch.nn import CrossEntropyLoss

# Adam is an optimization algorithm that can be used instead of the classical SGD procedure
# to update network weights iterative based in training data.
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader

import torch.multiprocessing as mp

from torchvision import transforms, datasets, models
from torchvision.io import read_image

np.random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x7f5ffbdebd50>

In [2]:
# torch.cuda.current_device()
# torch.cuda.get_device_name(0)
# torch.cuda.device_count()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Torch is using device:', device)
torch.cuda.get_device_name(device)

Torch is using device: cuda:0


'NVIDIA GeForce RTX 3070 Ti Laptop GPU'

In [3]:
def remove_fname_space(path):
    for filename in os.listdir(path):
        my_source = path + "/" + filename
        my_dest = path + "/" + filename.strip().replace(" ", "")
        os.rename(my_source, my_dest)

In [51]:
path_tr_real = '../../data/CIFAK/train/REAL'
# remove_fname_space(path_tr_real)

In [52]:
fname_tr_real = os.listdir(path_tr_real)
fname_tr_real.sort()
labels_lst=[1]*len(fname_tr_real)
# print(len(labels_lst))
# print(len(fname_tr_real))
tr_dict={'Image_name':fname_tr_real,'True?':labels_lst}
tr_df=pd.DataFrame(tr_dict)
tr_df.to_csv(path_tr_real+"/tr_annotation.csv")

fpath_tr_real = []
for i, file in enumerate(fname_tr_real):
    fpath_tr_real.append(path_tr_real+"/"+file)

In [53]:
# cv2.imread(fpath_tr_real[0])

In [54]:
class SliceImage:
    def __init__(self, slice_width=8):
        self.slice_width = slice_width

    def slice(self, img):
        # img: is a tensor of the shape (Color_Channels x Rows (Hight) x Columns (Width))
        #
        # Make a slice every "slice_width" as we are moving across dimension 1 (as we are moving
        # vertically across rows)
        img = img.unfold(1, self.slice_width, self.slice_width)
        # Make a slice every slice_width as we are moving across dimension 2,
        # Note that previous operation has added new dimension at the beginning
        # refers to no. of vertical slices, hence 2 here still refers to the rows.
        img = img.unfold(2, self.slice_width, self.slice_width)
        return img

    def plot(self, img):
        img = self.slice(img).permute(1, 2, 0, 3, 4)

        fig = plt.figure(figsize=(2, 2))
        grid = ImageGrid(fig, 111, nrows_ncols=(
            img.size(0), img.size(1)), axes_pad=0.1)
        print(grid)

        for i, ax in enumerate(grid):
            i_b4 = str(np.base_repr(i, 4)).zfill(2)
            row = int(i_b4[0])
            column = int(i_b4[1])
            patch = img[row][column].permute(1, 2, 0).numpy()
            ax.imshow(patch)
            ax.axis('off')

    def __call__(self, img):
        img = self.slice(img)
        channels = img.size(0)

        return img.reshape(-1, self.slice_width * self.slice_width * channels)

    plt.show()

In [55]:
# Define the transforms on the input data (x) tensor
data_transform = transforms.Compose([
    transforms.ToPILImage(),
    # transforms.Resize((32, 32)),
    # transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    SliceImage(8)
])

In [56]:
# Custom Dataset Class
class Images_Dataset(Dataset):
    def __init__(self, images, annotations_file, transform=None):
        self.images = images
        self.labels = pd.read_csv(annotations_file)
        self.transform = transform

    def __getitem__(self, index):
        img = read_image(self.images[index])
        
        # img.to(device)

        labels = self.labels.iloc[index,2]

        if self.transform:
            img = self.transform(img)

        return img, labels

    def __len__(self):
        # To return the length of the dataset
        return len(self.images)

In [57]:
# Define the dataset object and configure the Data Loader
tr_annotation_file= path_tr_real+"/tr_annotation.csv"
Im_tr_dataset = Images_Dataset(fpath_tr_real,tr_annotation_file, data_transform)

Im_tr_loader = DataLoader(dataset=Im_tr_dataset,
                       batch_size=64,
                       # Drops the last mini batch if less than the batch size (could enhance the model accuracy)
                       drop_last=True,
                       shuffle=True,
                       num_workers=4)  # increase number of processor cores loading the data and getting it ready for model training inside the GPU

In [58]:
# Im_dataset[12]
# plt.imshow(Im_dataset[12].permute(1,2,0))

Start by slicing each image [3,32,23] ==> [16,3,8,8]

In [59]:
train_features, train_labels = next(iter(Im_tr_loader))
train_features[0].size()

torch.Size([16, 192])