In [None]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import os
import cv2 
from google.colab.patches import cv2_imshow
from PIL import Image
import sys
import math
import random
from skimage.transform import rotate, AffineTransform
from skimage.util import random_noise

%tensorflow_version 2.x
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras.regularizers import l1
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

import torch
import torchvision.transforms as transforms

from torch.utils.data import TensorDataset, ConcatDataset
import random

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#Data Preparation

In [None]:
# This class reads the data for each dataset
class readDataset:
    def __init__(self, num_classes, path):
        self.height = 200
        self.width = 200
        self.channels = 3
        self.path = path
        self.num_classes = num_classes

    def DogsAndCats(self):
        class_names = ['dogs', 'cats']
        # two lists containing data train
        train_data_=[]
        train_labels_=[]
        # two lists containing data test
        test_data_=[]
        test_labels_=[]
        # read path names of dataset
        for i in range(self.num_classes) :
            path_train = self.path + "training_set/" + class_names[i] 
            path_test = self.path + "test_set/" + class_names[i]
            data_names_train = os.listdir(path_train)
            data_names_test = os.listdir(path_test)
            print("\n class: ",i)
            print("number of train data: ",len(data_names_train))
            print('--------------------')
            print("number of test data: ",len(data_names_test))
            # read train data
            for j in range(len(data_names_train)):
                name = data_names_train[j]
                path_to_image = path_train + "/" + name
                image = cv2.imread(path_to_image)
                image = cv2.resize(image, (self.height, self.width))
                train_data_.append(np.array(image))
                train_labels_.append(i)
            # read test set data        
            for k in range(len(data_names_test)):
                name = data_names_test[k]
                path_to_image = path_test + "/" + name
                image = cv2.imread(path_to_image)
                image = cv2.resize(image, (self.height, self.width))
                test_data_.append(np.array(image))
                test_labels_.append(i)
        # return two lists
        train_data_ = np.array(train_data_)
        test_data_ = np.array(test_data_)

        train_labels_ = to_categorical(np.array(train_labels_), self.num_classes)
        test_labels_ = to_categorical(np.array(test_labels_), self.num_classes)
        return train_data_, train_labels_, test_data_, test_labels_



In [None]:
dataset_path = "/content/drive/MyDrive/projectDataset/dogsAndCats/dataset/"
data_dogs_cats = readDataset(2, dataset_path)
train_data, train_labels, test_data, test_labels = data_dogs_cats.DogsAndCats()


 class:  0
number of train data:  4033
--------------------
number of test data:  1000

 class:  1
number of train data:  4009
--------------------
number of test data:  1000


In [None]:
# this cell is saving the train and test data in to the drive
path_save = '/content/drive/MyDrive/projectDataset/dogsAndCats/dataset/numpyFiles/'
np.save(path_save+'catDogs_train_data.npy',train_data)
np.save(path_save+'catDogs_train_labels.npy',train_labels)
np.save(path_save+'catDogs_test_data.npy',test_data)
np.save(path_save+ 'catDogs_test_labels.npy',test_labels)

In [None]:
# this cell is loading the numpies from the drive
# uncomment in case

# path_save = '/content/drive/MyDrive/projectDataset/dogsAndCats/dataset/numpyFiles/'
# train_data = np.load(path_save+'catDogs_train_data.npy')
# train_labels = np.load(path_save+'catDogs_train_labels.npy')
# test_data = np.load(path_save+'catDogs_test_data.npy')
# test_labels = np.load(path_save+'catDogs_test_labels.npy')

In [None]:
# normalizing the data
# train_data = train_data/255.0
# test_data = test_data/255.0

In [None]:
# split train to train and val
train_data, val_data, train_labels, val_label = train_test_split(train_data, 
                                                                 train_labels, 
                                                                 test_size=0.2)

#Data Augmentation in Pytorch

In [None]:
# create dataloader pytorch
import matplotlib.pyplot as plt
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset, TensorDataset
# convert numpy to tensors
train_data = torch.from_numpy(train_data.astype(np.float32))
test_data = torch.from_numpy(test_data.astype(np.float32))
val_data = torch.from_numpy(val_data.astype(np.float32))
train_labels = torch.from_numpy(train_labels.astype(np.float32))
test_labels = torch.from_numpy(test_labels.astype(np.float32))
val_labels = torch.from_numpy(val_label.astype(np.float32))
# create tensor dataset
train_dataset = TensorDataset(train_data, train_labels)
val_dataset = TensorDataset(val_data, val_labels)
test_dataset = TensorDataset(test_data, test_labels)

In [None]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(degrees=45),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomAffine(degrees=0, translate=(0.15, 0.15)),
    # transforms.RandomResizedCrop(size=(224, 224), scale=(0.5, 1.0)),
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
# this list includes k random number within the range of train length
train_random_list = random.sample(range(0, len(train_data)), round(0.5 * len(train_data)))
val_random_list = random.sample(range(0, len(val_data)), round(0.5 * len(val_data)))

In [None]:
# Apply transformations to each sample in the dataset
transformed_train = []
cnt = 0
for sample, label in train_dataset:
    if cnt in train_random_list:
      transformed_sample = transform(sample.permute(2, 0, 1))
      # print(transformed_sample.shape)
      transformed_train.append((transformed_sample.permute(1, 2, 0), label))
    cnt += 1

In [None]:
# concatenate the two datasets
augmented_train_dataset = ConcatDataset([train_dataset, transformed_train])

In [None]:
# Apply transformations to each sample in the val dataset
transformed_val = []
cnt = 0
for sample, label in val_dataset:
    if cnt in val_random_list:
      transformed_sample = transform(sample.permute(2, 0, 1))
      # print(transformed_sample.shape)
      transformed_val.append((transformed_sample.permute(1, 2, 0), label))
    cnt += 1

In [None]:
# concatenate the two datasets
augmented_val_dataset = ConcatDataset([val_dataset, transformed_val])

In [None]:
# create dataloader 
train_loader = DataLoader(augmented_train_dataset, batch_size=128,
                          shuffle=True, drop_last=True)
val_loader = DataLoader(augmented_val_dataset, batch_size=128, 
                        shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=128, 
                         shuffle=True, drop_last=True)