In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import os
import cv2 
from google.colab.patches import cv2_imshow
from PIL import Image
import sys
import math
import random
from skimage.transform import rotate, AffineTransform
from skimage.util import random_noise
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
import torch
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, ConcatDataset
import random
import tensorflow as tf
from tensorflow import keras

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Data Preparation

In [3]:
# This class reads the data for each dataset
class readDataset:
    def __init__(self, num_classes, path):
        self.height = 200
        self.width = 200
        self.channels = 3
        self.path = path
        self.num_classes = num_classes

    def Animal90(self):
        path = self.path + 'name_of_the_animals.txt'
        # extracting class names
        with open(path) as f:
            lines = f.readlines()
        class_names= []
        for i in lines:
            class_names.append(i.strip())
        # list which contains data
        data = []
        data_labels = []
        # read path names of dataset
        for i in range(self.num_classes) :
            path = self.path + 'animals/' +class_names[i] 
            data_names = os.listdir(path)
            print("\n class: ",i)
            print("number of train data: ",len(data_names))
            # read whole data
            for j in range(len(data_names)):
                path_to_image = path + "/" + data_names[j]
                image = cv2.imread(path_to_image)
                image = cv2.resize(image, (self.height, self.width))
                data.append(np.array(image))
                data_labels.append(i)

        # return two lists
        data = np.array(data)
        # encode the labels
        data_labels = tf.keras.utils.to_categorical(np.array(data_labels), self.num_classes)

        return data, data_labels


In [None]:
dataset_path = "/content/drive/MyDrive/projectDataset/animal90/"
animal90 = readDataset(90, dataset_path)
data, data_labels = animal90.Animal90()


 class:  0
number of train data:  60

 class:  1
number of train data:  60

 class:  2
number of train data:  60

 class:  3
number of train data:  60

 class:  4
number of train data:  60

 class:  5
number of train data:  60

 class:  6
number of train data:  60

 class:  7
number of train data:  60

 class:  8
number of train data:  60

 class:  9
number of train data:  60

 class:  10
number of train data:  60

 class:  11
number of train data:  60

 class:  12
number of train data:  60

 class:  13
number of train data:  60

 class:  14
number of train data:  60

 class:  15
number of train data:  60

 class:  16
number of train data:  60

 class:  17
number of train data:  60

 class:  18
number of train data:  60

 class:  19
number of train data:  60

 class:  20
number of train data:  60

 class:  21
number of train data:  69

 class:  22
number of train data:  60

 class:  23
number of train data:  60

 class:  24
number of train data:  70

 class:  25
number of train data: 

In [None]:
# this cell is saving the train and test data in to the drive
# comment in case
path_save = '/content/drive/MyDrive/projectDataset/animal90/numpyFiles/'
np.save(path_save+'data.npy', data)
np.save(path_save+'data_labels.npy',data_labels)

In [9]:
# this cell is loading the numpies from the drive and convert to torch

path_save = '/content/drive/MyDrive/projectDataset/animal90/numpyFiles/'
data = torch.tensor(np.load(path_save+'data.npy'))
data_labels = torch.tensor(np.load(path_save+'data_labels.npy'))

In [None]:
# normalizing the data
# train_data = train_data/255.0
# test_data = test_data/255.0

In [10]:
# split data to train and test
train_data, test_data, train_labels, test_labels = train_test_split(data, 
                                                                 data_labels, 
                                                                 test_size=0.2)
# split train to train and val
train_data, val_data, train_labels, val_labels = train_test_split(train_data, 
                                                                 train_labels, 
                                                                 test_size=0.2)

#Data Augmentation in Pytorch

In [11]:
# create dataloader pytorch
import matplotlib.pyplot as plt
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset, TensorDataset

# create tensor dataset
train_dataset = TensorDataset(train_data, train_labels)
val_dataset = TensorDataset(val_data, val_labels)
test_dataset = TensorDataset(test_data, test_labels)

In [12]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(degrees=45),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomAffine(degrees=0, translate=(0.15, 0.15)),
    # transforms.RandomResizedCrop(size=(224, 224), scale=(0.5, 1.0)),
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [13]:
# this list includes k random number within the range of train length
train_random_list = random.sample(range(0, len(train_data)), round(0.5 * len(train_data)))
val_random_list = random.sample(range(0, len(val_data)), round(0.5 * len(val_data)))

In [14]:
# Apply transformations to each sample in the dataset
transformed_train = []
cnt = 0
for sample, label in train_dataset:
    if cnt in train_random_list:
      transformed_sample = transform(sample.permute(2, 0, 1))
      # print(transformed_sample.shape)
      transformed_train.append((transformed_sample.permute(1, 2, 0), label))
    cnt += 1

In [15]:
# concatenate the two datasets
augmented_train_dataset = ConcatDataset([train_dataset, transformed_train])

In [16]:
# Apply transformations to each sample in the val dataset
transformed_val = []
cnt = 0
for sample, label in val_dataset:
    if cnt in val_random_list:
      transformed_sample = transform(sample.permute(2, 0, 1))
      # print(transformed_sample.shape)
      transformed_val.append((transformed_sample.permute(1, 2, 0), label))
    cnt += 1

In [17]:
# concatenate the two datasets
augmented_val_dataset = ConcatDataset([val_dataset, transformed_val])

In [18]:
# create dataloader 
train_loader = DataLoader(augmented_train_dataset, batch_size=128,
                          shuffle=True, drop_last=True)
val_loader = DataLoader(augmented_val_dataset, batch_size=128, 
                        shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=128, 
                         shuffle=True, drop_last=True)

In [19]:
for train, label in train_loader:
    print(train.shape)
    break

torch.Size([128, 200, 200, 3])
