# 1. Import Lib

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory


# Create list of images URL

list_image_url = []
list_images_label = []

import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from glob import glob
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


from torch.utils.data import Dataset

import torchvision
import torchvision.transforms as transforms

from torchvision.transforms import (Compose,
                                    RandomResizedCrop,
                                    Resize,
                                    CenterCrop,
                                    RandomHorizontalFlip,
                                    RandomVerticalFlip,
                                    RandomPerspective,
                                    RandomRotation,
                                    ColorJitter,
                                    Normalize,
                                    ToTensor
                                   )

from collections import Counter

from pathlib import Path

label_map = {0:"cats",
             1:"dogs",
             2:"panda"
            }




# 2. Load Data URL and Labels to 2 Array

In [None]:
ds_path = '../input/animal-image-datasetdog-cat-and-panda/animals/'
for category in ['cats','dogs','panda']:
    # Hàm os.path.join là nối tạo path mới (+),
    for p in os.listdir( os.path.join(ds_path, category)):

        f = Path(os.path.join(ds_path,category, p))

        list_image_url.append(f)

        this_label = f.parent.stem

        if this_label == 'cats':
            this_label = 0
        elif this_label == 'dogs':
            this_label = 1
        elif this_label == 'panda':
            this_label = 2

        list_images_label.append( this_label )

print('Number of instance', len(list_image_url))

Counter(list_images_label)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 3. Check Data Variety

In [None]:
sizes = []
for path in list_image_url:
    im = Image.open(path)
    sizes.append(im.size)
    im.close()

print(max(sizes), min(sizes))
del sizes

# 4. Split Train and Test

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(list_image_url, list_images_label, test_size=0.2, random_state=1357)
len(x_train), len(x_test)

# 5. Class Dataset inherit: Dataset of torch

In [None]:

transform = Compose([
    RandomResizedCrop(224),
    RandomHorizontalFlip(1),
    RandomVerticalFlip(0.1),
    RandomPerspective(distortion_scale=0.2, p=0.2),
    RandomRotation(15),
    ToTensor(),
    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

basic_augmentations = Compose([
    Resize(224),
    CenterCrop(224),
    ToTensor()
])

class AnimalDataset(Dataset):
    def __init__(self, img_paths , img_labels, size_of_images ):
        self.img_paths = img_paths
        self.img_labels = img_labels
        self.size_of_images = size_of_images
        if len(self.img_paths) != len(self.img_labels):
            raise InvalidDatasetException(self.img_paths,self.img_labels)

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self,index):
        PIL_IMAGE = Image.open(self.img_paths[index]).convert(mode="RGB").resize(self.size_of_images)
        # In pytorch we use torch tensors, ToTensor transform transforms the PIL image
        # to Torch tensor.
        TENSOR_IMAGE = basic_augmentations(PIL_IMAGE)


        vector = np.zeros(3)

        vector[self.img_labels[index]] = 1

        return TENSOR_IMAGE,vector

import datetime
print(datetime.datetime.now())

In [None]:
train_ds = AnimalDataset(x_train, y_train, (224,224))
val_ds = AnimalDataset(x_test, y_test, (224,224) )

len(train_ds), len(val_ds)

# Visualization

In [None]:
plot_ds  = AnimalDataset(x_train[:16], y_train[:16], (224,224))

In [None]:
to_plot = []
classes = { 0: 'panda',
            1: 'dog',
            2: 'cat'
          }
for x,y in plot_ds:
    to_plot.append((x,y))

import matplotlib.pyplot as plt

figure = plt.figure(figsize=(16,16))
for i in range(1,17):
    img = to_plot[i-1][0].permute(1,2,0)
    label = 'view'
    figure.add_subplot(4,4,i)
    plt.title(label)
    plt.axis('off')
    plt.imshow(img)

# Batching Datasets

In [None]:
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_ds, batch_size=32, shuffle=True)

len(train_loader), len(val_loader)

# 6. Model Config

In [None]:
class MyCNN(nn.Module):
    def __init__(self):
        super(MyCNN,self).__init__()
        # Convolutional layers

        self.backbone = nn.Sequential(

            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),

            nn.Conv2d(16, 28, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(28, 28, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(28),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),


            nn.Conv2d(28, 56, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(56, 56, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(56),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),

            nn.Dropout2d(0.5)

        )



        self.head = nn.Sequential(
            nn.AdaptiveAvgPool1d(56),
            nn.Linear(56,3),
            nn.Softmax(dim=1)
        )



    def forward(self,x):
        # Convolutional layers with ReLU activation and max pooling


        x = self.backbone(x)
        x = x.view(x.size(0), -1)
        x = self.head(x)

        return x

print(MyCNN())

# Model ResNet

In [None]:
resnet = torchvision.models.resnet50(pretrained=False)

# Freeze the parameters so we don't update them during training


class ResNEt50Tranfer(nn.Module):
    def __init__(self):
        super(ResNEt50Tranfer,self).__init__()

        self.resnet = resnet

#         for param in self.resnet.parameters():
#             param.requires_grad = False


        self.resnet.fc = nn.Sequential(
               nn.Linear(2048, 128),
               nn.ReLU(inplace=True),
               nn.Linear(128, 3))

    def forward(self,x):
        x = self.resnet(x)
        return x

print(ResNEt50Tranfer())



# Set Up Parameter

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNEt50Tranfer().to(device)

Loss = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(),lr=1e-3)

print(device)

In [None]:
EPOCH_NUMBER = 10
TRAIN_LOSS = []
TRAIN_ACCURACY = []
TEST_ACCURACY = []

for epoch in range(1,EPOCH_NUMBER+1):
    epoch_loss = 0.0
    correct = 0
    total = 0

    model.train()

    for data_instance in train_loader:
        sample, label = data_instance

        sample = sample.to(device)
        label = label.to(device)



        predict = model(sample)

        this_lost = Loss(predict, label)

        optimizer.zero_grad()
        this_lost.backward()
        optimizer.step()
        epoch_loss += this_lost.item()

    print('Traint',epoch, epoch_loss/len(train_loader) )

    TRAIN_LOSS.append(epoch_loss/len(train_loader))

    model.eval()

    correct_val = 0
    total_val = 0

    for data_instance in train_loader:
        sample, label = data_instance


        sample = sample.to(device)
        label = label.to(device)

        predict = model(sample)

        for index, ele_label in enumerate(predict):


            total_val += 1
            #print(predict[index] , torch.argmax(predict[index]) , torch.argmax(label[index])  )

            if torch.argmax(predict[index]) == torch.argmax(label[index]):
                correct_val += 1

    print('Acc Train',epoch,  correct_val / total_val )

    TRAIN_ACCURACY.append(correct_val / total_val)

    model.eval()

    correct_val = 0
    total_val = 0

    for data_instance in val_loader:
        sample, label = data_instance

        sample = sample.to(device)
        label = label.to(device)

        predict = model(sample)

        for index, ele_label in enumerate(predict):
            total_val += 1
            #print(predict[index] , torch.argmax(predict[index]) , torch.argmax(label[index])  )

            if torch.argmax(predict[index]) == torch.argmax(label[index]):
                correct_val += 1

    print('Acc Eval',epoch,  correct_val / total_val )

    TEST_ACCURACY.append(correct_val / total_val)




In [None]:
import matplotlib.pyplot as plt

plt.plot(TRAIN_ACCURACY,  color='red')
plt.plot(TEST_ACCURACY,   color='blue')
plt.legend(['Train', 'Test'], loc='lower right')
plt.title('Precision')