# Image classification using 4 animals Kaggle dataset

View dataset description and leaderboard [here](https://www.kaggle.com/competitions/4-animal-classification/)

## 1. Import modules

In [None]:

import torch
from torchvision import transforms, models
from torch import nn
from tqdm import tqdm
from glob import glob
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset

In [None]:
# Check GPU compatibility
if torch.cuda.is_available():
    print("GPU is available")
    print("GPU device:", torch.cuda.get_device_name(0))
    print("GPU memory:", round(torch.cuda.get_device_properties(0).total_memory/1024**3),"GB")
else:
    print("GPU is not available. Using CPU")
    
CUDA = torch.cuda.is_available()
device = "cuda" if CUDA else "cpu"


## 2. Process data

In [None]:

# load data
test_data =[]
train_data_x = []
train_data_y = []

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((256,256)),
    transforms.CenterCrop((224,224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

for file in glob("./data/4-animal-classification/test/test/*"):
    img = Image.open(file)
    img = transform(img)
    test_data.append(img)
    

animals = ['cat','deer','dog','horse']

for label, animal in enumerate(animals):
    for file in glob(f"./data/4-animal-classification/train/{animal}/*"):
        img = Image.open(file)
        img = transform(img)
        train_data_x.append(img)
        train_data_y.append(label)


In [None]:
# convert to tensor
test_data = torch.tensor(np.array(test_data),dtype=torch.float32)
train_data_x = torch.tensor(np.array(train_data_x),dtype=torch.float32)
train_data_y = torch.tensor(np.array(train_data_y),dtype=torch.long)


In [None]:
print("Train data shape:",train_data_x.shape)
print("Train label shape:",train_data_y.shape)
print("Test data shape:",test_data.shape)

In [None]:
# save data 
torch.save(train_data_x, "./data/4-animal-classification/train_data_x.pt")
torch.save(train_data_y, "./data/4-animal-classification/train_data_y.pt")
torch.save(test_data, "./data/4-animal-classification/test_data.pt")


## 3. Define architectures

In [None]:
class VGG16(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.sequential_224 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding='same'), nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding='same'), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.sequential_112 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding='same'), nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding='same'), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.sequential_56 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding='same'), nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding='same'), nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding='same'), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.sequential_28 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding='same'), nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding='same'), nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding='same'), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.sequential_14 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding='same'), nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding='same'), nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding='same'), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.sequential_linear = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=512*7*7, out_features=2**13), nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=2**13, out_features=2**12), nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=2**12, out_features=2**11), nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=2**11, out_features=2**10), nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=2**10, out_features=num_classes)
        )

    def forward(self,x):
        x = self.sequential_224(x)
        x = self.sequential_112(x)
        x = self.sequential_56(x)
        x = self.sequential_28(x)
        x = self.sequential_14(x)
        x = self.sequential_linear(x)
        return x

## 4. Train models

In [None]:

# load train data
train_data_x = torch.load("./data/4-animal-classification/train_data_x.pt")
train_data_y = torch.load("./data/4-animal-classification/train_data_y.pt")


In [None]:
# define data loader
batch_size = 2

train_x, val_x, train_y, val_y = train_test_split(train_data_x,train_data_y, test_size=0.2, random_state=42)

class Data(Dataset):
    def __init__(self,data,label):
        self.data = data
        self.label = label
    def __len__(self):
        return self.data.shape[0]
    def __getitem__(self,idx):
        return self.data[idx], self.label[idx]


train_data = Data(train_x,train_y)
val_data = Data(val_x,val_y)

train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True)
val_loader = DataLoader(val_data,batch_size=batch_size,shuffle=True)


In [None]:

# model = models.vgg16()
# model.classifier[6] = nn.Linear(in_features=4096,out_features=4)
# print(model.forward)
model = VGG16(4)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.003)

model = model.to(device)
criterion = criterion.to(device)

In [None]:


min_val_loss=10000000
for epoch in range(50):
    total_loss_train=0
    total_acc_train=0
    for x, y in tqdm(train_loader):
        x = x.to(device)
        y = y.to(device)
        
        output = model(x.float())
        
        
        batch_loss = criterion(output, y)
        total_loss_train += batch_loss.item()
        
        acc = (output.argmax(dim=1)==y).sum().item()
        total_acc_train += acc

        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

    total_loss_val=0
    total_acc_val=0

    with torch.no_grad():
        for x, y in tqdm(val_loader):
            x = x.to(device)
            y = y.to(device)
            
            output = model(x.float())
            batch_loss = criterion(output, y)
            total_loss_val += batch_loss.item()
            
            acc = (output.argmax(dim=1)==y).sum().item()
            total_acc_val += acc

    print(
        f'Epochs: {epoch+1} | Train Loss: {total_loss_train / len(train_x):.3f}\
        | Train Accuracy: {total_acc_train/len(train_x):.3f}\
        | Val Loss: {total_loss_val/len(val_x):.3f}\
        | Val Accuracy:{total_acc_val/len(val_x):.3f}'
    )

    if min_val_loss>total_loss_val/len(val_x):
        min_val_loss = total_loss_val/len(val_x)
        torch.save(model.state_dict(), "simplemodel.pt")
        print(f"Save model because val loss improve loss {min_val_loss:.3f}")
    
    print("-"*50)