In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp /content/drive/MyDrive/kaggle/kaggle.json ~/.kaggle/
!kaggle datasets download -d ambityga/imagenet100
!unzip imagenet100.zip

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import torchvision.transforms as transforms
import torchvision.datasets as datasets

import numpy as np
import matplotlib.pyplot as plt
import cv2

from torch.utils.data import DataLoader, TensorDataset, Dataset

import warnings
warnings.filterwarnings("ignore")

USE_CUDA = torch.cuda.is_available()
device = ("cuda" if USE_CUDA else "cpu")
print(device)

if device == "cuda":
  torch.cuda.manual_seed(777)

else:
  torch.manual_seed(1)

cuda


### ImageNet-100 Dataset Load

* How does it looks like?

In [15]:
from glob import glob
from tqdm import tqdm, tqdm_notebook
import json

class ImageNet100Dataset(Dataset):

  def __init__(self, train=True):
    self.x_train = list()
    self.y_train = list()
    self.y_label = dict()
    self.train = train
    self.label_dict = dict()

    # Load JSON for label
    with open("./Labels.json","r") as f:
      total_dict = json.load(f)

      for path in tqdm_notebook(glob("./train.X1/**")):
        label_id = path.split("/")[-1]
        self.label_dict[label_id] = total_dict[label_id]

    # Data Augmentation with Pre Transform
    if self.train == True:
      self.pre_transforms = transforms.Compose([
          transforms.ToTensor(),
          transforms.RandomResizedCrop(224),
          transforms.RandomRotation([0, 270]),
          transforms.RandomHorizontalFlip(0.5),
          transforms.RandomVerticalFlip(0.5),
      ])
    else:
      self.pre_transforms = transforms.Compose([
          transforms.ToTensor(),
          transforms.Resize(256),
          transforms.CenterCrop([224, 224])
      ])

    self.load()
    self.num_classes = len(self.y_label)

  def __len__(self):
    return len(self.x_train)

  def __getitem__(self, idx):

    x = self.pre_transforms((cv2.imread(self.x_train[idx])))
    y = torch.LongTensor([self.y_train[idx]]).squeeze()
    return x, y

  def load(self):
    idx = 0

    # Put into DataLoader
    if self.train == True:
      data_path = "./train.X1/**/**"
    else:
      data_path = "./val.X/**/**"

    for path in tqdm_notebook(glob(data_path)):

      label_id = path.split("/")[-2]

      # Exclude train v2,v3,v4 (for fast evaluation)
      if self.train != True:
        if label_id not in self.label_dict:
          continue

      self.x_train.append(path)

      if label_id in self.y_label:
        self.y_train.append(self.y_label[label_id][0])

      else:
        self.y_label[label_id] = (idx, self.label_dict[label_id])
        idx += 1
        self.y_train.append(self.y_label[label_id][0])

    def get_class_num():
      return len(self.y_label)

In [10]:
train_set = ImageNet100Dataset(train=True)
train_loader = DataLoader(train_set, batch_size=128, shuffle=True, drop_last=True)
num_classes = (train_set.num_classes)

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/32500 [00:00<?, ?it/s]

In [11]:
from torch.nn.modules.activation import Softmax
class AlexNet(nn.Module):
  def __init__(self):

    super().__init__()

    # Input: (224, 224, 3)
    self.conv1 = nn.Sequential(
        nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2)
    )

    # Input: (26, 26, 96)
    self.conv2 = nn.Sequential(
        nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2)
    )

    # Input: (12, 12, 256)
    self.conv3 = nn.Sequential(
        nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
        nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
        nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2)
    )

    # Input: (5 * 5 * 256)
    self.fc = nn.Sequential(
        nn.Flatten(1, -1),
        nn.Linear(5 * 5 * 256, 4096),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(4096, 4096),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(4096, 25)
    )

  def forward(self, x):
    output = self.conv1(x)
    output = self.conv2(output)
    output = self.conv3(output)
    output = self.fc(output)
    return output

In [11]:
# class GoogLeNet(nn.Module):
#   def __init__(self):

#   def forward(self, x):
#     return

In [12]:
# class ResNet(nn.Module):
#   def __init__(self):

#   def forward(self, x):
#     return

In [13]:
# class MobileNet(nn.Module):
#   def __init__(self):

#   def forward(self, x):
#     return

In [12]:
model = AlexNet().to(device)
total_epochs = 100
optimizer = optim.SGD(model.parameters(), lr=0.1)
criterion = nn.CrossEntropyLoss().to(device)

for epoch in tqdm_notebook(range(1, total_epochs + 1)):

  total_cost = 0
  total_batch = len(train_loader)

  for batch_idx, sample in tqdm_notebook(enumerate(train_loader)):

    x_train, y_train = sample
    x_train, y_train = x_train.to(device), y_train.to(device)
    y_pred = model(x_train)
    cost = criterion(y_pred, y_train).to(device)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    total_cost += cost / total_batch

  print(f"Epoch : {epoch}/{total_epochs} / Cost : {total_cost}")

  0%|          | 0/10 [00:00<?, ?it/s]

0it [00:00, ?it/s]

Epoch : 1/10 / Cost : 3.2110977172851562


0it [00:00, ?it/s]

Epoch : 2/10 / Cost : 3.1001663208007812


0it [00:00, ?it/s]

Epoch : 3/10 / Cost : 2.953193187713623


0it [00:00, ?it/s]

Epoch : 4/10 / Cost : 2.842623472213745


0it [00:00, ?it/s]

Epoch : 5/10 / Cost : 2.715667724609375


0it [00:00, ?it/s]

Epoch : 6/10 / Cost : 2.6011767387390137


0it [00:00, ?it/s]

Epoch : 7/10 / Cost : 2.534186363220215


0it [00:00, ?it/s]

Epoch : 8/10 / Cost : 2.4795031547546387


0it [00:00, ?it/s]

Epoch : 9/10 / Cost : 2.4207770824432373


0it [00:00, ?it/s]

Epoch : 10/10 / Cost : 2.363231658935547


In [16]:
eval_set = ImageNet100Dataset(train=False)
eval_loader = DataLoader(eval_set, batch_size=128, shuffle=True, drop_last=True)

with torch.no_grad():

  total = 0
  accuracy = 0

  for batch_idx, sample in tqdm_notebook(enumerate(eval_loader)):

    x_test, y_test = sample
    x_test, y_test = x_test.to(device), y_test.to(device)
    y_pred = model(x_train)

    accuracy += np.count_nonzero((y_pred.argmax(dim=1) == y_test).cpu())
    total += len(y_train)

  print(f"Accuracy : {accuracy / total * 100:.2f}")

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/5000 [00:00<?, ?it/s]

0it [00:00, ?it/s]

Accuracy : 3.82
