<a href="https://colab.research.google.com/github/starmpcc/CS470-Team-23/blob/master/Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

CS470 Team 25 Project Classifier

References


*   https://tutorials.pytorch.kr/intermediate/torchvision_tutorial.html
*   https://github.com/pytorch/vision/blob/21153802a3086558e9385788956b0f2808b50e51/torchvision/models/resnet.py#L161
*   https://papers.nips.cc/paper/2017/file/e7e23670481ac78b3c4122a99ba60573-Paper.pdf



In [2]:
import os
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.models import resnet34
from PIL import Image
from google.colab import drive
from sklearn.model_selection import train_test_split

drive.mount('/content/gdrive')
root = os.getcwd()+'/gdrive/My Drive/Colab Notebooks/Project'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
#define hyperparameters
val_set_ratio = 0.25
learning_rate = 0.01
num_epoches = 50
num_classes = 91

In [4]:
def rec_freeze(model):
  for child in model.children():
    for param in child.parameters():
      param.requires_grad = False
    rec_freeze(child)

In [42]:
#Old Dataloader
#Assume that all images are saved in "cat" folder

#temporary loader for raw image
"""
temp_transform = transforms.Compose([
                                      transforms.Resize(224),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),])

dataset = torchvision.datasets.ImageFolder(os.path.join(root, "cat"), temp_transform)
#Use ConcatDataset to use refined data
train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size = val_set_ratio)
train_dataset = torch.utils.data.Subset(dataset, train_idx)
val_dataset = torch.utils.data.Subset(dataset, val_idx)
train_dataloader = torch.utils.data.DataLoader(train_dataset, 10, True, num_workers = 8)
val_dataloader = torch.utils.data.DataLoader(val_dataset, 10, True, num_workers = 8)

print(len(dataset))
print(len(train_dataset))
print(len(val_dataset))
"""

5594
4195
1399


In [21]:
#Read Pre-processed data
face_data = torch.zeros(100, 100, 10).to(device)

In [22]:
#temporary loader for raw image
temp_transform = transforms.Compose([
                                      transforms.Resize(224),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),])

class CatFaceDataset(torch.utils.data.Dataset):
  #Dict {image:Tensor(B*224*224), label:int, index:int}

  def __init__(self, root, transform):
    self.root = root
    self.imgs = []
    self.cats = list(sorted(os.listdir(os.path.join(root, "cat"))))
    for cat in self.cats:
      imagelist = list(sorted(os.listdir(os.path.join(root, "cat", cat))))
      self.imgs += [os.path.join(root, "cat", cat, i) for i in imagelist]
    self.transform = transform

  def __getitem__(self, idx):
    img = Image.open(self.imgs[idx]).convert("RGB")
    img = self.transform(img)
    label = self.imgs[idx].split('/')[-2].split('_')[-1]
    index = int(os.path.basename(self.imgs[idx]).split('.')[0])
    target = {}    
    target["image"] = img
    target["label"] = int(label)
    target["index"] = int(index)
    return target
  
  def __len__(self):
    return len(self.imgs)

dataset = CatFaceDataset(root, temp_transform)
#Use ConcatDataset to use refined data
train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size = val_set_ratio)
train_dataset = torch.utils.data.Subset(dataset, train_idx)
val_dataset = torch.utils.data.Subset(dataset, val_idx)
train_dataloader = torch.utils.data.DataLoader(train_dataset, 10, True, num_workers = 8)
val_dataloader = torch.utils.data.DataLoader(val_dataset, 10, True, num_workers = 8)

print(len(dataset))
print(len(train_dataset))
print(len(val_dataset))


5601
4200
1401
(0, {'image': tensor([[[[0.2235, 0.2157, 0.2118,  ..., 0.2941, 0.2902, 0.2863],
          [0.2235, 0.2157, 0.2118,  ..., 0.2941, 0.2902, 0.2863],
          [0.2235, 0.2157, 0.2118,  ..., 0.2941, 0.2902, 0.2863],
          ...,
          [0.9020, 0.9098, 0.9216,  ..., 0.7529, 0.7569, 0.7451],
          [0.8902, 0.9020, 0.8902,  ..., 0.7569, 0.7529, 0.7373],
          [0.8667, 0.8784, 0.8745,  ..., 0.7608, 0.7490, 0.7529]],

         [[0.1608, 0.1529, 0.1490,  ..., 0.2745, 0.2706, 0.2667],
          [0.1608, 0.1529, 0.1490,  ..., 0.2745, 0.2706, 0.2667],
          [0.1608, 0.1529, 0.1490,  ..., 0.2745, 0.2706, 0.2667],
          ...,
          [0.8745, 0.8824, 0.8941,  ..., 0.7686, 0.7725, 0.7608],
          [0.8510, 0.8667, 0.8588,  ..., 0.7725, 0.7686, 0.7529],
          [0.8196, 0.8392, 0.8392,  ..., 0.7765, 0.7647, 0.7686]],

         [[0.1020, 0.0941, 0.0902,  ..., 0.2000, 0.1961, 0.1922],
          [0.1020, 0.0941, 0.0902,  ..., 0.2000, 0.1961, 0.1922],
          [0.

In [None]:
class ACNN(nn.Module):
  def __init__(self):
    super(ACNN, self).__init__()

    #Get layers from pretrained resnet34
    resnet = resnet34(pretrained = True)
    l = []
    for child in resnet.children():
      l.append(child)
    
    #Original layers from resnet34
    self.conv1 = l[0]
    self.bn1 = l[1]
    self.relu = l[2]
    self.maxpool = l[3]
    self.layer1 = l[4]
    self.layer2 = l[5]
    self.layer3 = l[6]
    self.layer4 = l[7]
    self.avgpool = l[8]
#    self.fc = l[9]

    #Re-Define final fc layer to adapt our model
    self.fc = nn.Linear(512, num_classes)

    #define new layers for Adaptive Convolution
    self.param_ln1 = nn.Linear(1, 1)

  #TODO: freeze some layers
  def forward(self, x, cat_face_data):
        #B*3*224*224
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        #B*64*32*32
        x = self.layer1(x)
        #B*64*32*32
        x = self.layer2(x)
        #B*128*16*16
        x = self.layer3(x)
        #B*256*8*8
        x = self.layer4(x)
        #B*512*4*4
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        #B*512
        x = self.fc(x)

        return x



In [None]:
# Define Model
model = ACNN().to(device)

In [None]:
optimizer = torch.optim.SGD(model.parameters(), learning_rate)
train_losses = []
val_losses = []
train_accs = []
val_accs = []

fitness = nn.CrossEntropyLoss()

for epoch in range(num_epoches):
  model.train()
  cnt = 0
  correct_cnt = 0
  train_loss = 0.0
  for idx, target in enumerate(train_dataloader):
    print(target["label"])
    x = target["image"].to(device)
    label = target["label"].to(device)
    cat_face_data = face_data[label, target["index"], :]

    pred = model(x, cat_face_data)
    optimizer.zero_grad()
    train_loss = fitness(pred, label)
    train_loss.backward()
    optimizer.step()
    
    _, correct = torch.max(pred, 1)
    correct_cnt += (correct == label.data).sum().item()
    cnt += x.data.size(0)

  train_losses.append(train_loss)
  train_accs.append(correct_cnt/cnt)


  model.eval()
  cnt = 0
  correct_cnt = 0
  val_loss = 0.0
  for idx, target in enumerate(val_dataloader):
    with torch.no_grad():
      x = target["image"].to(device)
      label = target["label"].to(device)
      cat_face_data = face_data[label, target["index"], :]

      pred = model(x, cat_face_data)
      pred = model(x)
      val_loss = fitness(pred, label)
      _, correct = torch.max(pred, 1)
      correct_cnt += (correct == label.data).sum().item()
      cnt += x.data.size(0)

  val_losses.append(val_loss)
  val_accs.append(correct_cnt/cnt)

  print(f"{epoch}th epoch,  train_loss: {train_loss}, val_loss: {val_loss}")

    
