<a href="https://colab.research.google.com/github/shiyuhu1933/EC-523-final-project/blob/main/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
from google.colab import drive
import torch
import os
import h5py
import numpy as np

drive.mount('/content/gdrive')

DATASET_GOOGLE_DRIVE_PATH = '/content/gdrive/MyDrive/Deep Learning /face_model_hdf5'


def get_file_name(files_list):
  with open(files_list) as f:
    return [line.rstrip()[:] for line in f]

class FaceMaskData(torch.utils.data.Dataset):

  def __init__(self, mode, config, transform=None):

    self.data_dir = DATASET_GOOGLE_DRIVE_PATH
    self.transform = transform

    if mode == 'train':
      self.files = get_file_name(os.path.join(self.data_dir, 'train_files.txt'))
    else:
      self.files = get_file_name(os.path.join(self.data_dir, 'test_files.txt'))

    image = []
    label = []

    for dataset in self.files:
      path = os.path.join(self.data_dir, dataset)
      self.file = h5py.File(path, 'r')
      self.total_num_imgs, self.H, self.W, self.C = self.file['image'].shape
      image.append(self.file['image'][:])
      label.append(self.file['labels'][:])

    self.image= np.vstack(image)
    self.label = np.vstack(label)

    self.num_images = len(self.image) 
    self.num_classes = len(np.unique(self.label))

  def __getitem__(self, index):
    """Return one image and its corresponding attribute label."""
    image = self.image[index]
    label = self.label[index]
    if self.transform:
        image = self.transform(image)
    return image, torch.FloatTensor(label)

  def __len__(self):
    return self.num_images
      
  def get_num_class(self):
    return self.num_classes



Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [27]:

def get_configuration(repeat_num, batch_size):
    config = {}

    config['device'] = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    config['batch_size'] = batch_size
    config['num_workers'] = 1
    config['repeat_num'] = repeat_num
    config['lr'] = 0.0001

    return config


def image_to_rgb(images):
  imgs = []
  for i in range(len(images)):
    image_r = torch.unsqueeze(images[i,:,:,0], 0)
    image_g = torch.unsqueeze(images[i,:,:,1], 0)
    image_b = torch.unsqueeze(images[i,:,:,2], 0)
    image_split_rgb = torch.cat((image_r, image_g, image_b), 0)
    image_split_rgb = torch.unsqueeze(image_split_rgb, 0)
    imgs.append(image_split_rgb)
  return torch.cat(imgs)

In [28]:
def print_network(model, name):
  """Print out the network information."""
  num_params = 0
  print("\n")
  print("model name", name)
  print(model)
  num_params = sum([par.numel() for par in model.parameters()])
  print("The number of parameters: {}".format(num_params))

In [12]:
import torch.nn as nn

config = get_configuration(repeat_num=10000, batch_size=128)
dataset = FaceMaskData('train', config)
data_loader = torch.utils.data.DataLoader(dataset=dataset,
                              batch_size=config['batch_size'],
                              shuffle=True, 
                              num_workers=config['num_workers'])
num_classes = dataset.get_num_class()

class IBSA_Block(nn.Module):
  def __init__(self, dim_in=112, dim_out=1, stride=1, embed_dim=512):
    super().__init__()

    self.dim_in, self.dim_out = dim_in, dim_out
    self.bn1 = nn.BatchNorm2d(dim_in, affine=True, track_running_stats=True)
    self.bn2 = nn.BatchNorm2d(dim_out, affine=True, track_running_stats=True)
    self.conv = nn.Conv2d(dim_in, dim_out, kernel_size=3, stride=stride)
    self.mhsa = nn.MultiheadAttention(embed_dim, dim_out)
    self.prelu = nn.PReLU()
    self.fc = nn.Linear(dim_out, num_classes)
  
  def forward(self, input):
    identity = input

    output = self.bn1(input)
    output = self.conv(output)
    output = self.bn2(output)
    output = self.prelu(output)
    output = self.mhsa(output)
    output = self.bn2(output)

    # output += identity
    # return output

    output = output.squeeze(-1).squeeze(-1)
    logits = self.fc(output)
    return logits

model = IBSA_Block().cuda()

In [51]:
import torch.nn as nn
import torchvision.models as models

config = get_configuration(repeat_num=10000, batch_size=16)
dataset = FaceMaskData('train', config)
data_loader = torch.utils.data.DataLoader(dataset=dataset,
                              batch_size=config['batch_size'],
                              shuffle=True, 
                              num_workers=config['num_workers'])
num_classes = dataset.get_num_class()

class Conv3x3(nn.Module):
  def __init__(self, dim_in=3, dim_out=3, stride=1):
    super().__init__()

    self.dim_in, self.dim_out = dim_in, dim_out

    model = models.resnet18(pretrained=False)
    modules = list(model.children())[:-1]
    self.extractor = nn.Sequential(*modules)

    self.conv = nn.Conv2d(dim_in, dim_out, kernel_size=3, stride=stride, padding=1)
    self.fc = nn.Linear(112, num_classes)

  
  def forward(self, input):

    print("before conv", input.size())
    output = self.conv(input)
    print("conv", output.size())


    output = self.extractor(output)
    output = output.squeeze(-1).squeeze(-1)
    print("before logit" , output.size())
    logits = self.fc(output)
    print("after logit", logits.size())
    return logits

model = Conv3x3().cuda()
print_network(model, 'Conv3x3')



model name Conv3x3
Conv3x3(
  (extractor): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, tr

In [52]:

   
import torch
import torch.nn as nn
import math

class FocalLoss(nn.Module):

    def __init__(self, gamma=0, eps=1e-7):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        #print(self.gamma)
        self.eps = eps
        self.ce = torch.nn.CrossEntropyLoss(reduction="none")

    def forward(self, input, target):
        logp = self.ce(input, target)
        p = torch.exp(-logp)
        loss = (1 - p) ** self.gamma * logp
        return loss.mean()


class ArcFaceLoss(nn.modules.Module):
    def __init__(self, s=45.0, m=0.1, crit="bce", weight=None, reduction="mean", class_weights_norm='batch'):
        super().__init__()

        self.weight = weight
        self.reduction = reduction
        self.class_weights_norm = class_weights_norm
        
        if crit == "focal":
            self.crit = FocalLoss(gamma=2)
        elif crit == "bce":
            self.crit = nn.CrossEntropyLoss(reduction="none")   

        if s is None:
            self.s = torch.nn.Parameter(torch.tensor([45.], requires_grad=True, device='cuda'))
        else:
            self.s = s

        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m
        
    def forward(self, logits, labels):

        logits = logits.float()
        cosine = logits
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        
        labels2 = torch.zeros_like(cosine)
        labels2.scatter_(1, labels.view(-1, 1).long(), 1)
        output = (labels2 * phi) + ((1.0 - labels2) * cosine)

        s = self.s

        output = output * s
        loss = self.crit(output, labels)

        if self.weight is not None:
            w = self.weight[labels].to(logits.device)

            loss = loss * w
            if self.class_weights_norm == "batch":
                loss = loss.sum() / w.sum()
            if self.class_weights_norm == "global":
                loss = loss.mean()
            else:
                loss = loss.mean()
            
            return loss
        if self.reduction == "mean":
            loss = loss.mean()
        elif self.reduction == "sum":
            loss = loss.sum()
        return loss


def loss_fn(metric_crit, target_dict, output_dict, num_classes, device):
    
    y_true = target_dict['target'].to(device)
    y_pred = output_dict['logits'].to(device)
    #ignore invalid classes for val loss
    mask = y_true < num_classes
    if mask.sum() == 0:
        return torch.zeros(1,  device = y_pred.device)
    loss = metric_crit(y_pred[mask], y_true[mask])

    return loss


In [53]:
import time
from torch import nn, optim
import torchvision.models as models

model = Conv3x3().cuda()

criterion = ArcFaceLoss().cuda()

optimizer = optim.SGD(model.parameters(), lr=0.1, momentum =0.9, weight_decay=5e-4)

Epochs = 5

loss = []

print('Start training ...')
start_time = time.time()
for epoch in range(1, Epochs+1):
  model.train()

  for i, data in enumerate(data_loader, 0):
    images, labels = data
    images, labels = images.float().cuda(), labels.cuda()
    images = image_to_rgb(images)

    logits = model(images)
    print(labels.size())
    print(logits.size())
    loss = criterion(logits, labels)
    loss_acc += loss.item()

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    loss_acc /= Epochs
    print('Epoch %d: Loss = %.2f'%(epoch, loss_acc))

Start training ...
before conv torch.Size([16, 3, 112, 112])
conv torch.Size([16, 3, 112, 112])
before logit torch.Size([16, 512])


RuntimeError: ignored

In [None]:
import torch
import torch.nn as nn
import math
import torch.nn.functional as F

class ArcFaceloss(nn.Module):
  def __init__(self, s=45.0, m=0.1, weight = None):
    super(ArcFaceloss, self).__init__()
    # x = tf.nn.l2_normalize(x, axis=1)
    self.weight = weight
    self.s = s
    self.cosm = math.cos(m)
    self.sinm = math.sin(m)
    self.th = math.cos(math.pi - m)
    self.mm = math.sin(math.pi - m) * m
  
  def forward(self, input, labels):
    num_classes = dataset.get_num_class()
    cost = input
    sint = torch.sqrt(1.0 - torch.square(cost))
    cosmt = self.s * (self.cosm * cost - self.sinm * sint)
    k = torch.where(cost > self.th, cosmt, self.s * (cost - self.mm))
    
    label = F.one_hot(labels, num_classes)
    output = (1 - label) * self.s * cost + label * k
    return output

In [15]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
import math


class ArcFace(nn.Module):
    def __init__(self, s=64.0, m=0.5):
        super(ArcFace, self).__init__()
        self.s = s
        self.m = m

    def forward(self, cosine: torch.Tensor, label):
        index = torch.where(label != -1.)[0]
        m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device)

        m_hot.scatter_(1, label[index, None], self.m)
        cosine.acos_()
        cosine[index] += m_hot
        cosine.cos_().mul_(self.s)
        return cosine