In [2]:
import os
from pathlib import Path
from tqdm import tqdm
from easydict import EasyDict as edict

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.utils as vutils
from torchvision import transforms as trans 

from data.ms1m import get_train_loader
from data.lfw import LFW
from backbone.arcfacenet import SEResNet_IR
from margin.ArcMarginProduct import ArcMarginProduct

from util.utils import save_checkpoint,test

In [60]:
!unzip "/content/dataset/MS1M.zip"
!unzip "/content/dataset/lfw_aligned_112.zip"



[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
  inflating: lfw_aligned_112/08613.jpg  
  inflating: lfw_aligned_112/05858.jpg  
  inflating: lfw_aligned_112/11317.jpg  
  inflating: lfw_aligned_112/08214.jpg  
  inflating: lfw_aligned_112/04665.jpg  
  inflating: lfw_aligned_112/09561.jpg  
  inflating: lfw_aligned_112/03688.jpg  
  inflating: lfw_aligned_112/08893.jpg  
  inflating: lfw_aligned_112/02023.jpg  
  inflating: lfw_aligned_112/09530.jpg  
  inflating: lfw_aligned_112/11125.jpg  
  inflating: lfw_aligned_112/03191.jpg  
  inflating: lfw_aligned_112/08453.jpg  
  inflating: lfw_aligned_112/10107.jpg  
  inflating: lfw_aligned_112/03501.jpg  
  inflating: lfw_aligned_112/05669.jpg  
  inflating: lfw_aligned_112/08368.jpg  
  inflating: lfw_aligned_112/04866.jpg  
  inflating: lfw_aligned_112/01659.jpg  
  inflating: lfw_aligned_112/11264.jpg  
  inflating: lfw_aligned_112/10320.jpg  
  inflating: lfw_aligned_112/11736.jpg  
  inflating: lfw_aligned_112/05162.jp

In [62]:

from posixpath import defpath
conf = edict()

conf.train_root = '/content/MS1M'
conf.lfw_test_root = '/content/lfw_aligned_112'
conf.lfw_file_list = '/content/dataset/lfw_pair.txt'


conf.mode = 'se_ir' 
conf.depth = 50
conf.margin_type = 'ArcFace'
conf.feature_dim = 512
conf.scale_siz = 32.0
conf.batch_size = 96
conf.lr = 0.01
conf.milestones = [8,10,12] #8,10,12. epochlarda learning raate i düşürme işlemi
conf.total_epoch = 14

conf.save_folder = './saved'
conf.save_dir = os.path.join(conf.save_folder, conf.mode+'_'+str(conf.depth))
conf.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') #GPU için
conf.num_workers = 2
conf.pin_memory = True


In [63]:
os.makedirs(conf.save_dir, exist_ok=True)

In [64]:
transform = trans.Compose([
    trans.ToTensor(),
    trans.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))
])
trainloader, class_num = get_train_loader(conf) #eğitim kümesini yükledik train_loader içerisine

In [65]:
print(class_num)

200


In [66]:
print(trainloader.dataset)

Dataset ImageFolder
    Number of datapoints: 29148
    Root location: /content/MS1M
    StandardTransform
Transform: Compose(
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
           )


In [67]:
lfwdataset =  LFW(conf.lfw_test_root,conf.lfw_file_list, transform = transform) #test verilerimizi yükledik
lfwloader = torch.utils.data.DataLoader(lfwdataset,batch_size=128, num_workers = conf.num_workers) 

In [68]:
print(conf.device)

cuda:0


In [69]:
net = SEResNet_IR(conf.depth, feature_dim = conf.feature_dim, mode = conf.mode).to(conf.device)
margin = ArcMarginProduct(conf.feature_dim, class_num).to(conf.device)
#modelimizi oluşturduk


In [70]:
print(net)

SEResNet_IR(
  (input_layer): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): PReLU(num_parameters=64)
  )
  (output_layer): Sequential(
    (0): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Dropout(p=0.4, inplace=False)
    (2): Flatten()
    (3): Linear(in_features=25088, out_features=512, bias=True)
    (4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (body): Sequential(
    (0): BottleNeck_IR_SE(
      (shortcut_layer): MaxPool2d(kernel_size=1, stride=2, padding=0, dilation=1, ceil_mode=False)
      (res_layer): Sequential(
        (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (2): BatchNorm2d(64, eps=1e-05, moment

In [71]:
criterion = nn.CrossEntropyLoss()

In [72]:
optimizer = optim.SGD([
    {'params':net.parameters(),'weight_decay':5e-4},
    {'params':margin.parameters(),'weight_decay':5e-4}
],lr=conf.lr, momentum=0.9,nesterov=True)

In [73]:
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.01
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005

Parameter Group 1
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.01
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005
)


In [74]:
def scheduler_lr():
  for params in optimizer.param_groups:
    params['lr']/=10 #herbir fonk çağrıldığında 10 a böl ve lr ye eşitle demek
  print(optimizer)

In [77]:
best_acc = 0

for epoch in range(1, conf.total_epoch+1):

  net.train()
  print('epoch {}/{}'.format(epoch, conf.total_epoch),flush=True)
  if epoch == conf.milestones[0]: #8
    scheduler_lr()
  if epoch == conf.milestones[1]: #10
    scheduler_lr()
  if epoch == conf.milestones[2]: #12
    scheduler_lr()

  for data in tqdm(trainloader):
      img, label = data[0].to(conf.device),data[1].to(conf.device)
      optimizer.zero_grad()
      logits = net(img)
      output = margin(logits, label)
      total_loss = criterion(output,label)
      total_loss.backward()
      optimizer.step()


  net.eval()
  
  lfw_acc = test(conf,net, lfwdataset,lfwloader)
  print('\nLFW: {:.4f} | train_loss: {:.4f}\n'.format(lfw_acc,total_loss.item()))

  is_best = lfw_acc > best_acc
  best_acc = max(lfw_acc,best_acc)
  save_checkpoint({
    'epoch':epoch,
    'net_state_dict': net.state_dict(),
    'margin_state_dict': margin.state_dict(),
    'best_acc' : best_acc
  }, is_best, checkpoint = conf.save_dir)    

epoch 1/14


100%|██████████| 304/304 [04:18<00:00,  1.18it/s]



LFW: 0.8258 | train_loss: 5.8703

best model saved

epoch 2/14


100%|██████████| 304/304 [04:18<00:00,  1.17it/s]
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcdd34aa5e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1449, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcdd34aa5e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1449, in _shutdown_w


LFW: 0.8435 | train_loss: 4.2097

best model saved

epoch 3/14


100%|██████████| 304/304 [04:18<00:00,  1.17it/s]
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcdd34aa5e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1449, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcdd34aa5e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1449, in _shutdown_w


LFW: 0.8607 | train_loss: 4.4027

best model saved

epoch 4/14


100%|██████████| 304/304 [04:18<00:00,  1.18it/s]



LFW: 0.8437 | train_loss: 2.9771

epoch 5/14


100%|██████████| 304/304 [04:18<00:00,  1.18it/s]



LFW: 0.8520 | train_loss: 1.7186

epoch 6/14


100%|██████████| 304/304 [04:18<00:00,  1.17it/s]



LFW: 0.8608 | train_loss: 1.7804

best model saved

epoch 7/14


100%|██████████| 304/304 [04:18<00:00,  1.18it/s]



LFW: 0.8670 | train_loss: 1.4268

best model saved

epoch 8/14
SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005

Parameter Group 1
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005
)


100%|██████████| 304/304 [04:18<00:00,  1.17it/s]



LFW: 0.8708 | train_loss: 0.7345

best model saved

epoch 9/14


100%|██████████| 304/304 [04:18<00:00,  1.18it/s]



LFW: 0.8723 | train_loss: 0.3052

best model saved

epoch 10/14
SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.0001
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005

Parameter Group 1
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.0001
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005
)


100%|██████████| 304/304 [04:18<00:00,  1.18it/s]



LFW: 0.8718 | train_loss: 0.1298

epoch 11/14


100%|██████████| 304/304 [04:18<00:00,  1.18it/s]



LFW: 0.8748 | train_loss: 0.3424

best model saved

epoch 12/14
SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 1e-05
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005

Parameter Group 1
    dampening: 0
    differentiable: False
    foreach: None
    lr: 1e-05
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005
)


100%|██████████| 304/304 [04:18<00:00,  1.18it/s]



LFW: 0.8743 | train_loss: 0.1246

epoch 13/14


100%|██████████| 304/304 [04:18<00:00,  1.18it/s]



LFW: 0.8740 | train_loss: 0.2160

epoch 14/14


100%|██████████| 304/304 [04:18<00:00,  1.17it/s]



LFW: 0.8752 | train_loss: 0.8086

best model saved

