In [1]:
import os, sys, gc
import time
import glob
import pickle
import copy
import json
import random
from collections import OrderedDict, namedtuple
import multiprocessing
import threading
import traceback

from typing import Tuple, List

import h5py
from tqdm import tqdm, tqdm_notebook

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cv2
from PIL import Image


import torch
import torchvision
import torch.nn.functional as F

from torch import nn, optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR

import torchmetrics
import pl_bolts
import pytorch_lightning as pl



from IPython.display import display, clear_output

import faiss

from modules.AugsDS_v7 import *
from modules.eval_functions import *
from modules.eval_metrics import evaluate

sys.path.append('./modules')

 - Found: 861 screenshots.  SCREENSHOT_DIR=./FB_page_qry


In [2]:
from modules.Facebook_model_AFMultiGPU_v22 import ArgsT22_EffNetV2, FacebookModel

In [3]:
args = ArgsT22_EffNetV2()

args.pretrained_bb = False

args.arc_classnum = 40
print(args) 

 = = = = = = = = = = ArgList = = = = = = = = = =
ALL_FOLDERS                   : ['query_images', 'reference_images', 'training_images', 'imagenet_images', 'face_frames']
BACKBONE_GPUS                 : [0]
BATCH_SIZE                    : 96
DATASET_WH                    : (384, 384)
DS_DIR                        : ./all_datasets/dataset_jpg_384x384
DS_INPUT_DIR                  : ./all_datasets/dataset
FrmFaces_SAMPLES              : ./FrameFaces_samples_v.pickle
GeM_opt_p                     : True
GeM_p                         : 3.0
ImgNet_SAMPLES                : ./ImageNet_samples_v.pickle
N_WORKERS                     : 28
OUTPUT_WH                     : (160, 160)
accelerator                   : ddp
arc_bottleneck                : None
arc_classnum                  : 40
arc_devices_v                 : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
arc_m                         : 0.4
arc_optimizer          

# Building model

In [4]:
model = FacebookModel(args)

 - Total weights: 22.62M


# Loading ckpt

In [5]:
ckpt_filename = './checkpoints/smp_test22/FacebookModel_Eepoch=15_TLtrn_loss_epoch=2.1522_TAtrn_acc_epoch=0.0000_VLval_loss_epoch=nan_VAval_acc_epoch=0.9752.ckpt'
_ = model.restore_checkpoint(ckpt_filename)

 - Loading model state_dict.
 - Restored checkpoint: ./checkpoints/smp_test22/FacebookModel_Eepoch=15_TLtrn_loss_epoch=2.1522_TAtrn_acc_epoch=0.0000_VLval_loss_epoch=nan_VAval_acc_epoch=0.9752.ckpt.




# Inference configuration

In [6]:
do_simple_augmentation = False
K = 500

BATCH_SIZE   = 128
N_WORKERS    = 7
DS_INPUT_DIR = f'./all_datasets/dataset'
ALL_FOLDERS  = ['query_images', 'reference_images', 'training_images']

args.ALL_FOLDERS = ALL_FOLDERS
args.BATCH_SIZE = BATCH_SIZE
args.N_WORKERS = N_WORKERS
args.DS_INPUT_DIR = DS_INPUT_DIR

In [7]:
while DS_INPUT_DIR[-1] in ['/', r'\\']:
    DS_INPUT_DIR = DS_INPUT_DIR[:-1]
    
# Path where the rescaled images will be saved
args.DS_DIR = f'{args.DS_INPUT_DIR}_jpg_{args.DATASET_WH[0]}x{args.DATASET_WH[1]}'

# Data Source

In [8]:
if any( [not os.path.exists(os.path.join(args.DS_DIR, folder)) for folder in args.ALL_FOLDERS] ):
    assert os.path.exists(args.DS_INPUT_DIR), f'DS_INPUT_DIR not found: {args.DS_INPUT_DIR}'

    resize_dataset(
        ds_input_dir=args.DS_INPUT_DIR,
        ds_output_dir=args.DS_DIR,
        output_wh=args.DATASET_WH,
        output_ext='jpg',
        num_workers=args.N_WORKERS,
        ALL_FOLDERS=args.ALL_FOLDERS,
        verbose=False,
    )

print('Paths:')
print(' - DS_INPUT_DIR:', args.DS_INPUT_DIR)
print(' - DS_DIR:      ', args.DS_DIR)

assert os.path.exists(args.DS_DIR), f'DS_DIR not found: {args.DS_DIR}'

try:
    public_ground_truth_path = os.path.join(args.DS_DIR, 'public_ground_truth.csv')
    public_gt = pd.read_csv( public_ground_truth_path)

except:
    public_ground_truth_path = os.path.join(args.DS_INPUT_DIR, 'public_ground_truth.csv')
    public_gt = pd.read_csv( public_ground_truth_path)

Paths:
 - DS_INPUT_DIR: ./all_datasets/dataset
 - DS_DIR:       ./all_datasets/dataset_jpg_384x384


# Datasets

In [9]:
ds_qry_full = FacebookDataset(
    samples_id_v=[f'Q{i:05d}' for i in range(50_000)],
    do_augmentation=False,
    ds_dir=args.DS_DIR,
    output_wh=args.OUTPUT_WH,
    channel_first=True,
    norm_type= args.img_norm_type,
    verbose=True,
)
# ds_qry_full.plot_sample(4)


ds_ref_full = FacebookDataset(
    samples_id_v=[f'R{i:06d}' for i in range(1_000_000)],
    do_augmentation=False,
    ds_dir=args.DS_DIR,
    output_wh=args.OUTPUT_WH,
    channel_first=True,
    norm_type=args.img_norm_type,
    verbose=True,
)
# ds_ref_full.plot_sample(4)


ds_trn_full = FacebookDataset(
    samples_id_v=[f'T{i:06d}' for i in range(1_000_000)],
    do_augmentation=False,
    ds_dir=args.DS_DIR,
    output_wh=args.OUTPUT_WH,
    channel_first=True,
    norm_type=args.img_norm_type,
    verbose=True,
)
# ds_trn_full.plot_sample(4)



dl_qry_full = DataLoader(
        ds_qry_full,
        batch_size=args.BATCH_SIZE,
        num_workers=args.N_WORKERS,
        shuffle=False,
    )

dl_ref_full = DataLoader(
    ds_ref_full,
    batch_size=args.BATCH_SIZE,
    num_workers=args.N_WORKERS,
    shuffle=False,
)

dl_trn_full = DataLoader(
    ds_trn_full,
    batch_size=args.BATCH_SIZE,
    num_workers=args.N_WORKERS,
    shuffle=False,
)



### Query embeddings

In [10]:
embed_qry_d = calc_embed_d(
    model, 
    dataloader=dl_qry_full,
    do_simple_augmentation=do_simple_augmentation
)

 - Setting Backbone's device: cuda:0
 - ArcfaceMultiGPU, setting devices:
  |-> kernel( 0): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 1): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 2): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 3): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 4): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 5): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 6): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 7): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 8): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 9): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(10): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(11): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(12): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(13): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(14): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(15): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(16): cuda:0   shape: torch.Size([

100%|█████████████████████████████████████████| 391/391 [00:58<00:00,  6.69it/s]


### Reference embeddings

In [11]:
aug = '_AUG' if do_simple_augmentation else ''
submission_path = ckpt_filename.replace('.ckpt', f'_{args.OUTPUT_WH[0]}x{args.OUTPUT_WH[1]}{aug}_REF.h5')
scores_path = submission_path.replace('.h5', '_match_d.pickle')

In [12]:
embed_ref_d = calc_embed_d(
    model, 
    dataloader=dl_ref_full, 
    do_simple_augmentation=do_simple_augmentation
)

save_submission(
    embed_qry_d,
    embed_ref_d,
    save_path=submission_path,
)

match_d = calc_match_scores(embed_qry_d, embed_ref_d, k=K)
save_obj(match_d, scores_path)

 - Setting Backbone's device: cuda:0
 - ArcfaceMultiGPU, setting devices:
  |-> kernel( 0): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 1): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 2): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 3): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 4): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 5): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 6): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 7): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 8): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 9): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(10): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(11): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(12): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(13): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(14): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(15): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(16): cuda:0   shape: torch.Size([

100%|███████████████████████████████████████| 7813/7813 [18:15<00:00,  7.13it/s]


 - Saved: ./checkpoints/smp_test22/FacebookModel_Eepoch=15_TLtrn_loss_epoch=2.1522_TAtrn_acc_epoch=0.0000_VLval_loss_epoch=nan_VAval_acc_epoch=0.9752_160x160_REF.h5


100%|█████████████████████████████████████████| 100/100 [04:16<00:00,  2.57s/it]

Saved: ./checkpoints/smp_test22/FacebookModel_Eepoch=15_TLtrn_loss_epoch=2.1522_TAtrn_acc_epoch=0.0000_VLval_loss_epoch=nan_VAval_acc_epoch=0.9752_160x160_REF_match_d.pickle





### Public GT validation

In [13]:
eval_d = evaluate(
    submission_path=submission_path,
    gt_path=public_ground_truth_path,
    is_matching=False,
)

./checkpoints/smp_test22/FacebookModel_Eepoch=15_TLtrn_loss_epoch=2.1522_TAtrn_acc_epoch=0.0000_VLval_loss_epoch=nan_VAval_acc_epoch=0.9752_160x160_REF.h5
{
  "average_precision": 0.6306082181957473,
  "recall_p90": 0.5283510318573432
}


### Training embeddings

In [14]:
aug = '_AUG' if do_simple_augmentation else ''
submission_path = ckpt_filename.replace('.ckpt', f'_{args.OUTPUT_WH[0]}x{args.OUTPUT_WH[1]}{aug}_TRN.h5')
scores_path = submission_path.replace('.h5', '_match_d.pickle')

In [15]:
embed_trn_d = calc_embed_d(
    model, 
    dataloader=dl_trn_full, 
    do_simple_augmentation=do_simple_augmentation
)

save_submission(
    embed_qry_d,
    embed_trn_d,
    save_path=submission_path,
)

 - Setting Backbone's device: cuda:0
 - ArcfaceMultiGPU, setting devices:
  |-> kernel( 0): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 1): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 2): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 3): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 4): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 5): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 6): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 7): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 8): cuda:0   shape: torch.Size([256, 1])
  |-> kernel( 9): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(10): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(11): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(12): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(13): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(14): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(15): cuda:0   shape: torch.Size([256, 1])
  |-> kernel(16): cuda:0   shape: torch.Size([

100%|███████████████████████████████████████| 7813/7813 [18:06<00:00,  7.19it/s]


 - Saved: ./checkpoints/smp_test22/FacebookModel_Eepoch=15_TLtrn_loss_epoch=2.1522_TAtrn_acc_epoch=0.0000_VLval_loss_epoch=nan_VAval_acc_epoch=0.9752_160x160_TRN.h5


In [16]:
match_d = calc_match_scores(embed_qry_d, embed_trn_d, k=K)
save_obj(match_d, scores_path)

100%|█████████████████████████████████████████| 100/100 [04:17<00:00,  2.57s/it]

Saved: ./checkpoints/smp_test22/FacebookModel_Eepoch=15_TLtrn_loss_epoch=2.1522_TAtrn_acc_epoch=0.0000_VLval_loss_epoch=nan_VAval_acc_epoch=0.9752_160x160_TRN_match_d.pickle



