In [1]:
import torch
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)

  from .autonotebook import tqdm as notebook_tqdm
Using cache found in /home/stanislaw/.cache/torch/hub/pytorch_vision_v0.10.0


In [2]:
model.eval()
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [3]:
from PIL import Image
from torchvision import transforms

# input_image.show()

In [4]:
from retinaface import RetinaFace
import numpy as np
import shutil
from tqdm import tqdm
import glob

#### Filtering imgs containing faces in MSCOCO and mv it to other folder. (ignoring all errors, e.g. not 3-channel images)

In [None]:
count = 0
for i in tqdm(range(13076, 15005, 5)):
    try:
        path = f"mscoco_face_data/split_00000/00001/{str(i).zfill(9)}.jpg"
        resp = RetinaFace.detect_faces(path)
        if np.any([resp[face]['score'] for face in resp]) > 0.9:
            shutil.move(path, f'{path.split("/")[0]}/face/{path.split("/")[-1]}')
            count +=1
    except:
        pass
print(count)

  3%|▎         | 11/386 [00:13<05:40,  1.10it/s]

In [12]:
[resp[face]['score'] for face in resp]

[0.9995817542076111]

In [5]:
img_batch = []
#coco
for filename in glob.iglob(f'./mscoco_face_data/face/*'):
    try:
        input_image = Image.open(filename)
        preprocess = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        input_tensor = preprocess(input_image)
        img_batch += input_tensor.unsqueeze(0)
    except:
        pass
#laion
for i in range(0, 167):
    try:
        input_image = Image.open(f"laion_face_data/split_00000/00000/{str(i).zfill(9)}.jpg")
    
        preprocess = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        input_tensor = preprocess(input_image)
        img_batch += input_tensor.unsqueeze(0)
    except:
        pass
    
img_batch = torch.stack([*img_batch])

In [6]:
img_batch.shape

torch.Size([286, 3, 224, 224])

In [7]:
# input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

# move the input and model to GPU for speed if available
output = []
# if torch.cuda.is_available():
#     model.to('cuda')
for i in tqdm(range(0, 290, 10)):
    input_batch = img_batch[i:i+10]
#     input_batch = input_batch.to('cuda')
    with torch.no_grad():
        output += model(input_batch)

output = torch.stack([*output])

100%|██████████| 29/29 [00:05<00:00,  5.33it/s]


In [8]:
output.shape

torch.Size([286, 1000])

In [9]:
# used to create data for viz in https://projector.tensorflow.org/
import pandas as pd
t=output
t_np = t.cpu().numpy() #convert to Numpy array
df = pd.DataFrame(t_np) #convert to a dataframe
df.to_csv("testfile.tsv",index=False, sep="\t", header=False) #save to file

In [10]:
NUMBER_OF_IMG_IN_1ST_DATASET =143
df['i']= df.index > NUMBER_OF_IMG_IN_1ST_DATASET

In [11]:
df['i'].to_csv("labels.tsv", index=False, header=False)

In [12]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,991,992,993,994,995,996,997,998,999,i
0,0.042199,1.666004,1.568521,1.650911,1.586255,2.484907,2.846426,-1.661567,-2.239351,-5.042858,...,-3.983123,-2.128247,-5.320333,0.240097,-2.150342,-1.685590,0.121385,0.928128,4.590708,False
1,-3.557405,-0.391183,-3.935380,-3.073274,-1.541572,-1.344232,-1.192342,0.894013,-1.313890,-3.197292,...,-5.279642,-4.122759,-3.515110,-3.208604,-2.717437,-0.677198,-2.210864,-2.066689,6.259179,False
2,-2.732625,-1.538850,-2.989211,-6.986698,-3.214403,-2.543644,-4.121750,1.880281,3.037087,-0.508553,...,-0.579668,-0.724722,-0.664674,-1.556822,-0.281095,1.602777,-0.946723,1.002028,2.886131,False
3,-2.268174,-3.392117,0.447099,-0.462099,-1.508214,-1.567885,-0.886254,0.708687,0.072315,1.511860,...,-3.953819,-2.851128,-1.926602,-1.519792,-2.445457,0.467408,-2.101395,0.254803,1.548754,False
4,-1.263749,-4.440587,-1.158522,-2.686519,-2.138419,-4.611231,-3.895607,0.796442,-0.541820,2.452530,...,-4.414467,-3.708502,-4.860530,-3.136843,-2.958047,-2.633459,-3.996073,0.269483,0.835155,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,3.742020,-0.824134,-0.026026,0.264265,0.801854,-0.365111,0.658960,-1.408317,-2.503309,-0.678806,...,-2.780777,-3.089402,-2.131645,-0.176030,-2.828650,-0.783943,0.221712,1.490485,2.184805,True
282,1.127660,-1.434879,0.192641,-1.385619,1.268991,-1.124739,-2.175979,0.986479,-0.990430,0.302945,...,-3.015771,-3.237227,-0.279458,-0.251598,-1.055005,-0.795737,-0.733964,2.307082,2.245730,True
283,-1.843122,-0.527434,-1.966538,-2.560517,-1.747687,-2.513512,-4.364317,-0.294093,-3.212142,-3.892242,...,-3.949220,-1.440389,-3.932070,-2.128608,-4.296507,-1.822982,-5.117743,0.183965,3.046900,True
284,-3.965820,-2.187596,1.363854,-2.315587,-0.651931,0.877066,-1.638174,-1.579365,-3.465210,-3.055721,...,-3.856341,-1.468276,-3.639595,-3.832289,-2.670493,-1.253663,-4.749150,-2.498746,2.180613,True


### hand-crafted size of img_batch (some ids are filtered out in preproc)

In [17]:
img_batch = []
for i in range(0, 167):
    try:
        input_image = Image.open(f"laion_face_data/split_00000/00000/{str(i).zfill(9)}.jpg")
    
        preprocess = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        input_tensor = preprocess(input_image)
        img_batch += input_tensor.unsqueeze(0)
    except:
        pass
    
img_batch = torch.stack([*img_batch])

In [7]:
img_batch.shape

torch.Size([286, 3, 224, 224])

In [13]:
img_batch = []
for filename in glob.iglob(f'./mscoco_face_data/face/*'):
    try:
        input_image = Image.open(filename)
        preprocess = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        input_tensor = preprocess(input_image)
        img_batch += input_tensor.unsqueeze(0)
    except:
        pass
    
img_batch = torch.stack([*img_batch])