## Imports and configs

In [1]:
from time import sleep
import sys
sys.path.append('..')

from utils import read_txt_to_mot, write_detection_features, FeatureNormalize
from utils.models import check_encoder_output_if_normalized

In [7]:
DETECTIONS_PATH = '../sample/detections.txt'
IMG_SIZE = 64
IMG_PATH = '/mnt/c/workspace/datasets/topview/cam3_1hour_1/img1'
EXT = 'PNG'
BOX_FORMAT = 'xyxy'

BATCH_SIZE = 256

## Create data

In [8]:
detections = read_txt_to_mot(DETECTIONS_PATH, length=14952)
detections

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1,-1,817.466919,254.280380,949.700806,383.700623,0.998946,-1,-1,-1
1,1,-1,650.468689,585.898560,758.974670,721.172852,0.997912,-1,-1,-1
2,1,-1,682.504211,435.654877,793.261292,573.850830,0.997562,-1,-1,-1
3,1,-1,959.726624,265.447327,1035.810547,366.196716,0.996594,-1,-1,-1
4,1,-1,526.514038,509.146484,644.655396,655.279907,0.995303,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...
156,10,-1,1265.525513,918.235107,1279.896606,959.410278,0.176202,-1,-1,-1
157,10,-1,0.000000,210.696503,87.162033,337.874908,0.155744,-1,-1,-1
158,10,-1,1267.864990,884.124695,1279.949219,958.964783,0.069062,-1,-1,-1
159,10,-1,1190.128906,947.570618,1274.269531,960.000000,0.053171,-1,-1,-1


## Write features

In [9]:
import torch
from torch import nn
from torchvision import models

### resnext semi-weakly supervised learning pretrained

In [None]:
resnext_swsl = torch.hub.load('facebookresearch/semi-supervised-ImageNet1K-models', 'resnext50_32x4d_swsl')


resnext_swsl.layer4 = nn.Identity()
resnext_swsl.fc = FeatureNormalize(p=2, dim=1)


write_detection_features(encoder=resnext_swsl, detections=detections,
                         destination='../sample/feats_resnext50_32x4d_swsl/',
                        img_path=IMG_PATH, batch_size=BATCH_SIZE,
                        img_size=IMG_SIZE, box_format=BOX_FORMAT, ext=EXT)

sleep(2)

### resnext imagenet pretrained

In [None]:
resnext_imgnet = models.resnext50_32x4d(pretrained=True)

resnext_imgnet.layer4 = nn.Identity()
resnext_imgnet.fc = FeatureNormalize(p=2, dim=1)

check_encoder_output_if_normalized(resnext_imgnet, device='cpu')

write_detection_features(encoder=resnext_imgnet,
                        destination='../sample/feats_resnext50_32x4d_imgnet/',
                        detections=detections,
                        img_path=IMG_PATH, batch_size=BATCH_SIZE,
                        img_size=IMG_SIZE, box_format=BOX_FORMAT, ext=EXT)

sleep(2)

### mobilenet v3

In [None]:
mobilenet_v3 = models.mobilenet_v3_large(True)
mobilenet_v3.classifier = FeatureNormalize(p=2, dim=1)

check_encoder_output_if_normalized(mobilenet_v3, device='cpu')

write_detection_features(encoder=mobilenet_v3,
                        destination='../sample/feats_mobilenet_v3_large/',
                        detections=detections,
                        img_path=IMG_PATH, batch_size=BATCH_SIZE,
                        img_size=IMG_SIZE, box_format=BOX_FORMAT, ext=EXT)

sleep(2)

### densenet 121

In [10]:
tv_densenet = models.densenet121(True)
densenet = nn.Sequential(tv_densenet.features,
                         nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Flatten(), 
                                       FeatureNormalize(p=2, dim=1)))
check_encoder_output_if_normalized(densenet, device='cpu')
write_detection_features(encoder=densenet,
                        destination='../sample/feats_densenet121/',
                        detections=detections,
                        img_path=IMG_PATH, batch_size=BATCH_SIZE,
                        img_size=IMG_SIZE, box_format=BOX_FORMAT, ext=EXT)

sleep(2)

Output is normalized.
Convert


Features is saved at ./feats_densenet121/
Detections file is saved at ./feats_densenet121/detections.csv
