## Extracting local features for one file
The code in the cell below is made to extract local features for only one image. This code is used to test things first before going large scale to the whole database

In [None]:
%%time
import os
import sys
import timm
import torch
from tqdm import tqdm
from torchinfo import summary
import numpy as np
import torchvision.transforms as ttf
import numpy as np
import csv
# !pip install nbformat
%run model_notebook.ipynb

# Setup the paths and constants
BACKBONE = "resnext"
POOL = "GeM"
NORM = None
BATCH_SIZE = 1

# Choose the image you want to use (the corresponding query file will be loaded)
# image_file_name = 'x3vA7Bk0HNI6rGkDpDZQUQ'
image_file_name = 'SQNDJeXa8UQ9pHht-13PNg'

root_dir = 'global_feature_verification_dir/'
q_idx = os.path.join(root_dir,'cph',image_file_name+'_query.json')

# Convert any given npy features file to csv file
def convert_npy_features_to_csv(source_file, target_file):
    source_features = np.load(source_file)
    source_features_list = source_features.tolist()

    with open(target_file, "w") as f:
        wr = csv.writer(f)
        wr.writerows(source_features_list)

def extract_features(model, local_branch, feats_file , f_length=2048):
    # To run the feature extraction just like nicola we need the following things:
    image_size = [480,640]
    image_t = ttf.Compose([ttf.Resize(size=(image_size[0],image_size[1])),
                        ttf.ToTensor(),
                        ttf.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                        ])
    dl = create_dataloader("test",root_dir, q_idx,None,image_t, BATCH_SIZE)
    
    if not os.path.exists(feats_file):
        feats = np.zeros((len(dl.dataset), f_length))
        for i, batch in tqdm(enumerate(dl), desc="Extracting features"):
            local_features = model.forward(batch.cuda())
            local_features = local_branch(local_features)
            feats[i * dl.batch_size:i * dl.batch_size + dl.batch_size] = local_features.cpu().detach().squeeze(0)
        np.save(feats_file, feats)
        print(f"{feats_file} has been saved..........")
    else:
        print(feats_file,"already exists. Skipping.")


## Extract Local features using using the model from the disk 
model_file_weights = os.path.join('generalized_contrastive_loss','Models','MSLS','MSLS_resnext_GeM_480_GCL.pth')
model = create_model(BACKBONE,POOL,norm=None,mode="single" )
file_name_extension = ''
try:
    model.load_state_dict(torch.load(model_file_weights)["model_state_dict"])
except:
    model.load_state_dict(torch.load(model_file_weights)["state_dict"])

if torch.cuda.is_available():
    model.cuda()

# Force inference mode
model.eval()

local_branch = LocalBranch(input_dim=1024, out_channel=2048)
local_branch.cuda()

feats_file= os.path.join(root_dir,'cph',image_file_name+'_local_feature_file'+ file_name_extension+'.npy')
csv_file= os.path.join(root_dir,'cph',image_file_name+'_local_feature_file'+file_name_extension +'.csv')

extract_features(model,local_branch, feats_file)

# We are here, it means the npy is saved, convert to csv file
convert_npy_features_to_csv(feats_file, csv_file)

## Extracing local features for the whole dataset
The code in the cells below is used to extract local features for all query and map images for both CPH (Copenhagen) and SF (San Francisco).

In [1]:
import os
import sys
import torch
from tqdm import tqdm
from torchinfo import summary
import numpy as np
import torchvision.transforms as ttf
import numpy as np
import csv
%run model_notebook.ipynb

In [2]:

# This function takes in a ResNextGCL model and loop through all given images and extract local features.
# The local features are generated as follows. Lf generated using GCL --> passed to MultiAttrous --> attention map generated -->
# a 2048 lf vector is generated and saved for each image. 
def extract_features(dl, model, feats_file , f_length=2048):
    # Define local branch model for each city and each query and map for the given city in the dataset
    local_branch = LocalBranch(input_dim=1024, out_channel=2048)
    # we must do this otherwise it will perform it on the cpu      
    local_branch.cuda()
    if not os.path.exists(feats_file):
        feats = np.zeros((len(dl.dataset), f_length))
        for i, batch in tqdm(enumerate(dl), desc="Extracting features"):
            # obtain lf first using ResNext + GCL
            local_features = model.forward(batch.cuda())
            # Pass those lf to local branch model
            local_features = local_branch(local_features)
            # squeeze and detach to endup with [2048] vector
            feats[i * dl.batch_size:i * dl.batch_size + dl.batch_size] = local_features.cpu().detach().squeeze(0)
        np.save(feats_file, feats)
        print(f"{feats_file} has been saved..........")
    else:
        print(feats_file,"already exists. Skipping.")


#extract features for the whole dataset
def extract_features_msls(
    model,
    subset='val',
    root_dir = 'generalized_contrastive_loss/msls/',
    weights_file_path = 'generalized_contrastive_loss/Models/MSLS/MSLS_resnext_GeM_480_GCL.pth',
    f_length = 2048,
    results_dir = 'generalized_contrastive_loss/results/MSLS/val/',
    batch_size = 1,
    k = 30,
    ):
    cities = ["cph", "sf"]

    savename= 'MSLS_resnext_GCL_multi_attrous_attention_map'
    
    subset_dir=subset if subset == "test" else "train_val"
    image_size = [480,640]
    image_t = ttf.Compose([ttf.Resize(size=(image_size[0],image_size[1])),
                        ttf.ToTensor(),
                        ttf.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                        ])
    for c in cities:
        print(c)
        m_raw_file = root_dir+subset_dir+"/"+c+"/database/raw.csv"
        q_idx_file = root_dir+subset_dir+"/"+c+"/query.json"
        m_idx_file = root_dir+subset_dir+"/"+c+"/database.json"
        q_dl = create_dataloader("test", root_dir, q_idx_file, None, image_t, batch_size)
        q_feats_file =results_dir+"/"+savename+"_"+c+"_local_queryfeats.npy"
        #extract features for the query images
        extract_features(q_dl, model, q_feats_file,  f_length)
        
        m_dl = create_dataloader("test", root_dir, m_idx_file, None, image_t, batch_size)
        m_feats_file =results_dir+"/"+savename+"_"+c+"_local_mapfeats.npy"
        #extract features for the map images 
        extract_features(m_dl, model, m_feats_file, f_length)

In [3]:
%%time

# Convert any given npy features file to csv file. THis is a helper function to inspect the generated npy lf file
def convert_npy_features_to_csv(source_file, target_file):
    source_features = np.load(source_file)
    source_features_list = source_features.tolist()
    with open(target_file, "w") as f:
        wr = csv.writer(f)
        wr.writerows(source_features_list)

# Setup the paths and constants
BACKBONE = "resnext"
POOL = "GeM"
NORM = None

## Extract Local features using the model from the disk 
model_file_weights = os.path.join('generalized_contrastive_loss','Models','MSLS','MSLS_resnext_GeM_480_GCL.pth')
model = create_model(BACKBONE,POOL,norm=None,mode="single" )
file_name_extension = ''
try:
    model.load_state_dict(torch.load(model_file_weights)["model_state_dict"])
except:
    model.load_state_dict(torch.load(model_file_weights)["state_dict"])

if torch.cuda.is_available():
    model.cuda()

# Force inference mode
model.eval()

extract_features_msls(model)

# We are here, it means the npy is saved, convert to csv file
# convert_npy_features_to_csv(feats_file, csv_file)

Using cache found in /home/jovyan/.cache/torch/hub/facebookresearch_WSL-Images_main


 the layers of the resnext101_32x8d_wsl are: odict_keys(['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3', 'layer4', 'avgpool', 'fc'])
 the layers of the resnext101_32x8d_wsl are after removing the last two layers (avgpool and fc): odict_keys(['0', '1', '2', '3', '4', '5', '6', '7'])
Number of layers: 8
0 Conv2d IS TRAINED
1 BatchNorm2d IS TRAINED
2 ReLU IS TRAINED
3 MaxPool2d IS TRAINED
4 Sequential IS TRAINED
5 Sequential IS TRAINED
6 Sequential IS TRAINED
7 Sequential IS TRAINED
cph


Extracting features: 6595it [03:01, 36.38it/s]


generalized_contrastive_loss/results/MSLS/val//MSLS_resnext_GCL_multi_attrous_attention_map_cph_local_queryfeats.npy has been saved..........


Extracting features: 12601it [05:29, 38.19it/s]


generalized_contrastive_loss/results/MSLS/val//MSLS_resnext_GCL_multi_attrous_attention_map_cph_local_mapfeats.npy has been saved..........
sf


Extracting features: 4525it [01:58, 38.29it/s]


generalized_contrastive_loss/results/MSLS/val//MSLS_resnext_GCL_multi_attrous_attention_map_sf_local_queryfeats.npy has been saved..........


Extracting features: 6315it [02:45, 38.17it/s]


generalized_contrastive_loss/results/MSLS/val//MSLS_resnext_GCL_multi_attrous_attention_map_sf_local_mapfeats.npy has been saved..........
CPU times: user 12min 49s, sys: 34.4 s, total: 13min 23s
Wall time: 13min 28s
