<h3>To re-rank</h3>

- Function takes in a dictionary containing the key
- "features" and "configuration"

- "features" dictionary should contain all the path of map and query of both the features with this as the keys 
- "configuration" dictionary should contain all the required variables to run the experiment such as the fusion function, similarity measure function, cph_len, sf_len




In [57]:
import torch
import pandas as pd
import os
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import manhattan_distances
import numpy as np

<h3>Fusion Methods</h3>

In [None]:
def orthogonal_fusion(global_matrix,local_matrix):
    global_norm = torch.norm(global_matrix,p=2,dim=1)
    projection = torch.mm(global_matrix,local_matrix.T)
    projection = projection/(global_norm*global_norm)
    orthogonal_comp = local_matrix-projection
    fusion = torch.cat([global_matrix,orthogonal_comp],dim=1)
    # global_feat.expand(orthogonal_comp.size()), orthogonal_comp
    return fusion

In [None]:
def fusion(global_features,local_features):
    """when you apply just the sequeeze() it removes all the ones
    #so (1,2048,1,1) after squeezing -> (2048), adding that extra dimension on the zero 
    #axis :- unsqueeze(0) (1,2048)"""
    

    if global_features.shape[0]==1:
        
        x = np.multiply(local_features,global_features).squeeze().unsqueeze(0)

    else: #if we have batch size!=1, then we dont have to unsqueeze because it wont squeeze the batch size               
        #hardaman product
        x = np.multiply(local_features,global_features).squeeze()

    return x


<h3>Helpers for Re-ranking</h3>

In [65]:
#load the features given feature dictionary and returns back the path

def load_files(features):
    local_cph_query = torch.from_numpy(np.load(features["cph"]["local_query"]))
    global_cph_query =  torch.from_numpy(np.load(features["cph"]["global_query"]))

    local_sf_query =  torch.from_numpy(np.load(features["sf"]["local_query"]))
    global_sf_query =  torch.from_numpy(np.load(features["sf"]["global_query"]))

    local_cph_database =  torch.from_numpy(np.load(features["cph"]["local_database"]))
    global_cph_database =  torch.from_numpy(np.load(features["cph"]["global_database"]))

    local_sf_database =  torch.from_numpy(np.load(features["sf"]["local_database"]))
    global_sf_database =  torch.from_numpy(np.load(features["sf"]["global_database"]))
    
    return local_cph_query,global_cph_query,\
            local_sf_query,global_sf_query,\
            local_cph_database,global_cph_database,\
            local_sf_database,global_sf_database
    

In [64]:
def write(path,filename,df):
    path = os.path.join(path,filename)
    with open(path, 'w') as f:
        for i in range(df.shape[0]):
            results=df.iloc[i]["query_id"] + " " +  " ".join([str(i) for i in eval(df.iloc[i]["re_ranked_ids"])]) +"\n"
            f.write(results)
    

<h3>Re-ranking</h3>

In [63]:

def rank(features,configuration):
    
    #Load experimental configuration
    
    CPH_LEN = configuration["cph_len"]
    SF_LEN = configuration["sf_len"]
    fusion = configuration["fusion"]
    df = pd.read_csv(configuration["df"])
    similarity_measure = configuration["similarity_measure"]
    
    #Unpacking the features
    
    local_features_cph_query,global_features_cph_query,\
    local_features_sf_query,global_features_sf_query,\
    local_features_cph_database,global_features_cph_database,\
    local_features_sf_database,global_features_sf_database = load_files(features)
    
    for row in range(df.shape[0]):    

        if row<CPH_LEN:
            #unsqueezing because when we access it we result in ([2048]), to make it ([1,2048])
            #same for global
            local_feature_query = local_features_cph_query[row].unsqueeze(0)
            global_feature_query = global_features_cph_query[row].unsqueeze(0)
            
            #applying the fusion (1,4096)
            query_fusion = fusion(global_feature_query,local_feature_query)

            #eval basically converts to the required datatype given the string format

            retrieved_indices = eval(df.iloc[row]["retrieved_indicies"])
            retrieved_ids =  eval(df.iloc[row]["retrieved_ids"])
            
            idx_id = {i:j for i,j in zip(retrieved_indices,retrieved_ids)}
     
            database_feature_list = None
        
            #fetching all the features first and concatenating them
            for database_id in retrieved_indices:
                #same reason as above(to why we unsqueezing)
                global_feature_database = global_features_cph_database[database_id].unsqueeze(0)
                
                local_feature_database = local_features_cph_database[database_id].unsqueeze(0)
                #(1,4096)
                combined_features = fusion(global_feature_database,local_feature_database)

                if database_feature_list is None:
                    database_feature_list = combined_features
                else:
                    database_feature_list = torch.cat([database_feature_list,combined_features])
                    
            #so now we result the size of database_feature_list as -> (top_k,4096)
            #computing the similarity so we get (1,top_k) size. 1 because, 1 query image passed in
            similarity =  similarity_measure(query_fusion,database_feature_list)
            
            #getting the indices of the most similar and mapping to the retrieved list
            ranked_indices = [retrieved_indices[i] for i in np.argsort(similarity)[0]]

            retrieved_file_names = [idx_id[i] for i in ranked_indices]

            df.loc[row,"re_ranked"] = str(ranked_indices)
            df.loc[row,"re_ranked_ids"] = str(retrieved_file_names)

        else: #For SF
            
            row = abs(row-CPH_LEN) 
            
            local_feature_query = local_features_sf_query[row].unsqueeze(0)
            global_feature_query = global_features_sf_query[row].unsqueeze(0)

            query_fusion = fusion(global_feature_query,local_feature_query)
        
            
            retrieved_indices = eval(df.iloc[CPH_LEN+row]["retrieved_indicies"])
            
            retrieved_ids =  eval(df.iloc[CPH_LEN+row]["retrieved_ids"])
            
            idx_id = {i:j for i,j in zip(retrieved_indices,retrieved_ids)}

            
            database_feature_list = None
            
            for database_id in retrieved_indices:
          
                global_feature_database = global_features_sf_database[database_id].unsqueeze(0)
                
                local_feature_database = local_features_sf_database[database_id].unsqueeze(0)
                combined_features = fusion(global_feature_database,local_feature_database)


                if database_feature_list is None:
                    database_feature_list = combined_features
                else:
                    database_feature_list = torch.cat([database_feature_list,combined_features])
            
            similarity = similarity_measure(query_fusion,database_feature_list)
            
            ranked_indices = [retrieved_indices[i] for i in np.argsort(similarity)[0]]
            
            retrieved_file_names = [idx_id[i] for i in ranked_indices]
            
            df.loc[CPH_LEN+row,"re_ranked"] = str(ranked_indices)
            df.loc[CPH_LEN+row,"re_ranked_ids"] = str(retrieved_file_names)
            
    #writing the results
    
    write(configuration["results_path"],configuration["results_filename"],df)

    if configuration["return_df"]==True:
        return df

  


<h3>Sample on how to run</h3>

In [23]:
DATA_ROOT = "FIR"
DIR_ADVANCED_IR = os.path.join(DATA_ROOT, "advanced_ir") #i added the local features into a folder 'weights' in advanced_ir
DATASET_ROOT = os.path.join(DATA_ROOT, "msls")
DATASET_TEST = os.path.join(DATASET_ROOT, "test")
DATASET_VAL = os.path.join(DATASET_ROOT, "train_val")
DATASET_VAL_SF = os.path.join(DATASET_VAL, "sf")
DATASET_VAL_CPH = os.path.join(DATASET_VAL, "cph")

In [72]:
features = {
    "cph": 
        {
            "local_query" :os.path.join(DIR_ADVANCED_IR,"weights/MSLS_resnext_GCL_multi_attrous_attention_map_cph_local_queryfeats.npy") ,
            "global_query" : os.path.join(DATA_ROOT,"results/MSLS/val/MSLS_resnext_GeM_480_GCL_cph_queryfeats.npy"),
            "local_database" : os.path.join(DIR_ADVANCED_IR,"weights/MSLS_resnext_GCL_multi_attrous_attention_map_cph_local_mapfeats.npy"),
            "global_database" : os.path.join(DATA_ROOT,"results/MSLS/val/MSLS_resnext_GeM_480_GCL_cph_mapfeats.npy") 
        },

    "sf":
        {
        "local_query" : os.path.join(DIR_ADVANCED_IR,"weights/MSLS_resnext_GCL_multi_attrous_attention_map_sf_local_queryfeats.npy"),
        "global_query" : os.path.join(DATA_ROOT,"results/MSLS/val/MSLS_resnext_GeM_480_GCL_sf_queryfeats.npy"), 
        "local_database" : os.path.join(DIR_ADVANCED_IR,"weights/MSLS_resnext_GCL_multi_attrous_attention_map_sf_local_mapfeats.npy"), 
        "global_database" : os.path.join(DATA_ROOT,"results/MSLS/val/MSLS_resnext_GeM_480_GCL_sf_mapfeats.npy")

        }
}

configuration = {
    "similarity_measure" : manhattan_distances, #similarity function
    "fusion" : orthogonal_fusion, #fusion function
    "df" :   "FIR/advanced_ir/data.csv",
    "cph_len" : 6595,
    "sf_len" : 4525,
    "results_path": "FIR/advanced_ir",
    "results_filename": "predictions.txt",
    "return_df" : True
}



In [73]:
rank(features,configuration).head()

Unnamed: 0.1,Unnamed: 0,query_id,retrieved_ids,retrieved_indicies,re_ranked,re_ranked_ids
0,0,x3vA7Bk0HNI6rGkDpDZQUQ,"['X9V1oGRaAEFjq5jufrklTQ', 'E7gcrCyitkguCnMzoE...","[3, 5130, 5131, 0, 7912, 5132, 8812, 9186, 1, ...","[3, 0, 5130, 5131, 8812, 1, 7912, 4505, 5132, ...","['X9V1oGRaAEFjq5jufrklTQ', 'm6_LAhWivjGN4O1fkW..."
1,1,U9Vj0IV4q1psciXpj51F_w,"['X9V1oGRaAEFjq5jufrklTQ', '22BOHMokEHyXf9LA8B...","[3, 4504, 1, 1815, 9186, 0, 2, 9181, 5131, 513...","[1, 3, 4504, 9183, 9181, 9182, 9184, 2, 1815, ...","['HU9GEfLAB9pm5RmjW4MLhg', 'X9V1oGRaAEFjq5jufr..."
2,2,Eh1NwQjH4jbKcWqVJ4ZsJg,"['X9V1oGRaAEFjq5jufrklTQ', '_Eq8EgtwLGiMFc7VJd...","[3, 4, 2, 1815, 7604, 1, 0, 8810, 5, 9186, 6, ...","[3, 2, 4, 1, 0, 8810, 5, 6, 7604, 8809, 5133, ...","['X9V1oGRaAEFjq5jufrklTQ', 'qhZA-uC4KY1F38C_Hb..."
3,3,1RKCGBAWsZbi5dj3vR2mlw,"['_Eq8EgtwLGiMFc7VJdb-YQ', 'X9V1oGRaAEFjq5jufr...","[4, 3, 5, 6, 0, 8815, 9186, 9181, 8798, 1, 513...","[4, 3, 5, 6, 9181, 0, 1, 12219, 8815, 8810, 91...","['_Eq8EgtwLGiMFc7VJdb-YQ', 'X9V1oGRaAEFjq5jufr..."
4,4,LdiYwYkqgUfc1IYDu5ov9A,"['Z4MR4AHQufgsCwiBiqQ23A', 'eQ-8kVNfMZiexVcu_V...","[5, 6, 4, 8815, 5133, 7049, 5125, 8816, 8811, ...","[5, 6, 4, 3, 5133, 12219, 8816, 8815, 34, 1192...","['Z4MR4AHQufgsCwiBiqQ23A', 'eQ-8kVNfMZiexVcu_V..."
