## Re-rank

- Function takes in a dictionary containing the key
- "features" and "configuration"

- "features" dictionary should contain all the path of map and query of both the features with this as the keys 
- "configuration" dictionary should contain all the required variables to run the experiment such as the fusion function, similarity measure function, cph_len, sf_len




In [48]:
import os
import json
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm

from IPython.display import display

### Generate Dataframe
Generate a dataframe for all indicies from retrieved results.

In [46]:
def generate_pred_indicies(prediction_path, validation_path, cities, display_df = False):
    # Read the prediction text file
    df = pd.read_fwf(prediction_path, header=None)
    # Combine all but first column as a list of predictions
    df['combined'] = df.drop(0, axis=1).values.tolist()
    # Replace the dataframe with the first col (query image id) and combined list
    df = df[[0, 'combined']]
    # Name the columns appropriately
    df.columns = ['query_id', 'retrieved_ids']
    # create a new column for lists in the dataframe
    df['retrieved_indicies'] = [[] for x in range(len(df))]

    curr_idx = 0  # keeps track of the beginning idx in the pandas dataframe
    end_idx = 0  # ending idx in the dataframe
    city_lens = {city: 0 for city in cities}
    for city in cities:
        city_data = os.path.join(validation_path, city)
        # read the query json file for cph
        with open(os.path.join(city_data, "query.json"), "r") as f:
            # load the query json file
            query_data = json.load(f)['im_paths']
            # query data length
            end_idx = len(query_data)
            city_lens[city] = end_idx
        # load the database file and index it on a map for fast lookup
        with open(os.path.join(city_data, "database.json"), "r") as f:
            database_data = json.load(f)
        # one time pass to load it as a dictionary
        query_image_ids = {x.replace('.', '/').split('/')[4]: i for i, x in enumerate(database_data['im_paths'])}

        # for each row apply the fuction to retrieve the ids
        df.loc[curr_idx:curr_idx+end_idx-1, 'retrieved_indicies'] = df.loc[curr_idx:curr_idx+end_idx-1].apply(lambda x: [query_image_ids[i] for i in x['retrieved_ids']], axis=1)
        curr_idx += end_idx

    # If needed, print the dataframe
    if display_df:
        display(df)

    return df, city_lens

<h3>Helpers for Re-ranking</h3>

In [64]:
def write(path, filename, df):
    path = os.path.join(path, filename)
    with open(path, 'w') as f:
        for i in range(df.shape[0]):
            results = df.loc[i, "query_id"] + " " + df.loc[i, "re_ranked_ids"] +"\n"
            f.write(results)

<h3>Re-ranking</h3>

In [51]:
def rank(configuration):
    # Load the configuration and data
    retrieved_df, data_lengths = generate_pred_indicies(
        configuration["prediction_path"],
        configuration["data_path"],
        configuration["cities"],
        False
    )
    similarity_measure = configuration["similarity_measure"]
    fusion_method = configuration["fusion_method"]
    features_path = configuration["features_path"]
    local_feat_format = configuration["local_feat_format"]
    global_feat_format = configuration["global_feat_format"]
    fusion = configuration["fusion"]

    curr_idx = 0
    for city in configuration["cities"]:
        # Load local features
        query_local = torch.Tensor(
            np.load(os.path.join(features_path, local_feat_format.format(city=city, db="query")))
        )
        map_local = torch.Tensor(
            np.load(os.path.join(features_path, local_feat_format.format(city=city, db="map")))
        )

        # Get end lengths
        end_idx = data_lengths[city]

        # If fusion needs to be done...
        if fusion:
            # Load global features
            query_global = torch.Tensor(
                np.load(os.path.join(features_path, global_feat_format.format(city=city, db="query")))
            )
            map_global = torch.Tensor(
                np.load(os.path.join(features_path, global_feat_format.format(city=city, db="map")))
            )
            # Do fusion between local and global features
            query_fusion = fusion_method.forward(query_local, query_global)
            map_fusion = fusion_method.forward(map_local, map_global)
        else:
            # Otherwise, just keep without fusion for local only
            query_fusion = query_local
            map_fusion = map_local

        # Retrieve the ids and indicies from dataframe
        retrieved_indices = torch.tensor(retrieved_df.loc[curr_idx:curr_idx+end_idx-1]["retrieved_indicies"].tolist())
        retrieved_ids = retrieved_df.loc[curr_idx:curr_idx+end_idx-1]["retrieved_ids"].tolist()

        # Gather the features in the map features relating to those indicies
        for idx in tqdm(range(retrieved_indices.shape[0]), desc=f"Ranking for {city}"):
            # idx (one query image) -> (30, 4096)
            database_feature_list = map_fusion.index_select(0, retrieved_indices[idx])
            # (1, 4096) == (30, 4096) -> (1, 30)
            row_similarity = similarity_measure(query_fusion[idx].unsqueeze(0), database_feature_list)
            # (1, 30) -> sorted((1,30))[0] -> 30
            ranked_indicies = torch.argsort(row_similarity)[0]
            retrieved_df.loc[curr_idx+idx, "re_ranked_ids"] = " ".join([retrieved_ids[idx][i] for i in ranked_indicies])

        curr_idx += end_idx

    # Writing the results to the file
    print(f"Writing the results to {os.path.join(configuration['features_path'], configuration['results_file_format'])}.")
    write(configuration["features_path"], configuration["results_file_format"], retrieved_df)

    return retrieved_df, os.path.join(configuration["features_path"], configuration["results_file_format"])