In [1]:
import os
import sys
import json
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from src import constants
import pandas as pd
import numpy as np

In [2]:
def article_images(id, data):
    subset = data[['id', 'article_idx']]
    images = subset[subset['article_idx'] == id]['id'].values
    return images

def clean_up(y):
    y = y.replace('[', '').replace(']', '').replace('"', '').strip().split(',')
    y = [b.replace('"', '').strip()[1:-1] for b in y]
    return y

In [3]:
num_pts = 10 # less than 60
sample = pd.read_csv("../src/ui/data.csv")
time_sample = pd.read_csv("../src/ui/time.csv")
sample['true_imgs'] = [clean_up(a) for a in sample['true_imgs'].values]
models = ['knn', 't2t', 'softcos', 'emb', 'use']
for model in models:
    sample[model] = [clean_up(a)[0:num_pts] for a in sample[model].values]

In [4]:
def intersection(lst1, lst2): 
    return list(set(lst1) & set(lst2)) 

def recall(y_true, y_pred):
    num_true_pos = len(intersection(y_true, y_pred)) # number of true positive
    num_true_image = len(y_true) # number of true images used in the article
    score = num_true_pos/num_true_image
    return score

def precision(y_true, y_pred):
    num_true_pos = len(intersection(y_true, y_pred)) # number of true positive
    num_pred = len(y_pred) # number of predicted images
    score = num_true_pos/num_pred 
    return score

def r_precision(y_true, y_pred):
    num_true_image = len(y_true) # number of true images used in the article
    num_true_pos = len(intersection(y_true, y_pred[:num_true_image])) # number of true positive
    
    score = num_true_pos/num_true_image
    return score

def dcg_at_k(r, k, method=0):
    """
    Score is discounted cumulative gain (dcg) Relevance is positive real values.  
    Can use binary as the previous methods.
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    """
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.


def ndcg_at_k(y_true, y_pred, k, method=0):
    """
    Score is normalized discounted cumulative gain (ndcg) Relevance is positive real values.  
    Can use binary as the previous methods.
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Normalized discounted cumulative gain
    """
    r = [1* (true_img in y_pred[:len(y_true)]) for true_img in y_true] # 1 if in true images
    dcg_max = dcg_at_k(np.ones(len(y_true)), k = k) # IDCG with ground truth 
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max

## Model Comparison

In [5]:
recall_score = [[],[],[],[],[]]
precision_score = [[],[],[],[],[]]
r_precision_score = [[],[],[],[],[]]
ndcg_k_score = [[],[],[],[],[]]
times = []

for i,model in enumerate(models):
    times.append(time_sample[model].median()) 
    for ix in range(sample.shape[0]):
        y_true = sample['true_imgs'].values[ix]
        y_pred = sample[model][ix]
        recall_score[i].append(recall(y_true, y_pred))
        precision_score[i].append(precision(y_true, y_pred))
        r_precision_score[i].append(r_precision(y_true, y_pred))
        ndcg_k_score[i].append(ndcg_at_k(y_true, y_pred, k=len(y_true)))

score_all_df = pd.DataFrame({'models':['knn', 't2t', 'softcos', 'emb', 'use'], 
                            'recall': np.mean(recall_score, axis=1),
                           'precision': np.mean(precision_score, axis=1),
                           'r_precision': np.mean(r_precision_score, axis=1),
                           'ndcg_at_k': np.mean(ndcg_k_score, axis=1),
                           'time (s)': times})
score_all_df

Unnamed: 0,models,recall,precision,r_precision,ndcg_at_k,time (s)
0,knn,0.244731,0.071,0.150098,0.150238,1.838248
1,t2t,0.115193,0.0374,0.071471,0.072825,1.335792
2,softcos,0.168261,0.0466,0.094335,0.094656,3.876387
3,emb,0.203972,0.0586,0.110821,0.11111,0.0528
4,use,0.359414,0.1102,0.24049,0.239711,0.216308


In [6]:
true = sample['true_imgs'].values
score_df = pd.DataFrame({'article_id': sample['ids'].values, 'num_true': [len(a) for a in true], 
                         'knn': recall_score[0], 't2t': recall_score[1], 'softcos': recall_score[2],
                         'emb': recall_score[3], 'use': recall_score[4]})
score_df.head(10)

Unnamed: 0,article_id,num_true,knn,t2t,softcos,emb,use
0,83cd6eac371d4f628377af393fb9152c,2,1.0,0.0,1.0,1.0,1.0
1,2d1f7c446aa14fa19ab6055eaec10ee5,1,0.0,0.0,0.0,1.0,0.0
2,4156960b89f842e28acd3d824bc11cf9,2,1.0,0.5,1.0,0.0,0.0
3,e54786c6fb5049feb69051a1f58df722,1,1.0,0.0,1.0,0.0,0.0
4,8fd956088433457b8fbf8b7edff10df2,6,0.166667,0.0,0.166667,1.0,0.0
5,23d1fce7f5cd4b8785a788c4b5eddf60,1,0.0,0.0,1.0,0.0,0.0
6,4c8057fc785b4e89b8990a201d2f784c,3,0.0,0.0,0.333333,0.0,0.666667
7,91d9711a9e5549109dd04d264e02b720,9,0.0,0.0,0.0,0.0,0.0
8,ec89d17eae8b4ed88c3fccdbcb57f567,1,1.0,0.0,1.0,1.0,1.0
9,4d972513519a4b3f9ba5190c13626aba,1,0.0,0.0,0.0,0.0,1.0


---