In [3]:
import numpy as np
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
import pandas as pd
from collections import Counter
from ast import literal_eval

In [4]:
PATH_TO_TRAIN = "../../data/train_splitted.csv"
PATH_TO_TEST = "../../data/test_splitted.csv"
PATH_TO_VAL = "../../data/val_splitted.csv"

In [181]:
""" Reference from https://gist.github.com/bwhite/3726239
"""

import numpy as np

def dcg_at_k(r, k, method=0):
    """Score is discounted cumulative gain (dcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> dcg_at_k(r, 1)
    3.0
    >>> dcg_at_k(r, 1, method=1)
    3.0
    >>> dcg_at_k(r, 2)
    5.0
    >>> dcg_at_k(r, 2, method=1)
    4.2618595071429155
    >>> dcg_at_k(r, 10)
    9.6051177391888114
    >>> dcg_at_k(r, 11)
    9.6051177391888114
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Discounted cumulative gain
    """
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.


def ndcg_at_k(r, correct_rankings, k, method=0):
    """Score is normalized discounted cumulative gain (ndcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> ndcg_at_k(r, 1)
    1.0
    >>> r = [2, 1, 2, 0]
    >>> ndcg_at_k(r, 4)
    0.9203032077642922
    >>> ndcg_at_k(r, 4, method=1)
    0.96519546960144276
    >>> ndcg_at_k([0], 1)
    0.0
    >>> ndcg_at_k([1], 2)
    1.0
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Normalized discounted cumulative gain
    """
    dcg_max = dcg_at_k(correct_rankings, k, method)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max


# LOAD DATASETS

In [54]:
train = pd.read_csv(PATH_TO_TRAIN)
test = pd.read_csv(PATH_TO_TEST)
val = pd.read_csv(PATH_TO_VAL)

In [56]:
train.head(1)

Unnamed: 0,context_id,context_2,context_1,context_0,reply_id,reply,label,confidence,merged_contexts,contexts_and_reply,weighted_label,initial_labels,num_label
0,22579918886,"[654394, 1561605, 16734, 1554817, 306154, 1561...","[1830003, 1463197, 334254, 676100, 917832]","[1293580, 1463197, 1501890, 1000204]",0,"[743031, 1101989]","[0, 0, 1]",0.875352,"[654394, 1561605, 16734, 1554817, 306154, 1561...","[654394, 1561605, 16734, 1554817, 306154, 1561...","[0.0, 0.0, 0.87535161750000001]",good,2


In [57]:
test.head(1)

Unnamed: 0,context_id,context_2,context_1,context_0,reply_id,reply,label,confidence,merged_contexts,contexts_and_reply,weighted_label,initial_labels,num_label
0,127768564286,"[1061960, 397658, 1247951, 1134945, 1147066, 1...","[334254, 1017975, 613707, 200685, 1463197, 613...","[786497, 1504830, 980317, 938763]",0,"[397658, 786497, 1489432, 1376676, 1463197, 90...","[0, 0, 1]",0.867679,"[1061960, 397658, 1247951, 1134945, 1147066, 1...","[1061960, 397658, 1247951, 1134945, 1147066, 1...","[0.0, 0.0, 0.86767855680000006]",good,2


In [58]:
val.head(1)

Unnamed: 0,context_id,context_2,context_1,context_0,reply_id,reply,label,confidence,merged_contexts,contexts_and_reply,weighted_label,initial_labels,num_label
0,521831731666,[1304112],"[1663321, 488315, 1862038]","[1748603, 1098780, 1120957, 166293, 307305]",0,"[642764, 1704903, 743031, 1066137]","[0, 1, 0]",0.936427,"[1304112, 1663321, 488315, 1862038, 1748603, 1...","[1304112, 1663321, 488315, 1862038, 1748603, 1...","[0.0, 0.93642739580000001, 0.0]",neutral,1


## Analyzing test

In [59]:
test.context_id.unique()

array([   127768564286,    548670776224,    727582522209, ...,
       281244955640299, 281315804039325, 281421690458811])

In [61]:
one_row_test = test.loc[test['context_id'] == 127768564286]

In [62]:
one_row_test.head()

Unnamed: 0,context_id,context_2,context_1,context_0,reply_id,reply,label,confidence,merged_contexts,contexts_and_reply,weighted_label,initial_labels,num_label
0,127768564286,"[1061960, 397658, 1247951, 1134945, 1147066, 1...","[334254, 1017975, 613707, 200685, 1463197, 613...","[786497, 1504830, 980317, 938763]",0,"[397658, 786497, 1489432, 1376676, 1463197, 90...","[0, 0, 1]",0.867679,"[1061960, 397658, 1247951, 1134945, 1147066, 1...","[1061960, 397658, 1247951, 1134945, 1147066, 1...","[0.0, 0.0, 0.86767855680000006]",good,2
1,127768564286,"[1061960, 397658, 1247951, 1134945, 1147066, 1...","[334254, 1017975, 613707, 200685, 1463197, 613...","[786497, 1504830, 980317, 938763]",1,"[786497, 1489432]","[0, 1, 0]",0.653608,"[1061960, 397658, 1247951, 1134945, 1147066, 1...","[1061960, 397658, 1247951, 1134945, 1147066, 1...","[0.0, 0.65360824549999996, 0.0]",neutral,1
2,127768564286,"[1061960, 397658, 1247951, 1134945, 1147066, 1...","[334254, 1017975, 613707, 200685, 1463197, 613...","[786497, 1504830, 980317, 938763]",2,"[1554817, 334254, 1017975, 1134945, 989147, 11...","[0, 0, 1]",0.903552,"[1061960, 397658, 1247951, 1134945, 1147066, 1...","[1061960, 397658, 1247951, 1134945, 1147066, 1...","[0.0, 0.0, 0.90355219860000002]",good,2
3,127768564286,"[1061960, 397658, 1247951, 1134945, 1147066, 1...","[334254, 1017975, 613707, 200685, 1463197, 613...","[786497, 1504830, 980317, 938763]",3,"[1702518, 786497, 1489432, 884730, 980317]","[1, 0, 0]",0.94458,"[1061960, 397658, 1247951, 1134945, 1147066, 1...","[1061960, 397658, 1247951, 1134945, 1147066, 1...","[0.94457976109999997, 0.0, 0.0]",bad,0
4,127768564286,"[1061960, 397658, 1247951, 1134945, 1147066, 1...","[334254, 1017975, 613707, 200685, 1463197, 613...","[786497, 1504830, 980317, 938763]",4,"[109082, 1463197, 1702518, 786497, 1489432]","[0, 0, 1]",0.87135,"[1061960, 397658, 1247951, 1134945, 1147066, 1...","[1061960, 397658, 1247951, 1134945, 1147066, 1...","[0.0, 0.0, 0.87134977730000007]",good,2


In [63]:
weighted_labels_str = one_row_test.weighted_label

In [202]:
def get_predictions(weighted_labels):
    weighted_labels = np.array(weighted_labels)
    max_elements = weighted_labels.max(axis=1)[:, np.newaxis]
    one_hot_labels = (weighted_labels == max_elements).astype(int)
    labels = one_hot_labels * max_elements
    return one_hot_labels, labels



def get_scores_and_ids(dataframe, context_id):
    partition = dataframe.loc[dataframe['context_id'] == context_id]
    try: 
        weighted_labels = [literal_eval(l) for l in partition.weighted_label]
    except ValueError:
        weighted_labels = partition.weighted_label
    
    _, preprocessed_labels = get_predictions(weighted_labels)
    
    ids_and_labels = list(zip(partition.reply_id, 
                                  np.argmax(preprocessed_labels, axis=1)))

    ids_and_labels = sorted(ids_and_labels, key=lambda x: -x[-1])

    predicted_ids = [x[0] for x in ids_and_labels]
    predicted_scores = [x[1] for x in ids_and_labels]
    correct_scores = sorted(partition.num_label, reverse=True)
    
    return predicted_ids, predicted_scores, correct_scores

In [207]:
pred_ids, pred_scores, scores = get_scores_and_ids(test, 127768564286)

In [208]:
scores

[2, 2, 2, 2, 1, 0]

In [209]:
pred_scores

[2, 2, 2, 2, 1, 0]

In [210]:
ndcg_at_k(pred_scores, scores, len(scores))

1.0

# Debug

In [201]:
dataframe = test
context_id = 127768564286
partition = dataframe.loc[dataframe['context_id'] == context_id]
preprocessed_labels = [literal_eval(l) for l in partition.weighted_label]


#print (preprocessed_labels)

#print (np.argmax(preprocessed_labels, axis=1))




ids_and_labels = list(zip(partition.reply_id, 
                              np.argmax(preprocessed_labels, axis=1)))

ids_and_labels = sorted(ids_and_labels, key=lambda x: -x[-1])

predicted_ids = [x[0] for x in ids_and_labels]
predicted_scores = [x[1] for x in ids_and_labels]
correct_scores = sorted(partition.num_label, reverse=True)

print (predicted_scores, correct_scores)

[2, 2, 2, 2, 1, 0] [2, 2, 2, 2, 1, 0]
