### Evaluation Metric

#### Kendall tau correlation (Maximize):
* https://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.kendalltau.html
* https://onlinecourses.science.psu.edu/stat509/node/158
* https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient

#### Normalized Kendall Distance (Minimize):
* https://en.wikipedia.org/wiki/Kendall_tau_distance
* https://stats.stackexchange.com/questions/168602/whats-the-kendall-taus-distance-between-these-2-rankings - implementation

#### Other methods which were investigated:
* https://en.wikipedia.org/wiki/Learning_to_rank
* https://blog.godatadriven.com/kendall-tau-recommendations - in case i'll want to rank n out of k reviews
* https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kendalltau.html

In [9]:
import numpy as np
import pandas as pd

In [16]:
import itertools

from scipy.special import comb
from scipy.stats import kendalltau, weightedtau

def kendallTauDistNorm(A, B):
    """
    Calculate the normalized Kendall tau distance
    
    Args:
        A(list): Ranked list, 1
        B(list): Ranked list, 2
        
    Returns:
        float. The normalized Kendall distance 
    """
    pairs = itertools.combinations(range(0, len(A)), 2)

    distance = 0

    for x, y in pairs:
        a = A[x] - A[y]
        b = B[x] - B[y]

        # if discordant (different signs)
        if (a * b < 0):
            distance += 1

    return distance / comb(len(A), 2)


def evaluate_ranking(products_data, method):
    """
    Evaluate the predictions ranking with respect to the groung trouth rank
    
    Args:
        product_data(pandas.core.groupby.DataFrameGroupBy): The reviews grouped by ProductId
        method(fuction): The Evaluation method either
    
    Returns:
        float. The score for evaluation of ranking
    """
    # Order by Helpfullness score and index and get ranked indices:
    prediction = products_data.groupby('Predicted_Helpfullness_Score').apply(pd.DataFrame.sortlevel, level=0, ascending=True)
    prediction.sort_index(ascending=False, level=0, inplace=True)
    prediction_rank = prediction.index.labels[1]
    
    ground_truth = products_data.groupby('Groundtruth_Helpfullness_Score').apply(pd.DataFrame.sortlevel, level=0, ascending=True)
    ground_truth.sort_index(ascending=False, level=0, inplace=True)
    ground_truth_rank = ground_truth.index.labels[1]
    
    return method(prediction_rank, ground_truth_rank)


def get_products_ranking_evaluation(X_val, y_val, y_pred, method):
    """
    Calculates the ranking evaluation for all products
    
    Args:
        X_val(pd.DataFrame): The Data set for evaluation
        y_val(pd.Series): The Data set labels (ground truth)
        y_pred(np.array): The label's prediction
        method(func): The method for ranking evaluation
    Returns:
        pd.DataFrame: With the ranking evaluation score of all the products with more than 1 review
    """
    # Add colums of the predicted and ground truth helpfullness score:
    X_val['Predicted_Helpfullness_Score'] = pd.Series(y_pred, index=X_val.index)
    X_val['Groundtruth_Helpfullness_Score'] = pd.Series(y_val, index=X_val.index)
    
    # Group by ProductId and Evaluate ranking:
    rank_evaluation = X_val.groupby('ProductId').apply(evaluate_ranking, method=method)
    rank_evaluation = rank_evaluation.apply(pd.Series)
    rank_evaluation = rank_evaluation.rename(columns={0: 'kendall_tau', 1:'p_value'})
    
    # Drop all products who had only one review:
    rank_evaluation.dropna(how='all', inplace= True)
    
    return rank_evaluation, rank_evaluation.mean()

#### Load Relevant Data

In [11]:
xgb_fe_val_pred = np.load('data/pred/xgb_fe_val_pred.npy')
svm_fe_val_pred = np.load('data/pred/svm_fe_val_pred.npy')
rfc_fe_val_pred = np.load('data/pred/rfc_fe_val_pred.npy')
rfc_bow_val_pred = np.load('data/pred/rfc_bow_val_pred.npy')
xgb_bow_val_pred = np.load('data/pred/xgb_bow_val_pred.npy')
rfc_tfidf_val_pred = np.load('data/pred/rfc_tfidf_val_pred.npy')
xgb_tfidf_val_pred = np.load('data/pred/xgb_tfidf_val_pred.npy')

# Ensemble
val_eq_ensemble_pred = np.load('data/pred/val_eq_ensemble_pred.npy')
val_w_ensemble_pred = np.load('data/pred/val_w_ensemble_pred.npy')

# Stacking
val_stack_rfc_pred = np.load('data/pred/val_stack_rfc_pred.npy')
val_stack_logreg_pred = np.load('data/pred/val_stack_logreg_pred.npy')

y_test = np.load('data/y_test.npy')
y_train = np.load('data/y_train.npy')
y_val = np.load('data/y_val.npy')

X_val = np.load('data/X_val.npy')
df_cols = np.load('data/df_cols.npy')

X_val = pd.DataFrame(X_val, columns=df_cols)

In [25]:
ranking_dict = {'rfc_fe_val_rank': rfc_fe_val_pred,
                'svm_fe_val_rank': svm_fe_val_pred, 
                'xgb_fe_val_rank': xgb_fe_val_pred,
                'rfc_tfidf_val_rank': rfc_tfidf_val_pred,
                'xgb_tfidf_val_rank': xgb_tfidf_val_pred,
                'rfc_bow_val_rank': rfc_bow_val_pred,
                'xgb_bow_val_rank': xgb_bow_val_pred,
                'val_eq_ensemble_rank': val_eq_ensemble_pred,
                'val_w_ensemble_rank': val_w_ensemble_pred,
                'val_stack_rfc_rank': val_stack_rfc_pred,
                'val_stack_logreg_rank': val_stack_logreg_pred,
               }

for key, value in ranking_dict.items():
    _, rank_score = get_products_ranking_evaluation(X_val, y_val, value ,kendallTauDistNorm)
    print(key, rank_score)

  return func(g, *args, **kwargs)


rfc_fe_val_rank kendall_tau    0.263983
dtype: float64
svm_fe_val_rank kendall_tau    0.297241
dtype: float64
xgb_fe_val_rank kendall_tau    0.27915
dtype: float64
rfc_tfidf_val_rank kendall_tau    0.308127
dtype: float64
xgb_tfidf_val_rank kendall_tau    0.298437
dtype: float64
rfc_bow_val_rank kendall_tau    0.327637
dtype: float64
xgb_bow_val_rank kendall_tau    0.303252
dtype: float64
val_eq_ensemble_rank kendall_tau    0.267451
dtype: float64
val_w_ensemble_rank kendall_tau    0.267398
dtype: float64
val_stack_rfc_rank kendall_tau    0.265746
dtype: float64
val_stack_logreg_rank kendall_tau    0.265746
dtype: float64


In [28]:
# save ranking score for demo:
ranking_rfc_fe, _ = get_products_ranking_evaluation(X_val, y_val, value ,kendallTauDistNorm)
ranking_rfc_fe.to_csv('ranking_rfc_fe.csv')

  return func(g, *args, **kwargs)


Unnamed: 0_level_0,kendall_tau
ProductId,Unnamed: 1_level_1
0006641040,0.000000
7310172001,0.000000
7310172101,0.000000
B00004CI84,0.327485
B00004CXX9,0.285714
B00004RAMY,0.400000
B00004RBDU,0.333333
B00004RYGX,0.439560
B00004S1C6,0.333333
B00005C2M2,1.000000
