In [1]:
import os
import logging
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
logging.getLogger("tensorflow").setLevel(logging.ERROR)

import tensorflow as tf
import tensorflow_ranking as tfr
import pandas as pd
import numpy as np
import time
import copy
from scipy.stats import rankdata
from tensorflow_serving.apis import input_pb2
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_squared_log_error
from sklearn.linear_model import RidgeClassifier 

In [2]:
gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
    details = tf.config.experimental.get_device_details(gpu)
    print(details)

{'compute_capability': (8, 6), 'device_name': 'NVIDIA GeForce RTX 3050 Laptop GPU'}


In [3]:
loaded_model = tf.saved_model.load("output_mslr_2000/export/best_model_by_loss/1684615624")

## Load test data and model

In [4]:
df_test = pd.read_csv('datasets/MSLR-WEB10K/test_mslr.csv')

In [5]:
display(df_test.head())

Unnamed: 0,relevance_label,qid,covered_query_term_number_body,covered_query_term_number_anchor,covered_query_term_number_title,covered_query_term_number_url,covered_query_term_number_whole_document,covered_query_term_ratio_body,covered_query_term_ratio_anchor,covered_query_term_ratio_title,...,length_url,inlink_number,outlink_number,pagerank,siterank,qualityscore,qualityscore2,query_url_click_count,url_click_count,url_dwell_time
0,2.0,13,0.026667,0.0,0.133333,0.066667,0.026667,1.0,0.0,1.0,...,0.022595,4.798976e-10,0.0,0.004059,0.382544,0.110236,0.027559,0.0,0.0,0.0
1,1.0,13,0.026667,0.0,0.0,0.0,0.026667,1.0,0.0,0.0,...,0.010975,4.463047e-08,0.0,0.002335,0.196231,0.255906,0.622047,0.0,0.0,0.0
2,3.0,13,0.026667,0.0,0.066667,0.0,0.026667,1.0,0.0,0.5,...,0.012266,0.0,0.0,0.002335,0.017258,0.440945,0.555118,0.0,0.0,0.0
3,1.0,13,0.026667,0.0,0.133333,0.066667,0.026667,1.0,0.0,1.0,...,0.032279,3.924362e-05,0.0,0.008545,0.934218,0.003937,0.055118,0.0,0.0,0.0
4,0.0,13,0.013333,0.0,0.0,0.0,0.013333,0.5,0.0,0.0,...,0.015494,0.0,0.0,0.88432,0.238041,0.059055,0.047244,0.0,0.0,0.0


In [6]:
def _float_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [7]:
def serialize_all(examples):
    
    list_examples = []
    for idx, row in examples.iterrows():     

        example_dict = {
                       f'{feat_name}':_float_feature(feat_val) for 
                        feat_name, feat_val in zip(df_test.columns.tolist()[2:], row.iloc[2:].tolist())
                    }    
        
        example_dict['relevance_label'] = _int64_feature(int(row['relevance_label']))

        example_proto = tf.train.Example(features=tf.train.Features(feature=example_dict))
        list_examples.append(example_proto.SerializeToString())
    return list_examples

## LIME

In [8]:
class Explanations:
    def __init__(self, data, sample_size=20, visible_features=20, name_features = []):
        self.data = data 
        self.sample_size = sample_size 
        self.kernel_width = np.sqrt(data.shape[1]) * .75
        self.visible_features = visible_features
        self.name_features = name_features
        
    def subscores_GAM(self, instances, idx):
        acum_subscores = []
        tensors = tf.convert_to_tensor(instances)
        for fea in self.name_features:
            tf_predictor = loaded_model.signatures[fea + '_subscore']
            subscores = tf_predictor(tensors)
            acum_subscores.append(subscores['outputs'][idx])
        return tf.stack(acum_subscores)
        
    def predict_GAM(self, instances): 
        tf_example_predictor = loaded_model.signatures['predict']
        scores = tf_example_predictor(tf.convert_to_tensor(instances))['output']
        return scores

    def kernel(self, d):
    #similarity or weight based on the Gaussian kernel function
        return np.sqrt(np.exp(-(d ** 2) / self.kernel_width ** 2))
    
    def empirical_sampling(self, instance_explained):
        generated_docs = []

        for t in range(0, self.sample_size):
            instance_explained = copy.copy(instance_explained)
            num_features = instance_explained.shape[0]
            total_feature_selected = np.random.randint(0, num_features - 2) 
            selected_features = np.random.randint(2, num_features, total_feature_selected)

            for sel_feat in selected_features:
                instance_explained[sel_feat] = np.random.choice(self.data.iloc[:, sel_feat], 1)[0]

            generated_docs.append(instance_explained)
        return generated_docs
    
    def lime_inverse_zscore(self, instance_explained):
        generated_docs = []

        for t in range(0, self.sample_size):
            instance_explained = copy.copy(instance_explained)
            num_features = instance_explained.shape[0] 
            total_feature_selected = np.random.randint(0, num_features - 2)
            selected_features = np.random.randint(2, num_features, total_feature_selected)

            for sel_feat in selected_features:
                mu = np.mean(self.data.iloc[:, sel_feat].values)
                sigma = np.std(self.data.iloc[:, sel_feat].values)
                z = np.random.normal(0, 1)
                instance_explained[sel_feat] = z * sigma + mu

            generated_docs.append(instance_explained)
        return generated_docs
    
    def gaussian_sampling(self, instance_explained):
        generated_docs = []
        
        unique_vals = {}
        exclude_features = []

        #PROBLEM: it never excludes any feature
        for i in range(2, self.data.shape[1]):
            dist = np.abs(self.data.iloc[:, i] - instance_explained[i])
            unique_val = dist[ dist < np.std(self.data.iloc[:, i])].unique()
            
            if len(unique_val) > 0: 
                unique_vals[i] = unique_val
            else: 
                exclude_features.append(i)

        for t in range(0, self.sample_size):
            instance_explained = copy.copy(instance_explained)
            num_features = self.data.shape[1] 
            total_feature_selected = np.random.randint(0, num_features - 2 - len(exclude_features))
            available_features = np.setxor1d(np.arange(2, num_features), exclude_features).astype(int)
            selected_features = np.random.choice(available_features, total_feature_selected)
            for c_feat in selected_features:
                instance_explained[c_feat] = np.random.choice(unique_vals[c_feat], 1)[0]
            generated_docs.append(instance_explained)
        return generated_docs
        
    def get_explanations(self, qid_data, doc_idx, sampling): 

        start = time.time()

        docs = serialize_all(qid_data) #list of tensor examples

        original_scores = self.predict_GAM(docs) #prediction of docs given query
        GAM_explanations = self.subscores_GAM(docs, doc_idx) #explanations ranking doc_idx
        base_rank = rankdata([-1 * i for i in original_scores]).astype(int) - 1

        instance_explained = copy.copy(qid_data.iloc[doc_idx])
        
        #returns list of new documents samples
        if sampling == 'empirical':
            generated_docs = self.empirical_sampling(instance_explained)
        elif sampling == 'gaussian':
            generated_docs = self.gaussian_sampling(instance_explained)
        elif sampling == 'lime':
            generated_docs = self.lime_inverse_zscore(instance_explained)
        
        generated_predictions = []

        for t in range(0, self.sample_size):
            temp_docs = copy.copy(docs) #copy tensors
            temp_docs[doc_idx] = serialize_all(generated_docs[t].to_frame().T)[0] #the selected instance is replaced
            genere_pred = self.predict_GAM(temp_docs)
            generated_predictions.append(genere_pred) #return tensor predict
        
        ranked_all = []
        
        for gen_pred in generated_predictions:
            ranked_all.append(rankdata([-1 * i for i in gen_pred]).astype(int) - 1)
        ranked_all = np.array(ranked_all)
        
        labels = []

        for ranked in ranked_all: 
            if ranked[doc_idx] <= base_rank[doc_idx]: 
                labels.append(1)
            else:
                labels.append(0)
                
        gen_docs = []
        
        for i in range(0, self.sample_size):
            gen_docs.append(generated_docs[i].values[2:]) #all features of generated_docs
        gen_docs = np.array(gen_docs).astype(np.float32)
        
        i_explained = instance_explained.values[2:].astype(np.float32)
        distances = np.linalg.norm(gen_docs - i_explained, axis=1) #euclidean distance
        k_weights = self.kernel(distances).astype(np.float32)
        
        clf = RidgeClassifier(alpha = 0.5).fit(gen_docs, labels, sample_weight=k_weights)
        end = time.time()
        
        print('Time took for explanations: {} '.format(end - start))
        
        return GAM_explanations, tf.transpose(clf.coef_)

In [9]:
def calculating_metrics(GAM_explanations, LIME_explanations):
    
    #RMSE
    rmse = np.sqrt(mean_squared_error(GAM_explanations, LIME_explanations))
    
    #MAE
    mae = mean_absolute_error(GAM_explanations, LIME_explanations)
    
    #MSLE
    msle = mean_squared_log_error(GAM_explanations, LIME_explanations)
    
    return rmse, mae, msle

In [10]:
name_features = ['covered_query_term_number_body','covered_query_term_number_anchor','covered_query_term_number_title',
                 'covered_query_term_number_url','covered_query_term_number_whole_document','covered_query_term_ratio_body',
                'covered_query_term_ratio_anchor','covered_query_term_ratio_title','covered_query_term_ratio_url',
                 'covered_query_term_ratio_whole_document', 'stream_length_body', 'stream_length_anchor',
                'stream_length_title','stream_length_url','stream_length_whole_document','sum_term_freq_body','sum_term_freq_anchor','sum_term_freq_title',
                 'sum_term_freq_url','sum_term_freq_whole_document','min_term_freq_body','min_term_freq_anchor','min_term_freq_title',
                 'min_term_freq_url','min_term_freq_whole_document','max_term_freq_body','max_term_freq_anchor','max_term_freq_title',
                 'max_term_freq_url','max_term_freq_whole_document','mean_term_freq_body','mean_term_freq_anchor','mean_term_freq_title',
                 'mean_term_freq_url','mean_term_freq_whole_document','sum_stream_length_normalized_term_freq_body','sum_stream_length_normalized_term_freq_anchor',
                 'sum_stream_length_normalized_term_freq_title','sum_stream_length_normalized_term_freq_url','sum_stream_length_normalized_term_whole_document',
                 'min_stream_length_normalized_term_freq_body','min_stream_length_normalized_term_freq_anchor','min_stream_length_normalized_term_freq_title',
                 'min_stream_length_normalized_term_freq_url','min_stream_length_normalized_term_freq_whole_document','max_stream_length_normalized_term_freq_body',
                 'max_stream_length_normalized_term_freq_anchor','max_stream_length_normalized_term_freq_title','max_stream_length_normalized_term_freq_url',
                 'max_stream_length_normalized_term_freq_whole_document','mean_stream_length_normalized_term_freq_body','mean_stream_length_normalized_term_freq_anchor',
                 'mean_stream_length_normalized_term_freq_title','mean_stream_length_normalized_term_freq_url','mean_stream_length_normalized_term_freq_whole_document','boolean_model_body',
                 'boolean_model_anchor','boolean_model_title','boolean_model_url','boolean_model_whole_document','vector_space_model_body',
                 'vector_space_model_anchor','vector_space_model_title','vector_space_model_url','vector_space_model_whole_document','BM25_body',
                 'BM25_anchor','BM25_title','BM25_url','BM25_whole_document','LMIR.ABS_body','LMIR.ABS_anchor','LMIR.ABS_title','LMIR.ABS_url',
                 'LMIR.ABS_whole_document','LMIR.DIR_body','LMIR.DIR_anchor','LMIR.DIR_title','LMIR.DIR_url','LMIR.DIR_whole_document','LMIR.JM_body',
                 'LMIR.JM_anchor','LMIR.JM_title','LMIR.JM_url','LMIR.JM_whole_document','num_slash_url','length_url','inlink_number','outlink_number',
                 'pagerank','siterank','qualityscore','qualityscore2','query_url_click_count','url_click_count','url_dwell_time']


In [12]:
grouped_qid = df_test.groupby('qid')
group_data = grouped_qid.get_group(13)

idx = 3

lime = Explanations(df_test, name_features = name_features)
GAM_explanations, LIME_explanations = lime.get_explanations(group_data, idx, "gaussian")

print(LIME_explanations)

minmax_scaler = MinMaxScaler()
normalized_explanations_GAM = minmax_scaler.fit_transform(GAM_explanations)
normalized_explanations_LIME = minmax_scaler.fit_transform(LIME_explanations)

rmse, mae, msle = calculating_metrics(normalized_explanations_GAM, normalized_explanations_LIME)

print("RMSE:", rmse)
print("MAE:", mae)
print("MSLE:", msle)

Time took for explanations: 2.0460870265960693 
tf.Tensor(
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]], shape=(96, 1), dtype=float32)
RMSE: 0.38795579777322287
MAE: 0.3627529723755689
MSLE: 0.10214952239783125


In [57]:
grouped_qid = df_test.groupby('qid')

#Calculate 300 queries-doc, takes 10 minutes
list_rmse = []
list_mae = []
list_msle = []

for qid, group in grouped_qid:
    
    group_data = grouped_qid.get_group(qid)
    for idx in range(0,len(group_data)):
        
        lime = Explanations(df_test, name_features = name_features)
        GAM_explanations, LIME_explanations = lime.get_explanations(group_data, idx, "empirical")

        minmax_scaler = MinMaxScaler()
        normalized_explanations_GAM = minmax_scaler.fit_transform(GAM_explanations)
        normalized_explanations_LIME = minmax_scaler.fit_transform(LIME_explanations)

        rmse, mae, msle = calculating_metrics(normalized_explanations_GAM, normalized_explanations_LIME)
        list_rmse.append(rmse)
        list_mae.append(mae)
        list_msle.append(msle)

Time took for explanations: 1.6276917457580566 
Time took for explanations: 1.646200180053711 
Time took for explanations: 1.6224274635314941 
Time took for explanations: 1.634089469909668 
Time took for explanations: 1.6872954368591309 
Time took for explanations: 1.6048815250396729 
Time took for explanations: 1.6185219287872314 
Time took for explanations: 1.6734187602996826 


KeyboardInterrupt: 