In [2]:
import sys
sys.path

['/Users/litos/opt/anaconda3/envs/tensorflow_cpu/lib/python36.zip',
 '/Users/litos/opt/anaconda3/envs/tensorflow_cpu/lib/python3.6',
 '/Users/litos/opt/anaconda3/envs/tensorflow_cpu/lib/python3.6/lib-dynload',
 '',
 '/Users/litos/opt/anaconda3/envs/tensorflow_cpu/lib/python3.6/site-packages',
 '/Users/litos/opt/anaconda3/envs/tensorflow_cpu/lib/python3.6/site-packages/IPython/extensions',
 '/Users/litos/.ipython']

In [3]:
from gurobipy import *

from scipy.sparse import csr_matrix, load_npz, save_npz
from tqdm import tqdm
from sklearn.preprocessing import normalize
from collections import *
import datetime
import json
import pandas as pd
import time
# import yaml
import scipy.sparse as sparse
from ast import literal_eval

import numpy as np
import matplotlib.pyplot as plt
import random

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import normalize
from sklearn.linear_model import Ridge
from sklearn.utils.extmath import randomized_svd

from scipy.optimize import minimize

# Utils

In [4]:
def save_dataframe_csv(df, path, name):
    df.to_csv(path+name, index=False)


def load_dataframe_csv(path, name, index_col=None):
    return pd.read_csv(path+name, index_col=index_col)


# Load Data

In [5]:
# Load Original Data
df_train = pd.read_csv('../../data/yelp/Train.csv',encoding='latin-1')
# df_valid = pd.read_csv('../../data/yelp/Valid.csv',encoding='latin-1')
# df_test = pd.read_csv('../../data/yelp/Test.csv',encoding='latin-1')

In [6]:
keyphrases = pd.read_csv('../../data/yelp/KeyPhrases.csv')['Phrases'].tolist()
keyphrase_popularity = np.loadtxt('../data/yelp/'+'keyphrase_popularity.txt', dtype=int)

# Load U-I Data 
rtrain = load_npz("../../data/yelp/Rtrain.npz")
rvalid = load_npz("../../data/yelp/Rvalid.npz")
rtest = load_npz("../../data/yelp/Rtest.npz")

# Load user/item keyphrase data
U_K = load_npz("../../data/yelp/U_K.npz")
I_K = load_npz("../../data/yelp/I_K.npz")

# Models

In [7]:
def get_I_K(df, row_name = 'ItemIndex', shape = (3668,75)):
    rows = []
    cols = []
    vals = []
    for i in tqdm(range(df.shape[0])):
        key_vector = literal_eval(df['keyVector'][i])
        rows.extend([df[row_name][i]]*len(key_vector)) ## Item index
        cols.extend(key_vector) ## Keyword Index
        vals.extend(np.array([1]*len(key_vector)))
    return csr_matrix((vals, (rows, cols)), shape=shape)


In [8]:
# PLREC 
def inhour(elapsed):
    return time.strftime('%H:%M:%S', time.gmtime(elapsed))

def plrec(matrix_train, iteration=4, lamb=80, rank=200, seed=1):
    """
    Function used to achieve generalized projected lrec w/o item-attribute embedding
    :param matrix_train: user-item matrix with shape m*n
    :param iteration: number of power iterations in randomized svd
    :param lamb: parameter of penalty
    :param rank: latent dimension size
    :param seed: the seed of the pseudo random number generator to use when shuffling the data
    :return: prediction in sparse matrix
    """
    print ("Randomized SVD")
    start_time = time.time()
    P, sigma, Qt = randomized_svd(matrix_train,
                                  n_components=rank,
                                  n_iter=iteration,
                                  random_state=seed)

    RQ = matrix_train.dot(sparse.csc_matrix(Qt.T*np.sqrt(sigma)))

    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    print ("Closed-Form Linear Optimization")
    start_time = time.time()
    pre_inv = RQ.T.dot(RQ) + lamb * sparse.identity(rank, dtype=np.float32)
    inverse = sparse.linalg.inv(pre_inv.tocsc())
    Y = inverse.dot(RQ.T).dot(matrix_train)
    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    return np.array(RQ.todense()), np.array(Y.todense()), None

# def predict_vector(rating_vector, train_vector, remove_train=True):
#     dim = len(rating_vector)
#     candidate_index = np.argpartition(-rating_vector, dim-1)[:dim]
#     prediction_items = candidate_index[rating_vector[candidate_index].argsort()[::-1]]
    
#     if remove_train:
#         return np.delete(prediction_items, np.isin(prediction_items, train_vector.nonzero()[1]).nonzero()[0])
#     else:
#         return prediction_items

    
def predict_scores(matrix_U, matrix_V, bias=None,
                   penalize = False,
                   keyphrase_freq = I_K, 
                   critiqued_keyphrase = 0, 
                   matrix_Train = rtrain,
                   alpha = 0):
    prediction = matrix_U.dot(matrix_V.T)
    # Penalize
    if penalize == True:
        items_with_keyphrase = np.ravel(keyphrase_freq.T[critiqued_keyphrase].nonzero()[1])
        items_without_keyphrase = np.setdiff1d(np.arange(matrix_Train.shape[1]), items_with_keyphrase)
        prediction[items_without_keyphrase] = alpha # penalize
    
    return prediction

def predict_vector(rating_vector, train_vector, remove_train=True):
    dim = len(rating_vector)
    candidate_index = np.argpartition(-rating_vector, dim-1)[:dim]
    prediction_items = candidate_index[rating_vector[candidate_index].argsort()[::-1]]
    
    if remove_train:
        return np.delete(prediction_items, np.isin(prediction_items, train_vector.nonzero()[1]).nonzero()[0])
    else:
        return prediction_items


In [9]:
# initial Prediction
def predict_scores(matrix_U, matrix_V, bias=None,
                   penalize = False,
                   keyphrase_freq = I_K, 
                   critiqued_keyphrase = 0, 
                   matrix_Train = rtrain,
                   alpha = 0):
    
    prediction = matrix_U.dot(matrix_V.T)
    # Penalize
    if penalize == True:
        items_with_keyphrase = np.ravel(keyphrase_freq.T[critiqued_keyphrase].nonzero()[1])
        items_without_keyphrase = np.setdiff1d(np.arange(matrix_Train.shape[1]), items_with_keyphrase)
        prediction[items_without_keyphrase] = alpha # penalize
    
    return prediction


In [10]:
# Keyphrase Selection Helpers
def get_valid_keyphrases(keyphrase_freq,top_recommendations,item = None,threshold=50,mutiple_keyphrases_en = False, top_items = None):
    """
    Wrapper function to get either top 1 or top n keyphrases
    """
    if mutiple_keyphrases_en:
        top_keyphrases = []
        for item in top_items:
            top_keyphrases.extend(get_valid_keyphrases_for_one_item(keyphrase_freq,top_recommendations,item,threshold=threshold))
        return np.ravel(list(set(top_keyphrases))) # remove duplicate and reformat to np array
    else:
        return get_valid_keyphrases_for_one_item(keyphrase_freq,top_recommendations,item,threshold=threshold)

def get_valid_keyphrases_for_one_item(keyphrase_freq,top_recommendations, item,threshold=50):
    """
    Get keyphrases of item that make sense
    E.g. if the item has fewer than threshold=50 keyphrases, get all of them
    otherwise get top 50 keyphrases
    """
    keyphrase_length = len(keyphrase_freq[item].nonzero()[1])
    if keyphrase_length<threshold:
        return keyphrase_freq[item].nonzero()[1]
    else:
        keyphrases = np.ravel(keyphrase_freq[top_recommendations[0]].todense())
        top_keyphrases = np.argsort(keyphrases)[::-1][:threshold]
        return top_keyphrases
    
# For keyphrase selecting method # 3 "diff" 
def get_item_keyphrase_freq(keyphrase_freq,item):
    """
    Get item's keyphrase frequency 
    """
    count = keyphrase_freq[item].todense()
    return np.ravel(count/(np.sum(count)+0.001))

def get_all_item_keyphrase_freq(item_keyphrase_freq = I_K):
    res = []
    num_items = item_keyphrase_freq.shape[0]
    for item in range(num_items):
        res.append(np.ravel(get_item_keyphrase_freq(item_keyphrase_freq,item)))
    return np.array(res)

def get_keyphrase_popularity(df,keyphrases):
    """
    Get keyphrase popularity (count) from dataframe
    """
    keyphrase_popularity = np.zeros(len(keyphrases)) #initialize
    for i in range(len(df)):
        keyphrase_vector = literal_eval(df['keyVector'][i])
        keyphrase_popularity[keyphrase_vector] += 1 # count
    return keyphrase_popularity

In [11]:
all_item_keyphrase_freq = get_all_item_keyphrase_freq()

In [12]:
# One hot encoding of critiquing
def get_critiqued_UK(user_keyphrase_frequency,user_index,critiqued_keyphrase):
    """
    user_keyphrase_frequency is the U_K matrix (csr sparse matrix)
    return the one-hot encoding of the critique
    """
    U_K_cp = user_keyphrase_frequency.copy()
    U_K_cp[user_index] = 0
    U_K_cp[user_index,critiqued_keyphrase] = 1
    return U_K_cp

def project_one_hot_encoding(reg, user_keyphrase_frequency,user_index = 0,critiqued_keyphrase = 0, normalize_en = True):
    """
    Return the projection on user_sim space from one-hot encoding of critiqued keyphrase
    The res[user_index] should be target embedding row
    """
    critiqued_matrix = get_critiqued_UK(user_keyphrase_frequency, user_index, critiqued_keyphrase)
    res = reg.predict(critiqued_matrix)
    if normalize_en:
        res = normalize((res))
    return res

In [13]:
# Upper bound method 
def get_all_affected_items(wanted_keyphrases,keyphrase_freq):
    res = []
    for keyphrase in wanted_keyphrases:
        items = np.ravel(keyphrase_freq.T[keyphrase].nonzero()[1])
        res.extend(items)
    return np.array(list(set(res)))
    
def select_only_wanted_keyphrase(top_recommendations, wanted_keyphrases, keyphrase_freq, matrix_Train = rtrain):
    all_items_with_keyphrases = get_all_affected_items(wanted_keyphrases,keyphrase_freq)
    affected_items = np.setdiff1d(np.arange(matrix_Train.shape[1]), all_items_with_keyphrases) # Get all other keyphrases
    top_recommendations[~np.in1d(top_recommendations, affected_items)]
    return top_recommendations

def pruning(prediction_score, 
           wanted_keyphrases_random, 
           top_recommendations, 
           keyphrase_freq, 
           matrix_Train = rtrain,
           alpha = 0):
    items_with_keyphrase = get_all_affected_items(wanted_keyphrases_random, keyphrase_freq)
    #Return the unique values in ar1 that are not in ar2.
    items_without_keyphrase = np.setdiff1d(np.arange(matrix_Train.shape[1]), items_with_keyphrase)
#     print (items_without_keyphrase)
    print (sum(prediction_score[items_without_keyphrase]))
    score = np.copy(prediction_score)
    score[items_without_keyphrase] = alpha # penalize
    return score

# Utils

In [14]:
# Utility function for getting restaurant info from ItemIndex
def get_business_df(path = "../../data/yelp/business.json" ):
    with open(path,encoding="utf8") as json_file:
        data = json_file.readlines()
        data = list(map(json.loads, data))
    df = pd.DataFrame(data)
    
    return df

def get_restaurant_info(business_df, business_id, name = True, review_count = True, stars = True ):
    output_list = {}
    row_idx = int(business_df.index[business_df['business_id'] == business_id].tolist()[0])
    if name == True:
        output_list['name'] = business_df['name'][row_idx].encode('utf-8').strip()
    if review_count == True:
        output_list['review_count'] = business_df['review_count'][row_idx]
    if stars == True:
        output_list['stars'] = business_df['stars'][row_idx] 
    return output_list

# def get_businessid_from_Itemindex(ItemIndex_list, itemindex):
#     return ItemIndex_list['business_id'].tolist()[itemindex]

def get_restaurant_name(df_train, business_df, ItemIndex):
    rows = np.where(df_train['ItemIndex'] == ItemIndex)
    if len(rows)!= 0:
        business_id = df_train.loc[rows[0][0]]['business_id']
        item_info = get_restaurant_info(business_df, business_id)
        return item_info['name']
    return "NOT_FOUND"

# Evaluation 

In [15]:
# Evluation 
def recallk(vector_true_dense, hits, **unused):
    hits = len(hits.nonzero()[0])
    return float(hits)/len(vector_true_dense)

def precisionk(vector_predict, hits, **unused):
    hits = len(hits.nonzero()[0])
    return float(hits)/len(vector_predict)


def average_precisionk(vector_predict, hits, **unused):
    precisions = np.cumsum(hits, dtype=np.float32)/range(1, len(vector_predict)+1)
    return np.mean(precisions)


def r_precision(vector_true_dense, vector_predict, **unused):
    vector_predict_short = vector_predict[:len(vector_true_dense)]
    hits = len(np.isin(vector_predict_short, vector_true_dense).nonzero()[0])
    return float(hits)/len(vector_true_dense)


def _dcg_support(size):
    arr = np.arange(1, size+1)+1
    return 1./np.log2(arr)


def ndcg(vector_true_dense, vector_predict, hits):
    idcg = np.sum(_dcg_support(len(vector_true_dense)))
    dcg_base = _dcg_support(len(vector_predict))
    dcg_base[np.logical_not(hits)] = 0
    dcg = np.sum(dcg_base)
    return dcg/idcg


def click(hits, **unused):
    first_hit = next((i for i, x in enumerate(hits) if x), None)
    if first_hit is None:
        return 5
    else:
        return first_hit/10


def evaluate(matrix_Predict, matrix_Test, metric_names =['R-Precision', 'NDCG', 'Precision', 'Recall', 'MAP'], atK = [5, 10, 15, 20, 50], analytical=False):
    """
    :param matrix_U: Latent representations of users, for LRecs it is RQ, for ALSs it is U
    :param matrix_V: Latent representations of items, for LRecs it is Q, for ALSs it is V
    :param matrix_Train: Rating matrix for training, features.
    :param matrix_Test: Rating matrix for evaluation, true labels.
    :param k: Top K retrieval
    :param metric_names: Evaluation metrics
    :return:
    """
    global_metrics = {
        "R-Precision": r_precision,
        "NDCG": ndcg,
        "Clicks": click
    }

    local_metrics = {
        "Precision": precisionk,
        "Recall": recallk,
        "MAP": average_precisionk
    }

    output = dict()

    num_users = matrix_Predict.shape[0]

    for k in atK:

        local_metric_names = list(set(metric_names).intersection(local_metrics.keys()))
        results = {name: [] for name in local_metric_names}
        topK_Predict = matrix_Predict[:, :k]

        for user_index in tqdm(range(topK_Predict.shape[0])):
            vector_predict = topK_Predict[user_index]
            if len(vector_predict.nonzero()[0]) > 0:
                vector_true = matrix_Test[user_index]
                vector_true_dense = vector_true.nonzero()[1]
                hits = np.isin(vector_predict, vector_true_dense)

                if vector_true_dense.size > 0:
                    for name in local_metric_names:
                        results[name].append(local_metrics[name](vector_true_dense=vector_true_dense,
                                                                 vector_predict=vector_predict,
                                                                 hits=hits))

        results_summary = dict()
        if analytical:
            for name in local_metric_names:
                results_summary['{0}@{1}'.format(name, k)] = results[name]
        else:
            for name in local_metric_names:
                results_summary['{0}@{1}'.format(name, k)] = (np.average(results[name]),
                                                              1.96*np.std(results[name])/np.sqrt(num_users))
        output.update(results_summary)

    global_metric_names = list(set(metric_names).intersection(global_metrics.keys()))
    results = {name: [] for name in global_metric_names}

    topK_Predict = matrix_Predict[:]

    for user_index in tqdm(range(topK_Predict.shape[0])):
        vector_predict = topK_Predict[user_index]

        if len(vector_predict.nonzero()[0]) > 0:
            vector_true = matrix_Test[user_index]
            vector_true_dense = vector_true.nonzero()[1]
            hits = np.isin(vector_predict, vector_true_dense)

            # if user_index == 1:
            #     import ipdb;
            #     ipdb.set_trace()

            if vector_true_dense.size > 0:
                for name in global_metric_names:
                    results[name].append(global_metrics[name](vector_true_dense=vector_true_dense,
                                                              vector_predict=vector_predict,
                                                              hits=hits))

    results_summary = dict()
    if analytical:
        for name in global_metric_names:
            results_summary[name] = results[name]
    else:
        for name in global_metric_names:
            results_summary[name] = (np.average(results[name]), 1.96*np.std(results[name])/np.sqrt(num_users))
    output.update(results_summary)

    return output



# Critiquing Pipline

In [16]:
business_df = get_business_df()

In [17]:
keyphrase_popularity = np.loadtxt('../data/yelp/'+'keyphrase_popularity.txt', dtype=int)

In [18]:
Y, RQt, Bias = plrec(rtrain,
                    iteration = 10,
                    lamb = 200,
                    rank = 200)
RQ = RQt.T
reg = LinearRegression().fit(normalize(U_K), Y)

Randomized SVD
Elapsed: 00:00:00
Closed-Form Linear Optimization
Elapsed: 00:00:00


In [19]:
# Set up dataframe 

# post_ranki is post rank with different lambda ratio for combining pre-post User similarity matrix 

columns = ['user_id', 'target_item', 'item_name', 'iter', 'pre_rank', 
           'top_prediction_item_name',
           'post_rank_random_all',
           'post_rank_random_upper',
           'random_scores',
           'post_rank_pop_all',
           'post_rank_pop_upper',
           'pop_scores',
           'post_rank_diff_all',
           'post_rank_diff_upper',
           'diff_scores',
           'critiqued_keyphrase_random',
           'keyphrase_name_random',
           'critiqued_keyphrase_pop',
           'keyphrase_name_pop',
           'critiqued_keyphrase_diff',
           'keyphrase_name_diff',
           'num_existing_keyphrases',
           'pure_pruning_rank'] 
df = pd.DataFrame(columns=columns)
row = {}

## Average

In [48]:
def Average(initial_prediction_u, keyphrase_freq, affected_items, unaffected_items, num_keyphrases, query, test_user, item_latent, reg, all_equal = True):
    critiqued_vector = np.zeros(keyphrase_freq.shape[1])
    
    for q in query:
#         critiqued_vector[q] = 1
        critiqued_vector[q] = max(keyphrase_freq[test_user , q],1)
        
    num_critiques = len(query)
    
    # Get item latent for updating prediction
    W2 = reg.coef_
    W = item_latent.dot(W2)
    
    optimal_lambda = 1 # weight all critiquing equally
    
    lambdas = [optimal_lambda]*num_critiques
    
    # Record lambda values 
    for k in range(num_critiques):
        critiqued_vector[query[k]] *= optimal_lambda

    # Get rating score
    critique_score = predict_scores(matrix_U=reg.predict(critiqued_vector.reshape(1, -1)),
                                    matrix_V=item_latent)
    new_prediction = initial_prediction_u + critique_score.flatten()
    
#     if all_equal:
#         # weight initial and each critiquing equally 
#         new_prediction = initial_prediction_u/(num_critiques) + critique_score.flatten()
#     else:
#         # weight intial and combined critiquing equally
#         new_prediction = initial_prediction_u + critique_score.flatten() 
# #     print (len(new_prediction))
    return new_prediction, lambdas   


## Rating Objective

In [103]:
def LP1SimplifiedOptimize(initial_prediction_u, keyphrase_freq, affected_items, unaffected_items, num_keyphrases, 
                          query, test_user, item_latent, reg):

    critiqued_vector = np.zeros(keyphrase_freq.shape[1])

    for q in query:
#         critiqued_vector[q] = -keyphrase_freq[test_user][q]
        critiqued_vector[q] = max(keyphrase_freq[test_user , q],1)

    num_critiques = len(query)
    
    W2 = reg.coef_
    W = item_latent.dot(W2)

    num_affected_items = len(affected_items)
    num_unaffected_items = len(unaffected_items)

    start_time = time.time()

    # Model
    m = Model("LP1Simplified")
    m.setParam('OutputFlag', 0)
    # Assignment variables
    lambs = []

    for k in range(num_critiques):
        lambs.append(m.addVar(lb=-1,
                              ub=1,
                              vtype=GRB.CONTINUOUS,
                              name="lamb%d" % query[k]))
        
#     print ('affected_items', affected_items)
#     print (int(affected_items[0]))
#     print ('unaffected_items', unaffected_items)
#     print (int(unaffected_items[0]))
    
#     m.setObjective( quicksum(initial_prediction_u[unaffected_item] * num_affected_items + quicksum(lambs[i] * critiqued_vector[query[i]] * W[unaffected_item][query[i]] * num_affected_items for i in range(num_critiques) ) for unaffected_item in unaffected_items - 
#                     quicksum(initial_prediction_u[affected_item] * num_unaffected_items + quicksum(lambs[i] * critiqued_vector[query[i]] * W[affected_item][query[i]] * num_unaffected_items for i in range(num_critiques) ) for affected_item in affected_items)), GRB.MINIMIZE)
    m.setObjective(quicksum(initial_prediction_u[affected_item] * num_unaffected_items + quicksum(lambs[k] * critiqued_vector[query[k]] * W[affected_item][query[k]] * num_unaffected_items for k in range(num_critiques)) for affected_item in affected_items) - quicksum(initial_prediction_u[unaffected_item] * num_affected_items + quicksum(lambs[k] * critiqued_vector[query[k]] * W[unaffected_item][query[k]] * num_affected_items for k in range(num_critiques)) for unaffected_item in unaffected_items), GRB.MAXIMIZE)

    # Optimize
    m.optimize()

#     print("Elapsed: {}".format(inhour(time.time() - start_time)))

    lambdas = []
    for k in range(num_critiques):
        optimal_lambda = m.getVars()[k].X
        lambdas.append(optimal_lambda)
        critiqued_vector[query[k]] *= optimal_lambda

    critique_score = predict_scores(matrix_U=reg.predict(critiqued_vector.reshape(1, -1)),
                                    matrix_V=item_latent)

    new_prediction = initial_prediction_u + critique_score.flatten()

    return new_prediction, lambdas


## RankSVM Objective

In [33]:
#### See https://www.overleaf.com/read/wwftdhpcmxnx
#### For the RankSVM math

def rankSVM(initial_prediction_u, keyphrase_freq, affected_items, unaffected_items, num_keyphrases, 
            query, test_user, item_latent, reg, user_latent_embedding, item_keyphrase_freq = I_K):
    critiques = query # fix this variable name later
    
#     critiqued_vector = np.zeros(keyphrase_freq.shape[1])
    
#     for c in critiques:
# #         critiqued_vector[q] = 1 # set critiqued/boosted keyphrase to 1
#         critiqued_vector[c] = max(keyphrase_freq[test_user , c],1)
# #         print ('critiqued_vector setting: ',critiqued_vector[q])
#     print ('affected items, ',affected_items)
#     print ('unaffected items, ',unaffected_items)
    
    num_critiques = len(critiques)

    W2 = reg.coef_
    W = item_latent.dot(W2)

    num_affected_items = len(affected_items)
    num_unaffected_items = len(unaffected_items)

    start_time = time.time()

    # Model
    m = Model("LP2RankSVM")
    m.setParam('OutputFlag', 0)
    
    # Assignment variables
    thetas = []
    us = []
    xis = []
    # weight thetas
    for k in range(num_critiques + 1):
        thetas.append(m.addVar(lb=-1,
                              ub=1,
                              vtype=GRB.CONTINUOUS,
                              name="theta%d" % k))
    thetas = np.array(thetas)
    # dummy variable u for absolute theta
    for k in range(num_critiques + 1):
        us.append(m.addVar(vtype=GRB.CONTINUOUS,
                          name="u%d" % k))
        
    # slack variables xi
    for i in range(num_affected_items):
        for j in range(num_unaffected_items):
            xis.append(m.addVar(lb = 0, 
                                vtype = GRB.CONTINUOUS,
                                name = "xi_%d_%d" % (i,j) ))
      
    ## constraints
    # constraints for dummy variable u's
    for k in range(num_critiques+1):
        m.addConstr(us[k] >= thetas[k])
        m.addConstr(us[k] >= -thetas[k])
        
    
    ## Pre-calculate critique embedding
    u_i = Y[test_user]
    phi_js = []
    phi_jprimes = []
    k_cis = []
    
    user_latent_embedding = np.array(user_latent_embedding)
#     print ('user latent embedding shape: ', user_latent_embedding.shape)
    # constraints for rankSVM 
    for j in range(num_affected_items):
        for j_ in range(num_unaffected_items):
            m.addConstr( thetas.dot(user_latent_embedding.dot(RQ[affected_items[j]])) >= thetas.dot(user_latent_embedding.dot(RQ[unaffected_items[j_]])) + 1 - xis[j*num_affected_items + j_], name = "constraints%d_%d" % (j,j_))
#             print ('item j embedding :',user_latent_embedding.dot(RQ[j]) )
#             print ('item j_ embedding:',user_latent_embedding.dot(RQ[j_]) )
    lamb = 5 #regularization parameter (trading-off margin size against training error
    m.setObjective(quicksum(us) + lamb * quicksum(xis), GRB.MINIMIZE)
                
    # Optimize
    m.optimize()

#     print("Elapsed: {}".format(inhour(time.time() - start_time)))

    thetas = []
    for k in range(num_critiques+1):
        optimal_theta = m.getVarByName("theta%d" % k).X
        thetas.append(optimal_theta)
        
#     print ('optimal thetas: ',thetas)


    critiqued_vector = np.zeros(keyphrase_freq.shape[1])
    
    # Combine weights to critiqued vector
    for c in critiques:
#         critiqued_vector[q] = 1 # set critiqued/boosted keyphrase to 1
        critiqued_vector[c] = max(keyphrase_freq[test_user , c],1)
    for k in range(num_critiques):
        critiqued_vector[critiques[k]] *= thetas[k+1]
    
    # Get rating score
    critique_score = predict_scores(matrix_U=reg.predict(critiqued_vector.reshape(1, -1)),
                                    matrix_V=item_latent)
    new_prediction = thetas[0]*initial_prediction_u + critique_score.flatten()
    
    return new_prediction, thetas

In [41]:
#### See https://www.overleaf.com/read/wwftdhpcmxnx
#### For the RankSVM math

def rankSVM2(initial_prediction_u, keyphrase_freq, affected_items, unaffected_items, num_keyphrases, 
            query, test_user, item_latent, reg, user_latent_embedding, item_keyphrase_freq = I_K, lamb = [5,5]):
    critiques = query # fix this variable name later

    # pre calculate some value
    num_critiques = len(critiques)

    num_affected_items = len(affected_items)
    num_unaffected_items = len(unaffected_items)

#     start_time = time.time()

    # Model
    m = Model("LP2RankSVM2")
    m.setParam('OutputFlag', 0) # set to 1 for outputing details
    
    # Assignment variables
    thetas = []
    us = []
    xi_pos = []
    xi_neg = []
    # weight thetas
    for k in range(num_critiques + 1):
        thetas.append(m.addVar(lb=-1,
                              ub=1,
                              vtype=GRB.CONTINUOUS,
                              name="theta%d" % k))
    thetas = np.array(thetas)
    
    # dummy variable u for absolute theta
    for k in range(num_critiques + 1):
        us.append(m.addVar(vtype=GRB.CONTINUOUS,
                          name="u%d" % k))
        
    # slack variables xi
    for i in range(num_affected_items):
        xi_pos.append(m.addVar(lb = 0, 
                                vtype = GRB.CONTINUOUS,
                                name = "xi_pos%d" % i ))
    for i in range(num_unaffected_items):
        xi_neg.append(m.addVar(lb = 0, 
                                vtype = GRB.CONTINUOUS,
                                name = "xi_neg%d" % i ))
        
    ## constraints
    # constraints for dummy variable u's
    for k in range(num_critiques+1):
        m.addConstr(us[k] >= thetas[k])
        m.addConstr(us[k] >= -thetas[k])
 
    user_latent_embedding = np.array(user_latent_embedding)
    
    # Affected items rank higher
    for j in range(num_affected_items):
        m.addConstr( thetas.dot(user_latent_embedding.dot(RQ[affected_items[j]])) >= initial_prediction_u[affected_items[j]] + 1 - xi_pos[j], name = "pos_constraint%d" % j )
    
    # Unaffected items rank lower
    for j in range(num_unaffected_items):
        m.addConstr( initial_prediction_u[unaffected_items[j]] - thetas.dot(user_latent_embedding.dot(RQ[unaffected_items[j]])) >=  1 - xi_neg[j], name = "neg_constraint%d" % j )
            
    # objective
    lamb1 = lamb[0] #regularization for trading-off margin size against training error
    lamb2 = lamb[1] #regularization for trading-off deviation from Averaging 

    m.setObjective(quicksum(us) + lamb1 * (quicksum(xi_pos)+quicksum(xi_neg)) + lamb2 * quicksum( [( 1- theta) for theta in thetas]), GRB.MINIMIZE) 
    

                
    # Optimize
    m.optimize()

    # Save optimal thetas
    thetas = []
    for k in range(num_critiques+1):
        optimal_theta = m.getVarByName("theta%d" % k).X
        thetas.append(optimal_theta)
        
    critiqued_vector = np.zeros(keyphrase_freq.shape[1])
    
    # Combine weights to critiqued vector
    for c in critiques:
#         critiqued_vector[c] = 1 # set critiqued/boosted keyphrase to 1
        critiqued_vector[c] = max(keyphrase_freq[test_user , c],1)
    
    for k in range(num_critiques):
        critiqued_vector[critiques[k]] *= thetas[k+1]
    
    # Get rating score
    critique_score = predict_scores(matrix_U=reg.predict(critiqued_vector.reshape(1, -1)),
                                    matrix_V=item_latent)
    new_prediction = thetas[0]*initial_prediction_u/num_critiques + critique_score.flatten()
#     new_prediction = initial_prediction_u/num_critiques + critique_score.flatten()
#     new_prediction = critique_score.flatten()
    
    return new_prediction, thetas

In [120]:
#### Changes minimization objective compares to ranksvm2

def rankSVM3(initial_prediction_u, keyphrase_freq, affected_items, unaffected_items, num_keyphrases, 
            query, test_user, item_latent, reg, user_latent_embedding, item_keyphrase_freq = I_K, lamb = 5):
    critiques = query # fix this variable name later

    # pre calculate some value
    num_critiques = len(critiques)

    num_affected_items = len(affected_items)
    num_unaffected_items = len(unaffected_items)

#     start_time = time.time()

    # Model
    m = Model("LP2RankSVM2")
    m.setParam('OutputFlag', 0) # set to 1 for outputing details
    
    # Assignment variables
    thetas = []
    us = []
    xi_pos = []
    xi_neg = []
    # weight thetas
    for k in range(num_critiques + 1):
        thetas.append(m.addVar(lb=-2,
                              ub=2,
                              vtype=GRB.CONTINUOUS,
                              name="theta%d" % k))
    thetas = np.array(thetas)
    
    # dummy variable u for absolute theta
    for k in range(num_critiques + 1):
        us.append(m.addVar(vtype=GRB.CONTINUOUS,
                          name="u%d" % k))
        
    # slack variables xi
    for i in range(num_affected_items):
        xi_pos.append(m.addVar(lb = 0, 
                                vtype = GRB.CONTINUOUS,
                                name = "xi_pos%d" % i ))
    for i in range(num_unaffected_items):
        xi_neg.append(m.addVar(lb = 0, 
                                vtype = GRB.CONTINUOUS,
                                name = "xi_neg%d" % i ))
        
    ## constraints
    # constraints for dummy variable u's
    for k in range(num_critiques+1):
        m.addConstr(us[k] >= thetas[k] - 1)
        m.addConstr(us[k] >= 1 - thetas[k])
 
    user_latent_embedding = np.array(user_latent_embedding)
    
    # Affected items rank higher
    for j in range(num_affected_items):
        m.addConstr( thetas.dot(user_latent_embedding.dot(RQ[affected_items[j]])) >= initial_prediction_u[affected_items[j]] + 1 - xi_pos[j], name = "pos_constraint%d" % j )
    
    # Unaffected items rank lower
    for j in range(num_unaffected_items):
        m.addConstr( initial_prediction_u[unaffected_items[j]] - thetas.dot(user_latent_embedding.dot(RQ[unaffected_items[j]])) >=  1 - xi_neg[j], name = "neg_constraint%d" % j )
            
    # objective
    if type(lamb) != list:
        m.setObjective(quicksum(us) + lamb * (quicksum(xi_pos)+quicksum(xi_neg)), GRB.MINIMIZE)  # Single regularization
    else:
        lamb1 = lamb[0] #regularization for trading-off margin size against training error
        lamb2 = lamb[1] #regularization for trading-off deviation from Averaging 
        m.setObjective(lamb1* quicksum(us) + lamb2 * (quicksum(xi_pos)+quicksum(xi_neg)), GRB.MINIMIZE) # double regularization
    
                
    # Optimize
    m.optimize()

    # Save optimal thetas
    thetas = []
    for k in range(num_critiques+1):
        optimal_theta = m.getVarByName("theta%d" % k).X
        thetas.append(optimal_theta)
        
    critiqued_vector = np.zeros(keyphrase_freq.shape[1])
    
    # Combine weights to critiqued vector
    for c in critiques:
#         critiqued_vector[c] = 1 # set critiqued/boosted keyphrase to 1
        critiqued_vector[c] = max(keyphrase_freq[test_user , c],1)
    
    for k in range(num_critiques):
        critiqued_vector[critiques[k]] *= thetas[k+1]
    
    # Get rating score
    critique_score = predict_scores(matrix_U=reg.predict(critiqued_vector.reshape(1, -1)),
                                    matrix_V=item_latent)
    new_prediction = thetas[0]*initial_prediction_u/num_critiques + critique_score.flatten()
#     new_prediction = initial_prediction_u/num_critiques + critique_score.flatten()
#     new_prediction = critique_score.flatten()
    
    return new_prediction, thetas

# Main

In [111]:
class LP1Simplified(object):
    def __init__(self, keyphrase_freq, item_keyphrase_freq, row, matrix_Train, matrix_Test, test_users,
                 target_ranks, num_items_sampled, num_keyphrases, df,
                 max_iteration_threshold, keyphrase_popularity, dataset_name,
                 model, parameters_row, keyphrases_names, keyphrase_selection_method, max_wanted_keyphrase, lamb, **unused):
        self.keyphrase_freq = keyphrase_freq
        self.item_keyphrase_freq = item_keyphrase_freq
        self.row = row
        self.matrix_Train = matrix_Train
        self.num_users, self.num_items = matrix_Train.shape
        self.matrix_Test = matrix_Test
        self.test_users = test_users
        self.target_ranks = target_ranks
        self.num_items_sampled = num_items_sampled
        self.num_keyphrases = num_keyphrases
        self.df = df
        self.max_iteration_threshold = max_iteration_threshold
        self.keyphrase_popularity = keyphrase_popularity
        self.dataset_name = dataset_name
        self.model = model
        self.parameters_row = parameters_row
        self.keyphrase_selection_method = keyphrase_selection_method
        self.max_wanted_keyphrase = max_wanted_keyphrase
        
        self.lamb = lamb
        self.keyphrases_names = keyphrases_names

    def start_critiquing(self):
#         self.get_initial_predictions() # No need to do it every time
        self.RQ = RQ
        Yt = Y.T 
        self.Y = Y

        self.reg = reg

        self.prediction_scores = predict_scores(matrix_U=self.RQ,
                                                matrix_V=self.Y,
                                                bias=Bias).T
        
        for user in tqdm(self.test_users):
            start_time = time.time()
            # User id starts from 0
            self.row['user_id'] = user
            
            initial_prediction_items = predict_vector(rating_vector=self.prediction_scores[user],
                                                            train_vector=self.matrix_Train[user],
                                                            remove_train=True)
            # For keyphrase selection method 'diff' 
            top_recommended_keyphrase_freq = get_item_keyphrase_freq(self.item_keyphrase_freq,item = initial_prediction_items[0])
            
            # The iteration will stop if the wanted item is in top n
            for target_rank in self.target_ranks:
                self.row['target_rank'] = target_rank
                
                # Pick wanted items in test items
                candidate_items = self.matrix_Test[user].nonzero()[1]
                train_items = self.matrix_Train[user].nonzero()[1]
                wanted_items = np.setdiff1d(candidate_items, train_items)
                
                for item in wanted_items:
                    # Item id starts from 0
                    self.row['item_id'] = item
                    try:
                        self.row['item_name'] = get_restaurant_name(df_train, business_df,item)
                    except:
                        self.row['item_name'] = 'NOT_FOUND'
                    # Set the wanted item's initial rank as None
                    self.row['item_rank'] = None
                    # Set the wanted item's initial prediction score as None
                    self.row['item_score'] = None
                    
                    if self.keyphrase_selection_method == "random" or self.keyphrase_selection_method == "pop":
                        # Get the item's existing keyphrases (we can boost)
                        remaining_keyphrases = self.item_keyphrase_freq[item].nonzero()[1]
                    if self.keyphrase_selection_method == "diff":
                        # For keyphrase selection method 'diff' 
                        target_keyphrase_freq = get_item_keyphrase_freq(self.item_keyphrase_freq,item = item)
                        diff_keyphrase_freq = target_keyphrase_freq - top_recommended_keyphrase_freq
                        remaining_keyphrases = np.argsort(np.ravel(diff_keyphrase_freq))[::-1][:self.max_wanted_keyphrase]
                        
#                    print("The number of remaining_keyphrases is {}. remaining_keyphrases are: {}".format(len(remaining_keyphrases), remaining_keyphrases))
                    self.row['num_existing_keyphrases'] = len(remaining_keyphrases)
                    if len(remaining_keyphrases) == 0:
                        break
                    self.row['iteration'] = 0
                    self.row['critiqued_keyphrase'] = None
                    self.row['result'] = None
                    self.df = self.df.append(self.row, ignore_index=True)

                    query = []
                    affected_items = np.array([])
                    
                    # Set up latent embedding
                    user_latent_embedding = [Y[user]]
                    
                    for iteration in range(self.max_iteration_threshold):
                        self.row['iteration'] = iteration + 1
                                                
                        if self.keyphrase_selection_method == "pop":
                            # Always critique the most popular keyphrase
                            critiqued_keyphrase = remaining_keyphrases[np.argmax(self.keyphrase_popularity[remaining_keyphrases])]
    #                        print("remaining keyphrases popularity: {}".format(self.keyphrase_popularity[remaining_keyphrases]))
                        elif self.keyphrase_selection_method == "random":
                            critiqued_keyphrase = np.random.choice(remaining_keyphrases, size=1, replace=False)[0]
            
                        elif self.keyphrase_selection_method == "diff":
                            critiqued_keyphrase = remaining_keyphrases[0]
#                             print ('critiqued_keyphrase', critiqued_keyphrase)
                        
                        self.row['critiqued_keyphrase'] = critiqued_keyphrase
                        self.row['critiqued_keyphrase_name'] = keyphrases_names[critiqued_keyphrase]
                        query.append(critiqued_keyphrase)

                        # Get affected items (items have critiqued keyphrase)
                        current_affected_items = self.item_keyphrase_freq[:, critiqued_keyphrase].nonzero()[0]
                        affected_items = np.unique(np.concatenate((affected_items, current_affected_items))).astype(int)
                        unaffected_items = np.setdiff1d(range(self.num_items), affected_items)

                        if iteration == 0:
                            prediction_items = initial_prediction_items #calculated once for each user

                        affected_items_mask = np.in1d(prediction_items, affected_items)
                        affected_items_index_rank = np.where(affected_items_mask == True)
                        unaffected_items_index_rank = np.where(affected_items_mask == False)

                        import copy
                        
                        ## concat critique embeddings to user latent embedding
                        # Get critique vector 
                        critiqued_vector = np.zeros(self.keyphrase_freq.shape[1])
                        critiqued_vector[critiqued_keyphrase] = max(self.keyphrase_freq[user , critiqued_keyphrase],1)
                        # map user critique to user latent embedding
                        k_ci = reg.predict(critiqued_vector.reshape(1, -1)).flatten()
                        user_latent_embedding.append(k_ci)
#                         print ('user latent embedding shape: ', np.array(user_latent_embedding).shape)

                        
#                         prediction_scores_u, lambdas = Average(initial_prediction_u=self.prediction_scores[user],
#                                                                              keyphrase_freq=copy.deepcopy(self.keyphrase_freq),
#                                                                              affected_items=np.intersect1d(affected_items, prediction_items[affected_items_index_rank[0][:20]]),
#                                                                              unaffected_items=np.intersect1d(unaffected_items, prediction_items[unaffected_items_index_rank[0][:20]]),
#                                                                              num_keyphrases=self.num_keyphrases,
#                                                                              query=query,
#                                                                              test_user=user,
#                                                                              item_latent=self.RQ,
#                                                                              reg=self.reg)
#                         prediction_scores_u, lambdas = LP1SimplifiedOptimize(initial_prediction_u=self.prediction_scores[user],
#                                                                              keyphrase_freq=copy.deepcopy(self.keyphrase_freq),
#                                                                              affected_items=np.intersect1d(affected_items, prediction_items[affected_items_index_rank[0][:20]]),
#                                                                              unaffected_items=np.intersect1d(unaffected_items, prediction_items[unaffected_items_index_rank[0][:20]]),
#                                                                              num_keyphrases=self.num_keyphrases,
#                                                                              query=query,
#                                                                              test_user=user,
#                                                                              item_latent=self.RQ,
#                                                                              reg=self.reg)
#                         prediction_scores_u, lambdas = rankSVM(initial_prediction_u=self.prediction_scores[user],
#                                                                              keyphrase_freq=copy.deepcopy(self.keyphrase_freq),
#                                                                              affected_items=np.intersect1d(affected_items, prediction_items[affected_items_index_rank[0][:20]]),
#                                                                              unaffected_items=np.intersect1d(unaffected_items, prediction_items[unaffected_items_index_rank[0][:20]]),
#                                                                              num_keyphrases=self.num_keyphrases,
#                                                                              query=query,
#                                                                              test_user=user,
#                                                                              item_latent=self.RQ,
#                                                                              reg=self.reg,
#                                                                              user_latent_embedding = user_latent_embedding,
#                                                                              item_keyphrase_freq = all_item_keyphrase_freq
#                                                                              )
#                         prediction_scores_u, lambdas = rankSVM2(initial_prediction_u=self.prediction_scores[user],
#                                                                              keyphrase_freq=copy.deepcopy(self.keyphrase_freq),
#                                                                              affected_items=np.intersect1d(affected_items, prediction_items[affected_items_index_rank[0][:20]]),
#                                                                              unaffected_items=np.intersect1d(unaffected_items, prediction_items[unaffected_items_index_rank[0][:20]]),
#                                                                              num_keyphrases=self.num_keyphrases,
#                                                                              query=query,
#                                                                              test_user=user,
#                                                                              item_latent=self.RQ,
#                                                                              reg=self.reg,
#                                                                              user_latent_embedding = user_latent_embedding,
#                                                                              item_keyphrase_freq = all_item_keyphrase_freq,
#                                                                              lamb = self.lamb
#                                                                              )    
                        prediction_scores_u, lambdas = rankSVM3(initial_prediction_u=self.prediction_scores[user],
                                                                             keyphrase_freq=copy.deepcopy(self.keyphrase_freq),
                                                                             affected_items=np.intersect1d(affected_items, prediction_items[affected_items_index_rank[0][:20]]),
                                                                             unaffected_items=np.intersect1d(unaffected_items, prediction_items[unaffected_items_index_rank[0][:20]]),
                                                                             num_keyphrases=self.num_keyphrases,
                                                                             query=query,
                                                                             test_user=user,
                                                                             item_latent=self.RQ,
                                                                             reg=self.reg,
                                                                             user_latent_embedding = user_latent_embedding,
                                                                             item_keyphrase_freq = all_item_keyphrase_freq,
                                                                             lamb = self.lamb
                                                                             )                       
    
#                         item_keyphrase_freq = get_all_item_keyphrase_freq()
                        
                        self.row['lambda'] = lambdas
                        prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                                          train_vector=self.matrix_Train[user],
                                                          remove_train=False)
                        recommended_items = prediction_items
                        
                        # Current item rank
                        item_rank = np.where(recommended_items == item)[0][0]

                        self.row['item_rank'] = item_rank
                        self.row['item_score'] = prediction_scores_u[item]

                        if item_rank + 1 <= target_rank:
                            # Items is ranked within target rank
                            self.row['result'] = 'successful'
                            self.df = self.df.append(self.row, ignore_index=True)
                            break
                        else:
                            remaining_keyphrases = np.setdiff1d(remaining_keyphrases, critiqued_keyphrase)
                            # Continue if more keyphrases and iterations remained
                            if len(remaining_keyphrases) > 0 and self.row['iteration'] < self.max_iteration_threshold:
                                self.row['result'] = None
                                self.df = self.df.append(self.row, ignore_index=True)
                            else:
                                # Otherwise, mark fail
                                self.row['result'] = 'fail'
                                self.df = self.df.append(self.row, ignore_index=True)
                                break
#                         break ## For Testing LP Objective
        
            print("User ", user ,"Elapsed: {}".format(inhour(time.time() - start_time)))
        return self.df


    def get_initial_predictions(self):
        self.RQ, Yt, Bias = plrec(self.matrix_Train,
                                       iteration=self.parameters_row['iter'],
                                       lamb=self.parameters_row['lambda'],
                                       rank=self.parameters_row['rank'])
        self.Y = Yt.T

        self.reg = LinearRegression().fit(self.keyphrase_freq, self.RQ)

        self.prediction_scores = predict_scores(matrix_U=self.RQ,
                                                matrix_V=self.Y,
                                                bias=Bias)



In [117]:
row = {}
matrix_Train = rtrain
matrix_Test = rtest
test_users = np.arange(25)
test_users = [1]
# target_ranks = [20, 50]
num_items_sampled = 5
num_keyphrases = 235
df = pd.DataFrame(row)
max_iteration_threshold = 20
keyphrase_popularity = keyphrase_popularity
dataset_name = "yelp"
model = "plrec"
parameters_row = {'iter': 10,
                  'lambda':200,
                  'rank':200}
keyphrases_names = keyphrases
keyphrase_selection_method = 'diff'
max_wanted_keyphrase = 20
# lamb = [1000,1]
lamb = 1
critiquing_model = LP1Simplified(keyphrase_freq=U_K,
                                item_keyphrase_freq=I_K,
                                row=row,
                                matrix_Train=matrix_Train,
                                matrix_Test=matrix_Test,
                                test_users=test_users,
                                target_ranks=target_ranks,
                                num_items_sampled=num_items_sampled,
                                num_keyphrases=num_keyphrases,
                                df=df,
                                max_iteration_threshold=max_iteration_threshold,
                                keyphrase_popularity=keyphrase_popularity,
                                dataset_name=dataset_name,
                                model=model,
                                parameters_row=parameters_row,
                                keyphrases_names = keyphrases_names,
                                keyphrase_selection_method = keyphrase_selection_method,
                                max_wanted_keyphrase = max_wanted_keyphrase,
                                lamb = lamb)
df = critiquing_model.start_critiquing()

table_path = '../tables/critiquing/multi_step_critiquing/yelp/ranksvm/'
name = 'rank_svm_test.csv'
save_dataframe_csv(df, table_path, name)



  0%|          | 0/1 [00:00<?, ?it/s][A[A

100%|██████████| 1/1 [00:33<00:00, 33.68s/it][A[A

User  1 Elapsed: 00:00:33





# Tune lambda

In [123]:
# Single Regularization
lambs = [0.01,0.1,1,10,30,50,70,90,100,1000]
for lamb in lambs:
    row = {}
    matrix_Train = rtrain
    matrix_Test = rtest
    test_users = np.arange(25)
#         test_users = [1]
    target_ranks = [20, 50]
    num_items_sampled = 5
    num_keyphrases = 235
    df = pd.DataFrame(row)
    max_iteration_threshold = 20
    keyphrase_popularity = keyphrase_popularity
    dataset_name = "yelp"
    model = "plrec"
    parameters_row = {'iter': 10,
                      'lambda':200,
                      'rank':200}
    keyphrases_names = keyphrases
    keyphrase_selection_method = 'diff'
    max_wanted_keyphrase = 20
    critiquing_model = LP1Simplified(keyphrase_freq=U_K,
                                    item_keyphrase_freq=I_K,
                                    row=row,
                                    matrix_Train=matrix_Train,
                                    matrix_Test=matrix_Test,
                                    test_users=test_users,
                                    target_ranks=target_ranks,
                                    num_items_sampled=num_items_sampled,
                                    num_keyphrases=num_keyphrases,
                                    df=df,
                                    max_iteration_threshold=max_iteration_threshold,
                                    keyphrase_popularity=keyphrase_popularity,
                                    dataset_name=dataset_name,
                                    model=model,
                                    parameters_row=parameters_row,
                                    keyphrases_names = keyphrases_names,
                                    keyphrase_selection_method = keyphrase_selection_method,
                                    max_wanted_keyphrase = max_wanted_keyphrase,
                                    lamb = lamb)
    df = critiquing_model.start_critiquing()

    table_path = '../tables/critiquing/tuning_ranksvm3_diff/'
    name = 'lamb_'+ str(lamb) + '_test.csv'
    save_dataframe_csv(df, table_path, name)





  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A



  4%|▍         | 1/25 [00:07<02:53,  7.22s/it][A[A[A[A

User  0 Elapsed: 00:00:07






  8%|▊         | 2/25 [00:39<05:37, 14.68s/it][A[A[A[A

User  1 Elapsed: 00:00:32






 12%|█▏        | 3/25 [00:46<04:34, 12.48s/it][A[A[A[A

User  2 Elapsed: 00:00:07






 16%|█▌        | 4/25 [00:55<03:59, 11.38s/it][A[A[A[A

User  3 Elapsed: 00:00:08






 20%|██        | 5/25 [00:58<02:57,  8.89s/it][A[A[A[A

User  4 Elapsed: 00:00:03






 24%|██▍       | 6/25 [01:03<02:25,  7.68s/it][A[A[A[A

User  5 Elapsed: 00:00:04






 28%|██▊       | 7/25 [01:07<02:00,  6.67s/it][A[A[A[A

User  6 Elapsed: 00:00:04






 32%|███▏      | 8/25 [01:12<01:44,  6.14s/it][A[A[A[A

User  7 Elapsed: 00:00:04






 36%|███▌      | 9/25 [01:18<01:38,  6.13s/it][A[A[A[A

User  8 Elapsed: 00:00:06






 40%|████      | 10/25 [01:30<01:59,  7.96s/it][A[A[A[A

User  9 Elapsed: 00:00:12






 44%|████▍     | 11/25 [01:40<01:58,  8.43s/it][A[A[A[A

User  10 Elapsed: 00:00:09






 48%|████▊     | 12/25 [01:49<01:50,  8.50s/it][A[A[A[A

User  11 Elapsed: 00:00:08






 52%|█████▏    | 13/25 [01:55<01:32,  7.74s/it][A[A[A[A

User  12 Elapsed: 00:00:05






 56%|█████▌    | 14/25 [02:02<01:24,  7.70s/it][A[A[A[A

User  13 Elapsed: 00:00:07






 60%|██████    | 15/25 [02:09<01:15,  7.51s/it][A[A[A[A

User  14 Elapsed: 00:00:07






 64%|██████▍   | 16/25 [02:16<01:04,  7.12s/it][A[A[A[A

User  15 Elapsed: 00:00:06






 68%|██████▊   | 17/25 [02:31<01:17,  9.74s/it][A[A[A[A

User  16 Elapsed: 00:00:15






 72%|███████▏  | 18/25 [02:49<01:24, 12.02s/it][A[A[A[A

User  17 Elapsed: 00:00:17






 76%|███████▌  | 19/25 [02:57<01:04, 10.82s/it][A[A[A[A

User  18 Elapsed: 00:00:07






 80%|████████  | 20/25 [03:13<01:03, 12.60s/it][A[A[A[A

User  19 Elapsed: 00:00:16






 84%|████████▍ | 21/25 [03:25<00:48, 12.17s/it][A[A[A[A

User  20 Elapsed: 00:00:11






 88%|████████▊ | 22/25 [03:33<00:33, 11.14s/it][A[A[A[A

User  21 Elapsed: 00:00:08






 92%|█████████▏| 23/25 [03:37<00:17,  8.79s/it][A[A[A[A

User  22 Elapsed: 00:00:03
User  23 Elapsed: 00:00:00






100%|██████████| 25/25 [03:42<00:00,  8.90s/it][A[A[A[A




  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A

User  24 Elapsed: 00:00:05






  4%|▍         | 1/25 [00:07<02:56,  7.36s/it][A[A[A[A

User  0 Elapsed: 00:00:07






  8%|▊         | 2/25 [00:41<05:55, 15.44s/it][A[A[A[A

User  1 Elapsed: 00:00:34






 12%|█▏        | 3/25 [00:49<04:47, 13.09s/it][A[A[A[A

User  2 Elapsed: 00:00:07






 16%|█▌        | 4/25 [00:57<04:06, 11.74s/it][A[A[A[A

User  3 Elapsed: 00:00:08






 20%|██        | 5/25 [01:01<03:04,  9.21s/it][A[A[A[A

User  4 Elapsed: 00:00:03






 24%|██▍       | 6/25 [01:06<02:30,  7.94s/it][A[A[A[A

User  5 Elapsed: 00:00:04






 28%|██▊       | 7/25 [01:10<02:04,  6.94s/it][A[A[A[A

User  6 Elapsed: 00:00:04






 32%|███▏      | 8/25 [01:15<01:48,  6.40s/it][A[A[A[A

User  7 Elapsed: 00:00:05






 36%|███▌      | 9/25 [01:22<01:42,  6.39s/it][A[A[A[A

User  8 Elapsed: 00:00:06






 40%|████      | 10/25 [01:34<02:01,  8.11s/it][A[A[A[A

User  9 Elapsed: 00:00:12






 44%|████▍     | 11/25 [01:42<01:54,  8.20s/it][A[A[A[A

User  10 Elapsed: 00:00:08






 48%|████▊     | 12/25 [01:51<01:49,  8.42s/it][A[A[A[A

User  11 Elapsed: 00:00:08






 52%|█████▏    | 13/25 [01:57<01:31,  7.65s/it][A[A[A[A

User  12 Elapsed: 00:00:05






 56%|█████▌    | 14/25 [02:05<01:25,  7.76s/it][A[A[A[A

User  13 Elapsed: 00:00:08






 60%|██████    | 15/25 [02:12<01:16,  7.63s/it][A[A[A[A

User  14 Elapsed: 00:00:07






 64%|██████▍   | 16/25 [02:19<01:05,  7.23s/it][A[A[A[A

User  15 Elapsed: 00:00:06






 68%|██████▊   | 17/25 [02:34<01:17,  9.72s/it][A[A[A[A

User  16 Elapsed: 00:00:15






 72%|███████▏  | 18/25 [02:52<01:23, 11.99s/it][A[A[A[A

User  17 Elapsed: 00:00:17






 76%|███████▌  | 19/25 [03:00<01:06, 11.01s/it][A[A[A[A

User  18 Elapsed: 00:00:08






 80%|████████  | 20/25 [03:17<01:03, 12.77s/it][A[A[A[A

User  19 Elapsed: 00:00:16






 84%|████████▍ | 21/25 [03:28<00:49, 12.29s/it][A[A[A[A

User  20 Elapsed: 00:00:11






 88%|████████▊ | 22/25 [03:37<00:33, 11.23s/it][A[A[A[A

User  21 Elapsed: 00:00:08






 92%|█████████▏| 23/25 [03:40<00:17,  8.87s/it][A[A[A[A

User  22 Elapsed: 00:00:03
User  23 Elapsed: 00:00:00






100%|██████████| 25/25 [03:46<00:00,  9.07s/it][A[A[A[A




  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A

User  24 Elapsed: 00:00:05






  4%|▍         | 1/25 [00:06<02:46,  6.95s/it][A[A[A[A

User  0 Elapsed: 00:00:06






  8%|▊         | 2/25 [00:43<06:03, 15.81s/it][A[A[A[A

User  1 Elapsed: 00:00:36






 12%|█▏        | 3/25 [00:51<04:58, 13.56s/it][A[A[A[A

User  2 Elapsed: 00:00:08






 16%|█▌        | 4/25 [01:00<04:14, 12.14s/it][A[A[A[A

User  3 Elapsed: 00:00:08






 20%|██        | 5/25 [01:03<03:10,  9.51s/it][A[A[A[A

User  4 Elapsed: 00:00:03






 24%|██▍       | 6/25 [01:09<02:38,  8.35s/it][A[A[A[A

User  5 Elapsed: 00:00:05






 28%|██▊       | 7/25 [01:14<02:10,  7.23s/it][A[A[A[A

User  6 Elapsed: 00:00:04






 32%|███▏      | 8/25 [01:19<01:53,  6.70s/it][A[A[A[A

User  7 Elapsed: 00:00:05






 36%|███▌      | 9/25 [01:26<01:46,  6.65s/it][A[A[A[A

User  8 Elapsed: 00:00:06






 40%|████      | 10/25 [01:39<02:07,  8.53s/it][A[A[A[A

User  9 Elapsed: 00:00:12






 44%|████▍     | 11/25 [01:48<02:03,  8.83s/it][A[A[A[A

User  10 Elapsed: 00:00:09






 48%|████▊     | 12/25 [01:56<01:51,  8.56s/it][A[A[A[A

User  11 Elapsed: 00:00:07






 52%|█████▏    | 13/25 [02:02<01:32,  7.68s/it][A[A[A[A

User  12 Elapsed: 00:00:05






 56%|█████▌    | 14/25 [02:10<01:25,  7.79s/it][A[A[A[A

User  13 Elapsed: 00:00:08






 60%|██████    | 15/25 [02:17<01:16,  7.70s/it][A[A[A[A

User  14 Elapsed: 00:00:07






 64%|██████▍   | 16/25 [02:24<01:06,  7.39s/it][A[A[A[A

User  15 Elapsed: 00:00:06






 68%|██████▊   | 17/25 [02:39<01:18,  9.79s/it][A[A[A[A

User  16 Elapsed: 00:00:15






 72%|███████▏  | 18/25 [02:58<01:26, 12.32s/it][A[A[A[A

User  17 Elapsed: 00:00:18






 76%|███████▌  | 19/25 [03:05<01:04, 10.83s/it][A[A[A[A

User  18 Elapsed: 00:00:07






 80%|████████  | 20/25 [03:23<01:05, 13.17s/it][A[A[A[A

User  19 Elapsed: 00:00:18






 84%|████████▍ | 21/25 [03:35<00:51, 12.79s/it][A[A[A[A

User  20 Elapsed: 00:00:11






 88%|████████▊ | 22/25 [03:45<00:35, 11.76s/it][A[A[A[A

User  21 Elapsed: 00:00:09






 92%|█████████▏| 23/25 [03:49<00:19,  9.51s/it][A[A[A[A

User  22 Elapsed: 00:00:04
User  23 Elapsed: 00:00:00






100%|██████████| 25/25 [03:55<00:00,  9.41s/it][A[A[A[A

User  24 Elapsed: 00:00:05







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A



  4%|▍         | 1/25 [00:06<02:35,  6.48s/it][A[A[A[A

User  0 Elapsed: 00:00:06






  8%|▊         | 2/25 [00:42<05:53, 15.37s/it][A[A[A[A

User  1 Elapsed: 00:00:36






 12%|█▏        | 3/25 [00:51<04:54, 13.37s/it][A[A[A[A

User  2 Elapsed: 00:00:08






 16%|█▌        | 4/25 [00:59<04:08, 11.83s/it][A[A[A[A

User  3 Elapsed: 00:00:08






 20%|██        | 5/25 [01:03<03:06,  9.35s/it][A[A[A[A

User  4 Elapsed: 00:00:03






 24%|██▍       | 6/25 [01:09<02:40,  8.45s/it][A[A[A[A

User  5 Elapsed: 00:00:06






 28%|██▊       | 7/25 [01:15<02:17,  7.65s/it][A[A[A[A

User  6 Elapsed: 00:00:05






 32%|███▏      | 8/25 [01:21<02:00,  7.11s/it][A[A[A[A

User  7 Elapsed: 00:00:05






 36%|███▌      | 9/25 [01:28<01:54,  7.14s/it][A[A[A[A

User  8 Elapsed: 00:00:07






 40%|████      | 10/25 [01:41<02:16,  9.09s/it][A[A[A[A

User  9 Elapsed: 00:00:13






 44%|████▍     | 11/25 [01:51<02:10,  9.32s/it][A[A[A[A

User  10 Elapsed: 00:00:09






 48%|████▊     | 12/25 [01:59<01:56,  8.98s/it][A[A[A[A

User  11 Elapsed: 00:00:08






 52%|█████▏    | 13/25 [02:06<01:37,  8.12s/it][A[A[A[A

User  12 Elapsed: 00:00:06






 56%|█████▌    | 14/25 [02:14<01:30,  8.21s/it][A[A[A[A

User  13 Elapsed: 00:00:08






 60%|██████    | 15/25 [02:22<01:19,  7.99s/it][A[A[A[A

User  14 Elapsed: 00:00:07






 64%|██████▍   | 16/25 [02:28<01:08,  7.61s/it][A[A[A[A

User  15 Elapsed: 00:00:06






 68%|██████▊   | 17/25 [02:44<01:20, 10.09s/it][A[A[A[A

User  16 Elapsed: 00:00:15






 72%|███████▏  | 18/25 [03:03<01:28, 12.69s/it][A[A[A[A

User  17 Elapsed: 00:00:18






 76%|███████▌  | 19/25 [03:11<01:08, 11.42s/it][A[A[A[A

User  18 Elapsed: 00:00:08






 80%|████████  | 20/25 [03:30<01:07, 13.57s/it][A[A[A[A

User  19 Elapsed: 00:00:18






 84%|████████▍ | 21/25 [03:42<00:52, 13.01s/it][A[A[A[A

User  20 Elapsed: 00:00:11






 88%|████████▊ | 22/25 [03:51<00:36, 12.07s/it][A[A[A[A

User  21 Elapsed: 00:00:09






 92%|█████████▏| 23/25 [03:56<00:19,  9.76s/it][A[A[A[A

User  22 Elapsed: 00:00:04
User  23 Elapsed: 00:00:00






100%|██████████| 25/25 [04:02<00:00,  9.69s/it][A[A[A[A

User  24 Elapsed: 00:00:05







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A



  4%|▍         | 1/25 [00:06<02:47,  7.00s/it][A[A[A[A

User  0 Elapsed: 00:00:06






  8%|▊         | 2/25 [00:43<06:02, 15.76s/it][A[A[A[A

User  1 Elapsed: 00:00:36






 12%|█▏        | 3/25 [00:51<04:57, 13.54s/it][A[A[A[A

User  2 Elapsed: 00:00:08






 16%|█▌        | 4/25 [00:59<04:10, 11.95s/it][A[A[A[A

User  3 Elapsed: 00:00:08






 20%|██        | 5/25 [01:03<03:08,  9.44s/it][A[A[A[A

User  4 Elapsed: 00:00:03






 24%|██▍       | 6/25 [01:08<02:37,  8.28s/it][A[A[A[A

User  5 Elapsed: 00:00:05






 28%|██▊       | 7/25 [01:14<02:14,  7.48s/it][A[A[A[A

User  6 Elapsed: 00:00:05






 32%|███▏      | 8/25 [01:20<01:58,  6.94s/it][A[A[A[A

User  7 Elapsed: 00:00:05






 36%|███▌      | 9/25 [01:27<01:51,  6.98s/it][A[A[A[A

User  8 Elapsed: 00:00:07






 40%|████      | 10/25 [01:41<02:15,  9.02s/it][A[A[A[A

User  9 Elapsed: 00:00:13






 44%|████▍     | 11/25 [01:50<02:08,  9.19s/it][A[A[A[A

User  10 Elapsed: 00:00:09






 48%|████▊     | 12/25 [01:59<01:56,  8.95s/it][A[A[A[A

User  11 Elapsed: 00:00:08






 52%|█████▏    | 13/25 [02:05<01:37,  8.15s/it][A[A[A[A

User  12 Elapsed: 00:00:06






 56%|█████▌    | 14/25 [02:13<01:30,  8.26s/it][A[A[A[A

User  13 Elapsed: 00:00:08






 60%|██████    | 15/25 [02:21<01:20,  8.04s/it][A[A[A[A

User  14 Elapsed: 00:00:07






 64%|██████▍   | 16/25 [02:28<01:08,  7.65s/it][A[A[A[A

User  15 Elapsed: 00:00:06






 68%|██████▊   | 17/25 [02:43<01:20, 10.06s/it][A[A[A[A

User  16 Elapsed: 00:00:15






 72%|███████▏  | 18/25 [03:02<01:28, 12.59s/it][A[A[A[A

User  17 Elapsed: 00:00:18






 76%|███████▌  | 19/25 [03:10<01:07, 11.32s/it][A[A[A[A

User  18 Elapsed: 00:00:08






 80%|████████  | 20/25 [03:29<01:07, 13.53s/it][A[A[A[A

User  19 Elapsed: 00:00:18






 84%|████████▍ | 21/25 [03:41<00:51, 12.99s/it][A[A[A[A

User  20 Elapsed: 00:00:11






 88%|████████▊ | 22/25 [03:50<00:35, 11.95s/it][A[A[A[A

User  21 Elapsed: 00:00:09






 92%|█████████▏| 23/25 [03:55<00:19,  9.69s/it][A[A[A[A

User  22 Elapsed: 00:00:04
User  23 Elapsed: 00:00:00






100%|██████████| 25/25 [04:01<00:00,  9.64s/it][A[A[A[A

User  24 Elapsed: 00:00:05







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A



  4%|▍         | 1/25 [00:07<02:50,  7.10s/it][A[A[A[A

User  0 Elapsed: 00:00:07






  8%|▊         | 2/25 [00:44<06:09, 16.05s/it][A[A[A[A

User  1 Elapsed: 00:00:36






 12%|█▏        | 3/25 [00:52<05:00, 13.67s/it][A[A[A[A

User  2 Elapsed: 00:00:08






 16%|█▌        | 4/25 [01:00<04:12, 12.03s/it][A[A[A[A

User  3 Elapsed: 00:00:08






 20%|██        | 5/25 [01:03<03:09,  9.46s/it][A[A[A[A

User  4 Elapsed: 00:00:03






 24%|██▍       | 6/25 [01:09<02:37,  8.29s/it][A[A[A[A

User  5 Elapsed: 00:00:05






 28%|██▊       | 7/25 [01:14<02:14,  7.49s/it][A[A[A[A

User  6 Elapsed: 00:00:05






 32%|███▏      | 8/25 [01:20<01:58,  6.96s/it][A[A[A[A

User  7 Elapsed: 00:00:05






 36%|███▌      | 9/25 [01:28<01:53,  7.09s/it][A[A[A[A

User  8 Elapsed: 00:00:07






 40%|████      | 10/25 [01:42<02:18,  9.22s/it][A[A[A[A

User  9 Elapsed: 00:00:14






 44%|████▍     | 11/25 [01:52<02:12,  9.44s/it][A[A[A[A

User  10 Elapsed: 00:00:09






 48%|████▊     | 12/25 [02:00<01:58,  9.09s/it][A[A[A[A

User  11 Elapsed: 00:00:08






 52%|█████▏    | 13/25 [02:06<01:38,  8.21s/it][A[A[A[A

User  12 Elapsed: 00:00:06






 56%|█████▌    | 14/25 [02:14<01:30,  8.20s/it][A[A[A[A

User  13 Elapsed: 00:00:08






 60%|██████    | 15/25 [02:22<01:20,  8.01s/it][A[A[A[A

User  14 Elapsed: 00:00:07






 64%|██████▍   | 16/25 [02:29<01:08,  7.61s/it][A[A[A[A

User  15 Elapsed: 00:00:06






 68%|██████▊   | 17/25 [02:45<01:22, 10.30s/it][A[A[A[A

User  16 Elapsed: 00:00:16






 72%|███████▏  | 18/25 [03:04<01:29, 12.77s/it][A[A[A[A

User  17 Elapsed: 00:00:18






 76%|███████▌  | 19/25 [03:12<01:07, 11.32s/it][A[A[A[A

User  18 Elapsed: 00:00:07






 80%|████████  | 20/25 [03:30<01:07, 13.43s/it][A[A[A[A

User  19 Elapsed: 00:00:18






 84%|████████▍ | 21/25 [03:42<00:51, 12.90s/it][A[A[A[A

User  20 Elapsed: 00:00:11






 88%|████████▊ | 22/25 [03:51<00:35, 11.98s/it][A[A[A[A

User  21 Elapsed: 00:00:09






 92%|█████████▏| 23/25 [03:56<00:19,  9.70s/it][A[A[A[A

User  22 Elapsed: 00:00:04
User  23 Elapsed: 00:00:00






100%|██████████| 25/25 [04:02<00:00,  9.69s/it][A[A[A[A

User  24 Elapsed: 00:00:05







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A



  4%|▍         | 1/25 [00:07<02:51,  7.14s/it][A[A[A[A

User  0 Elapsed: 00:00:07






  8%|▊         | 2/25 [00:43<06:07, 15.98s/it][A[A[A[A

User  1 Elapsed: 00:00:36






 12%|█▏        | 3/25 [00:51<05:00, 13.65s/it][A[A[A[A

User  2 Elapsed: 00:00:08






 16%|█▌        | 4/25 [01:00<04:11, 11.97s/it][A[A[A[A

User  3 Elapsed: 00:00:08






 20%|██        | 5/25 [01:03<03:08,  9.44s/it][A[A[A[A

User  4 Elapsed: 00:00:03






 24%|██▍       | 6/25 [01:09<02:37,  8.27s/it][A[A[A[A

User  5 Elapsed: 00:00:05






 28%|██▊       | 7/25 [01:14<02:14,  7.48s/it][A[A[A[A

User  6 Elapsed: 00:00:05






 32%|███▏      | 8/25 [01:20<01:57,  6.92s/it][A[A[A[A

User  7 Elapsed: 00:00:05






 36%|███▌      | 9/25 [01:27<01:51,  6.96s/it][A[A[A[A

User  8 Elapsed: 00:00:07






 40%|████      | 10/25 [01:41<02:15,  9.02s/it][A[A[A[A

User  9 Elapsed: 00:00:13






 44%|████▍     | 11/25 [01:51<02:12,  9.50s/it][A[A[A[A

User  10 Elapsed: 00:00:10






 48%|████▊     | 12/25 [01:59<01:58,  9.09s/it][A[A[A[A

User  11 Elapsed: 00:00:08






 52%|█████▏    | 13/25 [02:06<01:38,  8.19s/it][A[A[A[A

User  12 Elapsed: 00:00:06






 56%|█████▌    | 14/25 [02:14<01:30,  8.27s/it][A[A[A[A

User  13 Elapsed: 00:00:08






 60%|██████    | 15/25 [02:22<01:20,  8.03s/it][A[A[A[A

User  14 Elapsed: 00:00:07






 64%|██████▍   | 16/25 [02:28<01:08,  7.63s/it][A[A[A[A

User  15 Elapsed: 00:00:06






 68%|██████▊   | 17/25 [02:45<01:22, 10.36s/it][A[A[A[A

User  16 Elapsed: 00:00:16






 72%|███████▏  | 18/25 [03:04<01:30, 12.88s/it][A[A[A[A

User  17 Elapsed: 00:00:18






 76%|███████▌  | 19/25 [03:12<01:08, 11.46s/it][A[A[A[A

User  18 Elapsed: 00:00:08






 80%|████████  | 20/25 [03:29<01:06, 13.30s/it][A[A[A[A

User  19 Elapsed: 00:00:17






 84%|████████▍ | 21/25 [03:40<00:50, 12.56s/it][A[A[A[A

User  20 Elapsed: 00:00:10






 88%|████████▊ | 22/25 [03:50<00:34, 11.58s/it][A[A[A[A

User  21 Elapsed: 00:00:09






 92%|█████████▏| 23/25 [03:54<00:18,  9.35s/it][A[A[A[A

User  22 Elapsed: 00:00:04
User  23 Elapsed: 00:00:00






100%|██████████| 25/25 [03:59<00:00,  9.58s/it][A[A[A[A

User  24 Elapsed: 00:00:05







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A



  4%|▍         | 1/25 [00:06<02:40,  6.70s/it][A[A[A[A

User  0 Elapsed: 00:00:06






  8%|▊         | 2/25 [00:40<05:41, 14.87s/it][A[A[A[A

User  1 Elapsed: 00:00:33






 12%|█▏        | 3/25 [00:48<04:40, 12.76s/it][A[A[A[A

User  2 Elapsed: 00:00:07






 16%|█▌        | 4/25 [00:56<03:55, 11.23s/it][A[A[A[A

User  3 Elapsed: 00:00:07






 20%|██        | 5/25 [00:59<02:56,  8.85s/it][A[A[A[A

User  4 Elapsed: 00:00:03






 24%|██▍       | 6/25 [01:04<02:27,  7.75s/it][A[A[A[A

User  5 Elapsed: 00:00:05






 28%|██▊       | 7/25 [01:09<02:04,  6.94s/it][A[A[A[A

User  6 Elapsed: 00:00:05






 32%|███▏      | 8/25 [01:14<01:49,  6.44s/it][A[A[A[A

User  7 Elapsed: 00:00:05






 36%|███▌      | 9/25 [01:21<01:43,  6.47s/it][A[A[A[A

User  8 Elapsed: 00:00:06






 40%|████      | 10/25 [01:34<02:06,  8.41s/it][A[A[A[A

User  9 Elapsed: 00:00:12






 44%|████▍     | 11/25 [01:44<02:03,  8.85s/it][A[A[A[A

User  10 Elapsed: 00:00:09






 48%|████▊     | 12/25 [01:52<01:50,  8.53s/it][A[A[A[A

User  11 Elapsed: 00:00:07






 52%|█████▏    | 13/25 [01:57<01:32,  7.71s/it][A[A[A[A

User  12 Elapsed: 00:00:05






 56%|█████▌    | 14/25 [02:05<01:25,  7.77s/it][A[A[A[A

User  13 Elapsed: 00:00:07






 60%|██████    | 15/25 [02:12<01:15,  7.59s/it][A[A[A[A

User  14 Elapsed: 00:00:07






 64%|██████▍   | 16/25 [02:19<01:05,  7.25s/it][A[A[A[A

User  15 Elapsed: 00:00:06






 68%|██████▊   | 17/25 [02:34<01:16,  9.59s/it][A[A[A[A

User  16 Elapsed: 00:00:15






 72%|███████▏  | 18/25 [02:51<01:22, 11.82s/it][A[A[A[A

User  17 Elapsed: 00:00:17






 76%|███████▌  | 19/25 [02:58<01:02, 10.47s/it][A[A[A[A

User  18 Elapsed: 00:00:07






 80%|████████  | 20/25 [03:16<01:02, 12.52s/it][A[A[A[A

User  19 Elapsed: 00:00:17






 84%|████████▍ | 21/25 [03:27<00:48, 12.07s/it][A[A[A[A

User  20 Elapsed: 00:00:11






 88%|████████▊ | 22/25 [03:36<00:33, 11.17s/it][A[A[A[A

User  21 Elapsed: 00:00:09






 92%|█████████▏| 23/25 [03:40<00:18,  9.06s/it][A[A[A[A

User  22 Elapsed: 00:00:04
User  23 Elapsed: 00:00:00






100%|██████████| 25/25 [03:46<00:00,  9.06s/it][A[A[A[A

User  24 Elapsed: 00:00:06







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A



  4%|▍         | 1/25 [00:06<02:38,  6.62s/it][A[A[A[A

User  0 Elapsed: 00:00:06






  8%|▊         | 2/25 [00:40<05:41, 14.86s/it][A[A[A[A

User  1 Elapsed: 00:00:34






 12%|█▏        | 3/25 [00:48<04:38, 12.65s/it][A[A[A[A

User  2 Elapsed: 00:00:07






 16%|█▌        | 4/25 [00:55<03:53, 11.11s/it][A[A[A[A

User  3 Elapsed: 00:00:07






 20%|██        | 5/25 [00:59<02:55,  8.78s/it][A[A[A[A

User  4 Elapsed: 00:00:03






 24%|██▍       | 6/25 [01:04<02:25,  7.64s/it][A[A[A[A

User  5 Elapsed: 00:00:04






 28%|██▊       | 7/25 [01:09<02:04,  6.91s/it][A[A[A[A

User  6 Elapsed: 00:00:05






 32%|███▏      | 8/25 [01:14<01:48,  6.39s/it][A[A[A[A

User  7 Elapsed: 00:00:05






 36%|███▌      | 9/25 [01:21<01:43,  6.47s/it][A[A[A[A

User  8 Elapsed: 00:00:06






 40%|████      | 10/25 [01:34<02:07,  8.50s/it][A[A[A[A

User  9 Elapsed: 00:00:13






 44%|████▍     | 11/25 [01:44<02:05,  9.00s/it][A[A[A[A

User  10 Elapsed: 00:00:10






 48%|████▊     | 12/25 [01:52<01:51,  8.59s/it][A[A[A[A

User  11 Elapsed: 00:00:07






 52%|█████▏    | 13/25 [01:58<01:33,  7.81s/it][A[A[A[A

User  12 Elapsed: 00:00:06






 56%|█████▌    | 14/25 [02:05<01:24,  7.72s/it][A[A[A[A

User  13 Elapsed: 00:00:07






 60%|██████    | 15/25 [02:12<01:15,  7.56s/it][A[A[A[A

User  14 Elapsed: 00:00:07






 64%|██████▍   | 16/25 [02:19<01:04,  7.18s/it][A[A[A[A

User  15 Elapsed: 00:00:06






 68%|██████▊   | 17/25 [02:34<01:16,  9.62s/it][A[A[A[A

User  16 Elapsed: 00:00:15






 72%|███████▏  | 18/25 [02:50<01:21, 11.61s/it][A[A[A[A

User  17 Elapsed: 00:00:16






 76%|███████▌  | 19/25 [02:58<01:02, 10.38s/it][A[A[A[A

User  18 Elapsed: 00:00:07






 80%|████████  | 20/25 [03:15<01:02, 12.59s/it][A[A[A[A

User  19 Elapsed: 00:00:17






 84%|████████▍ | 21/25 [03:27<00:48, 12.20s/it][A[A[A[A

User  20 Elapsed: 00:00:11






 88%|████████▊ | 22/25 [03:36<00:34, 11.36s/it][A[A[A[A

User  21 Elapsed: 00:00:09






 92%|█████████▏| 23/25 [03:40<00:18,  9.18s/it][A[A[A[A

User  22 Elapsed: 00:00:04
User  23 Elapsed: 00:00:00






100%|██████████| 25/25 [03:46<00:00,  9.06s/it][A[A[A[A

User  24 Elapsed: 00:00:05







  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A



  4%|▍         | 1/25 [00:06<02:47,  6.97s/it][A[A[A[A

User  0 Elapsed: 00:00:06






  8%|▊         | 2/25 [00:40<05:41, 14.84s/it][A[A[A[A

User  1 Elapsed: 00:00:33






 12%|█▏        | 3/25 [00:48<04:40, 12.75s/it][A[A[A[A

User  2 Elapsed: 00:00:07






 16%|█▌        | 4/25 [00:55<03:51, 11.04s/it][A[A[A[A

User  3 Elapsed: 00:00:07






 20%|██        | 5/25 [00:58<02:53,  8.67s/it][A[A[A[A

User  4 Elapsed: 00:00:03






 24%|██▍       | 6/25 [01:03<02:25,  7.64s/it][A[A[A[A

User  5 Elapsed: 00:00:05






 28%|██▊       | 7/25 [01:08<02:03,  6.87s/it][A[A[A[A

User  6 Elapsed: 00:00:05






 32%|███▏      | 8/25 [01:13<01:49,  6.44s/it][A[A[A[A

User  7 Elapsed: 00:00:05






 36%|███▌      | 9/25 [01:20<01:43,  6.48s/it][A[A[A[A

User  8 Elapsed: 00:00:06






 40%|████      | 10/25 [01:33<02:07,  8.53s/it][A[A[A[A

User  9 Elapsed: 00:00:13






 44%|████▍     | 11/25 [01:43<02:05,  8.93s/it][A[A[A[A

User  10 Elapsed: 00:00:09






 48%|████▊     | 12/25 [01:51<01:50,  8.49s/it][A[A[A[A

User  11 Elapsed: 00:00:07






 52%|█████▏    | 13/25 [01:57<01:32,  7.73s/it][A[A[A[A

User  12 Elapsed: 00:00:05






 56%|█████▌    | 14/25 [02:04<01:24,  7.72s/it][A[A[A[A

User  13 Elapsed: 00:00:07






 60%|██████    | 15/25 [02:11<01:14,  7.46s/it][A[A[A[A

User  14 Elapsed: 00:00:06






 64%|██████▍   | 16/25 [02:18<01:04,  7.15s/it][A[A[A[A

User  15 Elapsed: 00:00:06






 68%|██████▊   | 17/25 [02:33<01:15,  9.47s/it][A[A[A[A

User  16 Elapsed: 00:00:14






 72%|███████▏  | 18/25 [02:49<01:21, 11.60s/it][A[A[A[A

User  17 Elapsed: 00:00:16






 76%|███████▌  | 19/25 [02:57<01:02, 10.42s/it][A[A[A[A

User  18 Elapsed: 00:00:07






 80%|████████  | 20/25 [03:14<01:01, 12.40s/it][A[A[A[A

User  19 Elapsed: 00:00:17






 84%|████████▍ | 21/25 [03:25<00:48, 12.05s/it][A[A[A[A

User  20 Elapsed: 00:00:11






 88%|████████▊ | 22/25 [03:34<00:33, 11.24s/it][A[A[A[A

User  21 Elapsed: 00:00:09






 92%|█████████▏| 23/25 [03:38<00:18,  9.06s/it][A[A[A[A

User  22 Elapsed: 00:00:03
User  23 Elapsed: 00:00:00






100%|██████████| 25/25 [03:45<00:00,  9.00s/it][A[A[A[A

User  24 Elapsed: 00:00:06





In [None]:
# 2D Regularization
lambs = [0.01,0.1,1,10,30,50,70,90,100,1000]
for lamb1 in lambs:
    for lamb2 in lambs:
        row = {}
        matrix_Train = rtrain
        matrix_Test = rtest
        test_users = np.arange(25)
    #         test_users = [1]
        target_ranks = [20, 50]
        num_items_sampled = 5
        num_keyphrases = 235
        df = pd.DataFrame(row)
        max_iteration_threshold = 20
        keyphrase_popularity = keyphrase_popularity
        dataset_name = "yelp"
        model = "plrec"
        parameters_row = {'iter': 10,
                          'lambda':200,
                          'rank':200}
        keyphrases_names = keyphrases
        keyphrase_selection_method = 'random'
        max_wanted_keyphrase = 20
        critiquing_model = LP1Simplified(keyphrase_freq=U_K,
                                        item_keyphrase_freq=I_K,
                                        row=row,
                                        matrix_Train=matrix_Train,
                                        matrix_Test=matrix_Test,
                                        test_users=test_users,
                                        target_ranks=target_ranks,
                                        num_items_sampled=num_items_sampled,
                                        num_keyphrases=num_keyphrases,
                                        df=df,
                                        max_iteration_threshold=max_iteration_threshold,
                                        keyphrase_popularity=keyphrase_popularity,
                                        dataset_name=dataset_name,
                                        model=model,
                                        parameters_row=parameters_row,
                                        keyphrases_names = keyphrases_names,
                                        keyphrase_selection_method = keyphrase_selection_method,
                                        max_wanted_keyphrase = max_wanted_keyphrase,
                                        lamb = [lamb1,lamb2])
        df = critiquing_model.start_critiquing()

        table_path = '../tables/critiquing/tuning_ranksvm3_random/'
        name = 'lamb1_'+ str(lamb1) + '_lamb2_'+ str(lamb2) + '_test.csv'
        save_dataframe_csv(df, table_path, name)





  0%|          | 0/25 [00:00<?, ?it/s][A[A[A[A



  4%|▍         | 1/25 [00:07<02:54,  7.26s/it][A[A[A[A

User  0 Elapsed: 00:00:07


In [206]:
table_path = '../tables/critiquing/multi_step_critiquing/yelp/ranksvm/'
name = 'ranksvm2test.csv'
# save_dataframe_csv(df, table_path, name)
df = load_dataframe_csv(table_path,name)

In [243]:
def avg_successful_rate(df):
    num_runs = len(np.where(df['iteration'] == 0)[0])
    num_success = len(np.where(df['result'] == 'successful')[0])
    
    return num_success/num_runs
def avg_length(df,include_fail = True):
    num_runs = len(np.where(df['iteration'] == 0)[0])
    return (len(df)-num_runs)/num_runs

In [267]:
# df_5 =  df[df['target_rank'] == 5]
# df_10 = df[df['target_rank'] == 10]
df_20 = df[df['target_rank'] == 20]
df_50 = df[df['target_rank'] == 50]

In [245]:
# Ranksvm 1 50 users 20 topaffected lamb = 5
print (avg_length(df_20))
print (avg_successful_rate(df_20))
print (avg_length(df_50))
print (avg_successful_rate(df_50))

18.271084337349397
0.10090361445783133
16.94277108433735
0.1822289156626506


In [239]:
# Ranksvm2 50 users 20topaffected lamb = 5
print (avg_length(df_20))
print (avg_successful_rate(df_20))
print (avg_length(df_50))
print (avg_successful_rate(df_50))

18.370481927710845
0.09789156626506024
17.049698795180724
0.17620481927710843


In [268]:
# Ranksvm2 50 users 20topaffected lamb = 100, range = -100to100
print (avg_length(df_20))
print (avg_successful_rate(df_20))
print (avg_length(df_50))
print (avg_successful_rate(df_50))

18.13102409638554
0.11897590361445783
16.673192771084338
0.21234939759036145


In [252]:
# Avg
print (avg_length(df_20))
print (avg_successful_rate(df_20))
print (avg_length(df_50))
print (avg_successful_rate(df_50))

16.003012048192772
0.2756024096385542
14.045180722891565
0.39457831325301207


In [219]:
# Ranksvm 2 bot 20 affected
print (avg_length(df_20))
print (avg_successful_rate(df_20))
print (avg_length(df_50))
print (avg_successful_rate(df_50))

20.0
0.0
19.08888888888889
0.06666666666666667


In [290]:
# Rating obj
print (avg_length(df_20))
print (avg_successful_rate(df_20))
print (avg_length(df_50))
print (avg_successful_rate(df_50))

19.377777777777776
0.08888888888888889
18.88888888888889
0.15555555555555556


In [258]:
# top20items
print (avg_length(df_20))
print (avg_successful_rate(df_20))
print (avg_length(df_50))
print (avg_successful_rate(df_50))

18.466666666666665
0.15555555555555556
17.177777777777777
0.26666666666666666
