In [2]:
import sys
sys.path

['/Users/litos/opt/anaconda3/envs/tensorflow_cpu/lib/python36.zip',
 '/Users/litos/opt/anaconda3/envs/tensorflow_cpu/lib/python3.6',
 '/Users/litos/opt/anaconda3/envs/tensorflow_cpu/lib/python3.6/lib-dynload',
 '',
 '/Users/litos/opt/anaconda3/envs/tensorflow_cpu/lib/python3.6/site-packages',
 '/Users/litos/opt/anaconda3/envs/tensorflow_cpu/lib/python3.6/site-packages/IPython/extensions',
 '/Users/litos/.ipython']

In [3]:
from gurobipy import *

from scipy.sparse import csr_matrix, load_npz, save_npz
from tqdm import tqdm
from sklearn.preprocessing import normalize
from collections import *
import datetime
import json
import pandas as pd
import time
# import yaml
import scipy.sparse as sparse
from ast import literal_eval

import numpy as np
import matplotlib.pyplot as plt
import random

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import normalize
from sklearn.linear_model import Ridge
from sklearn.utils.extmath import randomized_svd

from scipy.optimize import minimize

# Utils

In [4]:
def save_dataframe_csv(df, path, name):
    df.to_csv(path+name, index=False)


def load_dataframe_csv(path, name, index_col=None):
    return pd.read_csv(path+name, index_col=index_col)


# Load Data

In [5]:
# Load Original Data
df_train = pd.read_csv('../../data/yelp/Train.csv',encoding='latin-1')
# df_valid = pd.read_csv('../../data/yelp/Valid.csv',encoding='latin-1')
# df_test = pd.read_csv('../../data/yelp/Test.csv',encoding='latin-1')

In [6]:
keyphrases = pd.read_csv('../../data/yelp/KeyPhrases.csv')['Phrases'].tolist()
keyphrase_popularity = np.loadtxt('../data/yelp/'+'keyphrase_popularity.txt', dtype=int)

# Load U-I Data 
rtrain = load_npz("../../data/yelp/Rtrain.npz")
rvalid = load_npz("../../data/yelp/Rvalid.npz")
rtest = load_npz("../../data/yelp/Rtest.npz")

# Load user/item keyphrase data
U_K = load_npz("../../data/yelp/U_K.npz")
I_K = load_npz("../../data/yelp/I_K.npz")

# Models

In [7]:
def get_I_K(df, row_name = 'ItemIndex', shape = (3668,75)):
    rows = []
    cols = []
    vals = []
    for i in tqdm(range(df.shape[0])):
        key_vector = literal_eval(df['keyVector'][i])
        rows.extend([df[row_name][i]]*len(key_vector)) ## Item index
        cols.extend(key_vector) ## Keyword Index
        vals.extend(np.array([1]*len(key_vector)))
    return csr_matrix((vals, (rows, cols)), shape=shape)


In [8]:
# PLREC 
def inhour(elapsed):
    return time.strftime('%H:%M:%S', time.gmtime(elapsed))

def plrec(matrix_train, iteration=4, lamb=80, rank=200, seed=1):
    """
    Function used to achieve generalized projected lrec w/o item-attribute embedding
    :param matrix_train: user-item matrix with shape m*n
    :param iteration: number of power iterations in randomized svd
    :param lamb: parameter of penalty
    :param rank: latent dimension size
    :param seed: the seed of the pseudo random number generator to use when shuffling the data
    :return: prediction in sparse matrix
    """
    print ("Randomized SVD")
    start_time = time.time()
    P, sigma, Qt = randomized_svd(matrix_train,
                                  n_components=rank,
                                  n_iter=iteration,
                                  random_state=seed)

    RQ = matrix_train.dot(sparse.csc_matrix(Qt.T*np.sqrt(sigma)))

    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    print ("Closed-Form Linear Optimization")
    start_time = time.time()
    pre_inv = RQ.T.dot(RQ) + lamb * sparse.identity(rank, dtype=np.float32)
    inverse = sparse.linalg.inv(pre_inv.tocsc())
    Y = inverse.dot(RQ.T).dot(matrix_train)
    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    return np.array(RQ.todense()), np.array(Y.todense()), None

# def predict_vector(rating_vector, train_vector, remove_train=True):
#     dim = len(rating_vector)
#     candidate_index = np.argpartition(-rating_vector, dim-1)[:dim]
#     prediction_items = candidate_index[rating_vector[candidate_index].argsort()[::-1]]
    
#     if remove_train:
#         return np.delete(prediction_items, np.isin(prediction_items, train_vector.nonzero()[1]).nonzero()[0])
#     else:
#         return prediction_items

    
def predict_scores(matrix_U, matrix_V, bias=None,
                   penalize = False,
                   keyphrase_freq = I_K, 
                   critiqued_keyphrase = 0, 
                   matrix_Train = rtrain,
                   alpha = 0):
    prediction = matrix_U.dot(matrix_V.T)
    # Penalize
    if penalize == True:
        items_with_keyphrase = np.ravel(keyphrase_freq.T[critiqued_keyphrase].nonzero()[1])
        items_without_keyphrase = np.setdiff1d(np.arange(matrix_Train.shape[1]), items_with_keyphrase)
        prediction[items_without_keyphrase] = alpha # penalize
    
    return prediction

def predict_vector(rating_vector, train_vector, remove_train=True):
    dim = len(rating_vector)
    candidate_index = np.argpartition(-rating_vector, dim-1)[:dim]
    prediction_items = candidate_index[rating_vector[candidate_index].argsort()[::-1]]
    
    if remove_train:
        return np.delete(prediction_items, np.isin(prediction_items, train_vector.nonzero()[1]).nonzero()[0])
    else:
        return prediction_items


In [9]:
# initial Prediction
def predict_scores(matrix_U, matrix_V, bias=None,
                   penalize = False,
                   keyphrase_freq = I_K, 
                   critiqued_keyphrase = 0, 
                   matrix_Train = rtrain,
                   alpha = 0):
    
    prediction = matrix_U.dot(matrix_V.T)
    # Penalize
    if penalize == True:
        items_with_keyphrase = np.ravel(keyphrase_freq.T[critiqued_keyphrase].nonzero()[1])
        items_without_keyphrase = np.setdiff1d(np.arange(matrix_Train.shape[1]), items_with_keyphrase)
        prediction[items_without_keyphrase] = alpha # penalize
    
    return prediction


In [74]:
# Keyphrase Selection Helpers
def get_valid_keyphrases(keyphrase_freq,top_recommendations,item = None,threshold=50,mutiple_keyphrases_en = False, top_items = None):
    """
    Wrapper function to get either top 1 or top n keyphrases
    """
    if mutiple_keyphrases_en:
        top_keyphrases = []
        for item in top_items:
            top_keyphrases.extend(get_valid_keyphrases_for_one_item(keyphrase_freq,top_recommendations,item,threshold=threshold))
        return np.ravel(list(set(top_keyphrases))) # remove duplicate and reformat to np array
    else:
        return get_valid_keyphrases_for_one_item(keyphrase_freq,top_recommendations,item,threshold=threshold)

def get_valid_keyphrases_for_one_item(keyphrase_freq,top_recommendations, item,threshold=50):
    """
    Get keyphrases of item that make sense
    E.g. if the item has fewer than threshold=50 keyphrases, get all of them
    otherwise get top 50 keyphrases
    """
    keyphrase_length = len(keyphrase_freq[item].nonzero()[1])
    if keyphrase_length<threshold:
        return keyphrase_freq[item].nonzero()[1]
    else:
        keyphrases = np.ravel(keyphrase_freq[top_recommendations[0]].todense())
        top_keyphrases = np.argsort(keyphrases)[::-1][:threshold]
        return top_keyphrases
    
# For keyphrase selecting method # 3 "diff" 
def get_item_keyphrase_freq(keyphrase_freq,item):
    """
    Get item's keyphrase frequency 
    """
    count = keyphrase_freq[item].todense()
    return count/np.sum(count)

def get_all_item_keyphrase_freq(item_keyphrase_freq = I_K):
    res = []
    num_items = item_keyphrase_freq.shape[0]
    for item in range(num_items):
        res.append(np.array(get_item_keyphrase_freq(item_keyphrase_freq,item)))
    return np.array(res)

def get_keyphrase_popularity(df,keyphrases):
    """
    Get keyphrase popularity (count) from dataframe
    """
    keyphrase_popularity = np.zeros(len(keyphrases)) #initialize
    for i in range(len(df)):
        keyphrase_vector = literal_eval(df['keyVector'][i])
        keyphrase_popularity[keyphrase_vector] += 1 # count
    return keyphrase_popularity

In [73]:
I_K.shape[0]

7456

In [11]:
# One hot encoding of critiquing
def get_critiqued_UK(user_keyphrase_frequency,user_index,critiqued_keyphrase):
    """
    user_keyphrase_frequency is the U_K matrix (csr sparse matrix)
    return the one-hot encoding of the critique
    """
    U_K_cp = user_keyphrase_frequency.copy()
    U_K_cp[user_index] = 0
    U_K_cp[user_index,critiqued_keyphrase] = 1
    return U_K_cp

def project_one_hot_encoding(reg, user_keyphrase_frequency,user_index = 0,critiqued_keyphrase = 0, normalize_en = True):
    """
    Return the projection on user_sim space from one-hot encoding of critiqued keyphrase
    The res[user_index] should be target embedding row
    """
    critiqued_matrix = get_critiqued_UK(user_keyphrase_frequency, user_index, critiqued_keyphrase)
    res = reg.predict(critiqued_matrix)
    if normalize_en:
        res = normalize((res))
    return res

In [12]:
# Upper bound method 
def get_all_affected_items(wanted_keyphrases,keyphrase_freq):
    res = []
    for keyphrase in wanted_keyphrases:
        items = np.ravel(keyphrase_freq.T[keyphrase].nonzero()[1])
        res.extend(items)
    return np.array(list(set(res)))
    
def select_only_wanted_keyphrase(top_recommendations, wanted_keyphrases, keyphrase_freq, matrix_Train = rtrain):
    all_items_with_keyphrases = get_all_affected_items(wanted_keyphrases,keyphrase_freq)
    affected_items = np.setdiff1d(np.arange(matrix_Train.shape[1]), all_items_with_keyphrases) # Get all other keyphrases
    top_recommendations[~np.in1d(top_recommendations, affected_items)]
    return top_recommendations

def pruning(prediction_score, 
           wanted_keyphrases_random, 
           top_recommendations, 
           keyphrase_freq, 
           matrix_Train = rtrain,
           alpha = 0):
    items_with_keyphrase = get_all_affected_items(wanted_keyphrases_random, keyphrase_freq)
    #Return the unique values in ar1 that are not in ar2.
    items_without_keyphrase = np.setdiff1d(np.arange(matrix_Train.shape[1]), items_with_keyphrase)
#     print (items_without_keyphrase)
    print (sum(prediction_score[items_without_keyphrase]))
    score = np.copy(prediction_score)
    score[items_without_keyphrase] = alpha # penalize
    return score

# Utils

In [13]:
# Utility function for getting restaurant info from ItemIndex
def get_business_df(path = "../../data/yelp/business.json" ):
    with open(path,encoding="utf8") as json_file:
        data = json_file.readlines()
        data = list(map(json.loads, data))
    df = pd.DataFrame(data)
    
    return df

def get_restaurant_info(business_df, business_id, name = True, review_count = True, stars = True ):
    output_list = {}
    row_idx = int(business_df.index[business_df['business_id'] == business_id].tolist()[0])
    if name == True:
        output_list['name'] = business_df['name'][row_idx].encode('utf-8').strip()
    if review_count == True:
        output_list['review_count'] = business_df['review_count'][row_idx]
    if stars == True:
        output_list['stars'] = business_df['stars'][row_idx] 
    return output_list

# def get_businessid_from_Itemindex(ItemIndex_list, itemindex):
#     return ItemIndex_list['business_id'].tolist()[itemindex]

def get_restaurant_name(df_train, business_df, ItemIndex):
    rows = np.where(df_train['ItemIndex'] == ItemIndex)
    if len(rows)!= 0:
        business_id = df_train.loc[rows[0][0]]['business_id']
        item_info = get_restaurant_info(business_df, business_id)
        return item_info['name']
    return "NOT_FOUND"

# Evaluation 

In [14]:
# Evluation 
def recallk(vector_true_dense, hits, **unused):
    hits = len(hits.nonzero()[0])
    return float(hits)/len(vector_true_dense)

def precisionk(vector_predict, hits, **unused):
    hits = len(hits.nonzero()[0])
    return float(hits)/len(vector_predict)


def average_precisionk(vector_predict, hits, **unused):
    precisions = np.cumsum(hits, dtype=np.float32)/range(1, len(vector_predict)+1)
    return np.mean(precisions)


def r_precision(vector_true_dense, vector_predict, **unused):
    vector_predict_short = vector_predict[:len(vector_true_dense)]
    hits = len(np.isin(vector_predict_short, vector_true_dense).nonzero()[0])
    return float(hits)/len(vector_true_dense)


def _dcg_support(size):
    arr = np.arange(1, size+1)+1
    return 1./np.log2(arr)


def ndcg(vector_true_dense, vector_predict, hits):
    idcg = np.sum(_dcg_support(len(vector_true_dense)))
    dcg_base = _dcg_support(len(vector_predict))
    dcg_base[np.logical_not(hits)] = 0
    dcg = np.sum(dcg_base)
    return dcg/idcg


def click(hits, **unused):
    first_hit = next((i for i, x in enumerate(hits) if x), None)
    if first_hit is None:
        return 5
    else:
        return first_hit/10


def evaluate(matrix_Predict, matrix_Test, metric_names =['R-Precision', 'NDCG', 'Precision', 'Recall', 'MAP'], atK = [5, 10, 15, 20, 50], analytical=False):
    """
    :param matrix_U: Latent representations of users, for LRecs it is RQ, for ALSs it is U
    :param matrix_V: Latent representations of items, for LRecs it is Q, for ALSs it is V
    :param matrix_Train: Rating matrix for training, features.
    :param matrix_Test: Rating matrix for evaluation, true labels.
    :param k: Top K retrieval
    :param metric_names: Evaluation metrics
    :return:
    """
    global_metrics = {
        "R-Precision": r_precision,
        "NDCG": ndcg,
        "Clicks": click
    }

    local_metrics = {
        "Precision": precisionk,
        "Recall": recallk,
        "MAP": average_precisionk
    }

    output = dict()

    num_users = matrix_Predict.shape[0]

    for k in atK:

        local_metric_names = list(set(metric_names).intersection(local_metrics.keys()))
        results = {name: [] for name in local_metric_names}
        topK_Predict = matrix_Predict[:, :k]

        for user_index in tqdm(range(topK_Predict.shape[0])):
            vector_predict = topK_Predict[user_index]
            if len(vector_predict.nonzero()[0]) > 0:
                vector_true = matrix_Test[user_index]
                vector_true_dense = vector_true.nonzero()[1]
                hits = np.isin(vector_predict, vector_true_dense)

                if vector_true_dense.size > 0:
                    for name in local_metric_names:
                        results[name].append(local_metrics[name](vector_true_dense=vector_true_dense,
                                                                 vector_predict=vector_predict,
                                                                 hits=hits))

        results_summary = dict()
        if analytical:
            for name in local_metric_names:
                results_summary['{0}@{1}'.format(name, k)] = results[name]
        else:
            for name in local_metric_names:
                results_summary['{0}@{1}'.format(name, k)] = (np.average(results[name]),
                                                              1.96*np.std(results[name])/np.sqrt(num_users))
        output.update(results_summary)

    global_metric_names = list(set(metric_names).intersection(global_metrics.keys()))
    results = {name: [] for name in global_metric_names}

    topK_Predict = matrix_Predict[:]

    for user_index in tqdm(range(topK_Predict.shape[0])):
        vector_predict = topK_Predict[user_index]

        if len(vector_predict.nonzero()[0]) > 0:
            vector_true = matrix_Test[user_index]
            vector_true_dense = vector_true.nonzero()[1]
            hits = np.isin(vector_predict, vector_true_dense)

            # if user_index == 1:
            #     import ipdb;
            #     ipdb.set_trace()

            if vector_true_dense.size > 0:
                for name in global_metric_names:
                    results[name].append(global_metrics[name](vector_true_dense=vector_true_dense,
                                                              vector_predict=vector_predict,
                                                              hits=hits))

    results_summary = dict()
    if analytical:
        for name in global_metric_names:
            results_summary[name] = results[name]
    else:
        for name in global_metric_names:
            results_summary[name] = (np.average(results[name]), 1.96*np.std(results[name])/np.sqrt(num_users))
    output.update(results_summary)

    return output



# Critiquing Pipline

In [15]:
business_df = get_business_df()

In [16]:
keyphrase_popularity = np.loadtxt('../data/yelp/'+'keyphrase_popularity.txt', dtype=int)

In [17]:
Y, RQt, Bias = plrec(rtrain,
                    iteration = 10,
                    lamb = 200,
                    rank = 200)
RQ = RQt.T
reg = LinearRegression().fit(normalize(U_K), Y)

Randomized SVD
Elapsed: 00:00:00
Closed-Form Linear Optimization
Elapsed: 00:00:00


In [18]:
# Set up dataframe 

# post_ranki is post rank with different lambda ratio for combining pre-post User similarity matrix 

columns = ['user_id', 'target_item', 'item_name', 'iter', 'pre_rank', 
           'top_prediction_item_name',
           'post_rank_random_all',
           'post_rank_random_upper',
           'random_scores',
           'post_rank_pop_all',
           'post_rank_pop_upper',
           'pop_scores',
           'post_rank_diff_all',
           'post_rank_diff_upper',
           'diff_scores',
           'critiqued_keyphrase_random',
           'keyphrase_name_random',
           'critiqued_keyphrase_pop',
           'keyphrase_name_pop',
           'critiqued_keyphrase_diff',
           'keyphrase_name_diff',
           'num_existing_keyphrases',
           'pure_pruning_rank'] 
df = pd.DataFrame(columns=columns)
row = {}

## LP Objectives

In [19]:
def Average(initial_prediction_u, keyphrase_freq, affected_items, unaffected_items, num_keyphrases, query, test_user, item_latent, reg, all_equal = True):
    critiqued_vector = np.zeros(keyphrase_freq.shape[1])
    
    for q in query:
        critiqued_vector[q] = 1
#         critiqued_vector[q] = keyphrase_freq[test_user,q]
        
    num_critiques = len(query)
    
    # Get item latent for updating prediction
    W2 = reg.coef_
    W = item_latent.dot(W2)
    
    optimal_lambda = 1 # weight all critiquing equally
    lambdas = [optimal_lambda]*num_critiques
    
    # Record lambda values 
    for k in range(num_critiques):
        critiqued_vector[query[k]] *= optimal_lambda

    critique_score = predict_scores(matrix_U=reg.predict(critiqued_vector.reshape(1, -1)),
                                    matrix_V=item_latent)

    if all_equal:
        # weight initial and each critiquing equally 
        new_prediction = initial_prediction_u/(num_critiques) + critique_score.flatten()
    else:
        # weight intial and combined critiquing equally
        new_prediction = initial_prediction_u + critique_score.flatten() 
#     print (len(new_prediction))
    return new_prediction, lambdas   


In [20]:
def LP1SimplifiedOptimize(initial_prediction_u, keyphrase_freq, affected_items, unaffected_items, num_keyphrases, query, test_user, item_latent, reg):

    critiqued_vector = np.zeros(keyphrase_freq.shape[1])

    for q in query:
        critiqued_vector[q] = 1 # set critiqued/boosted keyphrase to 1
#         critiqued_vector[q] = -keyphrase_freq[test_user][q]

    num_critiques = len(query)

    W2 = reg.coef_
    W = item_latent.dot(W2)

    num_affected_items = len(affected_items)
    num_unaffected_items = len(unaffected_items)

    start_time = time.time()

    # Model
    m = Model("LP1Simplified") # Create gurobi model with name

    # Assignment variables
    lambs = []

    for k in range(num_critiques):
        lambs.append(m.addVar(lb=-1,
                              ub=1,
                              vtype=GRB.CONTINUOUS,
                              name="lamb%d" % query[k]))

    m.setObjective(quicksum(initial_prediction_u[affected_item] * num_unaffected_items + quicksum(lambs[k] * critiqued_vector[query[k]] * W[affected_item][query[k]] * num_unaffected_items for k in range(num_critiques)) for affected_item in affected_items) - quicksum(initial_prediction_u[unaffected_item] * num_affected_items + quicksum(lambs[k] * critiqued_vector[query[k]] * W[unaffected_item][query[k]] * num_affected_items for k in range(num_critiques)) for unaffected_item in unaffected_items), GRB.MINIMIZE)

    # Optimize
    m.optimize()

    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    lambdas = []
    for k in range(num_critiques):
        optimal_lambda = m.getVars()[k].X
        lambdas.append(optimal_lambda)
        critiqued_vector[query[k]] *= optimal_lambda

    modified_user_laten = reg.predict(critiqued_vector.reshape(1, -1)) + self.Y
    new_prediction = predict_scores(matrix_U=modified_user_laten,
                                    matrix_V=item_latent)
#     critique_score = predict_scores(matrix_U=reg.predict(critiqued_vector.reshape(1, -1)),
#                                     matrix_V=item_latent)
#     new_prediction = initial_prediction_u + critique_score.flatten()

    return new_prediction, lambdas
    
    

In [21]:
def LP1SumToOneOptimize(initial_prediction_u, keyphrase_freq, affected_items, unaffected_items, num_keyphrases, query, test_user, item_latent, reg):

    critiqued_vector = np.zeros(keyphrase_freq.shape[1])

    for q in query:
        critiqued_vector[q] = 1 # set critiqued/boosted keyphrase to 1
#         critiqued_vector[q] = -keyphrase_freq[test_user][q]

    num_critiques = len(query)

    W2 = reg.coef_
    W = item_latent.dot(W2)

    num_affected_items = len(affected_items)
    num_unaffected_items = len(unaffected_items)

    start_time = time.time()

    # Model
    m = Model("LP1SumToOneOptimize")

    # Assignment variables
    lambs = []
    for k in range(1+num_critiques):
        lambs.append(m.addVar(lb=0,
                              ub=1,
                              vtype=GRB.CONTINUOUS,
                              name="lamb%d" % k))

    m.addConstr((sum(lambs[k] for k in range(1+num_critiques)) == 1), name="sum_to_one")

    m.setObjective(quicksum(lambs[0] * initial_prediction_u[affected_item] * num_unaffected_items + quicksum(lambs[k+1] * critiqued_vector[query[k]] * W[affected_item][query[k]] * num_unaffected_items for k in range(num_critiques)) for affected_item in affected_items) - quicksum(lambs[0] * initial_prediction_u[unaffected_item] * num_affected_items + quicksum(lambs[k+1] * critiqued_vector[query[k]] * W[unaffected_item][query[k]] * num_affected_items for k in range(num_critiques)) for unaffected_item in unaffected_items), GRB.MINIMIZE)

    # Optimize
    m.optimize()

    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    lambdas = []
    for k in range(1+num_critiques):
        optimal_lambda = m.getVars()[k].X
        lambdas.append(optimal_lambda)

    for k in range(num_critiques):
        critiqued_vector[query[k]] *= lambdas[k+1]

    critique_score = predict_scores(matrix_U=reg.predict(critiqued_vector.reshape(1, -1)),
                                    matrix_V=item_latent)
    new_prediction = lambdas[0]*initial_prediction_u + critique_score.flatten()

    return new_prediction, lambdas


In [122]:
def rankSVM(initial_prediction_u, keyphrase_freq, affected_items, unaffected_items, num_keyphrases, query, test_user, item_latent, reg, item_keyphrase_freq = I_K):
    critiqued_vector = np.zeros(keyphrase_freq.shape[1])

    for q in query:
#         critiqued_vector[q] = 1 # set critiqued/boosted keyphrase to 1
        critiqued_vector[q] = max(2*keyphrase_freq[test_user , q],1)

    num_critiques = len(query)

    W2 = reg.coef_
    W = item_latent.dot(W2)

    num_affected_items = len(affected_items)
    num_unaffected_items = len(unaffected_items)

    start_time = time.time()

    # Model
    m = Model("LP2RankSVM")
    
    # Assignment variables
    lambs = []
    us = []
    xis = []
    # weight w
    for k in range(num_critiques):
        lambs.append(m.addVar(lb=-1,
                              ub=1,
                              vtype=GRB.CONTINUOUS,
                              name="lamb%d" % k))
    # dummy variable u for absolute lamb
    for k in range(num_critiques):
        us.append(m.addVar(lb=0,
                              ub=1,
                              vtype=GRB.CONTINUOUS,
                              name="u%d" % k))
    
    # slack variables xi
    for k in range(num_critiques):
        for i in range(num_affected_items):
            for j in range(num_unaffected_items):
                xis.append(m.addVar(lb = 0, 
                                    vtype = GRB.CONTINUOUS,
                                    name = "xi_%d_%d_%d" % (i,j,k) )) # TODO
                
#     print ('num_critiques, ', num_critiques)
#     print ('num_affected_items, ', num_affected_items)
#     print ('num_unaffected_items, ', num_unaffected_items)
#     print ('xi size, ', len(xis))
#     print ('lambda size', len(lambs))

    ## constraints
    # constraints for dummy variable
    for k in range(num_critiques):
        m.addConstr(us[k] >= lambs[k])
        m.addConstr(us[k] >= -lambs[k])
    
    # constraints for rankSVM 
    for k in range(num_critiques):
        for i in range(num_affected_items):
            for j in range(num_unaffected_items):
#                 print ('affected item, ',affected_items[i])
#                 print ('query, ', query[k])
#                 print ('affected item phi, ',item_keyphrase_freq[affected_items[i] , query[k]])
#                 print ('unaffected item phi, ', item_keyphrase_freq[unaffected_items[i] , query[k]])
#                 print ('xi, ', xis[k*(num_affected_items*num_unaffected_items) + i*num_unaffected_items + j] )
#                 print ('lambda, ',lambs[k])
                m.addConstr(lambs[k]*item_keyphrase_freq[affected_items[i] , query[k]] >= lambs[k]*item_keyphrase_freq[unaffected_items[i] , query[k]] + 1 - xis[k*(num_affected_items*num_unaffected_items) + i*num_unaffected_items + j] , name = "constraints%d_%d_%d" % (k,i,j))
    C = 1 #regularization parameter (trading-off margin size against training error
#     m.setObjective(quicksum(lambs) + C * quicksum(xis) ,GRB.MINIMIZE)
    m.setObjective(quicksum(us) + C * quicksum(xis) ,GRB.MINIMIZE)
                
                
#     m.addConstr((sum(lambs[k] for k in range(1+num_critiques)) == 1), name="sum_to_one")
#     m.addConstr( ,name = "r%d_star" % )
#     m.setObjective(quicksum(lambs[i] for i in range() + C * quicksum( for i in range() for j in range() for k in range() ),GRB.MINIMIZE)
#     m.setObjective(quicksum(lambs[0] * initial_prediction_u[affected_item] * num_unaffected_items + quicksum(lambs[k+1] * critiqued_vector[query[k]] * W[affected_item][query[k]] * num_unaffected_items for k in range(num_critiques)) for affected_item in affected_items) - quicksum(lambs[0] * initial_prediction_u[unaffected_item] * num_affected_items + quicksum(lambs[k+1] * critiqued_vector[query[k]] * W[unaffected_item][query[k]] * num_affected_items for k in range(num_critiques)) for unaffected_item in unaffected_items), GRB.MINIMIZE)

    # Optimize
    m.optimize()

    print("Elapsed: {}".format(inhour(time.time() - start_time)))

    lambdas = []
    for k in range(num_critiques):
        optimal_lambda = m.getVarByName("lamb%d" % k).X
#         optimal_lambda = m.getVars()[k].X 
        lambdas.append(optimal_lambda)
        
#     print ('GETVARS, ',m.getVars())
    
    for k in range(num_critiques):
        critiqued_vector[query[k]] *= lambdas[k]

    critique_score = predict_scores(matrix_U=reg.predict(critiqued_vector.reshape(1, -1)),
                                    matrix_V=item_latent)
    
    new_prediction = initial_prediction_u + critique_score.flatten() # now adding initial to combined critiquing 

    return new_prediction, lambdas

    

In [120]:
class LP1Simplified(object):
    def __init__(self, keyphrase_freq, item_keyphrase_freq, row, matrix_Train, matrix_Test, test_users,
                 target_ranks, num_items_sampled, num_keyphrases, df,
                 max_iteration_threshold, keyphrase_popularity, dataset_name,
                 model, parameters_row, keyphrases_names, keyphrase_selection_method, max_wanted_keyphrase, **unused):
        self.keyphrase_freq = keyphrase_freq
        self.item_keyphrase_freq = item_keyphrase_freq
        self.row = row
        self.matrix_Train = matrix_Train
        self.num_users, self.num_items = matrix_Train.shape
        self.matrix_Test = matrix_Test
        self.test_users = test_users
        self.target_ranks = target_ranks
        self.num_items_sampled = num_items_sampled
        self.num_keyphrases = num_keyphrases
        self.df = df
        self.max_iteration_threshold = max_iteration_threshold
        self.keyphrase_popularity = keyphrase_popularity
        self.dataset_name = dataset_name
        self.model = model
        self.parameters_row = parameters_row
        self.keyphrase_selection_method = keyphrase_selection_method
        self.max_wanted_keyphrase = max_wanted_keyphrase
        
        
        self.keyphrases_names = keyphrases_names

    def start_critiquing(self):
#         self.get_initial_predictions() # No need to do it every time
        self.RQ = RQ
        Yt = Y.T 
        self.Y = Y

        self.reg = reg

        self.prediction_scores = predict_scores(matrix_U=self.RQ,
                                                matrix_V=self.Y,
                                                bias=Bias).T
        
        for user in self.test_users:
            # User id starts from 0
            self.row['user_id'] = user
            
            initial_prediction_items = predict_vector(rating_vector=self.prediction_scores[user],
                                                            train_vector=self.matrix_Train[user],
                                                            remove_train=True)
            # For keyphrase selection method 'diff' 
            top_recommended_keyphrase_freq = get_item_keyphrase_freq(self.item_keyphrase_freq,item = initial_prediction_items[0])
            
            # The iteration will stop if the wanted item is in top n
            for target_rank in self.target_ranks:
                self.row['target_rank'] = target_rank
                
                # Pick wanted items in test items
                candidate_items = self.matrix_Test[user].nonzero()[1]
                train_items = self.matrix_Train[user].nonzero()[1]
                wanted_items = np.setdiff1d(candidate_items, train_items)
                

                
                for item in wanted_items:
                    # Item id starts from 0
                    self.row['item_id'] = item
                    try:
                        self.row['item_name'] = get_restaurant_name(df_train, business_df,item)
                    except:
                        self.row['item_name'] = 'NOT_FOUND'
                    # Set the wanted item's initial rank as None
                    self.row['item_rank'] = None
                    # Set the wanted item's initial prediction score as None
                    self.row['item_score'] = None
                    
                    if self.keyphrase_selection_method == "random" or self.keyphrase_selection_method == "pop":
                        # Get the item's existing keyphrases (we can boost)
                        remaining_keyphrases = self.item_keyphrase_freq[item].nonzero()[1]
                    if self.keyphrase_selection_method == "diff":
                        # For keyphrase selection method 'diff' 
                        target_keyphrase_freq = get_item_keyphrase_freq(self.item_keyphrase_freq,item = item)
                        diff_keyphrase_freq = target_keyphrase_freq - top_recommended_keyphrase_freq
                        remaining_keyphrases = np.argsort(np.ravel(diff_keyphrase_freq))[::-1][:self.max_wanted_keyphrase]
                        
#                    print("The number of remaining_keyphrases is {}. remaining_keyphrases are: {}".format(len(remaining_keyphrases), remaining_keyphrases))
                    self.row['num_existing_keyphrases'] = len(remaining_keyphrases)
                    if len(remaining_keyphrases) == 0:
                        break
                    self.row['iteration'] = 0
                    self.row['critiqued_keyphrase'] = None
                    self.row['result'] = None
                    self.df = self.df.append(self.row, ignore_index=True)

                    query = []
                    affected_items = np.array([])

                    for iteration in range(self.max_iteration_threshold):
                        self.row['iteration'] = iteration + 1
                        
                        if self.keyphrase_selection_method == "pop":
                            # Always critique the most popular keyphrase
                            critiqued_keyphrase = remaining_keyphrases[np.argmax(self.keyphrase_popularity[remaining_keyphrases])]
    #                        print("remaining keyphrases popularity: {}".format(self.keyphrase_popularity[remaining_keyphrases]))
                        elif self.keyphrase_selection_method == "random":
                            critiqued_keyphrase = np.random.choice(remaining_keyphrases, size=1, replace=False)[0]
            
                        elif self.keyphrase_selection_method == "diff":
                            critiqued_keyphrase = remaining_keyphrases[0]
#                             print ('critiqued_keyphrase', critiqued_keyphrase)
                        
                        self.row['critiqued_keyphrase'] = critiqued_keyphrase
                        self.row['critiqued_keyphrase_name'] = keyphrases_names[critiqued_keyphrase]
                        query.append(critiqued_keyphrase)

                        # Get affected items (items have critiqued keyphrase)
                        current_affected_items = self.item_keyphrase_freq[:, critiqued_keyphrase].nonzero()[0]
                        affected_items = np.unique(np.concatenate((affected_items, current_affected_items))).astype(int)
                        unaffected_items = np.setdiff1d(range(self.num_items), affected_items)

                        if iteration == 0:
                            prediction_items = initial_prediction_items #calculated once for each user

                        affected_items_mask = np.in1d(prediction_items, affected_items)
                        affected_items_index_rank = np.where(affected_items_mask == True)
                        unaffected_items_index_rank = np.where(affected_items_mask == False)

                        import copy
#                         prediction_scores_u, lambdas = LP1SimplifiedOptimize(initial_prediction_u=self.prediction_scores[user],
#                                                                              keyphrase_freq=copy.deepcopy(self.keyphrase_freq),
#                                                                              affected_items=np.intersect1d(affected_items, prediction_items[affected_items_index_rank[0][:100]]),
#                                                                              unaffected_items=np.intersect1d(unaffected_items, prediction_items[unaffected_items_index_rank[0][:100]]),
#                                                                              num_keyphrases=self.num_keyphrases,
#                                                                              query=query,
#                                                                              test_user=user,
#                                                                              item_latent=self.RQ,
#                                                                              reg=self.reg)
#                         prediction_scores_u, lambdas = LP1SumToOneOptimize(initial_prediction_u=self.prediction_scores[user],
#                                                                            keyphrase_freq=copy.deepcopy(self.keyphrase_freq),
#                                                                            affected_items=np.intersect1d(affected_items, prediction_items[affected_items_index_rank[0][:100]]),
#                                                                            unaffected_items=np.intersect1d(unaffected_items, prediction_items[unaffected_items_index_rank[0][:100]]),
#                                                                            num_keyphrases=self.num_keyphrases,
#                                                                            query=query,
#                                                                            test_user=user,
#                                                                            item_latent=self.RQ,
#                                                                            reg=self.reg)
#                         prediction_scores_u, lambdas = Average(initial_prediction_u=self.prediction_scores[user],
#                                                                              keyphrase_freq=copy.deepcopy(self.keyphrase_freq),
#                                                                              affected_items=np.intersect1d(affected_items, prediction_items[affected_items_index_rank[0][:100]]),
#                                                                              unaffected_items=np.intersect1d(unaffected_items, prediction_items[unaffected_items_index_rank[0][:100]]),
#                                                                              num_keyphrases=self.num_keyphrases,
#                                                                              query=query,
#                                                                              test_user=user,
#                                                                              item_latent=self.RQ,
#                                                                              reg=self.reg)
                        prediction_scores_u, lambdas = rankSVM(initial_prediction_u=self.prediction_scores[user],
                                                                             keyphrase_freq=copy.deepcopy(self.keyphrase_freq),
                                                                             affected_items=np.intersect1d(affected_items, prediction_items[affected_items_index_rank[0][:10]]),
                                                                             unaffected_items=np.intersect1d(unaffected_items, prediction_items[unaffected_items_index_rank[0][:10]]),
                                                                             num_keyphrases=self.num_keyphrases,
                                                                             query=query,
                                                                             test_user=user,
                                                                             item_latent=self.RQ,
                                                                             reg=self.reg
                                                                             )
#                         item_keyphrase_freq = get_all_item_keyphrase_freq()

                        self.row['lambda'] = lambdas
                        prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                                          train_vector=self.matrix_Train[user],
                                                          remove_train=False)
                        recommended_items = prediction_items
                        
                        # Current item rank
                        item_rank = np.where(recommended_items == item)[0][0]

                        self.row['item_rank'] = item_rank
                        self.row['item_score'] = prediction_scores_u[item]

                        if item_rank + 1 <= target_rank:
                            # Items is ranked within target rank
                            self.row['result'] = 'successful'
                            self.df = self.df.append(self.row, ignore_index=True)
                            break
                        else:
                            remaining_keyphrases = np.setdiff1d(remaining_keyphrases, critiqued_keyphrase)
                            # Continue if more keyphrases and iterations remained
                            if len(remaining_keyphrases) > 0 and self.row['iteration'] < self.max_iteration_threshold:
                                self.row['result'] = None
                                self.df = self.df.append(self.row, ignore_index=True)
                            else:
                                # Otherwise, mark fail
                                self.row['result'] = 'fail'
                                self.df = self.df.append(self.row, ignore_index=True)
                                break
#                         break ## For Testing LP Objective
        return self.df


    def get_initial_predictions(self):
        self.RQ, Yt, Bias = plrec(self.matrix_Train,
                                       iteration=self.parameters_row['iter'],
                                       lamb=self.parameters_row['lambda'],
                                       rank=self.parameters_row['rank'])
        self.Y = Yt.T

        self.reg = LinearRegression().fit(self.keyphrase_freq, self.RQ)

        self.prediction_scores = predict_scores(matrix_U=self.RQ,
                                                matrix_V=self.Y,
                                                bias=Bias)



In [123]:
row = {}
matrix_Train = rtrain
matrix_Test = rtest
test_users = [1]#np.arange(20)
target_ranks = [20,50]
num_items_sampled = 5
num_keyphrases = 235
df = pd.DataFrame(row)
max_iteration_threshold = 20
keyphrase_popularity = keyphrase_popularity
dataset_name = "yelp"
model = "plrec"
parameters_row = {'iter': 10,
                  'lambda':200,
                  'rank':200}
keyphrases_names = keyphrases
keyphrase_selection_method = 'diff'
max_wanted_keyphrase = 20


critiquing_model = LP1Simplified(keyphrase_freq=U_K,
                                item_keyphrase_freq=I_K,
                                row=row,
                                matrix_Train=matrix_Train,
                                matrix_Test=matrix_Test,
                                test_users=test_users,
                                target_ranks=target_ranks,
                                num_items_sampled=num_items_sampled,
                                num_keyphrases=num_keyphrases,
                                df=df,
                                max_iteration_threshold=max_iteration_threshold,
                                keyphrase_popularity=keyphrase_popularity,
                                dataset_name=dataset_name,
                                model=model,
                                parameters_row=parameters_row,
                                keyphrases_names = keyphrases_names,
                                keyphrase_selection_method = keyphrase_selection_method,
                                max_wanted_keyphrase = max_wanted_keyphrase)
df = critiquing_model.start_critiquing()

Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 102 rows, 102 columns and 204 nonzeros
Model fingerprint: 0x1ba85def
Coefficient statistics:
  Matrix range     [1e+00, 5e+01]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Presolve removed 102 rows and 102 columns
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.0000000e+00   0.000000e+00   1.000000e+00      0s
Extra one simplex iteration after uncrush
       1    1.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 1 iterations and 0.01 seconds
Optimal objective  1.000000000e+00
Elapsed: 00:00:00
Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 102 rows, 102 columns and 204 nonzeros
Model fingerprint: 0xc161e890
Coefficient statistics:
  Matrix range     [1e+00, 5e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00,

  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Presolve removed 102 rows and 102 columns
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.0000000e+00   0.000000e+00   1.000000e+00      0s
Extra one simplex iteration after uncrush
       1    1.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 1 iterations and 0.01 seconds
Optimal objective  1.000000000e+00
Elapsed: 00:00:00
Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 102 rows, 102 columns and 204 nonzeros
Model fingerprint: 0xd9493c9b
Coefficient statistics:
  Matrix range     [1e+00, 3e+01]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Presolve removed 102 rows and 102 columns
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.0000000e+00   0.

       0    1.0000000e+00   0.000000e+00   1.000000e+00      0s
Extra one simplex iteration after uncrush
       1    1.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 1 iterations and 0.01 seconds
Optimal objective  1.000000000e+00
Elapsed: 00:00:00
Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 102 rows, 102 columns and 204 nonzeros
Model fingerprint: 0x13534375
Coefficient statistics:
  Matrix range     [1e+00, 1e+01]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Presolve removed 102 rows and 102 columns
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.0000000e+00   0.000000e+00   1.000000e+00      0s
Extra one simplex iteration after uncrush
       1    1.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 1 iterations and 0.01 seconds
Optimal objective  1.000000000e+00
Elapsed: 00:00:00



Solved in 1 iterations and 0.01 seconds
Optimal objective  1.000000000e+00
Elapsed: 00:00:00
Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 102 rows, 102 columns and 204 nonzeros
Model fingerprint: 0x6cfa6ec3
Coefficient statistics:
  Matrix range     [1e+00, 6e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Presolve removed 102 rows and 102 columns
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.0000000e+00   0.000000e+00   1.000000e+00      0s
Extra one simplex iteration after uncrush
       1    1.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 1 iterations and 0.01 seconds
Optimal objective  1.000000000e+00
Elapsed: 00:00:00
Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 102 rows, 102 columns and 204 nonzeros
Model fingerprint: 0x6e52a15c
Coefficient statistics:


Optimize a model with 102 rows, 102 columns and 204 nonzeros
Model fingerprint: 0x1ba85def
Coefficient statistics:
  Matrix range     [1e+00, 5e+01]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Presolve removed 102 rows and 102 columns
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.0000000e+00   0.000000e+00   1.000000e+00      0s
Extra one simplex iteration after uncrush
       1    1.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 1 iterations and 0.01 seconds
Optimal objective  1.000000000e+00
Elapsed: 00:00:00
Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 102 rows, 102 columns and 204 nonzeros
Model fingerprint: 0xc161e890
Coefficient statistics:
  Matrix range     [1e+00, 5e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Presolve remo

  RHS range        [1e+00, 1e+00]
Presolve removed 102 rows and 102 columns
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.0000000e+00   0.000000e+00   1.000000e+00      0s
Extra one simplex iteration after uncrush
       1    1.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 1 iterations and 0.01 seconds
Optimal objective  1.000000000e+00
Elapsed: 00:00:00
Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 102 rows, 102 columns and 204 nonzeros
Model fingerprint: 0xd9493c9b
Coefficient statistics:
  Matrix range     [1e+00, 3e+01]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Presolve removed 102 rows and 102 columns
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.0000000e+00   0.000000e+00   1.000000e+00      0s


       0    1.0000000e+00   0.000000e+00   1.000000e+00      0s
Extra one simplex iteration after uncrush
       1    1.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 1 iterations and 0.01 seconds
Optimal objective  1.000000000e+00
Elapsed: 00:00:00
Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 102 rows, 102 columns and 204 nonzeros
Model fingerprint: 0x13534375
Coefficient statistics:
  Matrix range     [1e+00, 1e+01]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Presolve removed 102 rows and 102 columns
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.0000000e+00   0.000000e+00   1.000000e+00      0s
Extra one simplex iteration after uncrush
       1    1.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 1 iterations and 0.01 seconds
Optimal objective  1.000000000e+00
Elapsed: 00:00:00



Solved in 1 iterations and 0.01 seconds
Optimal objective  1.000000000e+00
Elapsed: 00:00:00
Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 102 rows, 102 columns and 204 nonzeros
Model fingerprint: 0x6cfa6ec3
Coefficient statistics:
  Matrix range     [1e+00, 6e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Presolve removed 102 rows and 102 columns
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.0000000e+00   0.000000e+00   1.000000e+00      0s
Extra one simplex iteration after uncrush
       1    1.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 1 iterations and 0.01 seconds
Optimal objective  1.000000000e+00
Elapsed: 00:00:00
Gurobi Optimizer version 9.0.1 build v9.0.1rc0 (mac64)
Optimize a model with 102 rows, 102 columns and 204 nonzeros
Model fingerprint: 0x6e52a15c
Coefficient statistics:


In [124]:
df

Unnamed: 0,critiqued_keyphrase,item_id,item_name,item_rank,item_score,iteration,num_existing_keyphrases,result,target_rank,user_id,critiqued_keyphrase_name,lambda
0,,75.0,b'Salad King Restaurant',,,0.0,20.0,,20.0,1.0,,
1,2,75.0,b'Salad King Restaurant',2,27.0855,1.0,20.0,successful,20.0,1.0,thai,[1.0]
2,,78.0,b'The Works Gourmet Burger Bistro',,,0.0,20.0,,20.0,1.0,thai,[1.0]
3,48,78.0,b'The Works Gourmet Burger Bistro',481,0.882674,1.0,20.0,,20.0,1.0,burger,[1.0]
4,,99.0,b'La Bella Managua',,,0.0,20.0,,20.0,1.0,burger,[1.0]
...,...,...,...,...,...,...,...,...,...,...,...,...
175,136,7082.0,b'Essence of Life Organics',176,6.85481,1.0,20.0,,50.0,1.0,store,[1.0]
176,,7205.0,b'Bellwoods Brewery',,,0.0,20.0,,50.0,1.0,store,[1.0]
177,41,7205.0,b'Bellwoods Brewery',3,28.5422,1.0,20.0,successful,50.0,1.0,beer,[1.0]
178,,7354.0,b'Spectacle',,,0.0,20.0,,50.0,1.0,beer,[1.0]


In [110]:
# ranksvm with abs
df.head(20)

Unnamed: 0,critiqued_keyphrase,item_id,item_name,item_rank,item_score,iteration,num_existing_keyphrases,result,target_rank,user_id,critiqued_keyphrase_name,lambda
0,,75.0,b'Salad King Restaurant',,,0.0,20.0,,20.0,1.0,,
1,2.0,75.0,b'Salad King Restaurant',2.0,27.0855,1.0,20.0,successful,20.0,1.0,thai,"[1.0, 1.0]"
2,,78.0,b'The Works Gourmet Burger Bistro',,,0.0,20.0,,20.0,1.0,thai,"[1.0, 1.0]"
3,48.0,78.0,b'The Works Gourmet Burger Bistro',481.0,0.882674,1.0,20.0,,20.0,1.0,burger,"[1.0, 1.0]"
4,4.0,78.0,b'The Works Gourmet Burger Bistro',345.0,1.72736,2.0,20.0,,20.0,1.0,fry,"[1.0, 0.33333333333333326, 1.0]"
5,5.0,78.0,b'The Works Gourmet Burger Bistro',6976.0,-0.91167,3.0,20.0,,20.0,1.0,fried,"[1.0, 0.33333333333333326, 0.19999999999999996..."
6,8.0,78.0,b'The Works Gourmet Burger Bistro',1404.0,1.13573,4.0,20.0,,20.0,1.0,lunch,"[1.0, 0.5, 1.0, 1.0, 1.0]"
7,25.0,78.0,b'The Works Gourmet Burger Bistro',1379.0,1.07653,5.0,20.0,,20.0,1.0,vegetarian,"[1.0, 1.0, 0.19999999999999996, 1.0, 1.0, 1.0]"
8,27.0,78.0,b'The Works Gourmet Burger Bistro',1051.0,0.930398,6.0,20.0,,20.0,1.0,bbq,"[1.0, 1.0, 0.33333333333333326, 0.333333333333..."
9,49.0,78.0,b'The Works Gourmet Burger Bistro',102.0,9.50102,7.0,20.0,,20.0,1.0,cheese,"[1.0, 0.33333333333333326, 1.0, 0.333333333333..."


In [89]:
# ranksvm without abs
df

Unnamed: 0,critiqued_keyphrase,item_id,item_name,item_rank,item_score,iteration,num_existing_keyphrases,result,target_rank,user_id,critiqued_keyphrase_name,lambda
0,,75.0,b'Salad King Restaurant',,,0.0,20.0,,20.0,1.0,,
1,2,75.0,b'Salad King Restaurant',6295,-0.100984,1.0,20.0,,20.0,1.0,thai,"[1.0, 0.0]"
2,1,75.0,b'Salad King Restaurant',2,43.3392,2.0,20.0,successful,20.0,1.0,fast,"[1.0, 1.0, 1.0]"
3,,78.0,b'The Works Gourmet Burger Bistro',,,0.0,20.0,,20.0,1.0,fast,"[1.0, 1.0, 1.0]"
4,48,78.0,b'The Works Gourmet Burger Bistro',7424,-0.70013,1.0,20.0,,20.0,1.0,burger,"[1.0, 0.0]"
...,...,...,...,...,...,...,...,...,...,...,...,...
1614,121,7354.0,b'Spectacle',5201,-0.391135,16.0,20.0,,50.0,1.0,dumpling,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, -1.0, 1.0, 1.0,..."
1615,136,7354.0,b'Spectacle',3985,0.014292,17.0,20.0,,50.0,1.0,store,"[1.0, 1.0, 1.0, 1.0, 1.0, -1.0, -1.0, 1.0, 1.0..."
1616,150,7354.0,b'Spectacle',3882,0.224761,18.0,20.0,,50.0,1.0,clean,"[1.0, 1.0, 1.0, 0.19999999999999996, 1.0, 1.0,..."
1617,157,7354.0,b'Spectacle',5648,-0.524272,19.0,20.0,,50.0,1.0,greeted,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, -1.0, 1.0, 1.0,..."


In [109]:
table_path = '../tables/critiquing/multi_step_critiquing/yelp/ranksvm/'
name = '1user_absoluteObjective.csv'
save_dataframe_csv(df, table_path, name)

In [103]:
df

Unnamed: 0,critiqued_keyphrase,item_id,item_name,item_rank,item_score,iteration,num_existing_keyphrases,result,target_rank,user_id,critiqued_keyphrase_name,lambda
0,,75.0,b'Salad King Restaurant',,,0.0,20.0,,20.0,1.0,,
1,2,75.0,b'Salad King Restaurant',6295,-0.100984,1.0,20.0,,20.0,1.0,thai,"[1.0, 0.0]"
2,1,75.0,b'Salad King Restaurant',2,43.3392,2.0,20.0,successful,20.0,1.0,fast,"[1.0, 1.0, 1.0]"
3,,78.0,b'The Works Gourmet Burger Bistro',,,0.0,20.0,,20.0,1.0,fast,"[1.0, 1.0, 1.0]"
4,48,78.0,b'The Works Gourmet Burger Bistro',7424,-0.70013,1.0,20.0,,20.0,1.0,burger,"[1.0, 0.0]"
...,...,...,...,...,...,...,...,...,...,...,...,...
1608,121,7354.0,b'Spectacle',4351,-0.0715651,16.0,20.0,,50.0,1.0,dumpling,"[1.0, 1.0, 1.0, 0.2, 1.0, 1.0, 0.0, 1.0, 1.0, ..."
1609,136,7354.0,b'Spectacle',3582,0.117679,17.0,20.0,,50.0,1.0,store,"[1.0, 1.0, 1.0, 0.2, 1.0, 0.0, 0.0, 1.0, 1.0, ..."
1610,150,7354.0,b'Spectacle',3397,0.456262,18.0,20.0,,50.0,1.0,clean,"[1.0, 1.0, 1.0, 0.2, 1.0, 1.0, 0.0, 1.0, 1.0, ..."
1611,157,7354.0,b'Spectacle',5428,-0.26561,19.0,20.0,,50.0,1.0,greeted,"[1.0, 0.5, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, ..."


In [25]:
df

Unnamed: 0,critiqued_keyphrase,item_id,item_name,item_rank,item_score,iteration,num_existing_keyphrases,result,target_rank,user_id,critiqued_keyphrase_name,lambda
0,,75.0,b'Salad King Restaurant',,,0.0,20.0,,20.0,1.0,,
1,2,75.0,b'Salad King Restaurant',3,4.43009,1.0,20.0,successful,20.0,1.0,thai,[1]
2,,78.0,b'The Works Gourmet Burger Bistro',,,0.0,20.0,,20.0,1.0,thai,[1]
3,48,78.0,b'The Works Gourmet Burger Bistro',7343,-0.502279,1.0,20.0,,20.0,1.0,burger,[1]
4,4,78.0,b'The Works Gourmet Burger Bistro',5525,-0.0278124,2.0,20.0,,20.0,1.0,fry,"[1, 1]"
...,...,...,...,...,...,...,...,...,...,...,...,...
1589,121,7354.0,b'Spectacle',2008,0.150263,16.0,20.0,,50.0,1.0,dumpling,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]"
1590,136,7354.0,b'Spectacle',2001,0.168824,17.0,20.0,,50.0,1.0,store,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
1591,150,7354.0,b'Spectacle',2135,0.146572,18.0,20.0,,50.0,1.0,clean,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
1592,157,7354.0,b'Spectacle',2304,0.123868,19.0,20.0,,50.0,1.0,greeted,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."


In [None]:
def multi_step_critiquing_plrec(user = 2, 
                           keyphrase_length_threshold = 150, 
                           max_iteration_threshold = 5,
                           k = 50,
                           df = df,
                           row = row,
                           business_df = business_df,
                           keyphrases = keyphrases,
                           keyphrase_popularity = keyphrase_popularity, 
                           keyphrase_selection_method = 'random',
                           recommend_type = 'all',
                           lams = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1],
                           reg = reg, Y = Y, RQt = RQt, Bias = Bias,
                           top_k_rec = 20, affected_weight = 1, unaffected_weight = -1,
                                 w1 = 1, w2 = 1,
                            matrix_Train = rtrain,
                            matrix_Test = rtest,
                            keyphrase_freq = I_K,
                            num_items = rtrain.shape[1],
                            max_wanted_keyphrase = 10,
                          ):
    """
    k: HR@k 
    keyphrase_length_threshold: limit the number of keyphrases in top recommended item
    keyphrase_selection_method: 'random': randomly select keyphrase from wanted_keyphrases
                                'pop': always select the most popular keyphrase in wanted_keyphrases
                                'diff': select the keyphrase with largest frequency difference between top recommended 
                                        item and target item.
    recommend_type: 'all': recommend all items
                    'upper' (only_with_critiqued_keyphrase): recommend items with only critiqued_keyphrase
    lam: modified_matrix = lam*origianl_matrix + (1-lam)*critiquing_embedding 
    """
    
    row['user_id'] = user
    print ('User ID ', user)
    
    # Get wanted items 
    candidate_items = matrix_Test[user].nonzero()[1]
    train_items = matrix_Train[user].nonzero()[1]
    wanted_items = np.setdiff1d(candidate_items, train_items)
    print ('wanted_items length: ',len(wanted_items))
    
    # Get initial forward prediction 
    prediction_score = predict_scores(matrix_U=RQ,
                                      matrix_V=Y,
                                      bias=Bias).T[user]
    prediction_items = predict_vector(rating_vector=prediction_score,
                                                              train_vector=matrix_Train[user],
                                                              remove_train=True)
    # Get initial top recommended item(s)
    top_recommendations = np.argsort(prediction_score)[::-1]
    print ("Initial top recommendation index",top_recommendations[0])
    try:
        row['top_prediction_item_name'] = get_restaurant_name(df_train, business_df, top_recommendations[0])
    except: 
        row['top_prediction_item_name'] = 'CANNOT_FIND'
        print ('Cannot get restaurant name for ItemIndex: ', top_recommendations[0])
    
    
    # Get top recommended item's keyphrases
    top_item = top_recommendations[0] 
    top_recommend_keyphrases = get_valid_keyphrases(keyphrase_freq,
                                                    top_recommendations, 
                                                    item = top_item,
                                                    threshold=keyphrase_length_threshold,
                                                    mutiple_keyphrases_en = False, 
                                                    top_items = None)
    top_recommended_keyphrase_freq = get_item_keyphrase_freq(keyphrase_freq,item = top_item)
    
    
    #####################################
    # For each item, do the critiquing
    
    #limit the item to only 10
    num_target_item = 0 # initialize item count
    
    for item in wanted_items:    
        print ('target_item: ', item)
        row['target_item'] = item
        try:
            row['item_name'] = get_restaurant_name(df_train, business_df, item)
        except:
            row['item_name'] = 'CANNOT_FIND'
            print ('Cannot get restaurant name for ItemIndex: ', item)

        # Get pre-critiquing rank
        initial_rank = np.where(item == np.argsort(prediction_score)[::-1])[0][0]
        row['pre_rank'] = int(initial_rank)

        # Get the target item's existing keyphrases
        item_keyphrases = keyphrase_freq[item].nonzero()[1]
        
        # For diff 
        target_keyphrase_freq = get_item_keyphrase_freq(keyphrase_freq,item = item)
        diff_keyphrase_freq = target_keyphrase_freq - top_recommended_keyphrase_freq
        
        wanted_keyphrases_random = np.setdiff1d(item_keyphrases,top_recommend_keyphrases)
        wanted_keyphrases_pop = np.setdiff1d(item_keyphrases,top_recommend_keyphrases)
        wanted_keyphrases_diff = np.argsort(np.ravel(diff_keyphrase_freq))[::-1][:max_wanted_keyphrase]
        
        pruned_prediction_score = pruning(prediction_score, 
                                           wanted_keyphrases_random, 
                                           top_recommendations, 
                                           keyphrase_freq, 
                                           matrix_Train = rtrain)
        pure_pruning_rank = np.where(item == np.argsort(pruned_prediction_score)[::-1])[0][0]
        if pure_pruning_rank>initial_rank:
            pure_pruning_rank = initial_rank
        row['pure_pruning_rank'] = int(pure_pruning_rank)    
        
        affected_items = np.array([])
        modified_matrix = initial_user_similarity_embedding # initialize user similarity embedding
        
        #############################################
        # Critiquing iteration
        for iteration in range(max_iteration_threshold):
            print ('cur_iter ', iteration)
            row['iter'] = iteration

            if len(wanted_keyphrases_random) == 0 or len(wanted_keyphrases_diff) == 0: 
                print ('no more keyphrase available')
                break
            critiqued_keyphrase_random = np.random.choice(wanted_keyphrases_random, size=1, replace=False)[0]
            critiqued_keyphrase_pop = wanted_keyphrases_pop[np.argmin(keyphrase_popularity[wanted_keyphrases_pop])] # Select the least popular
            critiqued_keyphrase_diff = wanted_keyphrases_diff[0]
            
            row['critiqued_keyphrase_random'] = critiqued_keyphrase_random
            row['keyphrase_name_random'] = keyphrases[critiqued_keyphrase_random]
            row['critiqued_keyphrase_pop'] = critiqued_keyphrase_pop
            row['keyphrase_name_pop'] = keyphrases[critiqued_keyphrase_pop]
            row['critiqued_keyphrase_diff'] = critiqued_keyphrase_diff
            row['keyphrase_name_diff'] = keyphrases[critiqued_keyphrase_diff]
            
            # Do not critique this keyphrase next time
            wanted_keyphrases_random = np.delete(wanted_keyphrases_random, np.where(critiqued_keyphrase_random == wanted_keyphrases_random))
            wanted_keyphrases_pop = np.delete(wanted_keyphrases_pop, np.where(critiqued_keyphrase_pop == wanted_keyphrases_pop))
            wanted_keyphrases_diff = np.delete(wanted_keyphrases_diff, np.where(critiqued_keyphrase_diff == wanted_keyphrases_diff))
            
            # Critiquing Embedding

            # One hot encoding
            critiqued_matrix_onehot = get_critiqued_UK(U_K,user,critiqued_keyphrase_random)
            critiqued_matrix = reg.predict(critiqued_matrix_onehot)
            critiqued_matrix_random =critiqued_matrix

            critiqued_matrix_onehot = get_critiqued_UK(U_K,user,critiqued_keyphrase_pop)
            critiqued_matrix = reg.predict(critiqued_matrix_onehot)
            critiqued_matrix_pop = critiqued_matrix

            critiqued_matrix_onehot = get_critiqued_UK(U_K,user,critiqued_keyphrase_diff)
            critiqued_matrix = reg.predict(critiqued_matrix_onehot)
            critiqued_matrix_diff = critiqued_matrix


            # Warning!!! The following is used only for testing single step critiquing, 
            # for full average critiquing, use the above commented line 
            post_ranks_random_all = []
            post_ranks_random_upper = []
            random_scores = []
            random_ratings = []
            post_ranks_pop_all = []
            post_ranks_pop_upper = []
            pop_scores = []
            pop_ratings = []
            post_ranks_diff_all = []
            post_ranks_diff_upper = []
            diff_scores = []
            diff_ratings = []
            
            num_items = matrix_Train.shape[1]
            affected_items_random = keyphrase_freq[:,critiqued_keyphrase_random].nonzero()[0]
            affected_items_pop = keyphrase_freq[:,critiqued_keyphrase_pop].nonzero()[0]
            affected_items_diff = keyphrase_freq[:,critiqued_keyphrase_diff].nonzero()[0]
            
            unaffected_items_random = np.setdiff1d(range(num_items), affected_items_random)
            unaffected_items_pop = np.setdiff1d(range(num_items), affected_items_pop)
            unaffected_items_diff = np.setdiff1d(range(num_items), affected_items_diff)
            
            for lam in lams:
                modified_matrix_random = (1-lam)*Y + lam*critiqued_matrix_random 
                modified_matrix_pop = (1-lam)*Y + lam*critiqued_matrix_pop 
                modified_matrix_diff = (1-lam)*Y + lam*critiqued_matrix_diff 
                
                # Random
                prediction_scores_u = predict_scores(matrix_U=modified_matrix_random[user], 
                                     matrix_V=RQ,
                                     )
                prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                                  train_vector=matrix_Train[user],
                                                  remove_train=False)
                
                item_rank = np.where(prediction_items == item)[0][0]
                post_ranks_random_all.append(item_rank)
                
                # Random scores
                affected_hit = sum(np.in1d(prediction_items[:top_k_rec],affected_items_random))
                unaffected_hit = sum(np.in1d(prediction_items[:top_k_rec],unaffected_items_random))
                score = affected_weight*affected_hit + unaffected_weight*unaffected_hit
                random_scores.append(score)
                
                # Random Rating
                latent_diff = modified_matrix_random - Y #post-pre
                rating_diff = predict_scores(matrix_U=latent_diff[user], 
                                     matrix_V=RQ,
                                     )
                affected_items_mask = np.in1d(prediction_items, affected_items_random)
                affected_items_index_rank = np.where(affected_items_mask == True)
                unaffected_items_index_rank = np.where(affected_items_mask == False)
                
                rating_diff_sum_unaffected = np.sum(np.abs(rating_diff), 
                                                    where = np.in1d(range(num_items),np.intersect1d(unaffected_items_random, prediction_items[unaffected_items_index_rank[0][:100]])))
                rating_diff_sum_affected = np.sum(rating_diff, where = np.in1d(range(num_items), 
                                                                               np.intersect1d(affected_items_random, prediction_items[affected_items_index_rank[0][:100]])))
                rating_score = w1*rating_diff_sum_unaffected - w2*rating_diff_sum_affected
#                 print (rating_score)
                random_ratings.append(rating_score)
    
                # Random upper 
                prediction_scores_u = predict_scores(matrix_U=modified_matrix_random[user], 
                                                     matrix_V=RQ, 
                                                     bias=None,
                                                   penalize = True,
                                                   keyphrase_freq = keyphrase_freq, 
                                                   critiqued_keyphrase = critiqued_keyphrase_random, 
                                                   matrix_Train = matrix_Train,
                                                   alpha = 0)
                prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                                  train_vector=matrix_Train[user],
                                                  remove_train=False)
                item_rank = np.where(prediction_items == item)[0][0]
                post_ranks_random_upper.append(item_rank)
                
                # Pop
                prediction_scores_u = predict_scores(matrix_U=modified_matrix_pop[user], 
                                     matrix_V=RQ,
                                     )
                prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                                  train_vector=matrix_Train[user],
                                                  remove_train=False)
                
                item_rank = np.where(prediction_items == item)[0][0]
                post_ranks_pop_all.append(item_rank)
                
                
                # pop scores
                affected_hit = sum(np.in1d(prediction_items[:top_k_rec],affected_items_pop))
                unaffected_hit = sum(np.in1d(prediction_items[:top_k_rec],unaffected_items_pop))
                score = affected_weight*affected_hit + unaffected_weight*unaffected_hit
                pop_scores.append(score)
            
                # Pop Rating
                latent_diff = modified_matrix_pop - Y #post-pre
                rating_diff = predict_scores(matrix_U=latent_diff[user], 
                                     matrix_V=RQ,
                                     )
                affected_items_mask = np.in1d(prediction_items, affected_items_pop)
                affected_items_index_rank = np.where(affected_items_mask == True)
                unaffected_items_index_rank = np.where(affected_items_mask == False)
                
                rating_diff_sum_unaffected = np.sum(np.abs(rating_diff), where = np.in1d(range(num_items),np.intersect1d(unaffected_items_pop, prediction_items[unaffected_items_index_rank[0][:100]])))
                rating_diff_sum_affected = np.sum(rating_diff, where = np.in1d(range(num_items), np.intersect1d(affected_items_pop, prediction_items[affected_items_index_rank[0][:100]])))
                rating_score = w1*rating_diff_sum_unaffected - w2*rating_diff_sum_affected
                pop_ratings.append(rating_score)
                
                # Pop upper 
                
                prediction_scores_u = predict_scores(matrix_U=modified_matrix_pop[user], 
                                                     matrix_V=RQ, 
                                                     bias=None,
                                                   penalize = True,
                                                   keyphrase_freq = keyphrase_freq, 
                                                   critiqued_keyphrase = critiqued_keyphrase_pop, 
                                                   matrix_Train = matrix_Train,
                                                   alpha = 0)
                prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                                  train_vector=matrix_Train[user],
                                                  remove_train=False)
                item_rank = np.where(prediction_items == item)[0][0]
                post_ranks_pop_upper.append(item_rank)
                
                # Diff
                prediction_scores_u = predict_scores(matrix_U=modified_matrix_diff[user], 
                                     matrix_V=RQ,
                                     )
                prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                                  train_vector=matrix_Train[user],
                                                  remove_train=False)
                
                item_rank = np.where(prediction_items == item)[0][0]
                post_ranks_diff_all.append(item_rank)
                
                # Diff scores
                affected_hit = sum(np.in1d(prediction_items[:top_k_rec],affected_items_diff))
                unaffected_hit = sum(np.in1d(prediction_items[:top_k_rec],unaffected_items_diff))
                score = affected_weight*affected_hit + unaffected_weight*unaffected_hit
                diff_scores.append(score)
                
                # Diff Rating
                latent_diff = modified_matrix_diff - Y #post-pre
                rating_diff = predict_scores(matrix_U=latent_diff[user], 
                                     matrix_V=RQ,
                                     )
                affected_items_mask = np.in1d(prediction_items, affected_items_diff)
                affected_items_index_rank = np.where(affected_items_mask == True)
                unaffected_items_index_rank = np.where(affected_items_mask == False)
                
                rating_diff_sum_unaffected = np.sum(np.abs(rating_diff), where = np.in1d(range(num_items),np.intersect1d(unaffected_items_diff, prediction_items[unaffected_items_index_rank[0][:100]])))
                rating_diff_sum_affected = np.sum(rating_diff, where = np.in1d(range(num_items), np.intersect1d(affected_items_diff, prediction_items[affected_items_index_rank[0][:100]])))
                rating_score = w1*rating_diff_sum_unaffected - w2*rating_diff_sum_affected
                diff_ratings.append(rating_score)
                # Diff upper 
                prediction_scores_u = predict_scores(matrix_U=modified_matrix_diff[user], 
                                                     matrix_V=RQ, 
                                                     bias=None,
                                                   penalize = True,
                                                   keyphrase_freq = keyphrase_freq, 
                                                   critiqued_keyphrase = critiqued_keyphrase_diff, 
                                                   matrix_Train = matrix_Train,
                                                   alpha = 0)
                prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                                  train_vector=matrix_Train[user],
                                                  remove_train=False)
                item_rank = np.where(prediction_items == item)[0][0]
                post_ranks_diff_upper.append(item_rank)
            
            ######################################################
            # optimal predicted lambda from ranking obj 
            random_ranking_opti_predicted = lams[np.argmax(random_scores)]
            pop_ranking_opti_predicted = lams[np.argmax(pop_scores)]
            diff_ranking_opti_predicted = lams[np.argmax(diff_scores)]
            
            # optimal predicted lambda from rating obj 
            random_rating_opti_predicted = lams[np.argmin(random_ratings)]
            pop_rating_opti_predicted = lams[np.argmin(pop_ratings)]
            diff_rating_opti_predicted = lams[np.argmin(diff_ratings)]
            
            ####################################################
            # Get optimal post_ranking predicted
            modified_matrix_random_opti_predicted = (1-random_ranking_opti_predicted)*Y + random_ranking_opti_predicted*critiqued_matrix_random 
            modified_matrix_pop_opti_predicted = (1-random_ranking_opti_predicted)*Y + random_ranking_opti_predicted*critiqued_matrix_pop 
            modified_matrix_diff_opti_predicted = (1-random_ranking_opti_predicted)*Y + random_ranking_opti_predicted*critiqued_matrix_diff 
            # Random
            prediction_scores_u = predict_scores(matrix_U=modified_matrix_random_opti_predicted[user], 
                                     matrix_V=RQ,
                                     )
            prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                              train_vector=matrix_Train[user],
                                              remove_train=False)

            item_rank = np.where(prediction_items == item)[0][0]
            row['random_opti_ranking'] = item_rank
            
            modified_matrix_random_opti_predicted = (1-random_rating_opti_predicted)*Y + random_rating_opti_predicted*critiqued_matrix_random 
            prediction_scores_u = predict_scores(matrix_U=modified_matrix_random_opti_predicted[user], 
                                     matrix_V=RQ,
                                     )
            prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                              train_vector=matrix_Train[user],
                                              remove_train=False)

            item_rank = np.where(prediction_items == item)[0][0]
            row['random_opti_rating'] = item_rank
            
            # Pop
            prediction_scores_u = predict_scores(matrix_U=modified_matrix_pop_opti_predicted[user], 
                                     matrix_V=RQ,
                                     )
            prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                              train_vector=matrix_Train[user],
                                              remove_train=False)

            item_rank = np.where(prediction_items == item)[0][0]
            row['pop_opti_ranking'] = item_rank
            
            modified_matrix_pop_opti_predicted = (1-pop_rating_opti_predicted)*Y + pop_rating_opti_predicted*critiqued_matrix_pop 
            prediction_scores_u = predict_scores(matrix_U=modified_matrix_pop_opti_predicted[user], 
                                     matrix_V=RQ,
                                     )
            prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                              train_vector=matrix_Train[user],
                                              remove_train=False)

            item_rank = np.where(prediction_items == item)[0][0]
            row['pop_opti_rating'] = item_rank
            
            # Diff
            prediction_scores_u = predict_scores(matrix_U=modified_matrix_diff_opti_predicted[user], 
                                     matrix_V=RQ,
                                     )
            prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                              train_vector=matrix_Train[user],
                                              remove_train=False)

            item_rank = np.where(prediction_items == item)[0][0]
            row['diff_opti_ranking'] = item_rank
#             print ('diff_opti_ranking ', item_rank)
            
            modified_matrix_diff_opti_predicted = (1-diff_rating_opti_predicted)*Y + diff_rating_opti_predicted*critiqued_matrix_diff 
            prediction_scores_u = predict_scores(matrix_U=modified_matrix_diff_opti_predicted[user], 
                                     matrix_V=RQ,
                                     )
            prediction_items = predict_vector(rating_vector=prediction_scores_u,
                                              train_vector=matrix_Train[user],
                                              remove_train=False)

            item_rank = np.where(prediction_items == item)[0][0]
            row['diff_opti_rating'] = item_rank
            
            row['post_rank_random_all'] = post_ranks_random_all
            row['post_rank_random_upper'] = post_ranks_random_upper
            row['random_scores'] = random_scores
            row['random_ratings'] = random_ratings
            
            row['post_rank_pop_all'] = post_ranks_pop_all
            row['post_rank_pop_upper'] = post_ranks_pop_upper
            row['pop_scores'] = pop_scores
            row['pop_ratings'] = pop_ratings
            
            row['post_rank_diff_all'] = post_ranks_diff_all
            row['post_rank_diff_upper'] = post_ranks_diff_upper
            row['diff_scores'] = diff_scores
            row['diff_ratings'] = diff_ratings
            
            df = df.append(row, ignore_index=True)
            

        # break after got 10 target items 
        num_target_item += 1
        if num_target_item >10: # only want max 10 items per user
            break
            
    return df