In [1]:
import sys 
# sys.path.clear()
sys.path.insert(0, 'D:\\Anaconda\\envs\\tensorflow_cpu\\python36.zip')
sys.path.insert(0, 'D:\\Anaconda\\envs\\tensorflow_cpu\\DLLs')
sys.path.insert(0, 'D:\\Anaconda\\envs\\tensorflow_cpu\\lib')
sys.path.insert(0, 'D:\\Anaconda\\envs\\tensorflow_cpu')
sys.path.insert(0, 'D:\\Anaconda\\envs\\tensorflow_cpu\\lib\\site-packages')
sys.path.insert(0, '')

In [52]:
sys.path

['',
 'D:\\Anaconda\\envs\\tensorflow_cpu\\lib\\site-packages',
 'D:\\Anaconda\\envs\\tensorflow_cpu',
 'D:\\Anaconda\\envs\\tensorflow_cpu\\lib',
 'D:\\Anaconda\\envs\\tensorflow_cpu\\DLLs',
 'D:\\Anaconda\\envs\\tensorflow_cpu\\python36.zip']

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

2.0.0


In [3]:
from scipy.sparse import csr_matrix, load_npz, save_npz
from tqdm import tqdm
from sklearn.preprocessing import normalize
import datetime
import json
import pandas as pd
import time
import yaml
import scipy.sparse as sparse
from ast import literal_eval

# Load data

In [6]:
# Load Original Data
df_train = pd.read_csv('../../data/yelp/Train.csv',encoding='latin-1')
df_valid = pd.read_csv('../../data/yelp/Valid.csv',encoding='latin-1')
df_test = pd.read_csv('../../data/yelp/Test.csv',encoding='latin-1')
keyphrases = pd.read_csv('../../data/yelp/KeyPhrases.csv')['Phrases'].tolist()

In [7]:
def to_sparse_matrix(df, num_user, num_item, user_col, item_col, rating_col):

    dok = df[[user_col, item_col, rating_col]].copy()
    dok = dok.values
    dok = dok[dok[:, 2] > 0]
    shape = [num_user, num_item]
    return sparse.csr_matrix((dok[:, 2].astype(np.float32), (dok[:, 0], dok[:, 1])), shape=shape)

def generate_sparse():
#     num_users = df_train['UserIndex'].nunique()
#     num_items = df_train['ItemIndex'].nunique()
    num_users = df_train['UserIndex'].max() + 1
    num_items = df_train['ItemIndex'].max() + 1
    R_train = to_sparse_matrix(df_train, num_users, num_items, 'UserIndex', 'ItemIndex', rating_col)
    sparse.save_npz('../../data/yelp/' + 'Rtrain.npz', R_train)
    
    R_valid = to_sparse_matrix(df_valid, num_users, num_items, 'UserIndex','ItemIndex', rating_col)
    sparse.save_npz('../../data/yelp/' + 'Rvalid.npz', R_valid)
    
    R_test = to_sparse_matrix(df_test, num_users, num_items, 'UserIndex', 'ItemIndex', rating_col)
    sparse.save_npz('../../data/yelp/' + 'Rtest.npz', R_test)

In [38]:
df_train['ItemIndex'].max()

7455

In [33]:
df_train['UserIndex'].nunique()

2191

In [35]:
df_train['UserIndex'].max()

2342

In [43]:
rating_col = 'rating'
generate_sparse()

In [8]:
# Load U-I Data 
rtrain = load_npz("../../data/yelp/Rtrain.npz")
rvalid = load_npz("../../data/yelp/Rvalid.npz")
rtest = load_npz("../../data/yelp/Rtest.npz")

In [9]:
rtrain

<2343x7456 sparse matrix of type '<class 'numpy.float32'>'
	with 95153 stored elements in Compressed Sparse Row format>

# Models

In [5]:
# Models
from sklearn.metrics.pairwise import cosine_similarity
def train(matrix_train):
    similarity = cosine_similarity(X=matrix_train, Y=None, dense_output=True)
    return similarity

def get_I_K(df, row_name = 'ItemIndex', shape = (3668,75)):
    rows = []
    cols = []
    vals = []
    for i in tqdm(range(df.shape[0])):
        key_vector = literal_eval(df['keyVector'][i])
        rows.extend([df[row_name][i]]*len(key_vector)) ## Item index
        cols.extend(key_vector) ## Keyword Index
        vals.extend(np.array([1]*len(key_vector)))
    return csr_matrix((vals, (rows, cols)), shape=shape)

def predict(matrix_train, k, similarity, item_similarity_en = False):
    """
    res = similarity * matrix_train    if item_similarity_en = False
    res = similarity * matrix_train.T  if item_similarity_en = True
    """
    prediction_scores = []
    
    if item_similarity_en:
        matrix_train = matrix_train.transpose()
        
    for user_index in tqdm(range(matrix_train.shape[0])):
        # Get user u's prediction scores to all users
        vector_u = similarity[user_index]

        # Get closest K neighbors excluding user u self
        similar_users = vector_u.argsort()[::-1][1:k+1]
        # Get neighbors similarity weights and ratings
        similar_users_weights = similarity[user_index][similar_users]
        similar_users_ratings = matrix_train[similar_users].toarray()

        prediction_scores_u = similar_users_ratings * similar_users_weights[:, np.newaxis]

        prediction_scores.append(np.sum(prediction_scores_u, axis=0))
    res = np.array(prediction_scores)
    
    if item_similarity_en:
        res = res.transpose()
    
    return res

def prediction(prediction_score, topK, matrix_Train):

    prediction = []

    for user_index in tqdm(range(matrix_Train.shape[0])):
        vector_u = prediction_score[user_index]
        vector_train = matrix_Train[user_index]
        if len(vector_train.nonzero()[0]) > 0:
            vector_predict = sub_routine(vector_u, vector_train, topK=topK)
        else:
            vector_predict = np.zeros(topK, dtype=np.float32)

        prediction.append(vector_predict)

    return np.vstack(prediction)


def sub_routine(vector_u, vector_train, topK=500):

    train_index = vector_train.nonzero()[1]

    vector_u = vector_u

    candidate_index = np.argpartition(-vector_u, topK+len(train_index))[:topK+len(train_index)]
    vector_u = candidate_index[vector_u[candidate_index].argsort()[::-1]]
    vector_u = np.delete(vector_u, np.isin(vector_u, train_index).nonzero()[0])

    return vector_u[:topK]


In [10]:
# Evluation 
def recallk(vector_true_dense, hits, **unused):
    hits = len(hits.nonzero()[0])
    return float(hits)/len(vector_true_dense)

def precisionk(vector_predict, hits, **unused):
    hits = len(hits.nonzero()[0])
    return float(hits)/len(vector_predict)


def average_precisionk(vector_predict, hits, **unused):
    precisions = np.cumsum(hits, dtype=np.float32)/range(1, len(vector_predict)+1)
    return np.mean(precisions)


def r_precision(vector_true_dense, vector_predict, **unused):
    vector_predict_short = vector_predict[:len(vector_true_dense)]
    hits = len(np.isin(vector_predict_short, vector_true_dense).nonzero()[0])
    return float(hits)/len(vector_true_dense)


def _dcg_support(size):
    arr = np.arange(1, size+1)+1
    return 1./np.log2(arr)


def ndcg(vector_true_dense, vector_predict, hits):
    idcg = np.sum(_dcg_support(len(vector_true_dense)))
    dcg_base = _dcg_support(len(vector_predict))
    dcg_base[np.logical_not(hits)] = 0
    dcg = np.sum(dcg_base)
    return dcg/idcg


def click(hits, **unused):
    first_hit = next((i for i, x in enumerate(hits) if x), None)
    if first_hit is None:
        return 5
    else:
        return first_hit/10


def evaluate(matrix_Predict, matrix_Test, metric_names =['R-Precision', 'NDCG', 'Precision', 'Recall', 'MAP'], atK = [5, 10, 15, 20, 50], analytical=False):
    """
    :param matrix_U: Latent representations of users, for LRecs it is RQ, for ALSs it is U
    :param matrix_V: Latent representations of items, for LRecs it is Q, for ALSs it is V
    :param matrix_Train: Rating matrix for training, features.
    :param matrix_Test: Rating matrix for evaluation, true labels.
    :param k: Top K retrieval
    :param metric_names: Evaluation metrics
    :return:
    """
    global_metrics = {
        "R-Precision": r_precision,
        "NDCG": ndcg,
        "Clicks": click
    }

    local_metrics = {
        "Precision": precisionk,
        "Recall": recallk,
        "MAP": average_precisionk
    }

    output = dict()

    num_users = matrix_Predict.shape[0]

    for k in atK:

        local_metric_names = list(set(metric_names).intersection(local_metrics.keys()))
        results = {name: [] for name in local_metric_names}
        topK_Predict = matrix_Predict[:, :k]

        for user_index in tqdm(range(topK_Predict.shape[0])):
            vector_predict = topK_Predict[user_index]
            if len(vector_predict.nonzero()[0]) > 0:
                vector_true = matrix_Test[user_index]
                vector_true_dense = vector_true.nonzero()[1]
                hits = np.isin(vector_predict, vector_true_dense)

                if vector_true_dense.size > 0:
                    for name in local_metric_names:
                        results[name].append(local_metrics[name](vector_true_dense=vector_true_dense,
                                                                 vector_predict=vector_predict,
                                                                 hits=hits))

        results_summary = dict()
        if analytical:
            for name in local_metric_names:
                results_summary['{0}@{1}'.format(name, k)] = results[name]
        else:
            for name in local_metric_names:
                results_summary['{0}@{1}'.format(name, k)] = (np.average(results[name]),
                                                              1.96*np.std(results[name])/np.sqrt(num_users))
        output.update(results_summary)

    global_metric_names = list(set(metric_names).intersection(global_metrics.keys()))
    results = {name: [] for name in global_metric_names}

    topK_Predict = matrix_Predict[:]

    for user_index in tqdm(range(topK_Predict.shape[0])):
        vector_predict = topK_Predict[user_index]

        if len(vector_predict.nonzero()[0]) > 0:
            vector_true = matrix_Test[user_index]
            vector_true_dense = vector_true.nonzero()[1]
            hits = np.isin(vector_predict, vector_true_dense)

            # if user_index == 1:
            #     import ipdb;
            #     ipdb.set_trace()

            if vector_true_dense.size > 0:
                for name in global_metric_names:
                    results[name].append(global_metrics[name](vector_true_dense=vector_true_dense,
                                                              vector_predict=vector_predict,
                                                              hits=hits))

    results_summary = dict()
    if analytical:
        for name in global_metric_names:
            results_summary[name] = results[name]
    else:
        for name in global_metric_names:
            results_summary[name] = (np.average(results[name]), 1.96*np.std(results[name])/np.sqrt(num_users))
    output.update(results_summary)

    return output



In [13]:
rtrain

<2343x7456 sparse matrix of type '<class 'numpy.float32'>'
	with 95153 stored elements in Compressed Sparse Row format>

In [12]:
len(keyphrases)

235

In [14]:
# Generate U-K 
U_K = get_I_K(df_train, row_name = 'UserIndex', shape = (2343,235))

100%|███████████████████████████████████████████████████████████████████████████| 95153/95153 [04:33<00:00, 348.48it/s]


In [32]:
save_npz("../../data/yelp/U_K",U_K)

In [33]:
U_K = load_npz("../../data/yelp/U_K.npz")

In [194]:
# Generate I-K 
I_K = get_I_K(df_train, row_name = 'ItemIndex', shape = (7456,235))

100%|███████████████████████████████████████████████████████████████████████████| 95153/95153 [04:03<00:00, 391.29it/s]


In [196]:
save_npz("../../data/yelp/I_K",I_K)

In [197]:
I_K = load_npz("../../data/yelp/I_K.npz")

# U_I Result

In [23]:
similarity = normalize(train(rtrain))
user_item_prediction_score = predict(rtrain, 100, similarity, item_similarity_en= False)
user_item_predict = prediction(user_item_prediction_score, 50, rtrain)
user_item_res = evaluate(user_item_predict, rvalid)

100%|█████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:07<00:00, 297.19it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 2633.63it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4633.66it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4390.92it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4326.47it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4526.53it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4500.23it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4043.50it/s]


In [24]:
# k = 100
user_item_res

{'MAP@10': (0.051073692522802513, 0.004057146027986048),
 'MAP@15': (0.04740085851906938, 0.003359527402347134),
 'MAP@20': (0.044743950128338206, 0.002931008949008714),
 'MAP@5': (0.05701658299102388, 0.005498800420458874),
 'MAP@50': (0.036229210065942374, 0.0018910890109491262),
 'NDCG': (0.08489767566647444, 0.0037106587556144806),
 'Precision@10': (0.042172523961661344, 0.002787504501522014),
 'Precision@15': (0.03879507074395253, 0.0022771241184831627),
 'Precision@20': (0.03555454130534003, 0.0019002509290587605),
 'Precision@5': (0.050296668188041994, 0.004168258666849642),
 'Precision@50': (0.02735737106344135, 0.0012439499801260928),
 'R-Precision': (0.041733866795200275, 0.0027759220013279197),
 'Recall@10': (0.04138686214086912, 0.0029327051931329613),
 'Recall@15': (0.056532202532316896, 0.0034058829474612855),
 'Recall@20': (0.06920549460798729, 0.0037456497239839255),
 'Recall@5': (0.025432825237707825, 0.0023478703305939417),
 'Recall@50': (0.1294308432731444, 0.0050817

# Unlearned

In [25]:
similarity = normalize(train(U_K))
unlearned_prediction_score = predict(rtrain, 100, similarity, item_similarity_en= False)
unlearned_predict = prediction(unlearned_prediction_score, 50, rtrain)
unlearned_res = evaluate(unlearned_predict, rvalid)
unlearned_res

100%|█████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:12<00:00, 180.76it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 2550.70it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4449.14it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4707.70it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4509.40it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4746.00it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4670.52it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4043.29it/s]


{'MAP@10': (0.03807290647888548, 0.0034516964720750546),
 'MAP@15': (0.03553797209433905, 0.0028672870257672785),
 'MAP@20': (0.03370376900344167, 0.0025074125274402795),
 'MAP@5': (0.041119732237943095, 0.004657532891059873),
 'MAP@50': (0.027602870159743928, 0.0016228342694287708),
 'NDCG': (0.06511835191702671, 0.0033052172013054884),
 'Precision@10': (0.03322683706070288, 0.002465220667665642),
 'Precision@15': (0.0293321162330747, 0.001957297652179952),
 'Precision@20': (0.027407576449109995, 0.0016911246343864425),
 'Precision@5': (0.038703788224555, 0.003570729979189414),
 'Precision@50': (0.020949338201734367, 0.001040247535464102),
 'R-Precision': (0.03192788335932602, 0.002458395387461051),
 'Recall@10': (0.033001237423790984, 0.0026792409055536887),
 'Recall@15': (0.0433697047824931, 0.0030981184280090826),
 'Recall@20': (0.05395011310626228, 0.003459003186056587),
 'Recall@5': (0.019905087004912992, 0.0021000202554456375),
 'Recall@50': (0.1005849841010983, 0.00458800516351

# Learned with Linear Regression

In [18]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import normalize
from sklearn.linear_model import Ridge

In [19]:
X = normalize(U_K.todense())
y = normalize(train(rtrain))
# y = U_I_similarity
clf = Ridge(alpha=0.001).fit(X, y)

In [20]:
lr_similarity = clf.predict(np.array(X))

In [21]:
similarity = lr_similarity
lr_prediction_score = predict(rtrain, 100, similarity, item_similarity_en= False)
lr_predict = prediction(lr_prediction_score, 50, rtrain)
lr_res = evaluate(lr_predict, rvalid)

100%|█████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:12<00:00, 184.72it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 2660.55it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4736.44it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4717.42it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4794.15it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4755.39it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4679.83it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2343/2343 [00:00<00:00, 4279.18it/s]


In [22]:
lr_res

{'MAP@10': (0.04791011931929322, 0.003909906530540023),
 'MAP@15': (0.04472455659356618, 0.0032453570885310626),
 'MAP@20': (0.042183593262152395, 0.002831975800912942),
 'MAP@5': (0.05255134641716111, 0.0053077652762159545),
 'MAP@50': (0.03409008663773755, 0.0018206458827478736),
 'NDCG': (0.080007877002718, 0.0036741296182457423),
 'Precision@10': (0.04043815609310817, 0.0027488356456966574),
 'Precision@15': (0.036969420356001835, 0.002209056993815675),
 'Precision@20': (0.03372889091738932, 0.001865902272311573),
 'Precision@5': (0.04801460520310361, 0.004019422218032844),
 'Precision@50': (0.025349155636695576, 0.0011602543418718766),
 'R-Precision': (0.03995729846245619, 0.0027870432323067475),
 'Recall@10': (0.04015089215945651, 0.002938149676122475),
 'Recall@15': (0.05452043367806783, 0.003360650501561797),
 'Recall@20': (0.06574394345131794, 0.003655624471281709),
 'Recall@5': (0.024317606292104284, 0.0023406152577752615),
 'Recall@50': (0.12248665145198735, 0.00508984461525

# One-hot encoding of critiques

Use one-hot encoding of critiques to project into user-similarity space

In [65]:
U_K

<2343x235 sparse matrix of type '<class 'numpy.int32'>'
	with 242115 stored elements in Compressed Sparse Row format>

In [155]:
def get_critiqued_UK(user_keyphrase_frequency,user_index,critiqued_keyphrase):
    """
    user_keyphrase_frequency is the U_K matrix (csr sparse matrix)
    return the one-hot encoding of the critique
    """
    U_K_cp = user_keyphrase_frequency.copy()
    U_K_cp[user_index] = 0
    U_K_cp[user_index,critiqued_keyphrase] = 1
    return U_K_cp

def project_one_hot_encoding(reg, user_keyphrase_frequency,user_index,critiqued_keyphrase, normalize_en = True):
    """
    Return the projection on user_sim space from one-hot encoding of critiqued keyphrase
    The res[user_index] should be target embedding row
    """
    critiqued_matrix = get_critiqued_UK(user_keyphrase_frequency, user_index, critiqued_keyphrase)
    res = reg.predict(critiqued_matrix)
    if normalize_en:
        res = normalize((res))
    return res

In [158]:
# test
critiqued_matrix = project_one_hot_encoding(clf, U_K, 0, 0,normalize_en = True)

  self._set_arrayXarray(i, j, x)


In [159]:
critiqued_matrix[1]

array([0.00503134, 0.1046469 , 0.02630396, ..., 0.        , 0.00029115,
       0.00124497])

# Embedding of original U_U + critique projection

In [165]:
# initialize the matrix
modified_matrix = normalize(train(rtrain)).copy()

In [166]:
modified_matrix[0]

array([0.4867929 , 0.01587667, 0.01536822, ..., 0.        , 0.        ,
       0.        ], dtype=float32)

# Learn the lambdas

In [167]:
modified_matrix

array([[0.4867929 , 0.01587667, 0.01536822, ..., 0.        , 0.        ,
        0.        ],
       [0.01717785, 0.5266886 , 0.00329018, ..., 0.        , 0.01379578,
        0.        ],
       [0.01747107, 0.00345705, 0.55340123, ..., 0.        , 0.        ,
        0.0229555 ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.01634563, 0.        , ..., 0.        , 0.6240353 ,
        0.        ],
       [0.        , 0.        , 0.02697603, ..., 0.        , 0.        ,
        0.6503266 ]], dtype=float32)

In [168]:
critiqued_matrix

array([[-0.00612141, -0.02221741, -0.00579472, ...,  0.        ,
         0.03756859, -0.0309494 ],
       [ 0.00503134,  0.1046469 ,  0.02630396, ...,  0.        ,
         0.00029115,  0.00124497],
       [ 0.01630788,  0.02821016,  0.09550076, ...,  0.        ,
         0.01960971,  0.02490069],
       ...,
       [ 0.00338559, -0.04381626, -0.01855125, ...,  0.        ,
        -0.01606787,  0.03146326],
       [ 0.01187455,  0.01655502,  0.02848183, ...,  0.        ,
         0.15997426,  0.01094426],
       [ 0.0025182 , -0.00189156,  0.03735052, ...,  0.        ,
         0.03354831,  0.1618768 ]])

In [170]:
# AVERAGE 

# Combine critiqued matrix with modified directly (asumming only one critique)
modified_matrix = modified_matrix + critiqued_matrix

In [171]:
modified_matrix[0]

array([ 0.48067148, -0.00634074,  0.00957349, ...,  0.        ,
        0.03756859, -0.0309494 ])

### Constructing the w*p matrix

In [183]:
W.reshape(2343,1)

array([[ 0.48067148],
       [-0.00634074],
       [ 0.00957349],
       ...,
       [ 0.        ],
       [ 0.03756859],
       [-0.0309494 ]])

In [186]:
# l = lambda, the weights we want to learn
W = modified_matrix[0]
P = rtrain[:,0]

In [191]:
train_row = W*np.ravel(P.todense())

In [193]:
train_row.shape

(2343,)

## Define rating targets

In [None]:
# To define the rating targets, simply modify final prediction of U-I matrix's user_index row,
# Make the target critiqued item's predicted rating to be 1 (out of 5)

In [202]:
critiqued_keyphrase = 0
user_index = 0
affected_items = I_K[:, critiqued_keyphrase].nonzero()[user_index]

In [206]:
affected_items

array([   2,    6,   13, ..., 7434, 7439, 7441])

In [204]:
target_rating = rtrain[user_index]
print (target_rating)

  (0, 309)	5.0
  (0, 539)	4.0
  (0, 585)	3.0
  (0, 719)	4.0
  (0, 823)	4.0
  (0, 831)	5.0
  (0, 1438)	4.0
  (0, 1650)	4.0
  (0, 1710)	4.0
  (0, 1999)	3.0
  (0, 2075)	4.0
  (0, 2804)	4.0
  (0, 2868)	4.0
  (0, 3298)	4.0
  (0, 3623)	3.0
  (0, 3729)	4.0
  (0, 4358)	5.0
  (0, 4841)	4.0
  (0, 5081)	4.0
  (0, 5291)	3.0
  (0, 5410)	4.0
  (0, 5706)	4.0
  (0, 6099)	4.0
  (0, 6222)	5.0
  (0, 6299)	4.0
  (0, 6454)	4.0
  (0, 6940)	5.0
  (0, 7060)	2.0


In [209]:
# if affected items in the rtrain[user_index] list, set the rating to low (default = 1)
low = 1
updated_target_rating = np.ravel(target_rating.todense())
updated_target_rating[affected_items] = 1

In [210]:
updated_target_rating

array([0., 0., 1., ..., 0., 0., 0.], dtype=float32)