In [30]:
import numpy as np
from ratings_graph import RatingsGraph
import random
#from true_ratings_prediction import debias_ratings_baseline

# Generate Biased 

In [89]:
import numpy as np

def generate_biased_dataset(num_users, num_entities,gen_entity_features): #gen_user_biases
    # The true ratings of the entities
    ground_truth_ratings = np.random.random(num_entities)
    
    # The number of features that define a movie
    num_features = 1
    
    # Each user has a bias value along a certain feature dimension
    user_biases = np.zeros((num_users, num_features))
    for user_idx in range(num_users):
        for feature_idx in range(num_features):
            #user_biases[user_idx][feature_idx] = np.random.uniform(-1, 1)
            if random.randint(0, 1):
                user_biases[user_idx][feature_idx] = 0.1
            else:
                user_biases[user_idx][feature_idx] = -0.5
    
    # Setting the features for each entity along each feature dimension
    entity_features = np.zeros((num_entities, num_features))
    for entity_idx in range(num_entities):
        # Currently saying the maximum magnitude of the entity features is 1.0 / num_features
        #entity_features[entity_idx, :] = np.random.uniform(0, 1.0/num_features, num_features)
        entity_features[entity_idx, :] = gen_entity_features(num_features)
    
    # TODO: Think about how to intelligently normalize these features
    '''
    #linfnorm = np.linalg.norm(entity_features, axis=1, ord=2)
    #entity_features = entity_features.astype(np.float) / linfnorm[:,None]    
    '''
    
    # Setting the user_item ratings and the user_item adjacency matrix
    p_rate = 1
    user_entity_ratings = np.zeros((num_users, num_entities))
    user_entity_adj_matrix = np.zeros((num_users, num_entities))
    for user_idx in range(num_users):
        for entity_idx in range(num_entities):
            if np.random.random() < p_rate:
                user_entity_ratings[user_idx][entity_idx] = ground_truth_ratings[entity_idx] + \
                                                    (np.dot(entity_features[entity_idx, :], user_biases[user_idx,:]))
                user_entity_ratings[user_idx][entity_idx] = max(min(user_entity_ratings[user_idx][entity_idx], 1), 0)
                
                user_entity_adj_matrix[user_idx][entity_idx] = 1

    return user_entity_ratings, ground_truth_ratings, user_entity_adj_matrix, entity_features, user_biases

In [90]:
import numpy as np
import pandas as pd

EPSILON = 0.000001

def single_iteration(ratings_graph, biases, true_ratings, alpha, beta):
    # Update Ratings
    graph_shape = ratings_graph.get_graph_shape()
    indiv_true_ratings = np.maximum(np.zeros(graph_shape), np.minimum(np.ones(graph_shape),
                                        ratings_graph.get_user_entity_ratings() - alpha * biases[:, None]))
    rating_denoms = ratings_graph.get_entity_rating_counts()
    next_true_ratings = np.sum(ratings_graph.get_user_entity_adj_matrix() * indiv_true_ratings, axis=0) / rating_denoms

    # Update Biases
    indiv_biases = ratings_graph.get_user_entity_ratings() - next_true_ratings
    bias_denoms = ratings_graph.get_user_rating_counts()
    next_biases = (1-beta)*biases + beta*(np.sum(ratings_graph.get_user_entity_adj_matrix() * indiv_biases, axis=1) / bias_denoms)

    converged = True
    if ((true_ratings is not None) and np.any(np.abs(true_ratings - next_true_ratings) > EPSILON)) or \
        np.any(np.abs(biases - next_biases) > EPSILON):
        converged = False

    return converged, next_true_ratings, next_biases

def single_iteration_user_ent(ratings_graph, biases, true_ratings, alpha, beta):
    # Update Rating
    graph_shape = ratings_graph.get_graph_shape()
    indiv_true_ratings = np.maximum(np.zeros(graph_shape), np.minimum(np.ones(graph_shape),
                                        ratings_graph.get_user_entity_ratings() - alpha * biases))
    rating_denoms = ratings_graph.get_entity_rating_counts()
    next_true_ratings = np.sum(ratings_graph.get_user_entity_adj_matrix() * indiv_true_ratings, axis=0) / rating_denoms

    # Update Biases
    indiv_biases = (ratings_graph.get_user_entity_adj_matrix()*(ratings_graph.get_user_entity_ratings() - next_true_ratings)).dot(ratings_graph.get_entity_sim())
    bias_denoms = (ratings_graph.get_user_entity_adj_matrix()).dot(ratings_graph.get_entity_sim())
    next_biases = (1-beta)*biases + beta*(indiv_biases) / bias_denoms

    converged = True
    if ((true_ratings is not None) and np.any(np.abs(true_ratings - next_true_ratings) > EPSILON)) or \
        np.any(np.abs(biases - next_biases) > EPSILON):
        converged = False

    return converged, next_true_ratings, next_biases


def mishra_prediction(ratings_graph, initial_alpha=0.99, \
                                     decay_rate=1.00, \
                                     max_iters = 200000, \
                                     beta = 0.1, \
                                     user_entity_specific=False):
    np.random.seed(10)
    ground_truth_ratings = ratings_graph.get_ground_truth_ratings()
    true_ratings = [np.random.uniform((ratings_graph.num_entities,))]
    if not user_entity_specific:
        biases = [np.random.uniform(low = -1, high = 1, size = (ratings_graph.num_users,))]
    else:
        biases = [np.random.uniform(low = -1, high = 1, size = (ratings_graph.num_users, ratings_graph.num_entities))]
    errors = []

    converged = False
    num_iters = 0
    alpha = initial_alpha
    while not converged and num_iters < max_iters:
        true_rate_or_none = None if not true_ratings else true_ratings[-1]
        if not user_entity_specific:
            iter_out = single_iteration(ratings_graph, biases[-1], true_rate_or_none, alpha, beta)
        else:
            iter_out = single_iteration_user_ent(ratings_graph, biases[-1], true_rate_or_none, alpha, beta)

        converged, next_true_ratings, next_biases = iter_out
        true_ratings.append(next_true_ratings)
        biases.append(next_biases)
        if ground_truth_ratings is not None:
            errors.append(np.sqrt(np.mean((next_true_ratings - ground_truth_ratings)**2)))
        num_iters += 1
        alpha = alpha/decay_rate

    return biases, true_ratings, errors
def mean_prediction(ratings_graph):
    user_entity_ratings = ratings_graph.get_user_entity_ratings()
    user_entity_adj_matrix = ratings_graph.get_user_entity_adj_matrix()
    user_entity_adj_matrix_na = user_entity_adj_matrix.copy()
    user_entity_adj_matrix_na[user_entity_adj_matrix==0]=np.nan

    mean_pred = np.nanmean(user_entity_ratings*(user_entity_adj_matrix_na), axis=0)
    return mean_pred
                  
def median_prediction(ratings_graph):
    user_entity_ratings = ratings_graph.get_user_entity_ratings()
    user_entity_adj_matrix = ratings_graph.get_user_entity_adj_matrix()
    user_entity_adj_matrix_na = user_entity_adj_matrix.copy()
    user_entity_adj_matrix_na[user_entity_adj_matrix==0]=np.nan
    
    median_pred = np.nanmedian(user_entity_ratings*(user_entity_adj_matrix_na), axis=0)
    return median_pred

def get_pred_error(pred, ratings_graph):
    error = np.sqrt(np.mean((pred - ratings_graph.get_ground_truth_ratings())**2))
    return error

In [98]:
gen_constant_entity_features = lambda num_features: np.ones(num_features)
gen_random_entity_features = lambda num_features: np.random.uniform(0, 1.0/num_features, num_features)
user_entity_ratings, ground_truth_ratings, user_entity_adj_matrix, entity_features, user_biases = generate_biased_dataset(5, 2000, gen_constant_entity_features)
ratings_graph = RatingsGraph(user_entity_ratings, user_entity_adj_matrix, ground_truth_ratings, entity_features)

In [99]:
mean_pred = mean_prediction(ratings_graph)
median_pred = median_prediction(ratings_graph)
biases, true_ratings, errors = mishra_prediction(ratings_graph)
mishra_pred = true_ratings[-1]
#print (mean_pred)
#print (median_pred)
#print (mishra_pred)
print ('Mean error: ', get_pred_error(mean_pred, ratings_graph))
print ('Median error: ', get_pred_error(median_pred, ratings_graph))
print ('Mishra error: ', get_pred_error(mishra_pred, ratings_graph))

Mean error:  0.038265623293592774
Median error:  0.09666338604742315
Mishra error:  0.038323511825768763


In [97]:
print (biases[-1])
print(ground_truth_ratings)

[-0.32937063 -0.32937063  0.13740498  0.13740498  0.13740498]
[0.19806286 0.76053071 0.16911084 ... 0.18522301 0.36744679 0.69001506]


In [94]:
print (user_biases)
print (mishra_pred)

[[-0.5]
 [-0.5]
 [ 0.1]
 [ 0.1]
 [ 0.1]]
[0.22765092 0.66934391 0.2102797  ... 0.219947   0.32928127 0.59882826]


In [100]:
print ('user_entity_ratings: ', user_entity_ratings)
print ('ground_truth_ratings: ', ground_truth_ratings)
print ('user_entity_adj_matrix: ', user_entity_adj_matrix)
print ('entity_features: ', entity_features)
print ('user_biases: ', user_biases)

user_entity_ratings:  [[0.29806286 0.86053071 0.26911084 ... 0.28522301 0.46744679 0.79001506]
 [0.29806286 0.86053071 0.26911084 ... 0.28522301 0.46744679 0.79001506]
 [0.         0.26053071 0.         ... 0.         0.         0.19001506]
 [0.29806286 0.86053071 0.26911084 ... 0.28522301 0.46744679 0.79001506]
 [0.29806286 0.86053071 0.26911084 ... 0.28522301 0.46744679 0.79001506]]
ground_truth_ratings:  [0.19806286 0.76053071 0.16911084 ... 0.18522301 0.36744679 0.69001506]
user_entity_adj_matrix:  [[1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]]
entity_features:  [[1.]
 [1.]
 [1.]
 ...
 [1.]
 [1.]
 [1.]]
user_biases:  [[ 0.1]
 [ 0.1]
 [-0.5]
 [ 0.1]
 [ 0.1]]
