In [2]:
import scripts.util as ut
import recomender.trivial as trivial
import scripts.evaluate as eval

import json
import numpy as np
from sklearn.model_selection import train_test_split as sk_train_test_split
from operator import itemgetter


In [3]:
# Separate test and train
test, train = ut.read_ratings()

# put test in the same recomendation format
test_recomendation = eval.test_to_recomendation(test)



In [4]:
train_user_dict, train_item_dict, train_ratings_dict = train


In [5]:
def read_content():
    with open(ut.CONTENT, 'r') as json_file:
        json_list = list(json_file)


    content_dict = {}
    for json_str in json_list:
        result = json.loads(json_str)
        if (isinstance(result, dict)):
            content_dict[result['ItemId']] = result   
    return content_dict

In [6]:

def make_genre_dict(train_user_dict, content_dict):  
  genre_dict = {}
  for user_id in train_user_dict.keys():
    genre_dict[user_id] = {}
    items = train_user_dict[user_id]['Items']
    for item in items:
      genres = content_dict[item]['Genre'].split(', ')
      for genre in genres:
        if genre not in genre_dict[user_id]: genre_dict[user_id][genre] = []
        genre_dict[user_id][genre].append(item)
  return genre_dict
genre_dict = make_genre_dict(train_user_dict, content_dict)

In [47]:
def rating_by_genre(user, item, ratings, content_dict, genre_dict):

  genre_count = 1
  rate_by_genre = 0
  genres = content_dict[item]['Genre'].split(',')
  for genre in genres:
    if not genre in genre_dict[user]: continue
    n_items = len(genre_dict[user][genre])
    genre_rating = 0 
    for genre_item in genre_dict[user][genre]:
      genre_rating += ratings[user + ':' + genre_item]['Rating']
    genre_rating = genre_rating/n_items
    rate_by_genre += genre_rating
    genre_count += 1
  
  return rate_by_genre, genre_count

def genre_prediction(train, targets, content_dict):
  train_user_dict, train_item_dict, train_ratings_dict = train
  genre_dict = make_genre_dict(train_user_dict, content_dict)

  mean_rating = 0
  for key in train[2]:
    mean_rating += train[2][key]['Rating']
  mean_rating = mean_rating/len(train[2])

  predictions = []
  for pair in targets:
    _tuser, _titem = pair.split(':')
    dif_item_mean = 0
    dif_rating = 0
    weight = 1
    if _titem in train_item_dict:
      _item_dict = train_item_dict[_titem]
      dif_item_mean = _item_dict['Rating_sum']/len(_item_dict['Users']) - mean_rating
    
      
      if _tuser in train_user_dict: 
        dif_rating, weight = rating_by_genre(_tuser, _titem, train_ratings_dict, 
                                              content_dict, genre_dict) 
        if dif_rating > 0: dif_rating = dif_rating - mean_rating
    
    prediction = mean_rating + (dif_rating + dif_item_mean)/(weight) 
    if prediction < 0: break
    predictions.append([_tuser, _titem, prediction])
  
  predictions = sorted(predictions, key=itemgetter(2), reverse=True)  
  predictions = sorted(predictions, key=itemgetter(0), reverse=False)
  return predictions
predictions = genre_prediction(train, test[2].keys(), content_dict)
predictions[0]

['00053f5e11', '199f8f5ff4', 7.5975889781859935]

In [21]:
# External Imports
import numpy as np
from sklearn.metrics import ndcg_score
from operator import itemgetter


  # invert preference
def invert_preference(expected_dict):
  for user in expected_dict.keys():
    for idx, pair in enumerate(expected_dict[user]):
      item, relevance = pair
      expected_dict[user][idx][1] = len(expected_dict[user]) - relevance 
  return expected_dict

def invert_preference2(expected_dict, user_preference):
  for pair in expected_dict.keys():
    user, item = pair.split(',')
    expected_dict[pair] = user_preference[user] - expected_dict[pair] 
    
  return expected_dict

def get_relevance(recomendation, expected):
  expected_dict = {}
  recomendation_dict = {}

  # Create dict with user item and true relevance
  user_relevance = {}
  for pair in expected:
    user, item = pair[:-1].split(',')
    if user not in expected_dict: 
      expected_dict[user] = []
      user_relevance[user] = -1
    user_relevance[user] += 1
    expected_dict[user].append([item, user_relevance[user]])
  expected_dict = invert_preference(expected_dict) 


  # create a dict with the pairs and relevance in relation to the user
  user_relevance = {}
  for pair in recomendation:
    pair = pair[:-1]
    user, item = pair.split(',')
    if user not in user_relevance: user_relevance[user] = -1
    user_relevance[user] += 1
    recomendation_dict[pair] = user_relevance[user]
  # print(recomendation_dict)
  recomendation_dict = invert_preference2(recomendation_dict, user_relevance)

  # match the order of the true relevance to the predicted relevance
  true_relevance = []
  expected_relevance = []
  for user in expected_dict.keys():
    for item_relevance in expected_dict[user]:
      item, relevance = item_relevance
      true_relevance.append(relevance)
      expected_relevance.append(recomendation_dict[user + ',' + item])      




  return true_relevance, expected_relevance

def discount_cumulative_gain(recomendation, expected):
  #true_relevance, expected_relevance =
  # return get_relevance(recomendation, expected)
  true_relevance, expected_relevance = get_relevance(recomendation, expected)
  
  return ndcg_score(np.array([true_relevance]), np.array([expected_relevance]))




#############################################################################################


# Separate test and train
test, train = ut.read_ratings(test_size=.33)
# put test in the same recomendation format
test_recomendation = eval.test_to_recomendation(test)
# Make recommendations
recomendation = trivial.trivial_recomendation(train, test[2].keys(),user_mean=True)
# Calculate nDCG
discount_cumulative_gain(recomendation, test_recomendation)

0.969305466689108

In [24]:
len(test[2]), len(train[2])

(164930, 494790)