In [11]:
import argparse
import json
import numpy as np
from compute_scores import pearson_score

In [12]:
def build_arg_parser():
    parser = argparse.ArgumentParser(description='Find users who are similar to the input user')
    parser.add_argument('--user', dest='user', required=True,
            help='Input user')
    return parser

In [13]:
# Finds users in the dataset that are similar to the input user 
def find_similar_users(dataset, user, num_users):
    if user not in dataset:
        raise TypeError('Cannot find ' + user + ' in the dataset')

    # Compute Pearson score between input user 
    # and all the users in the dataset
    scores = np.array([[x, pearson_score(dataset, user, 
            x)] for x in dataset if x != user])

    # Sort the scores in decreasing order
    scores_sorted = np.argsort(scores[:, 1])[::-1]

    # Extract the top 'num_users' scores
    top_users = scores_sorted[:num_users] 

    return scores[top_users] 

In [14]:
# args = build_arg_parser().parse_args()
# user = args.user

In [15]:
# user = "Clarissa Jackson"
user = "Bill Duffy"

In [16]:


ratings_file = '../aiwp-data/ratings_05.json'

with open(ratings_file, 'r') as f:
    data = json.loads(f.read())

print('Users similar to ' + user + ':\n')
similar_users = find_similar_users(data, user, 3) 
print('User\t\t\tSimilarity score')
print('-'*41)
for item in similar_users:
    print(item[0], '\t\t', round(float(item[1]), 2))

Users similar to Bill Duffy:

User			Similarity score
-----------------------------------------
David Smith 		 0.99
Samuel Miller 		 0.88
Adam Cohen 		 0.86
