In [1]:
import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

from group_recommender_system import *
import networkx as nx
import random

G = nx.read_gpickle("C:\\Users\\ineso\\FEUP-3ano\\gulbenkian-ai\\data\\vegas-restaurants\\vegas_graph_categories.gpickle")

In [2]:
import math
import statistics

In [3]:
# using jaquard similarity
def category_similarity(G, c1, c2):
    return (category_similarity_by_restaurant(G, c1, c2) + category_similarity_by_rating(G, c1, c2))/2

In [4]:
def category_similarity_by_restaurant(G, c1, c2):
    rest1 = [r for r in G.neighbors(c1) if G.nodes[r]['type'] == 'restaurant']
    rest2 = [r for r in G.neighbors(c2) if G.nodes[r]['type'] == 'restaurant']

    overlap = set(rest1).intersection(rest2)
    total = set(rest1).union(set(rest2))

    return len(overlap) / len(total)

In [5]:
# using distance based similarity
def category_similarity_by_rating(G, c1, c2):
    users1 = [u for u in G.neighbors(c1) if G.nodes[u]['type'] == 'user']
    users2 = [u for u in G.neighbors(c2) if G.nodes[u]['type'] == 'user']

    overlap = set(users1).intersection(users2)
    if len(overlap) == 0:
        return 0
    distance_sum = 0
    for user in overlap:
        distance_sum += (G[user][c1]['rating']-G[user][c2]['rating'])**2
    return 1/(1 + math.sqrt(distance_sum))

In [6]:
# using category similarity weighted distance based similarity
def user_similarity(G, a, b, category):
    distance_sum = 0
    for c in common_categories(G, a, b):
        if G.has_edge(category, c):
            distance_sum += (G[category][c]['similarity']*(G[a][c]['rating']-G[b][c]['rating']))**2
    return 1/(1 + math.sqrt(distance_sum))

In [7]:
def user_avg_rating(G, user):
    ratings = [G[user][c]['rating'] for c in G.neighbors(user) if G.nodes[c]['type'] == 'category']
    return statistics.mean(ratings)

In [8]:
def neighbors_of_type(G, node, n_type):
    return [n for n in G.neighbors(node) if G.nodes[n]['type'] == n_type]

In [9]:
def predict_category_rating(G, user, category):
    avg_rating = user_avg_rating(G, user)

    similarity_sum = 0
    rating_distance_sum = 0
    for nbr in neighbors_of_type(G, category, 'user'):
        if nbr != user:
            sim = user_similarity(G, user, nbr, category)
            similarity_sum += sim
            rating_distance_sum += sim * (G[nbr][category]['rating']-user_avg_rating(G, nbr))
    return avg_rating + rating_distance_sum/similarity_sum


In [10]:
def common_categories(G, a, b):
    result = []
    for c in G.neighbors(a):
        if c in G.neighbors(b) and G.nodes[c]['type'] == 'category':
            result.append(c)
    return result

In [21]:
#test_user = '10cQOjA3q_wDVOumz7iwyQ'
test_user = random.choice(get_nodes_of_type(G, 'user'))
print("User: ", test_user, "\n")
users = set()
for r in G.neighbors(test_user):
    if G.nodes[r]['type'] == 'restaurant':
        for u in G.neighbors(r):
            if G.nodes[u]['type'] == 'user':
                users.add(u)
print(users)

recommendations = set()
for n in users:
    for r in G.neighbors(n):
        if G.nodes[r]['type'] == 'restaurant' and r not in G.neighbors(test_user):
            recommendations.add(r)
for r in recommendations:
    print(G.nodes[r]['name'])
    for c in neighbors_of_type(G, r, 'category'):
        print("*", c, "-", predict_category_rating(G, test_user, c))

User:  Ly-ajA47OZX3ZveeAxtvkg 

{'Ly-ajA47OZX3ZveeAxtvkg', '_pX8bHEc8ylD9gx1ZJbYgQ', 'SyfftyG4GQFJn8RUHTHPww', '1LMaudGtyj7v9_cRek-U0w', 'PK1hc_jXHwsCG8D3-ZhrHg', 'rbg-7RcncQ_GTQht-dBRGw', '8YaFdh5fGg-M6KPIHv7ivw', 'fDSFrpf8949UJVNbG3rGgQ', 'FNISHzqiEewhEPk9b65EMQ', 'y1b_9KmwNUWG1AS3CazFLA', 'rMa7iIhgU6eme7FU_SsurQ', 'ujKzyt5MRF_4vNLEM2wMbw'}
Caf√© Zupas
* Soup - 4.0221755224442735
* Sandwiches - 3.9941116141397437
* Salad - 3.9901306535996164
* American (New) - 3.9927342013158915


In [22]:
category = 'Food'
for c in get_nodes_of_type(G, 'category'):
    print(c, " - ", category_similarity_by_rating(G, category, c), " - ", category_similarity_by_restaurant(G, category, c))

Pizza  -  0.09089463166458112  -  0.07671232876712329
Salad  -  0.13004672183153887  -  0.06827309236947791
Burgers  -  0.11557201185679884  -  0.09377901578458682
Italian  -  0.15313066394523528  -  0.06325581395348837
Specialty Food  -  0.2919183330606551  -  0.1291053227633069
Japanese  -  0.10805777492275676  -  0.04304029304029304
Sushi Bars  -  0.1532899285617279  -  0.035440613026819924
Dim Sum  -  0.1907435698305462  -  0.00331858407079646
Vegetarian  -  0.3090169943749474  -  0.03751339764201501
Food  -  1.0  -  1.0
Ramen  -  0.25824569976124334  -  0.007633587786259542
Chinese  -  0.15242042951340282  -  0.033930254476908575
Vegan  -  0.14359009633730024  -  0.0536480686695279
Local Flavor  -  0.3567891723253309  -  0.0167973124300112
Food Stands  -  1.0  -  0.01348314606741573
Street Vendors  -  0.2989350844248255  -  0.015855039637599093
Arts & Entertainment  -  0.1827439976315568  -  0.032388663967611336
Festivals  -  1.0  -  0.0011325028312570782
Performing Arts  -  1.0  

In [65]:
def cosine_similarity(G, a, b):
    sum_rating_a_b = 0
    sum_rating_sqr_a = 0
    sum_rating_sqr_b = 0

    for c in common_categories(G, a, b):
        sum_rating_a_b += G[a][c]['rating']*G[b][c]['rating']
        sum_rating_sqr_a += G[a][c]['rating']**2
        sum_rating_sqr_b += G[b][c]['rating']**2

    return sum_rating_a_b / (math.sqrt(sum_rating_sqr_a) * math.sqrt(sum_rating_sqr_b))

In [32]:
import math

def pearson_similarity(G, a, b):
    avg_rating_a = user_avg_rating(G, a)
    avg_rating_b = user_avg_rating(G, b)

    sum_deviations = 0
    sum_dev_sqr_a = 0
    sum_dev_sqr_b = 0

    print(common_categories(G, a, b))
    for c in common_categories(G, a, b):
        print(c)
        dev_a = G[a][c]['rating'] - avg_rating_a
        dev_b = G[b][c]['rating'] - avg_rating_b

        sum_deviations += dev_a * dev_b
        sum_dev_sqr_a += dev_a ** 2
        sum_dev_sqr_b += dev_b ** 2

    return sum_deviations / (math.sqrt(sum_dev_sqr_a) * math.sqrt(sum_dev_sqr_b))

In [66]:
#test_user = random.choice(get_nodes_of_type(G, 'user'))
test_user = 'BIO98HB3ZDpNpQT49q3djQ'
print("User: ", test_user, "\n")

users = set()
for c in G.neighbors(test_user):
    if G.nodes[c]['type'] == 'category':
        for u in G.neighbors(c):
            if G.nodes[u]['type'] == 'user' and u != test_user:
                users.add(u)

for user in users:
    print('*', G.nodes[user]['name'], '-', cosine_similarity(G, test_user, user))

s - 1.0
* Tod - 1.0
* Bob-O - 0.9999999999999999
* Bart - 1.0
* J - 1.0
* Diana - 1.0
* Tara - 1.0
* Marty - 0.9999999999999998
* Ricardo - 1.0
* Sascha - 1.0
* Kat - 1.0
* Beverly - 0.9999999999999998
* Sandy - 0.9899494936611665
* Josephine - 1.0
* Nannette - 0.9999999999999998
* Ethan - 0.9999999999999998
* Melissa - 0.9999999999999998
* Lily - 1.0
* Linda - 1.0
* Aileen - 1.0
* Holly - 1.0
* Matt - 1.0
* Kimmie - 1.0
* Amy - 1.0000000000000002
* Aubrey - 1.0
* Mercy - 1.0
* Mike - 0.9999999999999998
* R - 1.0000000000000002
* Carina - 1.0
* Nicole - 0.9999999999999998
* Jamie - 1.0
* Queen - 1.0
* Marina - 0.9999999999999998
* Angie - 1.0000000000000002
* Becky - 0.9999999999999999
* Dorian - 1.0000000000000002
* Monte - 1.0
* Lyn - 1.0
* Lina - 1.0000000000000002
* Brian - 1.0
* Elena - 1.0
* S - 1.0
* Kelly - 1.0
* Jack - 0.9999999999999998
* Sharon - 1.0
* Brandon-Cindy - 0.9999999999999998
* Amy - 1.0
* Amiee - 1.0
* Shea - 1.0
* Joshua - 0.9999999999999998
* Matt - 1.0
* Cathy