In [99]:
import networkx as nx
import pandas as pd
import math

In [100]:

df = pd.read_csv(r'CrawlData\data\reprocessed_data\reduce_relationship.csv')
df.head()

Unnamed: 0,id1,id2
0,8674,2382
1,8674,12512
2,8674,10317
3,8674,1098
4,8674,14219


In [101]:
nodes = set(list(df['id1']) + list(df['id2']))

In [102]:
graph = nx.Graph()

In [103]:
graph.add_nodes_from(nodes)
for i, r in df.iterrows():
    graph.add_edge(r['id1'], r['id2'])

In [104]:
# plt.figure(figsize=(16,16))
# pos = nx.spring_layout(graph)
# nx.draw_networkx_labels(graph, pos, font_size=10)
# nx.draw_networkx_edges(graph, pos, alpha=0.2)
# nx.draw_networkx_nodes(graph, pos, node_size=3)

In [105]:
def friends(graph: nx.Graph, user):
    # return user's set of friends
    return set(graph.neighbors(user))

In [106]:
def number_of_friends(graph: nx.Graph, user):
    # return the number of user's friends
    return len(set(graph.neighbors(user)))

In [107]:
def friends_of_friends(graph: nx.Graph, user):
    # return a set of friends of user's friends
    user_friends = friends(graph, user)
    friends_of_friends_set = set()
    for u in user_friends:
        for uu in friends(graph, u):
            if uu not in user_friends:
                friends_of_friends_set.add(uu)
    try:
        friends_of_friends_set.remove(user)
    except:
        return friends_of_friends_set
    return friends_of_friends_set

In [108]:
def mutual_friends(graph: nx.Graph, user1, user2):
    # return 2 users' set of mutual friends
    user1_friends = friends(graph, user1)
    user2_friends = friends(graph, user2)
    return user1_friends.intersection(user2_friends)

In [109]:
def number_of_mutual_friends(graph: nx.Graph, user1, user2):
    # return the number of 2 users' mutual friends
    return len(mutual_friends(graph, user1, user2))

In [110]:
def number_of_mutual_friends_map(graph: nx.Graph, user):
    all = list(graph.nodes())
    # all = friends_of_friends(graph, user).union(friends(graph, user))
    all.remove(user)
    list_map = []
    for u in all:
        list_map.append((u, number_of_mutual_friends(graph, user, u)))
    return list_map

In [111]:
def get_ranked_list_by_mutual_friends(graph: nx.Graph, user, k = 10):
    ranked_list = sorted(number_of_mutual_friends_map(graph, user), key=lambda x: x[1], reverse=True)
    return ranked_list[:k]

In [112]:
def influence_score(graph: nx.Graph, user1, user2):
    mutual_friends_set = mutual_friends(graph, user1, user2)
    score = 0
    for u in mutual_friends_set:
        score = score + 1/math.log(number_of_friends(graph, u))
    return score

In [113]:
def influence_score_map(graph: nx.Graph, user):
    all = list(graph.nodes())
    all.remove(user)
    list_map = []
    for u in all:
        list_map.append((u, influence_score(graph, user, u)))
    return list_map

In [114]:
def get_ranked_list_by_influence_score(graph: nx.Graph, user, k = 10):
    ranked_list = sorted(influence_score_map(graph, user), key= lambda x: x[1], reverse=True)
    return ranked_list[:k]

In [115]:
def get_recommended_list(graph: nx.Graph, user, option: str, k = 10):
    if option == 'influence':
        return get_ranked_list_by_influence_score(graph, user, k=k)
    elif option == 'friends':
        return get_ranked_list_by_mutual_friends(graph, user, k=k)
    else: 
        print('parameter "option" accept "influence" or "friends"')
        return []

In [116]:
def is_recommendation_true(graph: nx.Graph, user1, user2, option: str, k=10):
    k1 = number_of_friends(graph, user1)
    k2 = number_of_friends(graph, user2)
    graph.remove_edge(user1, user2)
    ranked_list1 = get_recommended_list(graph, user1, option=option, k = k + k1)
    list1 = [i[0] for i in ranked_list1]
    ranked_list2 = get_recommended_list(graph, user2, option=option, k = k + k2)
    list2 = [i[0] for i in ranked_list2]
    graph.add_edge(user1, user2)
    if user1 not in list2 and user2 not in list1:
        return False
    return True


In [117]:
test_set = df.head(500)

In [118]:
count = 0
for i, r in test_set.iterrows():
    if is_recommendation_true(graph, r['id1'], r['id2'], option='influence', k=50) == True:
        count = count + 1
print(count/len(test_set))

0.836


In [119]:
count = 0
for i, r in test_set.iterrows():
    if is_recommendation_true(graph, r['id1'], r['id2'], option='friends', k=50) == True:
        count = count + 1
print(count/len(test_set))

0.862
