In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import operator
import random

Create a graph named facebook from the Facebook data in file facebook-links.txt. As above, use the Graph class.

In [2]:
facebook = nx.Graph()

File facebook-links.txt contains a list of all of the user-to-user links from the Facebook New Orleans networks. 
These links are undirected on Facebook.

Format: Each line contains two numeric user identifiers, 
meaning the second user appeared in the first user's friend list, 
and the first user appeared in the second user's friend list. 
Finally, the third column is a UNIX timestamp with the time of link establishment 
(if it could be determined, otherwise it is unavailable).

In [3]:
def open_source_file(filename):
    with open (filename,"r") as file:
        if filename == "../data/facebook-links.txt":
            content = file.readlines()
            for i in range(len(content)):
                point1 = content[i].split("\t")[0]
                if point1 not in facebook:
                    facebook.add_node(point1)
                point2 = content[i].split("\t")[1]
                if point2 not in facebook:
                    facebook.add_node(point2)
                facebook.add_edge(point1,point2)
        elif filename == "../data/facebook-links.txt":
            result = [x.strip("\n") for x in file.readlines()]
            for i in range(len(result)):
                point1 = result[i].split(" ")[0]
                if point1 not in facebook:
                    facebook.add_node(point1)
                point2 = result[i].split(" ")[1]
                if point2 not in facebook:
                    facebook.add_node(point2)
                facebook.add_edge(point1,point2)
        else:
            print ("Not a valid file, please check again")

In [4]:
open_source_file("../data/facebook-links.txt")

## Graph basic information

In [5]:
def show_basic_info(graph):
    print(nx.info(graph))

In [6]:
show_basic_info(facebook)

Name: 
Type: Graph
Number of nodes: 63731
Number of edges: 817090
Average degree:  25.6418


## friends set 

Returns a set of the friends of the given user, in the given graph.
The parameter 'user' is the string name of a person in the graph.

In [7]:
def friends(graph, user):
    return set(graph.neighbors(user))

## friends of friends set

Returns a set of friends of friends of the given user, in the given graph.
The result does not include the given user nor any of that user's friends.

In [8]:
def friends_of_friends(graph, user):
    f_list = set()
    friend_list = friends(graph,user)
    for friend in friend_list:
        f_list.update(set(friends(graph,friend)))
    for friend in friend_list:
        f_list.discard(friend)
    f_list.remove(user)
    return f_list

## Common friends between two users

Returns the set of friends that user1 and user2 have in common.

In [9]:
def common_friends(graph, user1, user2):
    friend_set_1 = friends(graph,user1)
    friend_set_2 = friends(graph,user2)
    return (friend_set_1 & friend_set_2)

Returns a map from each user U to the number of friends U has in common with the given user.
    The map keys are the users who have at least one friend in common with the
    given user, and are neither the given user nor one of the given user's friends.
    Take a graph G for example:
        - A and B have two friends in common
        - A and C have one friend in common
        - A and D have one friend in common
        - A and E have no friends in common
        - A is friends with D
    number_of_common_friends_map(G, "A")  =>   { 'B':2, 'C':1 }

In [10]:
def number_of_common_friends_map(graph, user):
    name_common = { str(friend) : len(common_friends(graph,str(friend),user)) for friend in friends_of_friends(graph,user) }
    return name_common

## Key list

Given a map whose values are numbers, return a list of the keys. The keys are sorted by the number they map to, from greatest to least. When two keys map to the same number, the keys are sorted by their natural sort order, from least to greatest.

In [11]:
def number_map_to_sorted_list(map):
    res = sorted(map.items(), key=lambda x: (-x[1], x[0]))
    numbers = [item[0] for item in res]
    return numbers

## Recommend  by number of common friends

Return a list of friend recommendations for the given user.The friend recommendation list consists of names of people in the graph who are not yet a friend of the given user.The order of the list is determined by the number of common friends.

In [12]:
def recommend_by_number_of_common_friends(graph, user):
    common_friends = number_of_common_friends_map(graph,user)
    return number_map_to_sorted_list(common_friends)

## Influence scoring

Returns a map from each user U to the friend influence, with respect to the given user. The map only contains users who have at least one friend in common with U, and are neither U nor one of U's friends.

"Influence scoring": the score for user2 as a friend of user1 is: 1/numfriends(f1) + 1/numfriends(f2) + 1/numfriends(f3), where numfriends(f) is the number of friends that f has. In other words, each friend F of user1 has a total influence score of 1 to contribute, and divides it equally among all of F's friends.

In [13]:
def influence_map(graph, user):
    fre_of_fre = friends_of_friends(graph,user)
    com_fre = { str(friend) : common_friends(graph,friend,user) for  friend in fre_of_fre}
    influence = { str(friend) : sum([1/len(friends(graph,item)) for item in common_friend]) for friend, common_friend in com_fre.items() }
    return influence

## Recommend by influence

Return a list of friend recommendations for the given user. The friend recommendation list consists of names of people in the graph who are not yet a friend of the given user. The order of the list is determined by the influence measurement.

In [14]:
def recommend_by_influence(graph, user):
    friends_influence = influence_map(graph,user)
    res = number_map_to_sorted_list(friends_influence)
    return res

For every Facebook user with an id that is a multiple of 100, 
print a list containing the first 10 friend recommendations, 
as determined by number of common friends. 
If there are fewer than 10 recommendations, 
print all the recommendations.

In [15]:
rec_num_of_friends = [recommend_by_number_of_common_friends(facebook,str(i))[:10] for i in range(len(facebook.nodes())) if i%100 == 0 and str(i) in facebook]

In [16]:
rec_num_of_friends

[['1902', '1983', '1940', '23', '1404', '2300', '2414', '3375', '528', '1194'],
 ['173',
  '177',
  '12225',
  '12599',
  '14724',
  '163',
  '181',
  '13252',
  '20913',
  '197'],
 ['295',
  '12894',
  '4078',
  '8288',
  '8290',
  '8843',
  '292',
  '296',
  '7834',
  '8286'],
 ['32662',
  '24182',
  '32661',
  '32666',
  '12294',
  '16758',
  '22457',
  '32664',
  '35667',
  '35782'],
 ['510',
  '1456',
  '1469',
  '1488',
  '1437',
  '1457',
  '1479',
  '1404',
  '1612',
  '2175'],
 ['12588',
  '4401',
  '596',
  '26199',
  '152',
  '441',
  '27209',
  '803',
  '12604',
  '1243'],
 ['9245',
  '7756',
  '24510',
  '6204',
  '7749',
  '7751',
  '7758',
  '9442',
  '10492',
  '7480'],
 ['16322',
  '4283',
  '8922',
  '10100',
  '11849',
  '20292',
  '5054',
  '6313',
  '11566',
  '20140'],
 ['273', '2524', '4248', '872', '4250', '4255', '4254', '888', '14835', '876'],
 ['23', '453', '456', '1140', '2068', '467', '1009', '1011', '11987', '16423'],
 ['1084',
  '1088',
  '1091',
  '1094'

For every Facebook user with an id that is a multiple of 1000, 
print a list containing the first 10 friend recommendations, 
as determined by influence score. 
If there are fewer than 10 recommendations, 
print all the recommendations.

In [17]:
rec_influence = [recommend_by_influence(facebook,str(i))[:10] for i in range(len(facebook.nodes())) if i%1000 == 0 and str(i) in facebook]

In [18]:
rec_influence

[['23', '453', '456', '467', '1140', '1009', '16423', '2068', '1011', '11987'],
 ['1686',
  '1693',
  '1691',
  '1692',
  '2117',
  '2162',
  '2163',
  '2164',
  '7787',
  '8821'],
 ['2868',
  '12623',
  '4653',
  '4864',
  '554',
  '1156',
  '639',
  '3002',
  '3004',
  '9033'],
 ['28751',
  '29830',
  '33373',
  '53528',
  '284',
  '3423',
  '3978',
  '4012',
  '13454',
  '9401'],
 ['1685',
  '2152',
  '2218',
  '2141',
  '1691',
  '1693',
  '2660',
  '1686',
  '1687',
  '2266'],
 ['22596',
  '40834',
  '22599',
  '6015',
  '53878',
  '61092',
  '58637',
  '37969',
  '41627',
  '903'],
 ['24649',
  '13775',
  '28568',
  '34865',
  '20401',
  '26203',
  '26206',
  '7160',
  '20934',
  '31156'],
 ['13523',
  '1689',
  '20165',
  '4758',
  '1209',
  '7999',
  '17802',
  '13283',
  '18817',
  '3169'],
 ['41322',
  '7999',
  '21759',
  '41321',
  '19228',
  '19231',
  '19286',
  '18388',
  '13421',
  '8988'],
 ['32097',
  '10452',
  '280',
  '12670',
  '24929',
  '27028',
  '53942',
  '55

Present the average index for each recommendation system. 
State which recommendation system is better for the facebook graph.

## Evaluation

In [19]:
def evaluate_recommendation(graph):
    number_index = []
    influence_index = []

    for i in range(100):
        # 1. Randomly choose a real friend connection; call the two friends F1 and F2.
        friendship_chosen = random.choice(graph.edges())
        friend1 = friendship_chosen[0]
        friend2 = friendship_chosen[1]

        # 2. Remove their friendship from the graph.
        graph.remove_edge(friend1,friend2)

        '''
        3. Compute friend recommendations for F1 and F2.
        4. Determine the rank of F1 in F2's list of recommended friends.
            Determine the rank of F2 in F1's list of recommended friends.
            If either of these does not exist (e.g., F1 is not recommended as one of F2's friends), discard the F1-F2 pair from your experiment.
            Otherwise, average these two numbers.
            The "rank" is also known as the "index" or "position". It starts counting at 1, not 0.
        '''
        if len(graph.neighbors(friend1)) == 0 or len(graph.neighbors(friend2)) == 0:
            pass
        else:
            f1_rec_number = recommend_by_number_of_common_friends(graph,friend1)
            f2_rec_number = recommend_by_number_of_common_friends(graph,friend2)
        
            f1_rec_influence = recommend_by_influence(graph,friend1)
            f2_rec_influence = recommend_by_influence(graph,friend2)

            if friend2 not in f1_rec_number or friend1 not in f2_rec_number or friend2 not in f1_rec_influence or friend1 not in f2_rec_influence:
                pass
            else:
                # recommend friend by the number of common friends
                index_number_f1= f1_rec_number.index(friend2) + 1
                number_index.append(index_number_f1)

                index_number_f2= f2_rec_number.index(friend1) + 1
                number_index.append(index_number_f2)
               
                # recommend friend by friends' influence
                index_influence_f1 = f1_rec_influence.index(friend2) + 1
                influence_index.append(index_influence_f1)

                index_influence_f2 = f2_rec_influence.index(friend1) + 1
                influence_index.append(index_influence_f2)

        #5. Put their friendship back in the graph.
        graph.add_edge(friend1,friend2)
            
    sum_influence_index = sum([influence_index[i] for i in range(len(influence_index))])
    avg_influence = sum_influence_index / len(influence_index)
    print("Average rank of influence method:", avg_influence)

    # calculate the average of number of common friends
    sum_number_index = sum([number_index[i] for i in range(len(number_index))])
    avg_number = sum_number_index/len(number_index)  
    print ("Average rank of number of friends in common method:", avg_number)

    # compare two methods
    if avg_influence < avg_number:
        print ("recommend by influence is better")
    else:
        print ("recommend by number of friends in common method is better")

In [20]:
evaluate_recommendation(facebook)

Average rank of influence method: 255.6043956043956
Average rank of number of friends in common method: 358.7967032967033
recommend by influence is better
