### API >> https://facebook-sdk.readthedocs.io/en/latest/api.html
### Get access token >> https://developers.facebook.com/tools/explorer

In [1]:
!pip install facebook-sdk



In [2]:
from collections import defaultdict as ddict
import facebook as fb
import requests as req
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
#Gets the categories of pages liked by the user
#   token       user access token
#   return      dictionary of categories found, each with it's own array of pages
#       {c1 : [page1, page2, page3...], c2 : [page4, page5], c3 : [page6]...}
def getCats(token):
    cats = ddict(str)
    graph = fb.GraphAPI(access_token=token, version='2.7')
    resource = graph.get_object("me/likes?fields=name,category")
    #print(resource)
    cats = dict()
    while(True):
        for page in resource['data']:
            cat = page['category']
            if not(cat in cats):
                cats[cat] = []
            cats[cat].append(page['id'])
        # Attempt to make a request to the next page of data, if it exists.
        try:
            resource=req.get(resource['paging']['next']).json()
        except KeyError:
            print("Finished getting pages")
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break
    return cats

In [4]:
#Get the pages that a given user liked
#   user    id of a user
#   token   access token
#   return  array of page IDs
#       [page1, page2, page3...]
def getPagesList(user, token):
    pages = list()
    graph = fb.GraphAPI(access_token=token, version='2.7')
    resource = graph.get_object(user+"/likes?fields=name")
    #print(resource)
    cats = dict()
    while(True):
        for page in resource['data']:
            pages.append(page['id'])
        # Attempt to make a request to the next page of data, if it exists.
        try:
            resource=req.get(resource['paging']['next']).json()
        except KeyError:
            print("Finished getting pages from user " + user)
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break
    return pages


def getPagesListFriends(user, token):
    pages = list()
    try:
        graph = fb.GraphAPI(access_token=token, version='2.7')
        resource = graph.get_object(user+"?fields=likes{category,name}")["likes"]
    except:
        return pages
    cats = dict()
    while(True):
        for page in resource['data']:
            pages.append(page['id'])
        # Attempt to make a request to the next page of data, if it exists.
        try:
            resource=req.get(resource['paging']['next']).json()
        except KeyError:
            print("Finished getting pages from user " + user)
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break
    return pages

In [5]:
#Classifies pages by categories, the category with more pages gets 1.0
#and the smaller categories get smaller and smaller classifications...
#   cats        dictionary of categories found, each with it's own array of pages
#       {c1 : [page1, page2, page3...], c2 : [page4, page5], c3 : [page6]...}
#   return      dictionary of pages, each with it's own classification (0.0 < classification <= 1.0)
#       {page1 : x, page2 : x, page4 : y, page6 : z}
def classify(cats):
    classification = ddict()
    biggestCat = ""
    biggestCatLen = 0
    for cat, pages in cats.items():
        if(len(pages) > biggestCatLen):
            biggestCat = cat
            biggestCatLen = len(pages)
    for cat, pages in cats.items():
        classify = len(pages)/biggestCatLen
        for i in pages:
            classification[i] = classify

    return classification

In [6]:
def normalize(cats):
    listKey = list()
    listValue = list()
    listZero = list()
    for key, value in cats.items():
        listKey.append(key)
        listValue.append(len(value))
        listZero.append(0.0)
    
    d = {'name': listKey, 'frequency': listValue, 'normalize': listZero, 'weight': listZero}
    normalization = pd.DataFrame(data=d)
    normalization["normalize"] = (normalization["frequency"]/normalization["frequency"].max())
    normalization = normalization.sort_values(["frequency", "name"], ascending=[False, True])
    
    weights = [25, 18, 15, 12, 10, 8, 6, 4, 2, 1]
    cont = 0
    for row, value in normalization.iterrows():
        if(cont < 10):
            value["weight"] = weights[cont] * value["normalize"]
        else:
            value["weight"] = weights[9] * value["normalize"]
        cont +=1
    
    return normalization

In [7]:
def similarityWith(user, classification, token):
    otherUserPages = getPagesListFriends(user, token)
    rate = 0.0
    for page in otherUserPages:
        if page in classification:
            rate += classification[page]
    return rate

In [8]:
# return dict { id1 : friend_name1, id2 : friend_name2 }
def getFriends(user, token):
    graph = fb.GraphAPI(access_token=token, version='2.7')
    resource = graph.get_object("/" + user + "/friends")
    friends = dict()
    while(True):
        for friend in resource['data']:
            friends[friend['id']] = friend['name']
            # Attempt to make a request to the next page of data, if it exists.
        try:
            resource=req.get(resource['paging']['next']).json()
        except KeyError:
            print("Finished getting friends from user " + user)
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break

    return friends

In [9]:
def getUsersDict(user, token):
    friends = getFriends(user, token)
    ffriends = dict() 
    listFFriends = []
    for idFriend in friends: 
        friendsOfFriends = getFriends(idFriend, token)
        for idUser, name in friendsOfFriends.items():
            if not idUser in ffriends and not idUser in friends:
                ffriends[idUser] = name
                listFFriends.append([idUser, name])
    
    count = 0
    while len(listFFriends) < 400 and len(listFFriends) > count:
        curruentID = listFFriends[count][0]
        friendsCurrentID = getFriends(curruentID, token)
        for idUser, name in friendsOfFriends.items():
            if not idUser in ffriends:
                ffriends[idUser] = name
                listFFriends.append([idUser, name])
        count+=1
    return ffriends

In [10]:
#START
#this is my token (Pitágoras)
token = "EAACEdEose0cBABQrzTCSD1uQFNevppuRxaGVTPL49CHb1MhMWgMty8kaDZCFNOKZBCtvNa7hzpzCIyvnA4JtLZBF9SqWsyMJ1d8CyQXqBaqZB8hwUOyJvhCoZBLBjSm1yLJBWHZBrd6ywTZAnjF4ZBHn7knuOk5mafTe2K4QoHskakjm7t2yMbfYSXYAi8E0a8kiLG4OYZB0cVSYDKixXERD2Pj9wXBPmFXkvaseQqtUyPQZDZD"
 
#ID of "Pitagoras"
otherPersonID = "10208935375967359"
raiID = "739425496128479"
anaID = "765831436837419"

In [11]:
#users = getUsersDict('me', token)
myfriends = getFriends('me', token)
cats = getCats(token)
classf = classify(cats)

otherUserPages = getPagesList(otherPersonID, token)
#similarity = similarityWith(otherPersonID, classf, token)
#print("Similarity with " + otherPersonID + ": " + str(similarity))
#example print classifications
#count = 0
#for page, classification in classf.items():
#    print(page + ": " + str(classification))
#    count = count + 1
#    if(count > 20):
#        break

Finished getting friends from user me
Finished getting pages
Finished getting pages from user 10208935375967359


In [20]:
def calcSimilarityAndAddTo(key, classf, token, similarityDict):
    ratio = similarityWith(key, classf, token)
    if(ratio > 0):
        similarityDict[key] = ratio
        print(key + " similarity with me is " + str(ratio))

In [21]:
#listKey = list()
#listValue = list()
#listSimilarity = list()
#selectedFriends = dict()
count = 0
limit = 5
similarityDict = dict()
for key, value in myfriends.items():
    if(count > limit):
        break
    calcSimilarityAndAddTo(key, classf, token, similarityDict)
    count = count + 1
    ##if(similarity > 0):
    ##    similarityDict[key] = similarity
    ##    listKey.append(key)
    ##    listValue.append(value)
    ##    selectedFriends[key] = value
    ##    listSimilarity.append(similarity)
    ##    listSimilarity.append([key, value, similarity])
        

Finished getting pages from user 10208935375967359
10208935375967359 similarity with me is 1.1578947368421053
Finished getting pages from user 862034200484130
862034200484130 similarity with me is 2.810526315789474
Finished getting pages from user 998100796886441
998100796886441 similarity with me is 10.505263157894733
Finished getting pages from user 967446623299200
967446623299200 similarity with me is 1.7578947368421054
Finished getting pages from user 773541116034720
773541116034720 similarity with me is 4.821052631578947


In [22]:
import pandas as pd
#d = {'id': listKey, 'name': listValue, 'similarity': listSimilarity}
#df_similarity = pd.DataFrame(data=d)
#print(similarityDict)
s = pd.Series(similarityDict, name='similarity')
s.index.name = 'user-id'
s.reset_index()
#print(s)
df_similarity = s.to_frame()
print (df_similarity.sort_values(["similarity"], ascending=[False]))

{'10208935375967359': 1.1578947368421053, '862034200484130': 2.810526315789474, '998100796886441': 10.505263157894733, '967446623299200': 1.7578947368421054, '773541116034720': 4.821052631578947}
user-id
10208935375967359     1.157895
773541116034720       4.821053
862034200484130       2.810526
967446623299200       1.757895
998100796886441      10.505263
Name: similarity, dtype: float64
                   similarity
user-id                      
998100796886441     10.505263
773541116034720      4.821053
862034200484130      2.810526
967446623299200      1.757895
10208935375967359    1.157895
