### API >> https://facebook-sdk.readthedocs.io/en/latest/api.html
### Get access token >> https://developers.facebook.com/tools/explorer

In [218]:
#!pip install facebook-sdk

In [219]:
from collections import defaultdict as ddict
import facebook as fb
import requests as req
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [220]:
#Gets the categories of pages liked by the user
#   token       user access token
#   return      dictionary of categories found, each with it's own array of pages
#       {c1 : [page1, page2, page3...], c2 : [page4, page5], c3 : [page6]...}
def getCats(token):
    cats = ddict(str)
    graph = fb.GraphAPI(access_token=token, version='2.7')
    resource = graph.get_object("me/likes?fields=name,category")
    #print(resource)
    cats = dict()
    while(True):
        for page in resource['data']:
            cat = page['category']
            if not(cat in cats):
                cats[cat] = []
            cats[cat].append(page['id'])
        # Attempt to make a request to the next page of data, if it exists.
        try:
            resource=req.get(resource['paging']['next']).json()
        except KeyError:
            print("Finished getting pages")
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break
    return cats

In [221]:
#Get the pages that a given user liked
#   user    id of a user
#   token   access token
#   return  array of page IDs
#       [page1, page2, page3...]
def getPagesList(user, token):
    pages = list()
    graph = fb.GraphAPI(access_token=token, version='2.7')
    resource = graph.get_object(user+"/likes?fields=name")
    #print(resource)
    cats = dict()
    while(True):
        for page in resource['data']:
            pages.append(page['id'])
        # Attempt to make a request to the next page of data, if it exists.
        try:
            resource=req.get(resource['paging']['next']).json()
        except KeyError:
            print("Finished getting pages from user " + user)
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break
    return pages


def getPagesListFriends(user, token):
    pages = list()
    try:
        graph = fb.GraphAPI(access_token=token, version='2.7')
        resource = graph.get_object(user+"?fields=likes{category,name}")["likes"]
    except:
        return pages
    cats = dict()
    while(True):
        for page in resource['data']:
            pages.append(page['id'])
        # Attempt to make a request to the next page of data, if it exists.
        try:
            resource=req.get(resource['paging']['next']).json()
        except KeyError:
            print("Finished getting pages from user " + user)
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break
    return pages

In [222]:
#Classifies pages by categories, the category with more pages gets 1.0
#and the smaller categories get smaller and smaller classifications...
#   cats        dictionary of categories found, each with it's own array of pages
#       {c1 : [page1, page2, page3...], c2 : [page4, page5], c3 : [page6]...}
#   return      dictionary of pages, each with it's own classification (0.0 < classification <= 1.0)
#       {page1 : x, page2 : x, page4 : y, page6 : z}
def classify(cats):
    classification = ddict()
    biggestCat = ""
    biggestCatLen = 0
    for cat, pages in cats.items():
        if(len(pages) > biggestCatLen):
            biggestCat = cat
            biggestCatLen = len(pages)
    for cat, pages in cats.items():
        classify = len(pages)/biggestCatLen
        for i in pages:
            classification[i] = classify

    return classification

In [251]:
def normalize(cats):
    listKey = list()
    listValue = list()
    listZero = list()
    for key, value in cats.items():
        listKey.append(key)
        listValue.append(len(value))
        listZero.append(0.0)
    
    d = {'name': listKey, 'frequency': listValue, 'normalize': listZero, 'weight': listZero}
    normalization = pd.DataFrame(data=d)
    normalization["normalize"] = (normalization["frequency"]/normalization["frequency"].max())
    normalization = normalization.sort_values(["frequency", "name"], ascending=[False, True])
    
    weights = [25, 18, 15, 12, 10, 8, 6, 4, 2, 1]
    cont = 0
    for row, value in normalization.iterrows():
        if(cont < 10):
            value["weight"] = weights[cont] * value["normalize"]
        else:
            value["weight"] = weights[9] * value["normalize"]
        cont +=1
    
    return normalization

In [224]:
def similarityWith(user, classification, token):
    otherUserPages = getPagesListFriends(user, token)
    rate = 0.0
    for page in otherUserPages:
        if page in classification:
            rate += classification[page]
    return rate

In [225]:
# return dict { id1 : friend_name1, id2 : friend_name2 }
def getFriends(user, token):
    graph = fb.GraphAPI(access_token=token, version='2.7')
    resource = graph.get_object("/" + user + "/friends")
    friends = dict()
    while(True):
        for friend in resource['data']:
            friends[friend['id']] = friend['name']
            # Attempt to make a request to the next page of data, if it exists.
        try:
            resource=req.get(resource['paging']['next']).json()
        except KeyError:
            print("Finished getting friends from user " + user)
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break

    return friends

In [226]:
def getUsersDict(user, token):
    friends = getFriends(user, token)
    ffriends = dict() 
    listFFriends = []
    for idFriend in friends: 
        friendsOfFriends = getFriends(idFriend, token)
        for idUser, name in friendsOfFriends.items():
            if not idUser in ffriends and not idUser in friends:
                ffriends[idUser] = name
                listFFriends.append([idUser, name])
    
    count = 0
    while len(listFFriends) < 400 and len(listFFriends) > count:
        curruentID = listFFriends[count][0]
        friendsCurrentID = getFriends(curruentID, token)
        for idUser, name in friendsOfFriends.items():
            if not idUser in ffriends:
                ffriends[idUser] = name
                listFFriends.append([idUser, name])
        count+=1
    return ffriends

In [227]:
#START
#this is my token (Raí)
token = "EAACEdEose0cBAEZBjzaEw40OQVFEHhcB66SH0W2HH6qNcst0Yy91CeRcVlG3TLz0XXFjkaIQsFeHliBssImPimHbxlZCiZCQbQqQruKgukPdHBeqI61CJ3y1IEWk9AEbOZCL6BEkOraPQnyhV6MnWY4H4ZC5UAWBHFrls4keCnHeWsZBjMhbNnSiNeHASs4hsZD"
 
#ID of "Pitagoras"
otherPersonID = "10208935375967359"
raiID = "739425496128479"
anaID = "765831436837419"

In [228]:
#users = getUsersDict('me', token)
myfriends = getFriends('me', token)
cats = getCats(token)
classf = classify(cats)

otherUserPages = getPagesList(otherPersonID, token)
#similarity = similarityWith(otherPersonID, classf, token)
#print("Similarity with " + otherPersonID + ": " + str(similarity))
#example print classifications
#count = 0
#for page, classification in classf.items():
#    print(page + ": " + str(classification))
#    count = count + 1
#    if(count > 20):
#        break

Finished getting friends from user me
Finished getting pages
Finished getting pages from user 10208935375967359


In [252]:
listKey = list()
listValue = list()
listSimilarity = list()
for key, value in myfriends.items():
    similarity = similarityWith(key, classf, token)
    if(similarity > 0):
        listKey.append(key)
        listValue.append(value)
        listSimilarity.append(similarity)
        listSimilarity.append([key, value, similarity])
        
d = {'id': listKey, 'name': listValue, 'similarity': listSimilarity}
df_similarity = pd.DataFrame(data=d)
print (df_similarity.sort_values(["similarity"], ascending=[False]))

Finished getting pages from user 10153358808842139
Finished getting pages from user 10205273663910653
Finished getting pages from user 1838181531
Finished getting pages from user 868983753130884
Finished getting pages from user 852327664797650
Finished getting pages from user 967446623299200
Finished getting pages from user 809635275747651
Finished getting pages from user 100001792916486
Finished getting pages from user 677240555682413
Finished getting pages from user 746734585401725
Finished getting pages from user 641449889264776
Finished getting pages from user 765831436837419
Finished getting pages from user 535512013227845
Finished getting pages from user 784485288333067
Finished getting pages from user 819788451470857
Finished getting pages from user 644347655699194
Finished getting pages from user 524429947699837
Finished getting pages from user 270856679768964


ConnectionError: HTTPSConnectionPool(host='graph.facebook.com', port=443): Max retries exceeded with url: /v2.7/1520392488179871/likes?access_token=EAACEdEose0cBAEZBjzaEw40OQVFEHhcB66SH0W2HH6qNcst0Yy91CeRcVlG3TLz0XXFjkaIQsFeHliBssImPimHbxlZCiZCQbQqQruKgukPdHBeqI61CJ3y1IEWk9AEbOZCL6BEkOraPQnyhV6MnWY4H4ZC5UAWBHFrls4keCnHeWsZBjMhbNnSiNeHASs4hsZD&fields=category%2Cname&limit=25&after=Mjg4MDE4MDU4NDA1 (Caused by NewConnectionError('<requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x000001C9D5B3A5C0>: Failed to establish a new connection: [WinError 10060] Uma tentativa de conexão falhou porque o componente conectado não respondeu\r\ncorretamente após um período de tempo ou a conexão estabelecida falhou\r\nporque o host conectado não respondeu',))