### API >> https://facebook-sdk.readthedocs.io/en/latest/api.html
### Get access token >> https://developers.facebook.com/tools/explorer

In [1]:
!pip install facebook-sdk



In [2]:
from collections import defaultdict as ddict
import facebook as fb
import requests as req
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
#Gets the categories of pages liked by the user
#   token       user access token
#   return      dictionary of categories found, each with it's own array of pages
#       {c1 : [page1, page2, page3...], c2 : [page4, page5], c3 : [page6]...}
def getCats(token):
    cats = ddict(str)
    graph = fb.GraphAPI(access_token=token, version='2.7')
    resource = graph.get_object("me/likes?fields=name,category")
    #print(resource)
    cats = dict()
    while(True):
        for page in resource['data']:
            cat = page['category']
            if not(cat in cats):
                cats[cat] = []
            cats[cat].append(page['id'])
        # Attempt to make a request to the next page of data, if it exists.
        try:
            resource=req.get(resource['paging']['next']).json()
        except KeyError:
            print("Finished getting pages")
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break
    return cats

In [4]:
#Get the pages that a given user liked
#   user    id of a user
#   token   access token
#   return  array of page IDs
#       [page1, page2, page3...]
def getPagesList(user, token):
    pages = list()
    graph = fb.GraphAPI(access_token=token, version='2.7')
    resource = graph.get_object(user+"/likes?fields=name")
    #print(resource)
    cats = dict()
    while(True):
        for page in resource['data']:
            pages.append(page['id'])
        # Attempt to make a request to the next page of data, if it exists.
        try:
            resource=req.get(resource['paging']['next']).json()
        except KeyError:
            print("Finished getting pages from user " + user)
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break
    return pages


def getPagesListFriends(user, token):
    pages = list()
    try:
        graph = fb.GraphAPI(access_token=token, version='2.7')
        resource = graph.get_object(user+"?fields=likes{category,name}")["likes"]
    except:
        return pages
    cats = dict()
    while(True):
        for page in resource['data']:
            pages.append(page['id'])
        # Attempt to make a request to the next page of data, if it exists.
        try:
            resource=req.get(resource['paging']['next']).json()
        except KeyError:
            print("Finished getting pages from user " + user)
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break
    return pages

In [5]:
#Classifies pages by categories, the category with more pages gets 1.0
#and the smaller categories get smaller and smaller classifications...
#   cats        dictionary of categories found, each with it's own array of pages
#       {c1 : [page1, page2, page3...], c2 : [page4, page5], c3 : [page6]...}
#   return      dictionary of pages, each with it's own classification (0.0 < classification <= 1.0)
#       {page1 : x, page2 : x, page4 : y, page6 : z}
def classify(cats):
    classification = ddict()
    biggestCat = ""
    biggestCatLen = 0
    for cat, pages in cats.items():
        if(len(pages) > biggestCatLen):
            biggestCat = cat
            biggestCatLen = len(pages)
    for cat, pages in cats.items():
        classify = len(pages)/biggestCatLen
        for i in pages:
            classification[i] = classify

    return classification

In [6]:
def normalize(cats):
    listKey = list()
    listValue = list()
    listZero = list()
    for key, value in cats.items():
        listKey.append(key)
        listValue.append(len(value))
        listZero.append(0.0)
    
    d = {'name': listKey, 'frequency': listValue, 'normalize': listZero, 'weight': listZero}
    normalization = pd.DataFrame(data=d)
    normalization["normalize"] = (normalization["frequency"]/normalization["frequency"].max())
    normalization = normalization.sort_values(["frequency", "name"], ascending=[False, True])
    
    weights = [25, 18, 15, 12, 10, 8, 6, 4, 2, 1]
    cont = 0
    for row, value in normalization.iterrows():
        if(cont < 10):
            value["weight"] = weights[cont] * value["normalize"]
        else:
            value["weight"] = weights[9] * value["normalize"]
        cont +=1
    
    return normalization

In [7]:
def similarityWith(user, classification, token):
    otherUserPages = getPagesListFriends(user, token)
    rate = 0.0
    for page in otherUserPages:
        if page in classification:
            rate += classification[page]
    return rate

In [8]:
# return dict { id1 : friend_name1, id2 : friend_name2 }
def getFriends(user, token):
    graph = fb.GraphAPI(access_token=token, version='2.7')
    resource = graph.get_object("/" + user + "/friends")
    friends = dict()
    while(True):
        for friend in resource['data']:
            friends[friend['id']] = friend['name']
            # Attempt to make a request to the next page of data, if it exists.
        try:
            resource=req.get(resource['paging']['next']).json()
        except KeyError:
            print("Finished getting friends from user " + user)
            # When there are no more pages (['paging']['next']), break from the
            # loop and end the script.
            break

    return friends

In [9]:
def getUsersDict(user, token):
    friends = getFriends(user, token)
    ffriends = dict() 
    listFFriends = []
    for idFriend in friends: 
        friendsOfFriends = getFriends(idFriend, token)
        for idUser, name in friendsOfFriends.items():
            if not idUser in ffriends and not idUser in friends:
                ffriends[idUser] = name
                listFFriends.append([idUser, name])
    
    count = 0
    while len(listFFriends) < 400 and len(listFFriends) > count:
        curruentID = listFFriends[count][0]
        friendsCurrentID = getFriends(curruentID, token)
        for idUser, name in friendsOfFriends.items():
            if not idUser in ffriends:
                ffriends[idUser] = name
                listFFriends.append([idUser, name])
        count+=1
    return ffriends

In [10]:
#START
#this is my token (Pitágoras)
token = "EAACEdEose0cBABQrzTCSD1uQFNevppuRxaGVTPL49CHb1MhMWgMty8kaDZCFNOKZBCtvNa7hzpzCIyvnA4JtLZBF9SqWsyMJ1d8CyQXqBaqZB8hwUOyJvhCoZBLBjSm1yLJBWHZBrd6ywTZAnjF4ZBHn7knuOk5mafTe2K4QoHskakjm7t2yMbfYSXYAi8E0a8kiLG4OYZB0cVSYDKixXERD2Pj9wXBPmFXkvaseQqtUyPQZDZD"
 
#ID of "Pitagoras"
otherPersonID = "10208935375967359"
raiID = "739425496128479"
anaID = "765831436837419"

In [30]:
users = getUsersDict('me', token)
#myfriends = getFriends('me', token)
cats = getCats(token)
classf = classify(cats)

otherUserPages = getPagesList(otherPersonID, token)
#similarity = similarityWith(otherPersonID, classf, token)
#print("Similarity with " + otherPersonID + ": " + str(similarity))
#example print classifications
#count = 0
#for page, classification in classf.items():
#    print(page + ": " + str(classification))
#    count = count + 1
#    if(count > 20):
#        break

Finished getting friends from user me
Finished getting friends from user 10208935375967359
Finished getting friends from user 862034200484130
Finished getting friends from user 998100796886441
Finished getting friends from user 100000888984231
Finished getting friends from user 967446623299200
Finished getting friends from user 773541116034720
Finished getting friends from user 739425496128479
Finished getting friends from user 100002211536120
Finished getting friends from user 765831436837419
Finished getting friends from user 100002541369604
Finished getting friends from user 653306944773428
Finished getting friends from user 607827396001201
Finished getting friends from user 644347655699194
Finished getting friends from user 1032134756839013
Finished getting friends from user 808400389216109
Finished getting friends from user 948685731934130
Finished getting friends from user 161313964255674
Finished getting friends from user 10152391386011939
Finished getting friends from user 1020

In [50]:
def calcSimilarityAndAddTo(key, classf, token, similarityDict):
    ratio = similarityWith(key, classf, token)
    #if(ratio > 0.000):
    similarityDict[key] = ratio
    return ratio

In [51]:
import threading
lock = threading.Lock()

def simCalcFunction(key, classf, token, similarityDict, threadsRunning):
    lock.acquire()
    threadsRunning = threadsRunning + 1
    lock.release()
    
    ratio = calcSimilarityAndAddTo(key, classf, token, similarityDict)
    
    lock.acquire()
    print(key + " similarity with me is " + str(ratio))
    threadsRunning = threadsRunning - 1
    lock.release()

In [52]:
#listKey = list()
#listValue = list()
#listSimilarity = list()
#selectedFriends = dict()
count = 0
limit = 32
similarityDict = dict()
threadsRunning = 0
threads = list()
for key, value in users.items():
    while(threadsRunning > limit):
        i = 1
        #do nothing
    t = threading.Thread(target=simCalcFunction, args = (key, classf, token, similarityDict, threadsRunning))
    t.daemon = False
    threads.append(t)
    t.start()
    print("started thread for user " + key)
    #simCalcFunction(key, classf, token, similarityDict, threadsRunning)
    ##if(similarity > 0):
    ##    similarityDict[key] = similarity
    ##    listKey.append(key)
    ##    listValue.append(value)
    ##    selectedFriends[key] = value
    ##    listSimilarity.append(similarity)
    ##    listSimilarity.append([key, value, similarity])
for x in threads:
     x.join()

started thread for user 1032134756839013
started thread for user 808400389216109
started thread for user 948685731934130
started thread for user 161313964255674
started thread for user 10152391386011939
started thread for user 10205830689499325
started thread for user 10204215934936399
started thread for user 10203715278547595
started thread for user 823875290969157
started thread for user 879262995430741
started thread for user 786112898127224
started thread for user 645217592223118
started thread for user 869985306428419
started thread for user 687027984737272
started thread for user 709968795780958
started thread for user 742590075854260
started thread for user 554487874679139
started thread for user 1595959920660890
started thread for user 189174214832382
started thread for user 10152690791431219
started thread for user 10152491973808526
started thread for user 10152929348914074
started thread for user 10153287821798098
started thread for user 10152850405356346
started thread for u

In [53]:
import pandas as pd
#d = {'id': listKey, 'name': listValue, 'similarity': listSimilarity}
#df_similarity = pd.DataFrame(data=d)
print(similarityDict)
#print(users)
s = pd.Series(similarityDict, name='similarity')
s.index.name = 'user-id'
s.reset_index()
#print(s)
df_similarity = s.to_frame()
print (df_similarity.sort_values(["similarity"], ascending=[False]))

{'948685731934130': 0.0, '808400389216109': 0.0, '10152699573418381': 0.0, '786112898127224': 0.0, '10152440587547448': 0.0, '823875290969157': 0.0, '869985306428419': 0.0, '687027984737272': 0.0, '10152491973808526': 0.0, '10152690791431219': 0.0, '10203569989306409': 0.0, '10152392307367522': 0.0, '1595959920660890': 0.0, '1032134756839013': 0.0, '10204215934936399': 0.0, '645217592223118': 0.0, '10152391386011939': 0.0, '10152850405356346': 0.0, '10202016009440875': 0.0, '798472416878669': 0.0, '985083188174764': 0.0, '697895010252633': 0.0, '974987235851341': 0.0, '859075384113432': 0.0, '999776340039111': 0.0, '735887446470444': 0.0, '799581100080013': 0.0, '1147509171956957': 0.0, '760641920650129': 0.0, '10153358808842139': 0.0, '872331799478706': 0.0, '823947500984627': 0.0, '554487874679139': 0.0, '649975765070515': 0.0, '774723955921387': 0.0, '10152868321804769': 0.0, '789556387771606': 0.0, '10203530867887851': 0.0, '809635275747651': 0.0, '913793375346781': 0.0, '897856883