In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install cdlib
import sys
!{sys.executable} -m pip install python-igraph
!{sys.executable} -m pip install leidenalg

Collecting cdlib
[?25l  Downloading https://files.pythonhosted.org/packages/e7/df/2c52a76f23e6801a12f0b67ba1523a167f808629dbb0feb7bf491efb1925/cdlib-0.2.0-py3-none-any.whl (152kB)
[K     |██▏                             | 10kB 14.2MB/s eta 0:00:01[K     |████▎                           | 20kB 20.3MB/s eta 0:00:01[K     |██████▌                         | 30kB 15.5MB/s eta 0:00:01[K     |████████▋                       | 40kB 14.0MB/s eta 0:00:01[K     |██████████▊                     | 51kB 14.2MB/s eta 0:00:01[K     |█████████████                   | 61kB 12.3MB/s eta 0:00:01[K     |███████████████                 | 71kB 13.2MB/s eta 0:00:01[K     |█████████████████▎              | 81kB 11.0MB/s eta 0:00:01[K     |███████████████████▍            | 92kB 10.5MB/s eta 0:00:01[K     |█████████████████████▌          | 102kB 10.7MB/s eta 0:00:01[K     |███████████████████████▊        | 112kB 10.7MB/s eta 0:00:01[K     |█████████████████████████▉      | 122kB 10.7MB/s

In [3]:
import pandas as pd
import networkx as nx
from networkx.algorithms.community.centrality import girvan_newman
from cdlib import algorithms
import numpy as np
import pickle
from networkx.algorithms.link_analysis.pagerank_alg import pagerank
from tqdm.autonotebook import tqdm
import pickle
from PIL import Image
import requests
from io import BytesIO

  


In [4]:
path = "/content/drive/My Drive/"
project_name="2_TwitterFollowGraph"
dataframe = pd.read_csv(path+project_name+"/Datasets/Facebook/musae_facebook_edges.csv")
columns=list(dataframe.columns)

In [5]:
def createGraph(dataframe,columns):
  edgelist = dataframe[columns].values.tolist()
  graph = nx.Graph()
  graph.add_edges_from(edgelist)
  return graph

In [6]:
graph = createGraph(dataframe,columns)

In [None]:
def sortedCommunities(communities):
  communitiesSorted = sorted(communities,key=lambda x:len(x),reverse=True)
  return communitiesSorted

In [None]:
def topDegreeNodeCommunities(communities,graph,size):
  degree = graph.degree()
  newCommunityList = []
  for community in communities:
    newCommunity = sorted(community,key=lambda x:degree[x],reverse=True)
    newCommunityList.append(newCommunity[:size])
  return newCommunityList

In [None]:
def generateCommunities(graph,method):
  if method == "leiden":
    communities = algorithms.leiden(graph)
  elif method == "walktrap":
    communities = algorithms.walktrap(graph)
  elif method == "surprise_communities":
    communities = algorithms.surprise_communities(graph)
  communitiesSorted = sortedCommunities(communities.communities)
  topNodeCommunities = topDegreeNodeCommunities(communitiesSorted,graph,30) # Set third parameter to get required size
  return topNodeCommunities

In [7]:
community_features_df=pd.read_csv(path+project_name+"/Datasets/Facebook/musae_facebook_target.csv")
community_features_df.head()

Unnamed: 0,id,facebook_id,page_name,page_type
0,0,145647315578475,The Voice of China 中国好声音,tvshow
1,1,191483281412,U.S. Consulate General Mumbai,government
2,2,144761358898518,ESET,company
3,3,568700043198473,Consulate General of Switzerland in Montreal,government
4,4,1408935539376139,Mark Bailey MP - Labor for Miller,politician


In [None]:
def filter_communities(communities_list,graph):
  final_communities_list=[]
  for communities in communities_list:
    temp_list=[]
    for node_id in communities:
      features=community_features_df[community_features_df["id"]==node_id]
      temp_list.append((features["page_name"].values[0],features["page_type"].values[0],graph.degree(node_id)))
    if len(temp_list)>1:
      final_communities_list.append(temp_list)
  return final_communities_list

In [8]:
communities_dict={}
page_rank_dict={}

In [None]:
#Leiden community algorithm
communities_list=generateCommunities(graph,"leiden")
final_communities_list=filter_communities(communities_list,graph)
communities_dict["leiden"]=final_communities_list
print(final_communities_list)

[[('The White House', 'government', 678), ('Joachim Herrmann', 'politician', 320), ('National Oceanic and Atmospheric Administration (NOAA)', 'government', 288), ('Marine Corps Recruiting', 'government', 257), ('U.S. Army SHARP', 'government', 256), ('Martin Schulz', 'politician', 236), ('The National Guard', 'government', 236), ('Arno Klare MdB', 'politician', 226), ('Angela Merkel', 'politician', 217), ('Resolute Support Mission', 'government', 201), ('Soldier for Life', 'government', 194), ('U.S. Mission to the United Nations', 'government', 193), ('NOAA Digital Coast', 'government', 188), ('Army OneSource', 'government', 182), ('NOAA NWS Storm Prediction Center', 'government', 177), ('Marie-Claude Bibeau', 'politician', 174), ('USA.gov', 'government', 166), ('European Commission - Development & Cooperation - EuropeAid', 'government', 162), ('European Committee of the Regions', 'government', 157), ('Norfolk District, U.S. Army Corps of Engineers', 'government', 156), ('Army Study Pr

In [None]:
#Surprise communities algorithm
communities_list=generateCommunities(graph,"surprise_communities")
final_communities_list=filter_communities(communities_list,graph)
communities_dict["surprise_communities"]=final_communities_list
print(final_communities_list)

[[('150e anniversaire de la Confédération du Canada', 'government', 38), ('Ramesh Sangha, MP', 'politician', 14)]]


In [None]:
#Walktrap communites algorithm
communities_list=generateCommunities(graph,"walktrap")
final_communities_list=filter_communities(communities_list,graph)
communities_dict["walktrap"]=final_communities_list
print(final_communities_list)



In [9]:
def rank_on_page_rank(graph):
  pagerank_result = dict(pagerank(graph))
  pagerank_result = sorted(pagerank_result.items(),key = lambda x:x[1],reverse=True)
  return pagerank_result

In [10]:
pagerank_result=rank_on_page_rank(graph)

In [11]:
def filter_pagerank(pagerank_result,size,graph):
  final_pagerank_result=[]
  for result in pagerank_result[:size]:
    features=community_features_df[community_features_df["id"]==result[0]]
    final_pagerank_result.append((features["page_name"].values[0],features["page_type"].values[0],features["facebook_id"].values[0]))
  return final_pagerank_result

In [12]:
final_pagerank_result=filter_pagerank(pagerank_result.copy(),15,graph)
page_rank_dict["pagerank"]=final_pagerank_result

In [13]:
final_pagerank_result

[('Facebook', 'company', 20531316728),
 ('Sir Peter Bottomley MP', 'politician', 293136030810246),
 ('Harish Rawat', 'politician', 271420283032553),
 ('The White House', 'government', 1191441824276882),
 ('The Obama White House', 'government', 63811549237),
 ('Barack Obama', 'politician', 6815841748),
 ('Joachim Herrmann', 'politician', 741724592531706),
 ('U.S. Army', 'government', 44053938557),
 ('European Parliament', 'government', 178362315106),
 ('Manfred Weber', 'politician', 456082177763485),
 ('U.S. Army Chaplain Corps', 'government', 404391086302925),
 ('FEMA Federal Emergency Management Agency', 'government', 78922439964),
 ('Home & Family', 'tvshow', 344485392298301),
 ('European Commission', 'government', 107898832590939),
 ('U.S. Department of State', 'government', 15877306073)]

In [None]:
filename = open(path+project_name+"/PickleFiles/fbcommunities.pkl","wb")
pickle.dump(communities_dict, filename)
filename.close()

In [14]:
filename = open(path+project_name+"/PickleFiles/fbpagerank.pkl","wb")
pickle.dump(page_rank_dict, filename)
filename.close()

In [15]:
def profile_pic_fetcher(facebook_id):
  try:
    profile_pic_url="https://graph.facebook.com/{}/picture?type=large".format(facebook_id)
    response = requests.get(profile_pic_url)
    profile_pic = Image.open(BytesIO(response.content))
    return profile_pic
  except Exception:
    print("Hi")
    pass

In [16]:
for pagerankResult in final_pagerank_result:
  profile_pic=profile_pic_fetcher(pagerankResult[2])
  profile_pic.save(path+project_name+"/PageRankImages/Facebook/{}.png".format(pagerankResult[0]))