### This dataset consists of 'circles' (or 'friends lists') from Facebook. Facebook data was collected from survey participants using this Facebook app. The dataset includes node features (profiles), circles, and ego networks.

http://snap.stanford.edu/data/egonets-Facebook.html

In [1]:
#setup
from graph_tool.all import *
import numpy as np
import math

file_path = "/home/jawa/Datasets/facebook_combined.txt"

In [8]:
# initialization
g = Graph(directed=False)

neighbours = {}
vertex_id = {}

with open(file_path, encoding="UTF-8") as f:
    for line in f:
        if line.startswith("#"):
            continue
        l = line.replace("\n", "").split(" ")
        a = l[0]
        b = l[1]
        if a not in neighbours:
            neighbours[a] = []
        if b not in neighbours:
            neighbours[b] = []
        # see if we have been here before
        if a in neighbours[b]:
            continue
        if b in neighbours[a]:
            continue
        # add to our check lists
        neighbours[a].append(b)
        neighbours[b].append(a)
        # get the vertexes
        if a not in vertex_id:
            v_a = g.add_vertex()
            vertex_id[a] = g.vertex_index[v_a]
        else:
            v_a = g.vertex(vertex_id[a])
        if b not in vertex_id:
            v_b = g.add_vertex()
            vertex_id[b] = g.vertex_index[v_b]
        else:
            v_b = g.vertex(vertex_id[b])
        # add the edge
        g.add_edge(v_a, v_b)
    

In [9]:
# degree stats
degrre_data = {}
degrre_data["out"] = []
degrre_data["in"] = []
counter = 0
for v in g.vertices():
        counter += 1
        degrre_data["out"].append(v.out_degree())
        degrre_data["in"].append(v.in_degree())
    
max_out = max(degrre_data["out"])
min_out = min(degrre_data["out"])
average_out = np.average(degrre_data["out"])
sd_out = np.std(degrre_data["out"])
print("Out degree stats -> max value = " + str(max_out) + " / min value = " + str(min_out) + " / average = " + str(average_out) + " / SV = " + str(sd_out))

Out degree stats -> max value = 1045 / min value = 1 / average = 43.6910126269 / SV = 52.4141155674


In [10]:
# distance stats
#shortest_distance = graph_tool.topology.shortest_distance(g)
#global_avg = []
#for dist in shortest_distance:
#    global_avg.append(np.average(dist))
pseudo_diameter = graph_tool.topology.pseudo_diameter(g)

# there are non connect components in the graph, so we cant just calculate the avverage shortest distance
#print("Shortest Distance Average = " + str(np.average(global_avg)) + " / Diameter = " + str(pseudo_diameter))
print("Diameter = " + str(pseudo_diameter))

Diameter = (8.0, (<Vertex object with index '1946' at 0x7f95c890a6d8>, <Vertex object with index '4035' at 0x7f95c890a840>))


In [11]:
# clustering stats
global_clustering = graph_tool.clustering.global_clustering(g)

print ("Clustering Average = " + str(np.average(global_clustering)) + " / " + "Clustering SV = " + str(np.std(global_clustering)))

Clustering Average = 0.278343743128 / Clustering SV = 0.240830534415


In [12]:
# connected componet stats
comp, hist = label_components(g)

labels = list(set(comp.a))
largest = label_largest_component(g)
l = GraphView(g, vfilt=largest)

print("Connected components = " + str(len(labels)) + " / Edges in Largest component = " + str(l.num_edges()) + " / Vertices in Largest component = " + str(l.num_vertices()))

Connected components = 1 / Edges in Largest component = 88234 / Vertices in Largest component = 4039


In [13]:
# vertex closeness stats
vertex_closeness = closeness(g)
c_list = []
for vc in vertex_closeness.a:
    if not math.isnan(vc):
        c_list.append(vc)
        
print("Closeness stats -> max value = " + str(max(c_list)) + " / min value = " + str(min(c_list)) + " / average = " + str(np.average(c_list)) + " / SV = " + str(np.std(c_list)))

Closeness stats -> max value = 0.459699453552 / min value = 0.178254535823 / average = 0.276167763567 / SV = 0.0361196038432


In [14]:
# betweenness stats
vertex_betweenness, edge_betweenness = betweenness(g)
v_b = list(vertex_betweenness)
print("Betweenness stats -> max value = " + str(max(v_b)) + " / min value = " + str(min(v_b)) + " / average = " + str(np.average(v_b)) + " / SV = " + str(np.std(v_b)))

Betweenness stats -> max value = 0.4805180785560148 / min value = 0.0 / average = 0.000666957356873 / SV = 0.011644899732
