# 0. Configuration

In [1]:
data_dir = 'data/'
graph_filename = 'deezer_europe_edges.csv'

graph_path = f'{data_dir}/{graph_filename}'
sep = ','
directed = False
header = True

In [2]:
import time

# 1. Graph processing with standard Python functions/data structures

In [3]:
from collections import defaultdict

### Data loading
We will load the graph adopting an **adjacency list** representation.

Specifically, we will use a **dictionary** where the key is a vertex identifier, and the value is the set of vertex identifiers of all the neighbors of the key vertex.

In [4]:
def load_graph(graph_path,sep,directed,header):
    with open(graph_path) as f:
        start = 1 if header else 0
        vertex2neighbors = defaultdict(set)
        for line in f.readlines()[start:]:
            tokens = line.split(sep)
            vertex1 = int(tokens[0])
            vertex2 = int(tokens[1])
            vertex2neighbors[vertex1].add(vertex2)
            if not directed:
                vertex2neighbors[vertex2].add(vertex1) #for undirected graphs, every edge is stored twice; can we do better?
    return vertex2neighbors

In [5]:
start = time.time()
graph = load_graph(graph_path,sep,directed,header)
end = time.time()
runtime = int(round((end-start)*1000))
print("Loading time: " + str(runtime) + " ms")

Loading time: 115 ms


### Playing with the graph

In [6]:
def number_of_vertices(graph):
    return len(graph)

def number_of_edges(graph,directed):
    denominator = 1 if directed else 2
    return sum([len(graph[u]) for u in graph.keys()])/denominator # '/2' is needed for undirected graphs because every edge is stored twice

def neighborhood(graph,vertex):
    return sorted(list(graph[vertex]))

def degree(graph,vertex):
    return len(graph[vertex])

def min_degree(graph):
    return min(degree(graph,u) for u in graph.keys())

def max_degree(graph):
    return max(degree(graph,u) for u in graph.keys())

In [7]:
print('Number of vertices: ' + str(number_of_vertices(graph)))
print('Number of edges: ' + str(int(number_of_edges(graph,directed))))

target_vertex = 0
print('Neighborhood of vertex ' + str(target_vertex) + ': ' + str(neighborhood(graph,target_vertex)))
print('Degree of vertex ' + str(target_vertex) + ': ' + str(degree(graph,target_vertex)))

print('Minimum degree: ' + str(min_degree(graph)))
print('Maximum degree: ' + str(max_degree(graph)))

Number of vertices: 28281
Number of edges: 92752
Neighborhood of vertex 0: [3001, 12029, 14145, 14270, 14581, 16976, 25564]
Degree of vertex 0: 7
Minimum degree: 1
Maximum degree: 172


In [8]:
n = number_of_vertices(graph)
m = number_of_edges(graph,directed)
print('Average degree: ' + str(m/n))

all_possible_edges = n*(n-1)/2
print('Edge density: ' + str(m/all_possible_edges))

Average degree: 3.279657720731233
Edge density: 0.00023194184729358083


In [9]:
def is_a_clique(graph,vertices):
    for u in vertices:
        for v in vertices:
            if u != v and v not in graph[u]:
                return False
    return True

In [10]:
vertex_set1 = {0,3001}
vertex_set2 = {0,1}
vertex_set3 = {1,17,34}
vertex_set4 = {1,17,34,190,5011}

print("Is vertex set " + str(vertex_set1) + " a clique? " + str(is_a_clique(graph,vertex_set1)))
print("Is vertex set " + str(vertex_set2) + " a clique? " + str(is_a_clique(graph,vertex_set2)))
print("Is vertex set " + str(vertex_set3) + " a clique? " + str(is_a_clique(graph,vertex_set3)))
print("Is vertex set " + str(vertex_set4) + " a clique? " + str(is_a_clique(graph,vertex_set4)))

Is vertex set {0, 3001} a clique? True
Is vertex set {0, 1} a clique? False
Is vertex set {1, 34, 17} a clique? True
Is vertex set {1, 34, 17, 5011, 190} a clique? False


# 2. Graph processing with [`NetworkX`](https://networkx.org/)

In [11]:
import networkx as nx

### Data loading

In [12]:
def load_graph_nx(graph_path,sep,directed,header):
    graph_type = nx.DiGraph if directed else nx.Graph
    with open(graph_path, 'rb') as f:
        if header:
            next(f, '') # skip header line
        G = nx.read_adjlist(f, delimiter=sep, create_using=graph_type, nodetype=int)
    return G

In [13]:
start = time.time()
graph_nx = load_graph_nx(graph_path,sep,directed,header)
end = time.time()
runtime = int(round((end-start)*1000))
print("Loading time: " + str(runtime) + " ms")

Loading time: 360 ms


### Playing with the graph

In [14]:
def number_of_vertices_nx(graph):
    return graph.number_of_nodes()

def number_of_edges_nx(graph):
    return graph.number_of_edges()

def neighborhood_nx(graph,vertex):
    return sorted([u for u in graph[vertex]])

def degree_nx(graph,vertex):
    return graph.degree[vertex]

def min_degree_nx(graph):
    return min(degree_nx(graph,u) for u in graph.nodes)

def max_degree_nx(graph):
    return max(degree_nx(graph,u) for u in graph.nodes)

In [15]:
print('Number of vertices: ' + str(number_of_vertices_nx(graph_nx)))
print('Number of edges: ' + str(int(number_of_edges_nx(graph_nx))))

target_vertex = 0
print('Neighborhood of vertex ' + str(target_vertex) + ': ' + str(neighborhood_nx(graph_nx,target_vertex)))
print('Degree of vertex ' + str(target_vertex) + ': ' + str(degree_nx(graph_nx,target_vertex)))

print('Minimum degree: ' + str(min_degree_nx(graph_nx)))
print('Maximum degree: ' + str(max_degree_nx(graph_nx)))

Number of vertices: 28281
Number of edges: 92752
Neighborhood of vertex 0: [3001, 12029, 14145, 14270, 14581, 16976, 25564]
Degree of vertex 0: 7
Minimum degree: 1
Maximum degree: 172


In [16]:
target_vertex = 0
c = nx.clustering(graph_nx,target_vertex)
print('Local clustering coefficient of vertex ' + str(target_vertex) + ': ' + str(c))

avgc = nx.average_clustering(graph_nx)
print('Network average clustering coefficient: ' + str(avgc))

Local clustering coefficient of vertex 0: 0.14285714285714285
Network average clustering coefficient: 0.1411598733316564


In [17]:
print('Is the graph connected? ' + str(nx.is_connected(graph_nx)))
print('Number of connected components: ' + str(nx.number_connected_components(graph_nx)))

Is the graph connected? True
Number of connected components: 1


In [18]:
target_vertex = 0
print('Eccentricity of vertex ' + str(target_vertex) + ': ' + str(nx.eccentricity(graph_nx,v=target_vertex)))
#print('Diameter: ' + str(nx.diameter(graph_nx))) # BE CAREFUL: this may be very slow even for moderately large graphs
#print('Radius: ' + str(nx.radius(graph_nx))) # BE CAREFUL: this may be very slow even for moderately large graphs

Eccentricity of vertex 0: 15


In [19]:
target_vertex1 = 0
target_vertex2 = 12300
print('Shortest path between vertex ' + str(target_vertex1) + ' and vertex ' + str(target_vertex2) + ': ' + str(nx.shortest_path(graph_nx,source=target_vertex1,target=target_vertex2)))

Shortest path between vertex 0 and vertex 12300: [0, 14270, 6131, 7361, 2832, 18679, 12300]
