In [1]:
import igraph as ig
from igraph import Graph

import pandas as pd
import numpy as np
import pickle

#https://igraph.org/python/doc/api/igraph.Graph.html

In [2]:
# Function to generate vertices and edges from a dancer output file
def read_dancer_file(filename):
    # Read file line by line
    with open(filename) as f:
        content = f.readlines()
    content = [x.strip() for x in content]

    # Get vertices
    vertices = content[1:content.index('#')]
    vertices = [list(int(line.split(';')[i]) for i in [0,2]) for line in vertices]
    vertices = pd.DataFrame(data=vertices, columns=['vertex', 'community'])

    # Get edges
    edges = content[content.index('# Edges')+1:]
    edges = [sorted(list(int(x) for x in line.split(';'))) for line in edges]
    edges = pd.DataFrame(data=edges, columns=['from', 'to'])
    # Remove repeated edges (to make undirected)
    edges = edges.drop_duplicates()
    
    return vertices, edges

In [33]:
datasets_location = 'datasets/'
datasets = []

In [34]:
# Dancer 01
dancer_01 = 'dancer_01/dancer_01.graph'
dataset_name = 'Dancer 01'

# Creating the graph
vertices, edges = read_dancer_file(datasets_location+dancer_01)
g = Graph.DataFrame(vertices=vertices, edges=edges, directed=False)

#Remove if it had been added before
for d in datasets:
    if d['name'] == dataset_name:
        datasets.remove(d)
        break

# Add to list of datasets
datasets.append({'name': dataset_name,'vertices':vertices,'edges':edges,'graph':g})

In [35]:
# Dancer 03
dancer_03 = 'dancer_03/dancer_03.graph'
dataset_name = 'Dancer 03'

# Creating the graph
vertices, edges = read_dancer_file(datasets_location+dancer_03)
g = Graph.DataFrame(vertices=vertices, edges=edges, directed=False)

#Remove if it had been added before
for d in datasets:
    if d['name'] == dataset_name:
        datasets.remove(d)
        break

# Add to list of datasets
datasets.append({'name': dataset_name,'vertices':vertices,'edges':edges,'graph':g})

In [66]:
# Cora
#https://paperswithcode.com/dataset/cora

cities_file = 'cora/cities.csv'
labels_file = 'cora/labels.csv'
dataset_name = 'Cora'

cities = pd.read_csv(datasets_location+cities_file)
labels = pd.read_csv(datasets_location+labels_file)

vertices = labels[['paper','numeric']]
vertices = vertices.rename(columns = {'paper':'vertex','numeric':'community'})

edges = cities.rename(columns = {'V1':'from','V2':'to'})

g = Graph.DataFrame(vertices=vertices, edges=edges, directed=False)
g = g.simplify()

#Remove if it had been added before
for d in datasets:
    if d['name'] == dataset_name:
        datasets.remove(d)
        break

# Add to list of datasets
datasets.append({'name': dataset_name,'vertices':vertices,'edges':edges,'graph':g})

In [67]:
# EU Mails
#http://snap.stanford.edu/data/email-Eu-core.html

edges_file = 'email_eu/email-Eu-core.txt'
labels_file = 'email_eu/email-Eu-core-department-labels.txt'
dataset_name = 'Email EU'

edges = pd.read_csv(datasets_location+edges_file, sep=' ', header=None)
vertices = pd.read_csv(datasets_location+labels_file, sep=' ', header=None)

vertices = vertices[[0,1]]
vertices = vertices.rename(columns = {0:'vertex',1:'community'})

edges = edges.rename(columns = {0:'from',1:'to'})

g = Graph.DataFrame(vertices=vertices, edges=edges, directed=False)
g = g.simplify()

#Remove if it had been added before
for d in datasets:
    if d['name'] == dataset_name:
        datasets.remove(d)
        break

# Add to list of datasets
datasets.append({'name': dataset_name,'vertices':vertices,'edges':edges,'graph':g})

In [68]:
len(datasets)

4

In [69]:
# Outputing serialized datasets
filename = 'datasets.data'

# Writing file
outfile = open(filename,'wb')
pickle.dump(datasets,outfile)
outfile.close()