In [5]:
import igraph as ig
import scipy.sparse as sp
import numpy as np
import sknetwork as sn

## Format GML data as sparse adjacency

In [5]:
g = ig.Graph.Read_GML("data/eu-core.gml")

In [10]:
adjacency = g.get_adjacency_sparse()
adjacency = adjacency.astype("bool")
adjacency

<Compressed Sparse Row sparse matrix of dtype 'bool'
	with 32770 stored elements and shape (1005, 1005)>

In [13]:
sp.save_npz("data/eu-core_adjacency.npz", adjacency)

In [24]:
labels = np.array(g.vs["gt"], dtype="int32")
labels

array([ 1,  1, 25, ...,  4, 14,  9], shape=(1005,), dtype=int32)

In [25]:
np.save("data/eu-core_labels.npy", labels)

In [27]:
g = ig.Graph.Read_GML("data/as.gml")
g = g.as_undirected()
g.vcount()

23752

In [28]:
adjacency = g.get_adjacency_sparse()
adjacency = adjacency.astype("bool")
adjacency

<Compressed Sparse Row sparse matrix of dtype 'bool'
	with 116832 stored elements and shape (23752, 23752)>

In [29]:
sp.save_npz("data/as_adjacency.npz", adjacency)

In [31]:
vals = np.unique(g.vs["gt"])
id_map = {j:i for i,j in enumerate(vals)}
labels = np.array([id_map[i] for i in g.vs["gt"]])
labels

array([154, 154,   1, ...,  57,   1,   1], shape=(23752,))

In [34]:
np.save("data/as_labels.npy", labels)

In [36]:
g = ig.Graph.Read_GML("data/cora_full.gml")
g = g.as_undirected()
g.vcount()

23166

In [37]:
adjacency = g.get_adjacency_sparse()
adjacency = adjacency.astype("bool")
adjacency

<Compressed Sparse Row sparse matrix of dtype 'bool'
	with 178314 stored elements and shape (23166, 23166)>

In [38]:
sp.save_npz("data/cora_adjacency.npz", adjacency)

In [39]:
vals = np.unique(g.vs["gt"])
id_map = {j:i for i,j in enumerate(vals)}
labels = np.array([id_map[i] for i in g.vs["gt"]])
labels

array([ 0,  0,  0, ..., 57, 15, 45], shape=(23166,))

In [42]:
np.save("data/cora_labels.npy", labels)

## Format SNAP data as sparse

In [6]:
graph = sn.data.from_csv("data/com-amazon.ungraph.txt", comments="#", reindex=True)
adjacency = graph["adjacency"]
adjacency

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 1851744 stored elements and shape (334863, 334863)>

In [None]:
sp.save_npz("data/amazon_adjacnecy.npz", adjacency)

In [9]:
with open("data/com-amazon.top5000.cmty.txt", "r") as f:
    x = f.readlines()
    labels = sp.dok_matrix((len(x), adjacency.shape[0]), dtype="bool")
    for c, line in enumerate(x):
        nodes = line[:-1].split('\t')
        for n in nodes:
            index = np.argmax(graph["names"] == int(n))
            labels[c, index] = True
labels = labels.tocsr()
labels

<Compressed Sparse Row sparse matrix of dtype 'bool'
	with 67462 stored elements and shape (5000, 334863)>

In [11]:
sp.save_npz("data/amazon_labels2.npz", labels)

In [12]:
graph = sn.data.from_csv("data/com-dblp.ungraph.txt", comments="#", reindex=True)
adjacency = graph["adjacency"]
adjacency

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 2099732 stored elements and shape (317080, 317080)>

In [None]:
sp.save_npz("data/dblp_adjacnecy.npz", adjacency)

In [13]:
with open("data/com-dblp.top5000.cmty.txt", "r") as f:
    x = f.readlines()
    labels = sp.dok_matrix((len(x), adjacency.shape[0]), dtype="bool")
    for c, line in enumerate(x):
        nodes = line[:-1].split('\t')
        for n in nodes:
            index = np.argmax(graph["names"] == int(n))
            labels[c, index] = True
labels = labels.tocsr()
labels

<Compressed Sparse Row sparse matrix of dtype 'bool'
	with 112228 stored elements and shape (5000, 317080)>

In [14]:
sp.save_npz("data/dblp_labels2.npz", labels)