In [1]:
# import necessary stuff
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

In [2]:
# define dataset file paths
dataset_path = 'data/BlogCatalog-dataset/data/'
friend_edges_csv_path = dataset_path + 'edges.csv'
group_edges_csv_path = dataset_path + 'group-edges.csv'
groups_csv_path = dataset_path + 'groups.csv'
bloggers_csv_path = dataset_path + 'nodes.csv'

In [3]:
# store cvs contents in dataframe
friend_edges_df = pd.read_csv(friend_edges_csv_path, sep=',', header=None, dtype={0: str, 1:str})
group_edges_df = pd.read_csv(group_edges_csv_path, sep=',', header=None, dtype={0: str, 1:str})
groups_df = pd.read_csv(groups_csv_path, sep=',', header=None, dtype={0: str})
bloggers_df = pd.read_csv(bloggers_csv_path, sep=',', header=None, dtype={0: str})

In [4]:
# give bloggers and groups unique node-ids
bloggers_df[0] = 'b' + bloggers_df[0]
friend_edges_df = 'b' + friend_edges_df
groups_df[0] = 'g' + groups_df[0]
group_edges_df[0] = 'b' + group_edges_df[0]
group_edges_df[1] = 'g' + group_edges_df[1]

In [5]:
# define networkx graph
blog_catalog_graph = nx.Graph()

In [6]:
# define node and edge label constants
IS_MEMBER_OF = 'is_member_of'
IS_FRIEND_WITH = 'is_friend_with'
BLOGGER = 'blogger'
GROUP = 'group'

In [7]:
# add blogger nodes to graph
blog_catalog_graph.add_nodes_from(bloggers_df[0].tolist())
print("{} nodes in graph".format(blog_catalog_graph.number_of_nodes()))
blog_catalog_graph.add_nodes_from(groups_df[0].tolist())
print("{} nodes in graph".format(blog_catalog_graph.number_of_nodes()))

10312 nodes in graph
10351 nodes in graph


In [8]:
# create edge tuples from dataframe
group_edges = list(zip(group_edges_df[0].tolist(), group_edges_df[1].tolist()))
friend_edges = list(zip(friend_edges_df[0].tolist(), friend_edges_df[1].tolist()))

In [9]:
# add (blogger)-[is_member_of]-(group) edges to graph
blog_catalog_graph.add_edges_from(group_edges, label=IS_MEMBER_OF)
print("{} edges in graph".format(blog_catalog_graph.number_of_edges()))
print("{} nodes in graph".format(blog_catalog_graph.number_of_nodes()))

14476 edges in graph
10351 nodes in graph


In [10]:
# add (blogger)-[is_friend_with]-(blogger) edges to graph
blog_catalog_graph.add_edges_from(friend_edges, label=IS_FRIEND_WITH)
print("{} edges in graph".format(blog_catalog_graph.number_of_edges()))
print("{} nodes in graph".format(blog_catalog_graph.number_of_nodes()))

348459 edges in graph
10351 nodes in graph


In [11]:
# export graph as edge list to given path
edge_list_export_path = dataset_path + 'blogcatalog_edgelist.csv'
nx.write_edgelist(blog_catalog_graph, edge_list_export_path, data=False)