# From CSV to SigmaJS
## Creating a Social Network Graph of the Marvel Universe


This notebooks converts a CSV-file containing a information on Co-Occurences of Marvel Superheroes [see data source](https://www.kaggle.com/csanhueza/the-marvel-universe-social-network/code) into a SigmaJS graph.

* **Author:** Tim Denzler, Yueqian Lin


In [1]:
%load_ext autoreload
%autoreload 2
import warnings
import csv
from tqdm import tqdm
from pprint import pprint
warnings.filterwarnings('ignore')

## Step 1: Create an empty networkx graph

In [2]:
import networkx as nx
G = nx.DiGraph() #empty networkx graph

## Step 2: Read the CSV file and generate the graph

In [5]:
import pandas as pd
df = pd.read_csv('./data/result_with_year.csv')

In [20]:
import pandas as pd
from itertools import combinations

new_df = pd.DataFrame(columns=['Author 1', 'Author 2'])
for index, row in df.iterrows():
    authors = row['Author Name']
    if isinstance(authors, str) and authors.strip() != "":
        authors = authors.split(', ')
        for author1, author2 in combinations(authors, 2):
            new_df = new_df.append({'Author 1': author1, 'Author 2': author2}, ignore_index=True)

# Save the new dataframe to a CSV file
new_df.to_csv('output_file.csv', index=False)

In [3]:
import csv
import networkx as nx
from tqdm import tqdm

# Define minimum degree for nodes
min_degree = 0

# Create empty graph
G = nx.Graph()

# Read data from CSV file and add nodes and edges
with open('output_filtered.csv', 'r') as f:
    data = csv.reader(f)
    headers = next(data)
    for row in tqdm(data):
        G.add_node(row[0]) #superhero in first column
        G.add_node(row[1]) #superhero in second column
        if G.has_edge(row[0], row[1]):
            # edge already exists, increase weight by one
            G[row[0]][row[1]]['weight'] += 1
        else:
            # add new edge with weight 1
            G.add_edge(row[0], row[1], weight = 1)

# Remove nodes with degree less than min_degree
for node in list(G.nodes()):
    if G.degree(node) < min_degree:
        G.remove_node(node)

14665it [00:00, 254441.57it/s]


In [8]:
import csv
import networkx as nx
import community  # pip install python-louvain

# Read data from CSV file and add nodes and edges
G = nx.Graph()
with open('output_file.csv', 'r') as f:
    data = csv.reader(f)
    headers = next(data)
    for row in data:
        G.add_edge(row[0], row[1])

# Apply Louvain algorithm for community detection
partition = community.best_partition(G)

# Filter nodes by modularity
modularity_threshold = 0.1
for node in list(G.nodes()):
    if partition[node] == -1:
        G.remove_node(node)
    else:
        node_modularity = len([n for n in G.neighbors(node) if partition[n] != partition[node]])
        node_modularity /= G.degree(node) if G.degree(node) > 0 else 1  # Check for zero degree
        if node_modularity < modularity_threshold:
            G.remove_node(node)


In [17]:
import csv
import networkx as nx
from tqdm import tqdm

# Define minimum degree for nodes
min_degree = 15

# Create empty graph
G = nx.Graph()

# Read data from CSV file and add nodes and edges
with open('output_file.csv', 'r') as f:
    data = csv.reader(f)
    headers = next(data)
    for row in tqdm(data):
        author1 = row[0].rstrip(',')
        author2 = row[1].rstrip(',')
        G.add_node(author1)
        G.add_node(author2)
        if G.has_edge(author1, author2):
            # edge already exists, increase weight by one
            G[author1][author2]['weight'] += 1
        else:
            # add new edge with weight 1
            G.add_edge(author1, author2, weight = 1)

# Remove nodes with degree less than min_degree
for node in list(G.nodes()):
    if G.degree(node) < min_degree:
        G.remove_node(node)


559273it [00:01, 295495.92it/s]


In [4]:
G_nodes = G.number_of_nodes()
G_edges = G.number_of_edges()
print("Nodes = ", G_nodes, " Edges = ",G_edges)

Nodes =  3297  Edges =  8781


## Step 3: Store the graph in gexf-format

In [5]:
marvelgraph = nx.write_gexf(G, "./data/result_filter_100.gexf") #save for gephi