## Read CSV file and create a graph

In [84]:
import re
import pandas as pd
import numpy as np
import networkx as nx
from itertools import count
import matplotlib.pyplot as plt

In [85]:
version = '0.1.2'
file_name = 'novels_' + version + '.csv'

In [86]:
df = pd.read_csv(file_name, na_values='NaN')
#df = df.iloc[:200]

G = nx.Graph()

# Add nodes
def get_node_label(s, genre_priority):
    genres = re.sub("['\[\]]", '', s).split(", ")
    return {'genre': min([genre_priority.get(g, len(genre_priority)) for g in genres])}


genre_priority = ['action', 'comedy', 'fantasy', 'romance', "tragedy", "xianxia", "adventure", 
                  "mystery", "drama", "horror", "sci-fi", "mature", "school life", "supernatural"]
genre_priority = dict(zip(genre_priority[::-1], count()))

nodes = [(t[0], get_node_label(t[1], genre_priority)) for t in df[['id', 'genres']].values]
G.add_nodes_from(nodes)

#TODO: Add colors depending on genre (ordered priority)

# Add edges
edge_attr = 'recommended_series_ids'
df_edge = df.loc[~df[edge_attr].isnull()].copy()
df_edge[edge_attr] = df_edge.loc[:, edge_attr].str.strip('[]').str.split(',')
edges = [(novel[0], int(edge)) for novel in df_edge[['id', edge_attr]].values for edge in novel[1]]
G.add_edges_from(edges)

print('Number of nodes:', G.number_of_nodes())
print('Number of edges:', G.number_of_edges())

Number of nodes: 7726
Number of edges: 20134


In [None]:
fig = plt.figure(figsize=(40, 20)) 
colors = list(nx.get_node_attributes(G, 'genre').values())
#nx.draw_kamada_kawai(G, node_size=500) 
nx.draw_kamada_kawai(G, node_color=colors, node_size=500, with_labels=False, cmap=plt.cm.jet) 
#plt.axis('equal') 
plt.show() 
fig.savefig('graph.svg') 