In [None]:
import re
import random
import pandas as pd
import numpy as np
import networkx as nx
from itertools import count
import matplotlib.pyplot as plt

In [None]:
version = '0.1.2'
file_name = 'novels_' + version + '.csv'
df = pd.read_csv(file_name, na_values='NaN')

n = 600
df = df.iloc[:600]

In [None]:
G = nx.Graph()

# Add nodes
def get_node_label(s, genre_priority):
    genres = re.sub("['\[\]]", '', s).split(", ")
    return {'genre': min([genre_priority.get(g, len(genre_priority)) for g in genres])}
    
# Ordered priority for genres (reversed)
genre_priority = ["action", "romance", "comedy", "fantasy", "tragedy", "mystery", "horror", "sci-fi"]
genre_priority = dict(zip(genre_priority[::-1], count()))

nodes = [(t[0], get_node_label(t[1], genre_priority)) for t in df[['id', 'genres']].values]
genre_priority["unknown"] = len(genre_priority)
G.add_nodes_from(nodes)

# Add edges
edge_attr = 'recommended_series_ids'
df_edge = df.loc[~df[edge_attr].isnull()].copy()
df_edge[edge_attr] = df_edge.loc[:, edge_attr].str.strip('[]').str.split(',')
edges = [(novel[0], int(edge)) for novel in df_edge[['id', edge_attr]].values for edge in novel[1]]
edges = [edge for edge in edges if edge[1] in [node[0] for node in nodes]]
G.add_edges_from(edges)

# Remove any nodes with no edges
G.remove_nodes_from(list(nx.isolates(G)))

# Remove any nodes within small components
for component in list(nx.connected_components(G)):
    if len(component)<5:
        for node in component:
            G.remove_node(node)

print('Number of nodes:', G.number_of_nodes())
print('Number of edges:', G.number_of_edges())

In [None]:
fig = plt.figure(figsize=(20, 10)) 
colors = list(nx.get_node_attributes(G, 'genre').values())

# nx.draw_kamada_kawai(G, node_color=colors, node_size=500, with_labels=False, cmap=plt.cm.jet) 
nx.draw_spring(G, node_color=colors, node_size=500, with_labels=False, cmap=plt.cm.jet) 

labels = {v:k for k,v in genre_priority.items()}
for c in np.unique(colors):
    plt.scatter([], [], c=[plt.cm.jet(c / max(colors))], label=labels[c])

#plt.axis('equal') 
plt.legend(prop={'size': 20})
plt.show() 
fig.savefig('graph.svg')