In [1]:
import pandas as pd
import numpy as np
%matplotlib inline

# Step 1: Load the edge list data

In [2]:
in_dir = 'D:\\Directory\\'

In [3]:
edge_df = pd.read_csv(in_dir + 'edge_list_spacy_vectors_lg_clean.csv')
edge_df.shape

(831, 18)

In [4]:
edge_df.head()

Unnamed: 0,source_id,source_album,source_artist,source_score,source_year1,source_year2,target_id,target_album,target_artist,target_score,target_year1,target_year2,target_rank,cosine_sim,source_year_master,target_year_master,source_score_cut,target_score_cut
0,22561,death certificate,ice cube,9.5,1991.0,,20048,ferg forever,a$ap ferg,6.4,2014.0,,1,0.994675,1991.0,2014.0,Great,Bad
1,22561,death certificate,ice cube,9.5,1991.0,,18930,piata,"madlib, freddie gibbs",8.0,2014.0,,2,0.99455,1991.0,2014.0,Great,Great
2,22566,blunted on reality,fugees,7.6,1994.0,2016.0,5711,street's disciple,nas,7.2,2004.0,,1,0.989205,1994.0,2004.0,Good,Good
3,22566,blunted on reality,fugees,7.6,1994.0,2016.0,13373,slaughterhouse,slaughterhouse,5.5,2009.0,,2,0.987979,1994.0,2009.0,Good,Bad
4,22132,things fall apart,the roots,9.4,1999.0,,4331,power in numbers,jurassic 5,7.1,2002.0,,1,0.992334,1999.0,2002.0,Great,Good


# Step 2: Make the graph object with networkx

In [5]:
import networkx as nx

In [6]:
# make the networkx graph object: directed graph
g = nx.from_pandas_edgelist(edge_df, 'source_album', 
                            'target_album', 
                            edge_attr='cosine_sim')
len(g.nodes())

815

# Step 3: Community detection with the Louvain algorithm

In [7]:
# https://python-louvain.readthedocs.io/en/latest/index.html
# https://python-louvain.readthedocs.io/en/latest/
# https://medium.com/@adityagandhi.7/network-analysis-and-community-structure-for-market-surveillance-using-python-networkx-65413e7b7fee
# https://github.com/adityagandhi7/community_structure/blob/master/test_chat_v1.ipynb
import community

In [8]:
#first compute the best partition
partition_object = community.best_partition(g)

In [9]:
# we should have 1 entry per node
# and we do
len(partition_object)

815

In [10]:
# first elements of the partition dictionary
# this is a mapping between album and community
list(partition_object.items())[0:10] 

[('death certificate', 0),
 ('ferg forever', 0),
 ('piata', 0),
 ('blunted on reality', 1),
 ("street's disciple", 1),
 ('slaughterhouse', 1),
 ('things fall apart', 1),
 ('power in numbers', 1),
 ('right about now: the official sucka free mixtape', 1),
 ('buhloone mindstate', 2)]

# Step 4: Add meta-data to networkx graph object for plotting

In [11]:
# extract the communities for each album 
values = [partition_object.get(node) for node in g.nodes()]

In [12]:
print(values[0:10])

[0, 0, 0, 1, 1, 1, 1, 1, 1, 2]


In [13]:
len(set(values))

89

In [14]:
# define a list of 95 different colors
# generated with the site: iwanthue (https://medialab.github.io/iwanthue/)
color_list = ["#0157a1", "#77f431", "#000cb3", "#e4ff3f",
"#6213c6", "#1abd00", "#ab39eb", "#00c932", "#e232e8",
"#2a9b00", "#975bff", "#aecf00", "#01119c", "#ffe02b",
"#5565ff", "#c1ff73", "#a5009f", "#00ca5f", "#ff64e7",
"#43ffa5", "#f60095", "#76ffa5", "#ff50c9", "#a0ffa4",
"#3a0067", "#f6ff7f", "#00216e", "#e5bc00", "#e378ff",
"#aab000", "#828bff", "#ff7910", "#0281e2", "#e08900",
"#019def", "#ff3f2b", "#01e0d3", "#da0120", "#03bde6",
"#d04b00", "#819cff", "#648e00", "#ff46ad", "#018436",
"#b40078", "#01a165", "#ca0067", "#3d7900", "#640063",
"#f2ffa5", "#16003e", "#f3ffbe", "#0f002d", "#ffd580",
"#000c26", "#ff6b47", "#01ad92", "#ff5552", "#85e2ff",
"#b5002d", "#b4ffec", "#8e0023", "#e4ffe9", "#82004f",
"#667000", "#feaaff", "#284f00", "#b8a8ff", "#8d6d00",
"#8dbfff", "#724200", "#0174ae", "#ff4f70", "#00512a",
"#ff9acb", "#0f2a00", "#ffedfe", "#000a05", "#f9fffd",
"#280e00", "#c9f1ff", "#6b001f", "#e3d2ff", "#571800",
"#ffdec2", "#003361", "#ff8a77", "#00353c", "#ff839a",
"#018093", "#ffb094", "#00506d", "#ffbeb9", "#371400",
"#342c00"]

In [15]:
len(color_list)

95

In [16]:
len(set(values))

89

In [17]:
# the community algorithm creates around 88-92 communities
# (there is some randomness to the algorithm)
# we select as many colors as there are communities:
color_list = color_list[0:len(set(values))]
len(color_list)

89

In [18]:
# and we make a dictionary where we map each
# community number to a specific color code
color_dict = pd.Series(color_list,
        index=np.arange(0,len(set(values)))).to_dict()

In [19]:
# here are the community-color mappings
# for the first 10 communities
list(color_dict.items())[0:10] 

[(0, '#0157a1'),
 (1, '#77f431'),
 (2, '#000cb3'),
 (3, '#e4ff3f'),
 (4, '#6213c6'),
 (5, '#1abd00'),
 (6, '#ab39eb'),
 (7, '#00c932'),
 (8, '#e232e8'),
 (9, '#2a9b00')]

In [20]:
# use the color dictionary to update the partition object:
# we replace the number of the community with the
# color hex code
for key, value in partition_object.items():
    partition_object[key] = color_dict[value]

In [21]:
# the partition object now maps each album to 
# the color for its community
list(partition_object.items())[0:10] 

[('death certificate', '#0157a1'),
 ('ferg forever', '#0157a1'),
 ('piata', '#0157a1'),
 ('blunted on reality', '#77f431'),
 ("street's disciple", '#77f431'),
 ('slaughterhouse', '#77f431'),
 ('things fall apart', '#77f431'),
 ('power in numbers', '#77f431'),
 ('right about now: the official sucka free mixtape', '#77f431'),
 ('buhloone mindstate', '#000cb3')]

In [22]:
# set the node attribute color in networkx object
# using the above mapping
nx.set_node_attributes(g, partition_object, 'color')

In [23]:
g.nodes['death certificate']

{'color': '#0157a1'}

In [24]:
# make dictionaries for source and target artists
source_artist_dict = pd.Series((edge_df.source_artist + " (" + edge_df.source_year_master.astype(int).astype(str) + ")").values,index=edge_df.source_album).to_dict()
target_artist_dict = pd.Series((edge_df.target_artist + " (" + edge_df.target_year_master.astype(int).astype(str) + ")").values,index=edge_df.target_album).to_dict()

# concatenate the dictionaries
# https://stackoverflow.com/questions/38987/how-do-i-merge-two-dictionaries-in-a-single-expression
master_artist_dict = {**source_artist_dict, **target_artist_dict}

In [25]:
# first element of the dictionary
list(master_artist_dict.items())[0] 

('death certificate', 'ice cube (1991)')

In [26]:
# set the node title attribute  
# using the above dictionary mapping
nx.set_node_attributes(g, master_artist_dict, 'title')

In [27]:
g.nodes['death certificate']

{'color': '#0157a1', 'title': 'ice cube (1991)'}

# Make the interactive network visualization with pyvis, color by community

In [28]:
# https://gist.github.com/maciejkos/e3bc958aac9e7a245dddff8d86058e17
def draw_graph3(networkx_graph,notebook=True,output_filename='graph.html',show_buttons=True,only_physics_buttons=False,
                height=None,width=None,bgcolor=None,font_color=None,pyvis_options=None):
    """
    This function accepts a networkx graph object,
    converts it to a pyvis network object preserving its node and edge attributes,
    and both returns and saves a dynamic network visualization.
    Valid node attributes include:
        "size", "value", "title", "x", "y", "label", "color".
        (For more info: https://pyvis.readthedocs.io/en/latest/documentation.html#pyvis.network.Network.add_node)
    Valid edge attributes include:
        "arrowStrikethrough", "hidden", "physics", "title", "value", "width"
        (For more info: https://pyvis.readthedocs.io/en/latest/documentation.html#pyvis.network.Network.add_edge)
    Args:
        networkx_graph: The graph to convert and display
        notebook: Display in Jupyter?
        output_filename: Where to save the converted network
        show_buttons: Show buttons in saved version of network?
        only_physics_buttons: Show only buttons controlling physics of network?
        height: height in px or %, e.g, "750px" or "100%
        width: width in px or %, e.g, "750px" or "100%
        bgcolor: background color, e.g., "black" or "#222222"
        font_color: font color,  e.g., "black" or "#222222"
        pyvis_options: provide pyvis-specific options (https://pyvis.readthedocs.io/en/latest/documentation.html#pyvis.options.Options.set)
    """

    # import
    from pyvis import network as net

    # make a pyvis network
    network_class_parameters = {"notebook": notebook, "height": height, "width": width, "bgcolor": bgcolor, "font_color": font_color}
    pyvis_graph = net.Network(**{parameter_name: parameter_value for parameter_name, parameter_value in network_class_parameters.items() if parameter_value})

    # for each node and its attributes in the networkx graph
    for node,node_attrs in networkx_graph.nodes(data=True):
        pyvis_graph.add_node(node,**node_attrs)

    # for each edge and its attributes in the networkx graph
    for source,target,edge_attrs in networkx_graph.edges(data=True):
        # if value/width not specified directly, and weight is specified, set 'value' to 'weight'
        if not 'value' in edge_attrs and not 'width' in edge_attrs and 'weight' in edge_attrs:
            # place at key 'value' the weight of the edge
            edge_attrs['value']=edge_attrs['weight']
        # add the edge
        pyvis_graph.add_edge(source,target,**edge_attrs)

    # turn buttons on
    if show_buttons:
        if only_physics_buttons:
            pyvis_graph.show_buttons(filter_=['physics'])
        else:
            pyvis_graph.show_buttons()

    # pyvis-specific options
    if pyvis_options:
        pyvis_graph.set_options(pyvis_options)

    # return and also save
    return pyvis_graph.show(output_filename)

In [29]:
# make the pyviz interactive plot
# this will save out an html file to the directory
# where this script is
# plot will also be shown in the notebook
draw_graph3(g, height = '1000px', width = '1000px', 
            show_buttons=False,  
            output_filename='graph_output_communities.html', notebook=True)