In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import joblib
import networkx as nx

# Load the trimmed edge list data

In [2]:
in_dir = 'D:\\Data\\Derived\\'

In [3]:
edge_df_trim = joblib.load(in_dir + 'edge_list_trim.pkl') 
edge_df_trim.shape

(831, 18)

In [4]:
edge_df_trim.head()

Unnamed: 0,source_id,source_album,source_artist,source_score,source_year1,source_year2,target_id,target_album,target_artist,target_score,target_year1,target_year2,target_rank,cosine_sim,source_year_master,target_year_master,source_score_cut,target_score_cut
100,22561,death certificate,ice cube,9.5,1991.0,,20048,ferg forever,a$ap ferg,6.4,2014.0,,1,0.994675,1991.0,2014.0,Great,Bad
101,22561,death certificate,ice cube,9.5,1991.0,,18930,piata,"madlib, freddie gibbs",8.0,2014.0,,2,0.99455,1991.0,2014.0,Great,Great
135,22566,blunted on reality,fugees,7.6,1994.0,2016.0,5711,street's disciple,nas,7.2,2004.0,,1,0.989205,1994.0,2004.0,Good,Good
136,22566,blunted on reality,fugees,7.6,1994.0,2016.0,13373,slaughterhouse,slaughterhouse,5.5,2009.0,,2,0.987979,1994.0,2009.0,Good,Bad
275,22132,things fall apart,the roots,9.4,1999.0,,4331,power in numbers,jurassic 5,7.1,2002.0,,1,0.992334,1999.0,2002.0,Great,Good


# Make the Network Visualization

In [5]:
# make the networkx graph object: directed graph
g = nx.from_pandas_edgelist(edge_df_trim, 'source_album', 'target_album', edge_attr='cosine_sim')
len(g.nodes())

815

### Examine the centralities of the network

In [6]:
s = pd.Series( nx.degree_centrality(g))
centralities = pd.DataFrame(s)
centralities.reset_index(level=None, drop=False, inplace=True)
centralities.columns = ['Album','Centrality']
centralities.sort_values('Centrality', ascending = False).head(5)

Unnamed: 0,Album,Centrality
75,lucky 7,0.02457
325,evolution,0.013514
143,dedication 5,0.013514
492,trill o.g.,0.011057
182,peep: the aprocalypse,0.011057


### Function to make pyvis interactive plot from networkx graph object

In [7]:
# https://gist.github.com/maciejkos/e3bc958aac9e7a245dddff8d86058e17
def draw_graph3(networkx_graph,notebook=True,output_filename='graph.html',show_buttons=True,only_physics_buttons=False,
                height=None,width=None,bgcolor=None,font_color=None,pyvis_options=None):
    """
    This function accepts a networkx graph object,
    converts it to a pyvis network object preserving its node and edge attributes,
    and both returns and saves a dynamic network visualization.
    Valid node attributes include:
        "size", "value", "title", "x", "y", "label", "color".
        (For more info: https://pyvis.readthedocs.io/en/latest/documentation.html#pyvis.network.Network.add_node)
    Valid edge attributes include:
        "arrowStrikethrough", "hidden", "physics", "title", "value", "width"
        (For more info: https://pyvis.readthedocs.io/en/latest/documentation.html#pyvis.network.Network.add_edge)
    Args:
        networkx_graph: The graph to convert and display
        notebook: Display in Jupyter?
        output_filename: Where to save the converted network
        show_buttons: Show buttons in saved version of network?
        only_physics_buttons: Show only buttons controlling physics of network?
        height: height in px or %, e.g, "750px" or "100%
        width: width in px or %, e.g, "750px" or "100%
        bgcolor: background color, e.g., "black" or "#222222"
        font_color: font color,  e.g., "black" or "#222222"
        pyvis_options: provide pyvis-specific options (https://pyvis.readthedocs.io/en/latest/documentation.html#pyvis.options.Options.set)
    """

    # import
    from pyvis import network as net

    # make a pyvis network
    network_class_parameters = {"notebook": notebook, "height": height, "width": width, "bgcolor": bgcolor, "font_color": font_color}
    pyvis_graph = net.Network(**{parameter_name: parameter_value for parameter_name, parameter_value in network_class_parameters.items() if parameter_value})

    # for each node and its attributes in the networkx graph
    for node,node_attrs in networkx_graph.nodes(data=True):
        pyvis_graph.add_node(node,**node_attrs)

    # for each edge and its attributes in the networkx graph
    for source,target,edge_attrs in networkx_graph.edges(data=True):
        # if value/width not specified directly, and weight is specified, set 'value' to 'weight'
        if not 'value' in edge_attrs and not 'width' in edge_attrs and 'weight' in edge_attrs:
            # place at key 'value' the weight of the edge
            edge_attrs['value']=edge_attrs['weight']
        # add the edge
        pyvis_graph.add_edge(source,target,**edge_attrs)

    # turn buttons on
    if show_buttons:
        if only_physics_buttons:
            pyvis_graph.show_buttons(filter_=['physics'])
        else:
            pyvis_graph.show_buttons()

    # pyvis-specific options
    if pyvis_options:
        pyvis_graph.set_options(pyvis_options)

    # return and also save
    return pyvis_graph.show(output_filename)

In [8]:
# make dictionaries for source and target albums
source_album_dict = pd.Series(edge_df_trim.source_score_cut.values,index=edge_df_trim.source_album).to_dict()
target_album_dict = pd.Series(edge_df_trim.target_score_cut.values,index=edge_df_trim.target_album).to_dict()

# concatenate the dictionaries
# https://stackoverflow.com/questions/38987/how-do-i-merge-two-dictionaries-in-a-single-expression
master_album_dict = {**source_album_dict, **target_album_dict}

In [9]:
# first element of the dictionary
list(master_album_dict.items())[0] 

('death certificate', 'Great')

In [10]:
# change the master album dict to have 
# album: color attribute
mad2 = master_album_dict.copy()
for album, quality in mad2.items():
    if quality == "Great":
        mad2[album] = "green"
    elif quality == "Good":
        mad2[album] = "darkgoldenrod"
    elif quality == "Bad":
        mad2[album] = "red"

In [11]:
# first element of the dictionary
list(mad2.items())[0] 

('death certificate', 'green')

In [12]:
# set the node attribute color 
# using the above dictionary mapping
nx.set_node_attributes(g, mad2, 'color')

In [13]:
g.nodes['death certificate']

{'color': 'green'}

In [14]:
# make dictionaries for source and target artists
source_artist_dict = pd.Series((edge_df_trim.source_artist + " (" + edge_df_trim.source_year_master.astype(int).astype(str) + ")").values,index=edge_df_trim.source_album).to_dict()
target_artist_dict = pd.Series((edge_df_trim.target_artist + " (" + edge_df_trim.target_year_master.astype(int).astype(str) + ")").values,index=edge_df_trim.target_album).to_dict()

# concatenate the dictionaries
# https://stackoverflow.com/questions/38987/how-do-i-merge-two-dictionaries-in-a-single-expression
master_artist_dict = {**source_artist_dict, **target_artist_dict}

In [15]:
# first element of the dictionary
list(master_artist_dict.items())[0] 

('death certificate', 'ice cube (1991)')

In [16]:
# set the node title attribute  
# using the above dictionary mapping
nx.set_node_attributes(g, master_artist_dict, 'title')

In [17]:
g.nodes['death certificate']

{'color': 'green', 'title': 'ice cube (1991)'}

In [18]:
# make the pyviz interactive plot
# this will save out an html file to the directory
# where this script is
# plot will also be shown in the notebook
draw_graph3(g, height = '1000px', width = '1000px', 
            show_buttons=False,  
            output_filename='graph_output_blog.html', notebook=True)