In [1]:
import pandas as pd
import numpy as np
import altair as alt
import networkx as nx
from pyvis.network import Network
from ast import literal_eval

In [2]:
# bring in data
df = pd.read_csv('data_master.csv')

# Create year column
df['year'] = df['release_date'].astype('datetime64[ns]').apply(lambda x: x.year)

# Filter needed columns
df = df[df['is_collab'] == True][['popularity', 'name', 'primary_genre', 'year', 'artists', 'collaborators']]
df['artists'] = df['artists'].apply(lambda x: literal_eval(x))

In [4]:
# Find all nodes and edges to plot
top50_pop = df[df['primary_genre'] == 'pop'].sort_values(by='popularity', ascending = False)\
    .head(50)[['artists', 'name', 'popularity','primary_genre']]
top50_pop

# Create graph
graph = nx.Graph()

# Populate Edges
edges = []
for row in top50_pop.iterrows():
    items = row[1]['artists']
    edges += [(items[i],items[j],{'title': row[1]['name'] + ", Popularity: " + str(row[1]['popularity']), \
                                  'width': row[1]['popularity']/100 + 2}) \
              for i in range(len(items)) for j in range(i+1, len(items))]
    
graph.add_edges_from(edges)

# Compute positions for viz.
pos = nx.spring_layout(graph)

# Display graph of top 50 pop collaborations
g = Network(height = 700, width = 1400, notebook = True, bgcolor="#222222", font_color="white")
g.toggle_hide_edges_on_drag(False)
g.force_atlas_2based()
g.from_nx(graph)
g.show('pop.html')