### Exploring GCN's for node prediction in a music playlist.  Predicting if an artist is part of playlist.  

Given 10 random artists from the 'rock classics' playlist and individual graphs of size 400, can node classification methods outperform PageRank with respect to predicting/labeling artists in the graph that are also in the original playlist.  Given the same parameters, ranking artists by PageRank will correctly label anywhere from ~35% to ~40% of artists in this playlist (see 'Exploring Weighted Edges'.)


In [1]:
import music_graph as mg    # contains all necessary functions, data and dependencies

from music_graph import artist_dictionary
from music_graph import tags_dictionary
from music_graph import filtered_tags_dictionary
from music_graph import ratings
from music_graph import build_net
from music_graph import layer_graphs
from music_graph import new_centrality
from music_graph import spotify_rankings
from music_graph import add_tag_edges

from datetime import datetime

import configparser
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

import tensorflow as tf
from stellargraph import StellarGraph as sg

# Spotify client id & secret for API access

config = configparser.ConfigParser()
config.read('spotify.ini')

client_id = config['spotify']['client_id']
client_secret = config['spotify']['client_secret']

In [2]:
playlists = {
'4C9mWYjVobPsfFXesGxYNf':'all classic hits', ### $$$
#'5TF1ki4BzMFfotw57juFTY':'coffee shop mix',
'3pM6OEFeTo4L1yd4eRltjL':'2004 hits', ### $$$
'73gFKr9Moh1T3jXM8TVSdC':'80s summer mix',
'17xTDziQjqAZANvg6C6FlX':'songs from your old ipod',
'6j3URIMhpf62LXpDcbi98b':'rap playlists',
#'1DcX9eK1MaPlwwBIsEfnBX':'country playlist to end all playlists',
#'5VhEpH7stoZiX4v8gIb77P':'playlists',
'37i9dQZF1DWXRqgorJj26U':'rock classics', ### $$$
'37i9dQZF1DWWJOmJ7nRx0C':'rock hard',  ### $$$
'37i9dQZF1DWWRktbhJiuqL':'pure rock n roll', ### $$$
'37i9dQZF1DX5W4wuxak2hE':'power pop', ###
'7Jfcy1H82lsTIzhpL4MZXu':'punk playlist', ### $$$
'07jaITMGGL6QYmT8yZVxQt':'metal playlist'
}  

"""
Request tracks from a playlist in Spotify
Then pull artist out of the track container.
"""
url = 'https://open.spotify.com/playlist'
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id, client_secret))

playlistid = '37i9dQZF1DWXRqgorJj26U'
playlist_name = playlists[playlistid]

# playlists with more than 100 artists will require two requests
playlist1 = sp.playlist_tracks(playlistid, offset = 0, limit = 100) # get the first 100 tracks
playlist2 = sp.playlist_tracks(playlistid, offset = 100, limit = 100) # get the next 100 tracks

artists = []

for index, item in enumerate(playlist1['items']):
    artists.append(item['track']['album']['artists'][0]['name'])
    
for index, item in enumerate(playlist2['items']):
    artists.append(item['track']['album']['artists'][0]['name'])
    
artists = [i.lower() for i in artists]

num_artists = len(set(artists))      # number of unique artists in the playlist

print(len(set(artists)), ' unique artists')
print(len((artists)), ' total artists') 

81  unique artists
150  total artists


In [3]:
rand_artist_count = 0
rand_artists = []
while rand_artist_count < 10:                  # number of random artists
    rand = np.random.randint(0,num_artists,1)
    rand = rand[0]
    if artists[rand] not in rand_artists: #and artists[rand] not in ['mumford & sons','bløf','lil jon & the east side boyz']:
        rand_artists.append(artists[rand].lower())
        rand_artist_count +=1

print("multigraph iteration: ")
print(rand_artists)
print(datetime.now())
multigraph = layer_graphs(rand_artists, add_tags = True, size = 400) # node count of each graph

print(list(multigraph.edges.data())[1])  # print an edge w/ data to make sure edge weight is correct.


multigraph iteration: 
['dire straits', 'bruce springsteen', 'kansas', "guns n' roses", 'lou reed', 'the who', 'cream', 'kiss', 'buffalo springfield', 'zz top']
2020-06-15 07:34:27.810324
guns n' roses  not in artist_dictionary
('dire straits', 'elementz of noize', {'kind': 'Artist', 'link': 'alan clark', 'weight': 0.10764079803947334})


In [4]:
print(len(multigraph.nodes()))
print(len(multigraph.edges.data()))

2351
631486


In [5]:
"""stellargraph from networkx graph
The networkx graph is a heterogeneous (has both artist and user-tag type edges) and undirected multigraph.
There are no node attributes at this time, although the user-tag edges added to the graph via the layer_graph 
function probably could/should be node attributes instead of edges."""

graph = sg.from_networkx(multigraph,
                        edge_type_attr = 'kind')

print(graph.info())


StellarGraph: Undirected multigraph
 Nodes: 2351, Edges: 631486

 Node types:
  default: [2351]
    Features: none
    Edge types: default-Artist->default, default-User-Tag->default

 Edge types:
    default-User-Tag->default: [593953]
        Weights: all 0.05
        Features: none
    default-Artist->default: [37533]
        Weights: range=[1.27572e-06, 0.518629], mean=0.00996706, std=0.0366298
        Features: none


In [6]:
# identity matrix for multigraph

id_mtx = np.identity(n = len(multigraph.nodes()))

id_mtx

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [20]:
# labels for node classification.  will have just 10 labeled nodes and |V | - 10 unlabeled nodes.
labels = pd.DataFrame()
labels['artist'] = list(multigraph.nodes())
labels['in_playlist'] = np.nan

for i in np.arange(0,len(labels)):
    if labels['artist'][i] in rand_artists:
        labels['in_playlist'][i] = 1
    else:
        labels['in_playlist'][i] = np.nan

In [22]:
# check that randomly selected artists are correctly labeled
print(labels[labels['in_playlist'] == 1])

labels.head(n = 20)

                   artist  in_playlist
0            dire straits          1.0
341               the who          1.0
348                 cream          1.0
1262               kansas          1.0
1283    bruce springsteen          1.0
1508  buffalo springfield          1.0
2047                 kiss          1.0
2131               zz top          1.0


Unnamed: 0,artist,in_playlist
0,dire straits,1.0
1,elementz of noize,
2,unguided lights,
3,the straits,
4,the acetones,
5,rockpile,
6,man,
7,the notting hillbillies,
8,the primitives,
9,spring,
