In [1]:
import requests
import pandas as pd
import math
import time

def timer(func,*args):
    start = time.perf_counter()
    output = func(*args)
    print(f"{func.__name__} took {round(time.perf_counter()-start,4)} seconds")
    return output

In [150]:
dfs = pd.read_excel('song_data.xlsx',sheet_name=None)

In [195]:
artist_genres = dfs['artist_genres']
artists_in_genre = artist_genres.groupby('genre').agg({'artist_id':lambda x: x.tolist()})
artist_genres['neighbors'] = artist_genres['genre'].map(artists_in_genre['artist_id'])

edges_w_data = artist_genres.explode('neighbors')
edges_w_data = edges_w_data[edges_w_data['artist_id']!=edges_w_data['neighbors']]
edges_w_data = edges_w_data.groupby(['artist_id','neighbors'],as_index=False).agg({'genre':[lambda x: x.tolist(),'count']})
edges_w_data.columns = ['Source','Target','genres','Weight']

edges = edges_w_data[['Source','Target','Weight']]
edges['set'] = edges[['Source','Target']].apply(frozenset, axis=1)
edges = edges.drop_duplicates(subset='set').drop('set',axis=1)
edges['type'] = 'undirected'

nodes = pd.DataFrame(edges['Source'].unique(),columns=['id'])
nodes['Label'] = nodes['id'].map(pd.Series(list(dfs['artists']['name']),dfs['artists']['id']))

In [188]:
nodes.to_csv('nodes.csv',index=False)
edges.to_csv('edges.csv',index=False)

In [218]:
artist_genres = dfs['artist_genres']
artist_genre_lists = artist_genres.groupby('artist_id').agg({'genre':lambda x: x.tolist()})
artist_genres['neighbors'] = artist_genres['artist_id'].map(artist_genre_lists['genre'])

edges_w_data = artist_genres.explode('neighbors')
edges_w_data = edges_w_data[edges_w_data['genre']!=edges_w_data['neighbors']]
edges_w_data = edges_w_data.groupby(['genre','neighbors'],as_index=False).agg({'artist_id':[lambda x: x.tolist(),'count']})
edges_w_data.columns = ['Target','Source','artist_ids','Weight']

edges = edges_w_data[['Source','Target','Weight']]
edges['set'] = edges[['Source','Target']].apply(frozenset, axis=1)
edges = edges.drop_duplicates(subset='set').drop('set',axis=1)
edges['type'] = 'undirected'

nodes = pd.DataFrame(edges['Source'].unique(),columns=['Label'])
nodes['id'] = nodes.index

In [251]:
artist_tracks = dfs['artist_tracks']
artist_track_lists = artist_tracks.groupby('track_id').agg({'artist_id':lambda x: x.tolist()})
artist_tracks['neighbors'] = artist_tracks['track_id'].map(artist_track_lists['artist_id'])

edges_w_data = artist_tracks.explode('neighbors')
edges_w_data = edges_w_data[edges_w_data['artist_id']!=edges_w_data['neighbors']]
edges_w_data = edges_w_data.groupby(['artist_id','neighbors'],as_index=False).agg({'track_id':[lambda x: x.tolist(),'count']})
edges_w_data.columns = ['Source','Target','track_ids','Weight']

edges = edges_w_data[['Source','Target','Weight']]
edges['set'] = edges[['Source','Target']].apply(frozenset, axis=1)
edges = edges.drop_duplicates(subset='set').drop('set',axis=1)
edges['type'] = 'undirected'

nodes = pd.DataFrame(edges['Source'].unique(),columns=['id'])
nodes['Label'] = nodes['id'].map(pd.Series(list(dfs['artists']['name']),dfs['artists']['id']))

In [252]:
nodes.to_csv('artists_by_track_nodes.csv',index=False)
edges.to_csv('artists_by_track_edges.csv',index=False)

In [280]:
token = "BQBU28J-7G_UfVBxXyn-oJ8MCEs8PGNl-NDgl_ZW1Yb8dMPJEsH1ab1ZuCIzymJtRgt_XxwDxdHJpKY9aogDTBA_gTxkDE9HlIDKdmlIu_twI8V55tsI9hw-H6ZqiyQUw6Dillv4scXayY6_aFpsmn6HirY0K9FSXDmGWrt_yP9196XxzCqlm1eTV3WSqYTsOVM"

headers = {
    'Accept': 'application/json',
    'Content-Type': 'application/json',
    'Authorization': 'Bearer '+token,
}

base_url = 'https://api.spotify.com/v1'

In [None]:
artists = dfs['artists']
related_artists = {}
for i, aid in enumerate(artists['id']):
    related_json = requests.get(f"{base_url}/artists/{aid}/related-artists",headers=headers).json()
    related_artists[aid] = [artist['id'] for artist in related_json['artists']]
    if i%10 == 0:
        print(i)

In [301]:
artists_in_library = {}
for aid in related_artists:
    ra = set(related_artists[aid]) & set(artists['id'])
    if ra:
        artists_in_library[aid] = list(ra)

In [322]:
df = pd.DataFrame(artists_in_library.keys(),columns=['id'])
df['neighbors'] = df['id'].map(artists_in_library)

edges_w_data = df.explode('neighbors')
edges_w_data.columns = ['Source','Target']

edges = edges_w_data.copy()
edges['set'] = edges.apply(frozenset, axis=1)
edges = edges.drop_duplicates(subset='set').drop('set',axis=1)
edges['type'] = 'undirected'

nodes = pd.DataFrame(edges['Source'].unique(),columns=['id'])
nodes['Label'] = nodes['id'].map(pd.Series(list(dfs['artists']['name']),dfs['artists']['id']))

In [323]:
nodes.to_csv('artists_by_algo_nodes.csv',index=False)
edges.to_csv('artists_by_algo_edges.csv',index=False)

In [330]:
df = pd.read_csv('genres_by_artist_nodes.csv')
df['id'] = df['Label']
df.to_csv('genres_by_artist_nodes.csv',index=False)

In [365]:
df = pd.read_csv('artists_by_algo_edges.csv')
df['Weight']=1

In [366]:
df.pivot(index='Source',columns='Target',values='Weight').fillna(0).to_csv('test.csv')

Source
00dwwnz3V4kRfu3UFYpJLz    0.0
5INjqkS1o8h1imAzPqGZBb    0.0
5IcR3N7QB1j6KBL8eImZ8m    0.0
5IqdTZwvASXwM39z5qPqAA    0.0
5K4W6rqBFWDnAN6FQUkS6x    0.0
                         ... 
4FZ3j1oH43e7cukCALsCwf    1.0
5BYuBzqmTXwUDw2rYkwExr    1.0
0bZCak2tcRMY1dzEIuwF42    1.0
4F84IBURUo98rz4r61KF70    1.0
2S5hlvw4CMtMGswFtfdK15    1.0
Name: 02uYdhMhCgdB49hZlYRm9o, Length: 671, dtype: float64