In [52]:
import requests
import pandas as pd
import math
import time
import json

def timer(func,*args):
    start = time.perf_counter()
    output = func(*args)
    print(f"{func.__name__} took {round(time.perf_counter()-start,4)} seconds")
    return output

In [39]:
dfs = pd.read_excel('song_data2.xlsx',sheet_name=None)
artist_genres = dfs['artist_genres']
artists = dfs['artists']
my_playlist = dfs['my_playlist']
artist_tracks = dfs['artist_tracks']
my_artists = my_playlist[['id']].merge(artist_tracks,left_on='id',right_on='track_id')['artist_id'].unique()

In [88]:
my_artist_genres = artist_genres[artist_genres['artist_id'].isin(my_artists)]
artist_genre_lists = my_artist_genres.groupby('artist_id').agg({'genre':lambda x: x.tolist()})
artist_genres['neighbors'] = my_artist_genres['artist_id'].map(artist_genre_lists['genre'])

g_edges = artist_genres.explode('neighbors')
g_edges = g_edges[g_edges['genre']!=g_edges['neighbors']]
g_edges = g_edges.groupby(['genre','neighbors'],as_index=False).agg({'artist_id':[lambda x: x.tolist(),'count']})
g_edges.columns = ['Target','Source','artist_ids','Weight']
g_edges = g_edges[['Target','Source','Weight']]

g_matrix = g_edges.pivot(index='Source',columns='Target',values='Weight').fillna(0)

In [41]:
token = "BQBp8kowgH2Y4_cJgJtw99_K7fWxmiimLGdE-s4acQWWsNoQMPNll4R36PQLYDN7No0SUB_LCW2ORJzNjtvgbJXdw3ZMEX6GuM2HqjqZM5jc9ADq41fiBLI7Mac4YCn7Ae_K22ybn6HcmM8Dhh5jJZZvX5PwEGRcNK_D02AkxSp2DA8Gsz9FH8vjw2o1W_s5_1w"

headers = {
    'Accept': 'application/json',
    'Content-Type': 'application/json',
    'Authorization': 'Bearer '+token,
}

base_url = 'https://api.spotify.com/v1'

In [None]:
related_artists = {}
for i, aid in enumerate(my_artists):
    related_json = requests.get(f"{base_url}/artists/{aid}/related-artists",headers=headers).json()
    related_artists[aid] = [artist['id'] for artist in related_json['artists']]
    if i%10 == 0:
        print(i)

In [48]:
artists_in_library = {}
for aid in related_artists:
    ra = set(related_artists[aid]) & set(my_artists)
    if ra:
        artists_in_library[aid] = list(ra)

In [49]:
artist_net = pd.DataFrame(artists_in_library.keys(),columns=['id'])
artist_net['neighbors'] = artist_net['id'].map(artists_in_library)

a_edges = artist_net.explode('neighbors')
a_edges.columns = ['Source','Target']
a_edges['Weight'] = 1

a_matrix = a_edges.pivot(index='Source',columns='Target',values='Weight').fillna(0)

In [50]:
g_matrix.to_csv('genre_matrix.csv')
a_matrix.to_csv('artist_matrix.csv')

In [90]:
g_edges.rename(columns={'Source':'id','Target':'target'})

Unnamed: 0,target,id,Weight
0,abstract hip hop,chamber psych,1
1,abstract hip hop,escape room,1
2,abstract hip hop,ninja,1
3,abstract hip hop,scottish hip hop,1
4,acoustic pop,indie folk,1
...,...,...,...
5883,zolo,funk rock,1
5884,zolo,new wave,1
5885,zolo,permanent wave,1
5886,zolo,post-punk,1


In [59]:
with open('artist_net.json') as f:
    artist_data = json.load(f)
artist_data_df = pd.DataFrame(artist_data['nodes'])

with open('genre_net.json') as f:
    genre_data = json.load(f)
genre_data_df = pd.DataFrame(genre_data['nodes'])

In [129]:
def transform_net(json_file,edges_df):
    with open(json_file) as f:
        data = json.load(f)
    data_df = pd.DataFrame(data['nodes'])
    edges = edges_df.rename(columns={'Source':'id','Target':'target'})[['id','target']].copy()
    edges_copy = edges.copy()
    
    edges['name'] = edges['id']
    edges_copy['name'] = edges['target']
    
    edges = pd.concat([edges,edges_copy])
    edges_swap = edges.copy()
    temp = edges['id']
    edges_swap['id'] = edges['target']
    edges_swap['target'] = temp
    
    edges = pd.concat([edges,edges_swap])
    edges['rel'] = edges['id']+'-->'+edges['target']
    edges['x'] = edges['name'].map(dict(zip(data_df['id'],data_df['x'])))
    edges['y'] = edges['name'].map(dict(zip(data_df['id'],data_df['y'])))
    
    return edges

In [130]:
transform_net('genre_net.json', g_edges)

Unnamed: 0,id,target,name,rel,x,y
0,chamber psych,abstract hip hop,chamber psych,chamber psych-->abstract hip hop,57.270973,17.457914
1,escape room,abstract hip hop,escape room,escape room-->abstract hip hop,12.269608,257.975372
2,ninja,abstract hip hop,ninja,ninja-->abstract hip hop,-82.429535,291.322174
3,scottish hip hop,abstract hip hop,scottish hip hop,scottish hip hop-->abstract hip hop,76.209175,263.389404
4,indie folk,acoustic pop,indie folk,indie folk-->acoustic pop,-45.359932,-112.018387
...,...,...,...,...,...,...
5883,zolo,funk rock,zolo,zolo-->funk rock,-305.913635,-169.425964
5884,zolo,new wave,zolo,zolo-->new wave,-305.913635,-169.425964
5885,zolo,permanent wave,zolo,zolo-->permanent wave,-305.913635,-169.425964
5886,zolo,post-punk,zolo,zolo-->post-punk,-305.913635,-169.425964


In [132]:
transform_net('artist_net.json', a_edges).to_csv('artist_network.csv',index=False)
transform_net('genre_net.json', g_edges).to_csv('genre_network_edit.csv',index=False)

In [74]:
a_edges

Unnamed: 0,Source,Target,Weight
0,0pu9LAsUMg8nhMK1hmiM5b,1Zp054Jc86WVKCxKEqZGOA,1
0,0pu9LAsUMg8nhMK1hmiM5b,4XMc1qHObZ7aXQrH5MmbjK,1
1,5SHxzwjek1Pipl1Yk11UHv,6hl5k4gLl1p3sjhHcb57t2,1
1,5SHxzwjek1Pipl1Yk11UHv,58bHgbHOExsHSOGlf0uUkL,1
1,5SHxzwjek1Pipl1Yk11UHv,4fgXfJCQnK6c44u4KzAtQP,1
...,...,...,...
655,0FL2d6iFFNAV3yBUbXjZ1U,4OSArit7O2Jaj4mgf3YN7A,1
655,0FL2d6iFFNAV3yBUbXjZ1U,6Qpa8xhGsGitz4WBf4BkpK,1
655,0FL2d6iFFNAV3yBUbXjZ1U,4SdIXLzfabqU61iK7SnKAU,1
655,0FL2d6iFFNAV3yBUbXjZ1U,4qOzMSukiZoiSjPQw8Zs7s,1


In [378]:
mp = dfs['my_playlist']
tr = dfs['tracks']
ar = dfs['artists']
at = dfs['artist_tracks']
ge = dfs['artist_genres']

In [389]:
mpat = mp.merge(tr,on='id').merge(at,left_on='id',right_on='track_id')[['id','artist_id']]
mpatge = mpat.merge(ge,on='artist_id')
mpatge.groupby('genre').count().sort_values('id',ascending=False)

Unnamed: 0_level_0,id,artist_id
genre,Unnamed: 1_level_1,Unnamed: 2_level_1
modern rock,714,714
indie pop,417,417
pop,387,387
modern alternative rock,386,386
rock,341,341
...,...,...
jewish hip hop,1,1
jump blues,1,1
kansas hip hop,1,1
kentucky indie,1,1
