In [92]:
import requests
import regex as re
import json
import time
import random
import networkx as nx
import pandas as pd

In [114]:
def open_file(filepath:str='cache.json')->dict:
    '''Opens a file'''
    cache_file = open(filepath, 'r')
    cache_contents = cache_file.read()
    artist_data = json.loads(cache_contents)
    cache_file.close()
    return artist_data

In [115]:
def load_full_network(playlist_link:str,random_link:str,cache_file:str)->nx.Graph:
    playlist_network = open_file(playlist_link)
    random_network = open_file(random_link)
    cache = open_file(cache_file)
    # combine!!

    # playlist network doesn't have name key, my bad
    for artist in playlist_network.keys():
        playlist_network[artist]['name']=artist

    g = nx.Graph()
    for artist in playlist_network.keys():
        g.add_node(artist)
        colab = playlist_network[artist]['collaborators'].keys()
        for y in colab:
            if artist !=y:
                if y not in g.nodes:
                    g.add_node(y)
                g.add_edge(artist,y)

    for n in g.nodes():
        g.nodes[n]['source'] = 'playlist'
        if n in playlist_network.keys():
            for key in playlist_network[n].keys():
                g.nodes[n][key]= playlist_network[n][key]
        else:
            for key in cache[n].keys():
                g.nodes[n][key]=cache[n][key]
            g.nodes[n]['in_playlist']=False
            g.nodes[n]['name']=n


    for artist in random_network.keys():
        if artist not in g.nodes:
            g.add_node(artist)
            g.nodes[artist]['source']='random'
        colab = random_network[artist]['collaborators'].keys()
        for y in colab:
            if artist !=y:
                if y not in g.nodes:
                    g.add_node(y)
                g.add_edge(artist,y)
                g.nodes[y]['source']='random'

    for n in g.nodes():
        if n in random_network.keys():
            for key in random_network[n].keys():
                g.nodes[n][key]= random_network[n][key]
            g.nodes[n]['in_playlist']=False
            g.nodes[n]['name']=n
            if n not in playlist_network.keys():
                for key in cache[n].keys():
                    g.nodes[n][key]=cache[n][key]
            g.nodes[n]['source']='random'
    return g

In [116]:
# loading complete networks
week1_g = load_full_network('10_26_playlist_layer2.json','10_26_random_2layer.json','cache.json')
week2_g = load_full_network('11_01_playlist_layer2.json','10_26_random_2layer.json','cache.json')
week3_g = load_full_network('11_08_playlist_layer2.json','10_26_random_2layer.json','cache.json')

Now I want to gather the info about the different networks and see how they change.

In [96]:
# make dataframe with networks attributes
week1_df =pd.DataFrame.from_dict(dict(week1_g.nodes(data=True)), orient='index')
week2_df =pd.DataFrame.from_dict(dict(week2_g.nodes(data=True)), orient='index')
week3_df =pd.DataFrame.from_dict(dict(week3_g.nodes(data=True)), orient='index')
# print the columns
print(week1_df.columns)

Index(['source', 'genres', 'img_info', 'popularity', 'followers',
       'collaborators', 'in_playlist', 'link', 'name', 'api_link'],
      dtype='object')


In [100]:
# making an easy way to iterate
graphs = {
    # 'week1': {'graph':week1_g, 'df':week1_df},
    #       'week2': {'graph':week2_g, 'df':week2_df},
          'week3': {'graph':week3_g, 'df':week3_df},}

In [105]:
week3_df.shape

(36019, 10)

In [107]:
week3_df['name']=week3_df.index
week3_df

Unnamed: 0,source,genres,img_info,popularity,followers,collaborators,in_playlist,name,link,api_link
ROSÉ,playlist,[k-pop],[{'url': 'https://i.scdn.co/image/ab6761610000...,84.0,7766971.0,"{'ROSÉ': {'count': 2, 'uri': 'spotify:artist:3...",True,ROSÉ,,
Bruno Mars,playlist,"[dance pop, pop]",[{'url': 'https://i.scdn.co/image/ab6761610000...,94.0,60240128.0,"{'Bruno Mars': {'count': 22, 'uri': 'spotify:a...",True,Bruno Mars,https://api.spotify.com/v1/artists/0du5cEVh5yT...,
G-DRAGON,playlist,"[k-pop, k-rap]",[{'url': 'https://i.scdn.co/image/ab6761610000...,67.0,2918979.0,"{'G-DRAGON': {'count': 12, 'uri': 'spotify:art...",False,G-DRAGON,,
BLACKPINK,playlist,"[k-pop, k-pop girl group, pop]",[{'url': 'https://i.scdn.co/image/ab6761610000...,81.0,51430114.0,"{'BLACKPINK': {'count': 20, 'uri': 'spotify:ar...",False,BLACKPINK,,
Various Artists,random,[],[{'url': 'https://i.scdn.co/image/ab6761610000...,0.0,2551849.0,{},False,Various Artists,,
...,...,...,...,...,...,...,...,...,...,...
Mc Zofree,random,,,,,,,Mc Zofree,,
KAMI MUZIC,random,,,,,,,KAMI MUZIC,,
AK-47,random,,,,,,,AK-47,,
Klay,random,,,,,,,Klay,,


In [108]:
for week in graphs.keys():
    print(week)
    # set graph & df
    g, df = graphs[week]['graph'], graphs[week]['df']
    # get pagerank
    pr = nx.pagerank(g)
    df['pagerank']=df['name'].apply(lambda x: pr[x])
    # get closeness centrality
    cc = nx.closeness_centrality(g)
    df['closeness_cent']=df['name'].apply(lambda x: cc[x])
    # clustering
    clust = nx.clustering(g)
    df['clustering']=df['name'].apply(lambda x: clust[x])
    # degree centrality
    deg_cent = nx.degree_centrality(g)
    df['deg_cent']=df['name'].apply(lambda x: deg_cent[x])
    # betweenness centrality
    # btwn_centr = nx.betweenness_centrality(g)
    # df['btwn_centr']=df['name'].apply(lambda x: btwn_centr[x])
    # degree
    deg = nx.degree(g)
    df['degree']=df['name'].apply(lambda x: deg[x])

week3


In [None]:
playlist_nodes = [x for x in week1_g.nodes if week1_g.nodes[x]['source']=='playlist'] #if week1_g.nodes[x]['source']=='playlist'
random_nodes = [x for x in week1_g.nodes if week1_g.nodes[x]['source']=='random']

In [111]:
# set weeks in col
# graphs['week1']['df']['week']=1
# graphs['week2']['df']['week']=2
graphs['week3']['df']['week']=3
# combine
# mega = pd.concat([graphs['week1']['df'],graphs['week2']['df'],graphs['week3']['df']],axis=0)

In [None]:
mega.groupby(['in_playlist','source','week'])[['pagerank','closeness_cent','clustering','deg_cent','degree']].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pagerank,closeness_cent,clustering,deg_cent,degree
in_playlist,source,week,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
False,playlist,1,2.7e-05,0.178964,0.065266,9.2e-05,3.333838
False,playlist,2,2.8e-05,,0.064974,9.3e-05,3.356332
False,playlist,3,2.8e-05,,0.065368,9.3e-05,3.36517
False,random,1,0.000191,0.212443,0.122278,0.000726,26.255879
False,random,2,0.000191,,0.122448,0.000728,26.240471
False,random,3,0.000191,,0.122448,0.000729,26.240471
True,playlist,1,0.000116,0.223681,0.138634,0.000469,16.964789
True,playlist,2,0.000114,,0.140451,0.000466,16.798561
True,playlist,3,0.000113,,0.143961,0.000461,16.6


In [113]:
# save to csv for my peers
# graphs['week1']['df'].to_csv('week1.csv')
# graphs['week2']['df'].to_csv('week2.csv')
graphs['week3']['df'].to_csv('week3.csv')