In [56]:
import math
import json
import requests
import itertools
import numpy as np
import pandas as pd
import time
from datetime import datetime, timedelta
from dotenv import load_dotenv
import os
import json
import base64
import itertools
from tqdm.auto import tqdm
import pickle
import networkx as nx
from collections import Counter


In [2]:
load_dotenv()
client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")

In [3]:
def get_token():
    auth_string = f"{client_id}:{client_secret}"
    auth_bytes = auth_string.encode("utf-8")
    auth_base64 = str(base64.b64encode(auth_bytes), "utf-8")

    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Authorization": f"Basic {auth_base64}",
        "Content-Type": "application/x-www-form-urlencoded", 
        "limit": "50"
        }
    data = {
        "grant_type": "client_credentials"
    }
    result = requests.post(url, headers=headers, data=data)
    json_result = json.loads(result.content)
    token = json_result["access_token"]
    return token

def get_auth_header(token):
    return {"Authorization": f"Bearer {token}", "limit": "50"}

def search_for_artist(token, artist):
    url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)
    query = f"?q={artist}&type=artist"
    query_url = url + query
    result = requests.get(query_url, headers=headers)
    if result.status_code != 200:
        print('Error: ', result.status_code)
    return result.json()


def get_albums_by_artist(token, artist_id):
    url = f"https://api.spotify.com/v1/artists/{artist_id}/albums"
    headers = get_auth_header(token)
    query = f"?limit=50"
    query_url = url + query
    result = requests.get(query_url, headers=headers)
    return result.json()

def get_tracks_by_album(token, album_id):
    url = f"https://api.spotify.com/v1/albums/{album_id}/tracks"
    headers = get_auth_header(token)
    query_url = url
    result = requests.get(query_url, headers=headers)
    return result.json()


# Use this for text analysis
def get_songs_by_artist(token, artist_id):
    url = f"https://api.spotify.com/v1/artists/{artist_id}/top-tracks"
    headers = get_auth_header(token)
    query = "?market=US"
    query_url = url + query
    result = requests.get(query_url, headers=headers)
    return result.json()


In [4]:
token = get_token()
response = search_for_artist(token, "The Weeknd")
id1 = response["artists"]["items"][0]["id"]

In [7]:
def set_of_albums_by_artist(ALBUM_json):
    """
    Output: dict of unique album_names and album_ids
    Output example: {
        'Starboy (Deluxe)': '35dut3ICqF3NEDkjxfzJJ1',
        'Live At SoFi Stadium': '1OARrXe5sB0gyy3MhQ8h92', 
    """
    album_names = []
    album_ids = []
    for album in ALBUM_json['items']:
        if album['name'] not in album_names and album['album_type'] == 'album':
            album_names.append(album['name'])
            album_ids.append(album['id'])
    return dict(zip(album_names, album_ids))

# Example
#theWeeknd_albums = get_albums_by_artist(token, id1)
#set_of_albums_by_artist(theWeeknd_albums)

def features_from_album(ALBUM_json):
    """
    Output example: [
        ['Starboy (feat. Daft Punk)', [('Daft Punk', '4tZwfgrHOc3mvqYlEYSvVi')]],
        ['Party Monster', []],
        ...

    ]
    """
    feats_on_album = []
    for i in range(len(ALBUM_json['items'])):
        song_name = ALBUM_json['items'][i]['name']  

        all_features = []
        for features in ALBUM_json['items'][i]['artists']: #[1:]:
            all_features.append( (features['name'], features['id']) )

        feats_on_album.append(all_features)
        #feats_on_album.append([song_name, all_features])

    return feats_on_album

# Example
#starboy_json = get_tracks_by_album(token, WeekndAlbums[0][1])
#features_from_album(starboy_json)

## Make network



1. csv med artist-name
2. Få artist-id  **search_for_artist(token, artist)**
3. Få alle albums **get_albums_by_artist(token, artist)** --> **set_of_albums_by_artist(ALBUM_json)**
4. Få alle features **get_tracks_by_album(token, album_id)** --> **features_from_album(ALBUM_json)**

In [8]:
dfTopArtist = pd.read_csv('artists_charts.csv')
unique_artists = dfTopArtist['artist'].unique()
unique_artists

array(['Post Malone', 'Juice WRLD', 'Lil Uzi Vert', ...,
       'Das Sound Machine', 'Death Cab for Cutie', 'Purity Ring'],
      dtype=object)

Get ids

In [29]:
len( unique_artists )

1250

In [30]:
dict_features = {}
for artist_name in tqdm(unique_artists[1114:]):
    response = search_for_artist(token, artist_name)
    artist_id = response["artists"]["items"][0]["id"]

    artist_albums = get_albums_by_artist(token, artist_id)
    artist_albums_dict = set_of_albums_by_artist(artist_albums)

    all_features = []
    for album_name, album_id in artist_albums_dict.items():
        album_tracks = get_tracks_by_album(token, album_id)
        album_features = features_from_album(album_tracks)
        all_features.append(album_features)

    # Flatten and remove self
    flattened = list(itertools.chain(*itertools.chain(*all_features)))
    features_no_self = [feat for feat in flattened if feat != (artist_name, artist_id)]

    dict_features[artist_name] = features_no_self

  0%|          | 0/136 [00:00<?, ?it/s]

In [31]:
# save to pickle
with open('dict_features03.pickle', 'wb') as handle:
    pickle.dump(dict_features, handle)


In [75]:
# Load pickle file
with open('dict_featuresALL.pickle', 'rb') as handle:
    dict_features = pickle.load(handle)

In [77]:
# Make function to format edges with weight
def format_edges(dict_features, islimitedartists=True):
    G = nx.Graph()
    print(len(dict_features))
    for artist in dict_features.keys():
        G.add_node(artist)
        res = Counter(dict_features[artist])
        for key in res.keys():
            if key[0] in dict_features.keys() and islimitedartists:
                G.add_edge(artist, key[0], weight=res[key])
    return G

GGG = format_edges(dict_features)

1250


In [79]:
GGG.number_of_edges()

7677

Begin making of network / graph

In [None]:
# Make graph with nodes being artists and edges being features the weight of the edge is the number of times the feature appears
G = nx.Graph()
for artist, features in dict_features.items():
    G.add_node(artist)
    #for feature in features:
    #    G.add_edge(artist, feature[0], weight=feature[1])


In [52]:
G = nx.Graph()
G.add_node(1)
G.add_edge(1,2, weight=3)
G.edges

EdgeView([(1, 2)])

In [61]:
res = Counter(dict_features['Post Malone'])

print(res.keys())

dict_keys([('Roddy Ricch', '757aE44tKEUQEqRuT6GnEB'), ('Doja Cat', '5cj0lLjcoR7YOSnhnX0Po5'), ('Gunna', '2hlmm7s2ICUX0LVIhVFlZQ'), ('Fleet Foxes', '4EVpmkEwrLYEg6jIsiPMIb'), ('The Kid LAROI', '2tIP7SsRs7vjIcLrU85W8J'), ('The Weeknd', '1Xyo4u8uXC1ZmMpatF05PJ'), ('DaBaby', '4r63FhuTkUYltbVAg5TQnk'), ('Halsey', '26VFTg2z8YR0cCuwLzESi2'), ('Future', '1RyvyyTE3xzB2ZywiAwp0i'), ('Meek Mill', '20sxb77xiYeusSH8cVdatc'), ('Lil Baby', '5f7VJjfbwm532GiveGC0ZK'), ('Ozzy Osbourne', '6ZLTlhejhndI4Rh53vYhrY'), ('Travis Scott', '0Y5tJX1MQlPlqiwlOH1tJY'), ('SZA', '7tYKF4w9nC0nq9CsPZTHyP'), ('Swae Lee', '1zNqQNIdeOUZHb8zbZRFMX'), ('Young Thug', '50co4Is1HCEo8bhOyUWKpn'), ('21 Savage', '1URnnhqYAYcrqrcwql10ft'), ('Ty Dolla $ign', '7c0XG5cIJTrrAgEC3ULPiq'), ('Nicki Minaj', '0hCNtLu0JehylgoiP8L4Gh'), ('G-Eazy', '02kJSzxNuaWGqwubyUba0Z'), ('YG', '0A0FS04o6zMoto8OKPsDwY'), ('Justin Bieber', '1uNFoZAHBGtllmzznpCI3s'), ('Kehlani', '0cGUm45nv7Z6M6qdXYQGTX'), ('Quavo', '0VRj0yCOv2FXJNP47XQnx5'), ('2 Chainz', '17

In [53]:
dict_features

{'Post Malone': [('Roddy Ricch', '757aE44tKEUQEqRuT6GnEB'),
  ('Doja Cat', '5cj0lLjcoR7YOSnhnX0Po5'),
  ('Gunna', '2hlmm7s2ICUX0LVIhVFlZQ'),
  ('Fleet Foxes', '4EVpmkEwrLYEg6jIsiPMIb'),
  ('The Kid LAROI', '2tIP7SsRs7vjIcLrU85W8J'),
  ('The Weeknd', '1Xyo4u8uXC1ZmMpatF05PJ'),
  ('Roddy Ricch', '757aE44tKEUQEqRuT6GnEB'),
  ('Doja Cat', '5cj0lLjcoR7YOSnhnX0Po5'),
  ('Gunna', '2hlmm7s2ICUX0LVIhVFlZQ'),
  ('Fleet Foxes', '4EVpmkEwrLYEg6jIsiPMIb'),
  ('The Kid LAROI', '2tIP7SsRs7vjIcLrU85W8J'),
  ('The Weeknd', '1Xyo4u8uXC1ZmMpatF05PJ'),
  ('DaBaby', '4r63FhuTkUYltbVAg5TQnk'),
  ('Halsey', '26VFTg2z8YR0cCuwLzESi2'),
  ('Future', '1RyvyyTE3xzB2ZywiAwp0i'),
  ('Meek Mill', '20sxb77xiYeusSH8cVdatc'),
  ('Lil Baby', '5f7VJjfbwm532GiveGC0ZK'),
  ('Ozzy Osbourne', '6ZLTlhejhndI4Rh53vYhrY'),
  ('Travis Scott', '0Y5tJX1MQlPlqiwlOH1tJY'),
  ('SZA', '7tYKF4w9nC0nq9CsPZTHyP'),
  ('Swae Lee', '1zNqQNIdeOUZHb8zbZRFMX'),
  ('Young Thug', '50co4Is1HCEo8bhOyUWKpn'),
  ('Swae Lee', '1zNqQNIdeOUZHb8zbZRFMX')

## Text analysis - Top songs 

In [125]:
response2 = get_songs_by_artist(token, id1)

<Response [200]>


In [87]:
for song in response2['tracks']: #[0]#['artists']
    print(song['name'])

Die For You (with Ariana Grande) - Remix
Creepin' (with The Weeknd & 21 Savage)
Die For You
Starboy
Blinding Lights
Reminder
Save Your Tears
I Was Never There
Call Out My Name
Save Your Tears (Remix) (with Ariana Grande) - Bonus Track


In [33]:
# read pickle file
with open('dict_features01.pickle', 'rb') as handle:
    dict_features1 = pickle.load(handle)

with open('dict_features02.pickle', 'rb') as handle:
    dict_features2 = pickle.load(handle)

with open('dict_features03.pickle', 'rb') as handle:
    dict_features3 = pickle.load(handle)

In [36]:
dict_features_all = {**dict_features1, **dict_features2, **dict_features3}
len(dict_features_all)

1250

In [37]:
with open('dict_featuresALL.pickle', 'wb') as handle:
    pickle.dump(dict_features_all, handle)

In [26]:
np.where(unique_artists == 'Nat King Cole Trio')

(array([1113]),)

In [28]:
unique_artists[1114:]

array(['Johnny Mathis', 'grandson', 'The Strokes', 'Bee Gees',
       'New Community Church', "Destiny's Child", 'Jameson Rodgers',
       'Passion Pit', 'HAIM', 'Dj Ozuna', 'Thirty Seconds To Mars',
       'Kim Cesarion', 'ODESZA', 'Cobra Starship', 'Michael Ray',
       'Wizkid', 'Soundgarden', 'Coleman Hell', 'iSHi',
       'Theophilus London', 'Chad Focus', 'David Bowie', 'Roy Woods',
       'Jaymes Reunion', 'Foster The People', 'Jennifer Lopez',
       'Alicia Keys', 'SNBRN', 'Shaun Reynolds', 'Radiohead', 'JoJo',
       'TWENTY88', 'Kalin and Myles', 'AronChupa', 'Niykee Heaton',
       'Chedda Da Connect', 'The Low Down', 'Miike Snow', 'New Politics',
       'Florence + The Machine', 'Beck', 'AFROJACK', 'Ludacris', 'Priory',
       'Fences', 'Seth MacFarlane', 'Nico Segal', 'Ace Hood',
       'Mary Lambert', 'Phoebe Ryan', 'LÃ\x89ON', 'Bankroll Fresh',
       'Alabama Shakes', 'Ryan Adams', 'Snoop Dogg',
       'Edward Sharpe & The Magnetic Zeros', 'The-Dream', 'Moxie Raia',
  