In [19]:
# import libraries
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from dotenv import load_dotenv
import os
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import sys

sys.path.append("../")

# load functions
from functions.spotify_api import parse_playlist_track

In [20]:
# read in secrets
load_dotenv()
client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")

In [21]:
# authenticate
sp = spotipy.Spotify(
    auth_manager=SpotifyOAuth(
        client_id=client_id,
        client_secret=client_secret,
        redirect_uri="http://localhost:3000",
        scope="user-library-read",
    )
)

In [22]:
# scrape playlists
browser = webdriver.Chrome()
browser.get("https://open.spotify.com/genre/section0JQ5DAzQHECxDlYNI6xD1h")

## USER NOTE - scroll to bottom of page to load all playlists

In [23]:
# Find all "a" tags with title containing "Top Songs"
anchor_elements = browser.find_elements(By.XPATH, '//a[contains(@title, "Top Songs")]')

# Extract hrefs from the anchor elements
playlist_hrefs = [elem.get_attribute("href") for elem in anchor_elements]

# Close browser
browser.close()

# extract playlist ids from hrefs
playlist_ids = [href.split("/")[-1] for href in playlist_hrefs]

In [24]:
top_50_playlists = []

# call api for each playlist
for playlist_id in playlist_ids:
    playlist_res = sp.playlist(playlist_id=playlist_id)
    # keep playlist name
    playlist_name = playlist_res["name"]
    print("-- processing playlist: " + playlist_name, "--")
    # extract tracks
    playlist_tracks = playlist_res["tracks"]["items"]
    # loop through tracks
    for track in playlist_tracks:
        # parse track
        track_json = parse_playlist_track(track=track, playlist_id=playlist_id, playlist_name=playlist_name)
        # append to list
        top_50_playlists.append(track_json)

-- processing playlist: Top Songs - Global --
-- processing playlist: Top Songs - Argentina --
-- processing playlist: Top Songs - Australia --
-- processing playlist: Top Songs - Austria --
-- processing playlist: Top Songs - Belarus --
-- processing playlist: Top Songs - Belgium --
-- processing playlist: Top Songs - Brazil --
-- processing playlist: Top Songs - Canada --
-- processing playlist: Top Songs - Chile --
-- processing playlist: Top Songs - Colombia --
-- processing playlist: Top Songs - Czech Republic --
-- processing playlist: Top Songs - Denmark --
-- processing playlist: Top Songs - Dominican Republic --
-- processing playlist: Top Songs - Ecuador --
-- processing playlist: Top Songs - Egypt --
-- processing playlist: Top Songs - Finland --
-- processing playlist: Top Songs - France --
-- processing playlist: Top Songs - Germany --
-- processing playlist: Top Songs - Guatemala --
-- processing playlist: Top Songs - Hong Kong --
-- processing playlist: Top Songs - Hunga

In [39]:
# get artist ids
artist_ids = list(set([artist["id"] for playlist in top_50_playlists for artist in playlist["artist_ids"]]))

In [43]:
artist_data = []


def retrieve_artist(artist_id):
    # call api
    artist_res = sp.artist(artist_id=artist_id)
    # followers
    followers = artist_res["followers"]["total"]
    # genres
    genres = artist_res["genres"]
    # popularity
    popularity = artist_res["popularity"]
    # name
    artist_name = artist_res["name"]

    print("-- retrieved artist: " + artist_name, "--")

    # append to list
    return {
        "artist_id": artist_id,
        "artist_name": artist_name,
        "followers": followers,
        "genres": genres,
        "popularity": popularity,
    }


for artist_id in artist_ids:
    artist_data.append(retrieve_artist(artist_id=artist_id))

-- retrieved artist: Malik Montana --
-- retrieved artist: Ana Castela --
-- retrieved artist: Bausa --
-- retrieved artist: Dharius --
-- retrieved artist: Lola Amour --
-- retrieved artist: @atutowy --
-- retrieved artist: Karan Aujla --
-- retrieved artist: LIT killah --
-- retrieved artist: bambi --
-- retrieved artist: Jakub Laszuk --
-- retrieved artist: Ferrillo --
-- retrieved artist: Future --
-- retrieved artist: Bon Nghiêm --
-- retrieved artist: Jed Baruelo --
-- retrieved artist: Mc Menor Do Alvorada --
-- retrieved artist: slimedemidemislime --
-- retrieved artist: Mohbad --
-- retrieved artist: Naldo Benny --
-- retrieved artist: Lossa --
-- retrieved artist: OBOY --
-- retrieved artist: KAROL G --
-- retrieved artist: Myke Towers --
-- retrieved artist: 理想混蛋 --
-- retrieved artist: Kenshi Yonezu --
-- retrieved artist: Takura --
-- retrieved artist: BigDaddy --
-- retrieved artist: fellow fellow --
-- retrieved artist: Thắng --
-- retrieved artist: Ray Dalton --
-- retr

In [45]:
with open("../local_data/spotify_artists.json", "w") as outfile:
    json.dump(artist_data, outfile)