In [ ]:
import os 
import time
import pandas as pd
import requests
import json
import dotenv
dotenv.load_dotenv()

In [ ]:
# Load in client ID and secret
spotify_id = os.getenv('SpotifyID')
spotify_secret = os.getenv("SpotifySecret")

In [ ]:
url = "https://accounts.spotify.com/api/token"
headers = {
    "Content-Type": "application/x-www-form-urlencoded"
}
data = {
    "grant_type": "client_credentials",
    "client_id": spotify_id,
    "client_secret": spotify_secret
}

In [ ]:
# Request access token (works for one hour)
r = requests.post(url, headers=headers, data=data)
r

In [ ]:
access_data = json.loads(r.text)
access_token = access_data['access_token']
token_type = access_data['token_type']

Note: To request by artist, you need to append their Spotify ID to the end of the URL. You can get this by clicking "Share" on an artist's profile on Spotify, and copying the ID that appears after https://open.spotify.com/artist/[id].

In [ ]:
# API call to Radiohead endpoint
url = "https://api.spotify.com/v1/artists/4Z8W4fKeB5YxbusRsdQVPb"
params = {
    "Authorization": f"{token_type} {access_token}"
}
params

In [ ]:
r = requests.get(url, headers=params)
r

In [ ]:
radiohead_json = json.loads(r.text)
radiohead_json

In [ ]:
# API call to Taylor Swift
url = "https://api.spotify.com/v1/artists/06HL4z0CvFAxyc27GXpf02/"
r = requests.get(url, headers=params)
r

In [ ]:
tswift_json = json.loads(r.text)
tswift_json

In [ ]:
# API call to Violator Album
url =" https://api.spotify.com/v1/albums/45YmvYK4hB4CgQgTMuNRm8/tracks/"
r = requests.get(url, headers=params)
r

In [ ]:
violator_json = json.loads(r.text)

# Loop through to get track titles and id codes on album
track_ids = []
tracks = []
for item in range(len(violator_json['items'])):
    track_ids.append(violator_json['items'][item]['id'])
    tracks.append(violator_json['items'][item]['name'])
print(tracks)
print(track_ids)

In [ ]:
# Convert track id list to comma seperated string
ids = ",".join(track_ids)
ids

In [ ]:
# API call to Violator tracks
url = f"https://api.spotify.com/v1/tracks?ids={ids}"
params = {
    "Authorization": f"{token_type} {access_token}"
}
r = requests.get(url, headers=params)
r

In [ ]:
tracks_json = json.loads(r.text)

# Loop through to find popularity scores
popularity_scores = []
for track in range(len(tracks_json['tracks'])):
    popularity_scores.append(tracks_json['tracks'][track]['popularity'])
popularity_scores

### API call to playlists

In [ ]:
url = f"https://api.spotify.com/v1/playlists/1AcLtu7KYTNyxocfgAxt7y"
r = requests.get(url, headers=params)
r

In [ ]:
playlist_json = json.loads(r.text)
playlist_json['name']

In [ ]:
name = playlist_json['name']
playlist_tracks = playlist_json['tracks']['items']
tracks = [track['track']['name'] for track in playlist_tracks if track['track']]
main_artist = [track['track']['artists'][0]['name'] for track in playlist_tracks]
main_artist

In [ ]:
track_json = playlist_json['tracks']['items']
track_list = []
track_urls = []

# this is outputing track list of the Hollies album (first song on playlist) instead of songs on playlist
for track in range(len(track_json)):
    track_list.append(track_json[track]['track']['name'])
    track_urls.append(track_json[track]['track']['href'])
track_list

In [ ]:
track_urls[:5]

Something to consider: disregarding remastered/live/rerecorded versions, usually indicated with a dash.

### Query for top playlists

In [ ]:
query = "jazz"
limit = 50
url = f"https://api.spotify.com/v1/search?q={query}&type=playlist&limit={limit}"
r = requests.get(url, headers=params)
r

In [ ]:
myjson = json.loads(r.text)
playlist_ids = [playlist['id'] for playlist in myjson['playlists']['items'] if playlist]
playlist_ids
# myjson['playlists']['items'][3]['id']

In [ ]:
base_url = "https://api.spotify.com/v1/playlists/"
endpoint = playlist_ids[0]

r = requests.get(base_url+endpoint, headers=params)
r

In [ ]:
myjson = r.json()
myjson['tracks']['items'][1]['track']['id']

In [ ]:
for id in playlist_ids:
    r = requests.get(base_url+id, headers=params)
    myjson = json.loads(r.text)
    name = myjson['name']
    playlist_tracks = myjson['tracks']['items']
    tracks = [playlist_track['track']['name'] for playlist_track in playlist_tracks]
    print(name)
    print(tracks)
    time.sleep(1)


## Build playlist dataframe

In [ ]:
import playlistscraper as ps

In [ ]:
# Load in access token
url = "https://accounts.spotify.com/api/token"
headers = {
    "Content-Type": "application/x-www-form-urlencoded"
}
data = {
    "grant_type": "client_credentials",
    "client_id": spotify_id,
    "client_secret": spotify_secret
}

r = requests.post(url, headers=headers, data=data)
access_data = json.loads(r.text)
access_token = access_data['access_token']
token_type = access_data['token_type']

In [ ]:
headers = {
    "Authorization": f"{token_type} {access_token}"
}

In [ ]:
genres = ["pop", "rock", "country"]
df1 = pd.DataFrame()
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
df1

In [ ]:
# was getting timeout error so only did 3-5 genres at a time
genres = ["alternative", "indie", "folk", "classical", "lofi"]
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
genres = ["jazz", "rap"]
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
genres = ["rnb", "blues", "punk", "electronic"]
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
genres = ["dance", "funk", "latin", "house"]
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
genres = ["reggae", "new wave", "disco", "ska", "edm"]
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
genres = ["hiphop", "soul", "synth", "kpop"]
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
genres = ["grunge", "gospel", "ambient", "techno", "tiktok"]
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
genres = ["cleaning", "drive", "roadtrip", "gym", "run"]
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
genres = ["morning", "afternoon", "evening", "night"]
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
genres = ["club", "library", "study", "beach", "boat"]
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
genres = ["chill", "party", "clubbing", "cafe"]
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
genres = ["50s", "60s", "70s", "80s"]
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
genres = ["90s", "2000", "2010", "y2k"]
for genre in genres:
    df2 = ps.get_playlist_df(genre, headers)
    df1 = pd.concat([df1, df2])

In [ ]:
df1.to_csv("playlist_data.csv", index=False)

In [ ]:
df1.shape

In [ ]:
df1

In [ ]:
"""
HAVE ADDED:
- pop
- rock
- country
- alternative
- indie
- folk
- classical
- lofi
- jazz 
- rap
- rnb
- blues
- punk
- electronic
- dance
- funk
- latin
- house
- reggae
- new wave
- edm
- disco
- ska
- hiphop
- soul
- synth
- kpop
- grunge
- gospel
- ambient
- techno
- tiktok
- cleaning
- drive
- roadtrip
- gym
- run
- morning
- afternoon
- evening
- night
- club
- library
- study
- beach
- boat
- chill
- party
- clubbing
- cafe
- 50s - 2010s
- y2k
"""

In [ ]:
"""
TO ADD:
International: african, indian / desi, spanish, french, 
Moods: sad, happy, anxious, excited
Weather: sunny, cloudy, rainy, snow, warm, cold
Movie soundtracks?
activities: cardio, lifting, sleep, meditation, nature, sexy(?)
places: coffeehouse
audience: kids, clean, teen, adult
"""

Thoughts: check df for repeating playlists, update same genres on semi-regular basis, flag explicit songs?