# Imports/Setup

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from IPython.display import clear_output
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

%matplotlib inline
load_dotenv()

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=os.getenv('CLIENT_ID'), CLIENT_SECRET=os.getenv('CLIENT_SECRET')))

# Download Data

## Get top 50 playlists about dreampop

In [2]:
total = 50
limit = 50
offset = 0

playlist_ids = []
while len(playlist_ids) < total:
    lists = sp.search(q='dream pop', type='playlist', limit=limit, offset=offset)['playlists']['items']
    for playlist in lists:
        playlist_ids.append(playlist['id'])
    offset += 50
playlist_ids = playlist_ids[:100]
print('Playlists:', len(playlist_ids))

Playlists: 50


## Get set of tracks

In [3]:
def get_track_names_ids(playlist):
    all_tracks = []
    for off in range(0, 3001, 100):
        tracks = sp.playlist_tracks(playlist, limit=100, offset=off)['items']
        for track in tracks:
            if track['track'] is not None:
                all_tracks.append((track['track']['id'], track['track']['name'], track['track']['artists'][0]['name'], 'dreampop'))
    return all_tracks

track_data = []
for i, playlist in enumerate(playlist_ids):
    clear_output(wait=True)
    print("Processing playlist:", i)
    for id, name, artist, genre in get_track_names_ids(playlist):
        track_data.append((id, name, artist, genre))
clear_output(wait=True)

track_dict = {}
for track in track_data:
    if track not in track_dict:
        track_dict[track] = 0
    track_dict[track] += 1

print('Tracks:', len(list(track_dict.items())))

Tracks: 8178


## Get audio features per track

In [5]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

track_chunks = list(chunks(track_data, 100))
audio_features = []
for c in track_chunks:
    c = [track for track in c if track[0] is not None]
    c_ids = [track[0] for track in c]
    c_names = [track[1] for track in c]
    c_artists = [track[2] for track in c]
    c_genres = [track[3] for track in c]
    c_audio_features = sp.audio_features(tracks=c_ids)
    for i, _ in enumerate(c_audio_features):
        if c_audio_features[i] == None:
            continue
        c_audio_features[i]['name'] = c_names[i]
        c_audio_features[i]['artist'] = c_artists[i]
        c_audio_features[i]['genre'] = c_genres[i]
    audio_features += c_audio_features
print('Audio Features:', len(audio_features))

Audio Features: 8174


In [6]:
audio_features = [f for f in audio_features if f is not None]
print('Audio Features:', len(audio_features))

Audio Features: 8173


## Save to CSV

In [7]:
track_df = pd.DataFrame(audio_features)
track_df.to_csv('../data/dreampop.csv')
track_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,type,id,uri,track_href,analysis_url,duration_ms,time_signature,name,artist,genre
0,0.420,0.330,9,-13.925,1,0.0275,0.24500,0.000679,0.0677,0.1750,...,audio_features,1LzNfuep1bnAUR9skqdHCK,spotify:track:1LzNfuep1bnAUR9skqdHCK,https://api.spotify.com/v1/tracks/1LzNfuep1bnA...,https://api.spotify.com/v1/audio-analysis/1LzN...,295600,3,Fade Into You,Mazzy Star,dreampop
1,0.512,0.838,0,-6.842,1,0.0310,0.01180,0.014200,0.1160,0.1950,...,audio_features,1bwqV8EGVw1RLT3cEzxLpH,spotify:track:1bwqV8EGVw1RLT3cEzxLpH,https://api.spotify.com/v1/tracks/1bwqV8EGVw1R...,https://api.spotify.com/v1/audio-analysis/1bwq...,203390,4,Closer,Josha Daniel,dreampop
2,0.347,0.550,7,-9.048,1,0.0399,0.00419,0.049200,0.0992,0.0669,...,audio_features,5TRSyGcFfevCbJHFUk4OB0,spotify:track:5TRSyGcFfevCbJHFUk4OB0,https://api.spotify.com/v1/tracks/5TRSyGcFfevC...,https://api.spotify.com/v1/audio-analysis/5TRS...,199680,4,Alive,Josha Daniel,dreampop
3,0.638,0.761,4,-7.945,0,0.1690,0.14700,0.000217,0.3210,0.3780,...,audio_features,4uXWLG0CBQhJlvqPksiHxu,spotify:track:4uXWLG0CBQhJlvqPksiHxu,https://api.spotify.com/v1/tracks/4uXWLG0CBQhJ...,https://api.spotify.com/v1/audio-analysis/4uXW...,188000,4,High Enough,Lodola,dreampop
4,0.377,0.650,2,-5.902,1,0.0290,0.00174,0.000058,0.1070,0.3650,...,audio_features,6sVQNUvcVFTXvlk3ec0ngd,spotify:track:6sVQNUvcVFTXvlk3ec0ngd,https://api.spotify.com/v1/tracks/6sVQNUvcVFTX...,https://api.spotify.com/v1/audio-analysis/6sVQ...,192467,4,Cherry-coloured Funk,Cocteau Twins,dreampop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8168,0.615,0.622,6,-7.679,1,0.0280,0.72900,0.010600,0.1040,0.5160,...,audio_features,2WFmx1unF0TjYnzHhDyICp,spotify:track:2WFmx1unF0TjYnzHhDyICp,https://api.spotify.com/v1/tracks/2WFmx1unF0Tj...,https://api.spotify.com/v1/audio-analysis/2WFm...,298477,4,seabirds,pizzagirl,dreampop
8169,0.644,0.655,2,-5.304,0,0.0329,0.13500,0.000000,0.1200,0.4630,...,audio_features,3IjChkHP6r5vJECoYH1lec,spotify:track:3IjChkHP6r5vJECoYH1lec,https://api.spotify.com/v1/tracks/3IjChkHP6r5v...,https://api.spotify.com/v1/audio-analysis/3IjC...,113759,4,Who Can Be Loved In This World?,Elvis Depressedly,dreampop
8170,0.605,0.555,5,-10.323,1,0.0823,0.35700,0.001450,0.2260,0.2120,...,audio_features,79ZeN4pLdlB3XfGnJXwMFR,spotify:track:79ZeN4pLdlB3XfGnJXwMFR,https://api.spotify.com/v1/tracks/79ZeN4pLdlB3...,https://api.spotify.com/v1/audio-analysis/79Ze...,136552,4,woah,Sipper,dreampop
8171,0.615,0.617,2,-9.315,1,0.0360,0.24900,0.928000,0.1060,0.1880,...,audio_features,3My7lwGD0D1SvJ2JzSipUN,spotify:track:3My7lwGD0D1SvJ2JzSipUN,https://api.spotify.com/v1/tracks/3My7lwGD0D1S...,https://api.spotify.com/v1/audio-analysis/3My7...,147940,4,Killer Whale,Boyscott,dreampop


# DB Upload

In [8]:
conn = create_engine('postgresql://{0}:{1}@{2}:{3}/{4}'.format(os.getenv('DB_USER'),
                                                               os.getenv('DB_PASS'), 
                                                               os.getenv('DB_IP'), 
                                                               os.getenv('DB_PORT'), 
                                                               os.getenv('DB_NAME')))
                                                               
track_df.to_sql('dreampop', con=conn, if_exists='replace')