# Imports/Setup

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from IPython.display import clear_output
from sqlalchemy import create_engine
from dotenv import load_dotenv
from pathlib import Path
import os

%matplotlib inline

dotenv_path = Path('../.env')
load_dotenv(dotenv_path=dotenv_path)

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=os.getenv('CLIENT_ID'), client_secret=os.getenv('CLIENT_SECRET')))

# Download Data

## Get top 50 playlists about dreampop

In [2]:
total = 50
limit = 50
offset = 0

playlist_ids = []
while len(playlist_ids) < total:
    lists = sp.search(q='dreampop', type='playlist', limit=limit, offset=offset)['playlists']['items']
    for playlist in lists:
        playlist_ids.append(playlist['id'])
    offset += 50
playlist_ids = playlist_ids[:100]
print('Playlists:', len(playlist_ids))

Playlists: 50


## Get set of tracks

In [3]:
def get_track_names_ids(playlist):
    all_tracks = []
    for off in range(0, 3001, 100):
        tracks = sp.playlist_tracks(playlist, limit=100, offset=off)['items']
        for track in tracks:
            if track['track'] is not None:
                all_tracks.append((track['track']['id'], track['track']['name'], track['track']['artists'][0]['name'], 'dreampop'))
    return all_tracks

track_data = []
for i, playlist in enumerate(playlist_ids):
    clear_output(wait=True)
    print("Processing playlist:", i)
    for id, name, artist, genre in get_track_names_ids(playlist):
        track_data.append((id, name, artist, genre))
clear_output(wait=True)

track_dict = {}
for track in track_data:
    if track not in track_dict:
        track_dict[track] = 0
    track_dict[track] += 1

print('Tracks:', len(list(track_dict.items())))

Tracks: 9317


## Get audio features per track

In [4]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

track_chunks = list(chunks(track_data, 100))
audio_features = []
for c in track_chunks:
    c = [track for track in c if track[0] is not None]
    c_ids = [track[0] for track in c]
    c_names = [track[1] for track in c]
    c_artists = [track[2] for track in c]
    c_genres = [track[3] for track in c]
    c_audio_features = sp.audio_features(tracks=c_ids)
    for i, _ in enumerate(c_audio_features):
        if c_audio_features[i] == None:
            continue
        c_audio_features[i]['name'] = c_names[i]
        c_audio_features[i]['artist'] = c_artists[i]
        c_audio_features[i]['genre'] = c_genres[i]
    audio_features += c_audio_features
print('Audio Features:', len(audio_features))

Audio Features: 11722


In [5]:
audio_features = [f for f in audio_features if f is not None]
print('Audio Features:', len(audio_features))

Audio Features: 11722


## Save to CSV

In [6]:
track_df = pd.DataFrame(audio_features)
track_df.to_csv('../data/dreampop.csv')
track_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,type,id,uri,track_href,analysis_url,duration_ms,time_signature,name,artist,genre
0,0.482,0.866,7,-4.534,1,0.0293,0.000368,0.116000,0.247,0.372,...,audio_features,5gEni4dmBqme8fyPcyWYiK,spotify:track:5gEni4dmBqme8fyPcyWYiK,https://api.spotify.com/v1/tracks/5gEni4dmBqme...,https://api.spotify.com/v1/audio-analysis/5gEn...,187364,4,Kate's Not Here - Day Wave & Lawrence Rothman ...,Lawrence Rothman,dreampop
1,0.543,0.498,9,-10.152,1,0.0678,0.692000,0.432000,0.078,0.815,...,audio_features,0l4cR2AZ18MktMd0BU3zwh,spotify:track:0l4cR2AZ18MktMd0BU3zwh,https://api.spotify.com/v1/tracks/0l4cR2AZ18Mk...,https://api.spotify.com/v1/audio-analysis/0l4c...,241097,4,Tenderness,Jay Som,dreampop
2,0.644,0.756,7,-8.342,1,0.0273,0.567000,0.158000,0.151,0.802,...,audio_features,04DdNbTmZAgHfNP6FdY3Sa,spotify:track:04DdNbTmZAgHfNP6FdY3Sa,https://api.spotify.com/v1/tracks/04DdNbTmZAgH...,https://api.spotify.com/v1/audio-analysis/04Dd...,264955,4,Plum,Widowspeak,dreampop
3,0.508,0.798,1,-5.412,0,0.0259,0.256000,0.202000,0.701,0.481,...,audio_features,22gjW0ccBALzqASrryR7Dw,spotify:track:22gjW0ccBALzqASrryR7Dw,https://api.spotify.com/v1/tracks/22gjW0ccBALz...,https://api.spotify.com/v1/audio-analysis/22gj...,213481,4,Opportunity,Kevin Krauter,dreampop
4,0.562,0.724,2,-6.787,1,0.0622,0.203000,0.001150,0.129,0.464,...,audio_features,4kDqNIQiBwkqDs6jWWcHAM,spotify:track:4kDqNIQiBwkqDs6jWWcHAM,https://api.spotify.com/v1/tracks/4kDqNIQiBwkq...,https://api.spotify.com/v1/audio-analysis/4kDq...,166453,4,What Does It Mean To Me?,Oberhofer,dreampop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11717,0.565,0.566,0,-8.751,0,0.1010,0.371000,0.000108,0.192,0.412,...,audio_features,5H8jHmABa01Y7RSL8tejgh,spotify:track:5H8jHmABa01Y7RSL8tejgh,https://api.spotify.com/v1/tracks/5H8jHmABa01Y...,https://api.spotify.com/v1/audio-analysis/5H8j...,189867,4,Chandelier,Yung Lean,dreampop
11718,0.673,0.405,8,-8.486,1,0.0335,0.689000,0.000305,0.150,0.400,...,audio_features,5cBco0HKWcHygJPIjuJAKz,spotify:track:5cBco0HKWcHygJPIjuJAKz,https://api.spotify.com/v1/tracks/5cBco0HKWcHy...,https://api.spotify.com/v1/audio-analysis/5cBc...,154013,4,make believe,mazie,dreampop
11719,0.739,0.491,6,-11.515,1,0.0270,0.791000,0.036600,0.100,0.707,...,audio_features,0HAqq2GcQKyi3s87GuN7jU,spotify:track:0HAqq2GcQKyi3s87GuN7jU,https://api.spotify.com/v1/tracks/0HAqq2GcQKyi...,https://api.spotify.com/v1/audio-analysis/0HAq...,228922,4,Amoeba,Clairo,dreampop
11720,0.278,0.798,8,-6.201,1,0.0688,0.003340,0.001440,0.220,0.318,...,audio_features,4HOM1n4aJDBDeMe3llCftS,spotify:track:4HOM1n4aJDBDeMe3llCftS,https://api.spotify.com/v1/tracks/4HOM1n4aJDBD...,https://api.spotify.com/v1/audio-analysis/4HOM...,199456,4,Good Girls,CHVRCHES,dreampop


# DB Upload

In [7]:
conn = create_engine('postgresql://{0}:{1}@{2}:{3}/{4}'.format(os.getenv('DB_USER'),
                                                               os.getenv('DB_PASS'), 
                                                               os.getenv('DB_IP'), 
                                                               os.getenv('DB_PORT'), 
                                                               os.getenv('DB_NAME')))
                                                               
track_df.to_sql('dreampop', con=conn, if_exists='replace')