# Imports/Setup

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from IPython.display import clear_output
from sqlalchemy import create_engine
from dotenv import load_dotenv
from pathlib import Path
import os

%matplotlib inline

dotenv_path = Path('../.env')
load_dotenv(dotenv_path=dotenv_path)

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=os.getenv('CLIENT_ID'), CLIENT_SECRET=os.getenv('CLIENT_SECRET')))

Exception ignored in: <function SpotifyAuthBase.__del__ at 0x00000227A1DF7EE0>
Traceback (most recent call last):
  File "C:\Users\paart\AppData\Local\Programs\Python\Python38\lib\site-packages\spotipy\oauth2.py", line 134, in __del__
    if isinstance(self._session, requests.Session):
AttributeError: 'SpotifyClientCredentials' object has no attribute '_session'


TypeError: __init__() got an unexpected keyword argument 'CLIENT_SECRET'

# Download Data

## Get top 50 playlists about dreampop

In [None]:
total = 50
limit = 50
offset = 0

playlist_ids = []
while len(playlist_ids) < total:
    lists = sp.search(q='dreampop', type='playlist', limit=limit, offset=offset)['playlists']['items']
    for playlist in lists:
        playlist_ids.append(playlist['id'])
    offset += 50
playlist_ids = playlist_ids[:100]
print('Playlists:', len(playlist_ids))

## Get set of tracks

In [None]:
def get_track_names_ids(playlist):
    all_tracks = []
    for off in range(0, 3001, 100):
        tracks = sp.playlist_tracks(playlist, limit=100, offset=off)['items']
        for track in tracks:
            if track['track'] is not None:
                all_tracks.append((track['track']['id'], track['track']['name'], track['track']['artists'][0]['name'], 'dreampop'))
    return all_tracks

track_data = []
for i, playlist in enumerate(playlist_ids):
    clear_output(wait=True)
    print("Processing playlist:", i)
    for id, name, artist, genre in get_track_names_ids(playlist):
        track_data.append((id, name, artist, genre))
clear_output(wait=True)

track_dict = {}
for track in track_data:
    if track not in track_dict:
        track_dict[track] = 0
    track_dict[track] += 1

print('Tracks:', len(list(track_dict.items())))

## Get audio features per track

In [None]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

track_chunks = list(chunks(track_data, 100))
audio_features = []
for c in track_chunks:
    c = [track for track in c if track[0] is not None]
    c_ids = [track[0] for track in c]
    c_names = [track[1] for track in c]
    c_artists = [track[2] for track in c]
    c_genres = [track[3] for track in c]
    c_audio_features = sp.audio_features(tracks=c_ids)
    for i, _ in enumerate(c_audio_features):
        if c_audio_features[i] == None:
            continue
        c_audio_features[i]['name'] = c_names[i]
        c_audio_features[i]['artist'] = c_artists[i]
        c_audio_features[i]['genre'] = c_genres[i]
    audio_features += c_audio_features
print('Audio Features:', len(audio_features))

In [None]:
audio_features = [f for f in audio_features if f is not None]
print('Audio Features:', len(audio_features))

## Save to CSV

In [None]:
track_df = pd.DataFrame(audio_features)
track_df.to_csv('../data/dreampop.csv')
track_df

# DB Upload

In [None]:
conn = create_engine('postgresql://{0}:{1}@{2}:{3}/{4}'.format(os.getenv('DB_USER'),
                                                               os.getenv('DB_PASS'), 
                                                               os.getenv('DB_IP'), 
                                                               os.getenv('DB_PORT'), 
                                                               os.getenv('DB_NAME')))
                                                               
track_df.to_sql('dreampop', con=conn, if_exists='replace')