In [1]:
from __future__ import print_function
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json
import csv
import os
import pandas as pd

In [17]:
# load and use credentials
with open("client_credentials.json", "r") as f:
    credentials = json.load(f)
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=credentials["client_id"], client_secret=credentials["client_secret"]))

In [25]:
# read playlist ids
with open("playlist_ids.json", "r") as f:
    playlist_ids = json.load(f)

In [26]:
import glob

files = glob.glob('../csv_files/*')
for f in files:
    os.remove(f)

In [27]:
multiclass_labels = {'rock': 1, 'pop': 2, 'hiphop': 3, 'blues': 4, 'electronic': 5, 'jazz': 6, 'classical': 7, 'disco': 8, 'folk': 9, 'alternative': 10}
fieldnames = ['id','track_name', 'artist_name', 'preview_url', 'danceability','energy','key','loudness','mode','speechiness','acousticness','instrumentalness','liveness','valence','tempo','duration_ms','time_signature']

for genre, playlist_id_list in playlist_ids.items():
    data_for_genre = []
    for playlist_id in playlist_id_list:
        items = spotify.playlist_tracks(playlist_id, fields='items.track.id, items.track.name, items.track.artists, items.track.preview_url, total')['items']
        meta_data = [{'id': item['track']['id'], 'track_name': item['track']['name'], 'artist_name': item['track']['artists'][int(0)]['name'], 'preview_url': item['track']['preview_url']} for item in items if item['track'] is not None and item['track']['preview_url'] is not None]    
        track_ids = [track['id'] for track in meta_data]
        features = spotify.audio_features(track_ids)
        for meta_data_i, features_i in zip(meta_data, features):
            meta_data_i.update(features_i)
        
        data_for_genre.extend(meta_data)

    file_exists = os.path.isfile(f'../csv_files/{genre}.csv')
    
    with open(f'../csv_files/{genre}.csv', 'w') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames + ['multiclass_label'])
        if not file_exists:
            writer.writeheader()  # file doesn't exist yet, write a header

        for data in data_for_genre:
            track = {field: data[field] for field in fieldnames}
            track['multiclass_label'] = multiclass_labels[genre]
            writer.writerow(track)

    df = pd.read_csv(f'../csv_files/{genre}.csv')
    new_df = df.drop_duplicates(subset=['id'])
    new_df.to_csv(f'../csv_files/{genre}.csv', index=False)

