In [34]:
import spotipy
import csv
import os
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
from dotenv import load_dotenv
import pandas as pd

In [35]:
load_dotenv()

client_id = os.getenv('SPOTIPY_CLIENT_ID')
client_secret = os.getenv('SPOTIPY_CLIENT_SECRET')
redirect_uri = os.getenv('SPOTIPY_REDIRECT_URI')

spotify = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri),
                    client_credentials_manager=SpotifyClientCredentials(), requests_timeout=10)

In [36]:
raw_data = pd.read_csv("../../data/audio_features_data.tsv", delimiter='\t')
raw_data['genre'] = raw_data['genre'].apply(lambda x: x.split()[-1])
lst = raw_data['genre'].value_counts().nlargest(5).index.tolist()
lst

['pop', 'rock', 'hop', 'rap', 'chillhop']

In [37]:
lst[2] = 'hip hop'
genre_list = lst
genre_dict = dict.fromkeys(genre_list)

for genre in genre_dict:
    genre_dict[genre] = []

for genre in genre_list:
    search_query = 'genre:' + genre
    search_result = spotify.search(q=search_query, limit=50) #the result only consists of tracks

    genre_dict[genre].extend(search_result['tracks']['items'])

    for i in range(19):
        search_result = spotify.next(search_result['tracks'])
        genre_dict[genre].extend(search_result['tracks']['items'])

In [38]:
genre_dict['pop'][0].keys()

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])

In [39]:
tracks_id = dict.fromkeys(genre_list)

for genre in tracks_id:
    tracks_id[genre] = []

for genre in genre_dict:
    for i in range(len(genre_dict[genre])):
        tracks_id[genre].append(genre_dict[genre][i]['id'])

for genre in tracks_id:
    tracks_id[genre] = list(dict.fromkeys(tracks_id[genre]))

In [40]:
track_features_list = dict.fromkeys(genre_list)

for genre in track_features_list:
    track_features_list[genre] = []

for genre in tracks_id:
    for i in range(len(tracks_id[genre])):
        track_features_list[genre].append(spotify.audio_features(tracks_id[genre][i]))

In [41]:
for genre in track_features_list:
    track_features_list[genre] = list(filter(lambda x: x != [None], track_features_list[genre]))

In [42]:
for genre in track_features_list: 
    for i in range(len(track_features_list[genre])):
        track_features_list[genre][i] = dict(list(filter(lambda x: x[0] == 'id' or type(x[1]) != str, list(track_features_list[genre][i][0].items()))))

In [43]:
for genre in track_features_list:
    for i in range(len(track_features_list[genre])):
        track_features_list[genre][i]['genre'] = genre

In [44]:
for genre in track_features_list:
    for i in range(len(track_features_list[genre])):
        track_features_list[genre][i] = {'id': track_features_list[genre][i].pop('id'), **track_features_list[genre][i]}

In [45]:
save_data = []
for genre in track_features_list:
    for i in range(len(track_features_list[genre])):
        save_data.append(track_features_list[genre][i])

In [46]:
with open('../../data/5_genres_audio_features.tsv', 'w') as file:
    writer = csv.DictWriter(file, fieldnames=save_data[0].keys(), delimiter='\t')
    writer.writeheader()
    for data in save_data:
        writer.writerow(data)