# Create Dataset for Music Classification using Spotipy 

In [1]:
import spotipy
import time
from IPython.core.display import clear_output
from spotipy import SpotifyClientCredentials, util

In [2]:
client_id='your client id'
client_secret='your client secret key'
redirect_uri='http://localhost:7000/callback'
username = 'your username'
scope = 'playlist-modify-public'

In [3]:
manager = SpotifyClientCredentials(client_id,client_secret)
sp = spotipy.Spotify(client_credentials_manager=manager)

#Credentials to access to  the Spotify User's Playlist, Favorite Songs, etc. 
token = util.prompt_for_user_token(username,scope,client_id,client_secret,redirect_uri) 
spt = spotipy.Spotify(auth=token)

In [24]:
def download_playlist(id_playlist,n_songs):
    songs_id = []
    tracks = []

    for i in range(0,n_songs,100):
        playlist = spt.playlist_tracks(id_playlist,limit=100,offset=i)
        
        for songs in playlist['items']:
            songs_id.append(songs['track']['id'])
    
    counter = 1
    for ids in songs_id:
        
        time.sleep(.6)
        track,columns = get_songs_features(ids)
        tracks.append(track)

        print(f"Song {counter} Added:")
        print(f"{track[0]} By {track[2]} from the album {track[1]}")
        clear_output(wait = True)
        counter+=1
    
    clear_output(wait = True)
    print("Music Downloaded!")
    return tracks,columns

In [16]:
def get_songs_features(ids):

    meta = sp.track(ids)
    features = sp.audio_features(ids)

    # meta
    name = meta['name']
    album = meta['album']['name']
    artist = meta['album']['artists'][0]['name']
    release_date = meta['album']['release_date']
    length = meta['duration_ms']
    popularity = meta['popularity']
    ids =  meta['id']

    # features
    acousticness = features[0]['acousticness']
    danceability = features[0]['danceability']
    energy = features[0]['energy']
    instrumentalness = features[0]['instrumentalness']
    liveness = features[0]['liveness']
    valence = features[0]['valence']
    loudness = features[0]['loudness']
    speechiness = features[0]['speechiness']
    tempo = features[0]['tempo']
    key = features[0]['key']
    time_signature = features[0]['time_signature']

    track = [name, album, artist, ids, release_date, popularity, length, danceability, acousticness,
            energy, instrumentalness, liveness, valence, loudness, speechiness, tempo, key, time_signature]
    columns = ['name','album','artist','id','release_date','popularity','length','danceability','acousticness','energy','instrumentalness',
                'liveness','valence','loudness','speechiness','tempo','key','time_signature']

    return track,columns

In [6]:
#happy playlist
tracks,columns = download_playlist("6tUoLGh3ueIMVoYbBVZpKQ",100)

Music Downloaded!


In [7]:
import pandas as pd
df_happy = pd.DataFrame(tracks,columns=columns)
df_happy['emotion']='happy'

In [8]:
#sad playlist
tracks,columns = download_playlist("56adfjzaO4gkQwEIZACbX5",100)

Music Downloaded!


In [9]:
df_sad=pd.DataFrame(tracks,columns=columns)
df_sad['emotion']='sad'

In [10]:
#neutral playlist
tracks,columns = download_playlist("4R8q49NearaGRAmBCB04ky",100)

Music Downloaded!


In [11]:
df_neutral =pd.DataFrame(tracks,columns=columns)
df_neutral['emotion']='neutral'

In [12]:
#calm playlist
tracks,columns = download_playlist("224DyzqwL7PeeKkFxIvVD7",100)

Music Downloaded!


In [13]:
df_calm =pd.DataFrame(tracks,columns=columns)
df_calm['emotion']='calm'

In [14]:
df_final= pd.concat([df_happy,df_sad,df_neutral,df_calm],axis=0)

In [15]:
df_final['emotion'].value_counts()

neutral    100
happy      100
calm       100
sad        100
Name: emotion, dtype: int64

In [16]:
df_final.to_csv('songs_data.csv')