<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Get-song-features" data-toc-modified-id="Get-song-features-1">Get song features</a></span></li></ul></div>

In [1]:
import spotipy
import spotipy.util as util
import os
import numpy as np
from spotipy.oauth2 import SpotifyOAuth
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
os.environ['SPOTIPY_CLIENT_ID'] = "Your CLIENT_ID"
os.environ['SPOTIPY_CLIENT_SECRET'] = "Your SPOTIPY_CLIENT_SECRET"
os.environ['SPOTIPY_REDIRECT_URI'] = 'http://localhost:7777/callback'

In [3]:
auth_manager = SpotifyClientCredentials()
scope = "playlist-read-private, user-library-read"

sp_client = spotipy.Spotify(auth_manager = SpotifyOAuth(scope = scope, username = 'shivakumar'))

In [5]:
user = 'spotify'
good_id = 'liked-playlist-id'
bad_id = 'disliked-playlist-id'
discover = 'final-test-playlist'

In [6]:
sp_client.user_playlist_tracks(user, good_id)['items'][0]['track'].keys()

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'episode', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track', 'track_number', 'type', 'uri'])

## Get song features

In [7]:
def fetch_playlist_feat(creator, playlist_id):
    playlist_features_list = ['artist', 'album', 'track_name', 'track_id', 'popularity', 'danceability',
                              'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness',
                              'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']
    
    playlist_df = pd.DataFrame(columns = playlist_features_list)
    playlist_features = {}
    
    i = 0
    playlist = []
    while True:
        if(len(sp_client.user_playlist_tracks(user, playlist_id, offset = i*100)['items']) != 0 ):
            playlist.append(sp_client.user_playlist_tracks(user, playlist_id, 
                                                           offset = i*100)['items'])
            i+=1
        else:
            break
    for i in range(len(playlist)):
        for track in playlist[i]:
            playlist_features['artist'] = track['track']['album']['artists'][0]['name']
            playlist_features['album'] = track['track']['album']['name']
            playlist_features['track_name'] = track['track']['name']
            playlist_features['track_id'] = track['track']['id']
            playlist_features['popularity'] = track['track']['popularity']
            # Get audio features
            audio_features = sp_client.audio_features(playlist_features['track_id'])[0]
            for feature in playlist_features_list[5:]:
                playlist_features[feature] = audio_features[feature]
            #concat the dfs
            track_df = pd.DataFrame(playlist_features, index = [0])
            playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)
    return playlist_df

In [8]:
good_df = fetch_playlist_feat(user, good_id)
bad_df = fetch_playlist_feat(user, bad_id)
#discover_df = fetch_playlist_feat(user, discover)

In [9]:
good_df['target'] = 1
bad_df['target'] = 0

In [10]:
df = pd.concat([good_df, bad_df])

In [11]:
df.reset_index(drop = True, inplace = True)

In [12]:
np.random.seed(10)
df = df.iloc[np.random.permutation(len(df))].reset_index(drop = True)

In [13]:
df.head()

Unnamed: 0,artist,album,track_name,track_id,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,target
0,Evol Intent,Amazing Friends,Broken Sword,5YJIHyYOLHhxSNXaEQta4c,0,0.351,0.999,1,-3.208,1,0.0874,0.000253,0.835,0.301,0.34,114.711,361693,5,0
1,Vishal-Shekhar,I Hate Luv Storys (Original Motion Picture Sou...,Bahara,1gwO79MdYdumgIjxq8eCxB,56,0.456,0.739,5,-6.733,1,0.0402,0.0718,7.24e-06,0.134,0.72,85.773,325707,4,1
2,Pritam,Chhichhore,Khairiyat (Bonus Track),3oM90C4SZLYzVZJEZD5EQk,58,0.606,0.654,0,-5.625,0,0.0365,0.484,1.13e-06,0.152,0.597,124.972,270707,4,1
3,ILS,Bohemia,Feed The Addiction,23AUyZevDLz3pwkICwIJa0,0,0.585,0.887,1,-6.485,1,0.0422,0.000171,0.813,0.554,0.212,134.884,278280,4,1
4,Amaal Mallik,M.S.Dhoni - The Untold Story,Phir Kabhi,4jk4CaqBMBbMZhf3PuR1ai,57,0.405,0.665,4,-5.498,1,0.0328,0.614,0.0,0.525,0.404,81.921,287422,4,1


In [15]:
df.shape

(356, 19)

In [14]:
df.to_csv('../dataset/data.csv', index = False)