In [2]:
import pickle
import pandas as pd
import numpy as np  
import requests
import base64

client_id = 'ca83153c363a4fbc8fc683647b84831d'
client_secret = '2aa45112fa0e4d38befc0cbde25006e1'

In [3]:
def get_service_token():
    access_token = None
    form = {
        'grant_type': 'client_credentials'
    }
    auth_str = f'{client_id}:{client_secret}'
    headers = {
        'Authorization': 'Basic '+str(base64.b64encode(bytes(auth_str,'utf-8')), "utf-8")
    }
    r = requests.post('https://accounts.spotify.com/api/token', data=form, headers=headers)
    if r.status_code == 200:
        access_token = r.json()['access_token']
    
    return access_token

In [4]:
def get_tracks(playlist_id, headers, next_url=None, track_list={}):
    if next_url:
        rtracks = requests.get(next_url, headers=headers)
    else:
        rtracks = requests.get(f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks', headers=headers)
        
    tracks = rtracks.json()
    for track in tracks['items']:
        if track['track']['id']:
            track_list[track['track']['id']] = {
                '_id': track['track']['id'],
                'name': track['track']['name'],
                'duration_ms': track['track']['duration_ms'],
                'popularity': track['track']['popularity'],
                'explicit': track['track']['explicit'],
                'artist': track['track']['artists'][0]['name']
            }

    if tracks['next']:
        return get_tracks(playlist_id, headers, next_url=tracks['next'], track_list=track_list)
    else:
        return track_list

def get_track_data(track_list, headers, iter_index=0):
    id_list = list(track_list.keys())
    start_index = iter_index*50
    end_index = (iter_index+1)*50 if (iter_index+1)*50 < len(id_list) else len(id_list)
    req_id_list = id_list[start_index:end_index]

    rdata = requests.get(f'https://api.spotify.com/v1/tracks/?ids={",".join(req_id_list)}', headers=headers)
    data = rdata.json()
    for track in data['tracks']:
        track_list[track['id']]['release_date'] = track['album']['release_date']

    if end_index != len(id_list):
        iter_index += 1
        return get_track_data(track_list, headers, iter_index=iter_index)
    else:
        return track_list


def get_track_features(track_list, headers, iter_index=0):
    id_list = list(track_list.keys())
    start_index = iter_index*100
    end_index = (iter_index+1)*100 if (iter_index+1)*100 < len(id_list) else len(id_list)
    req_id_list = id_list[start_index:end_index]

    rfeatures = requests.get(f'https://api.spotify.com/v1/audio-features/?ids={",".join(req_id_list)}', headers=headers)
    features = rfeatures.json()

    for feature in features['audio_features']:
        track_list[feature['id']]['danceability'] = feature['danceability']
        track_list[feature['id']]['energy'] = feature['energy']
        track_list[feature['id']]['key'] = feature['key']
        track_list[feature['id']]['loudness'] = feature['loudness']
        track_list[feature['id']]['mode'] = feature['mode']
        track_list[feature['id']]['speechiness'] = feature['speechiness']
        track_list[feature['id']]['acousticness'] = feature['acousticness']
        track_list[feature['id']]['instrumentalness'] = feature['instrumentalness']
        track_list[feature['id']]['liveness'] = feature['liveness']
        track_list[feature['id']]['valence'] = feature['valence']
        track_list[feature['id']]['tempo'] = feature['tempo']
        track_list[feature['id']]['time_signature'] = feature['time_signature']

    if end_index != len(id_list):
        iter_index += 1
        return get_track_features(track_list, headers, iter_index=iter_index)
    else:
        return track_list


def get_audio_feature(track_list, headers):
    for track_id in track_list:
        track = track_list[track_id]
        rfeature = requests.get(f'https://api.spotify.com/v1/audio-analysis/{track_id}', headers=headers)
        feature = rfeature.json()
        if 'track' in feature:
            afeature = feature['track']
            segments = feature['segments']

            duration = afeature['duration']
            pitch_values = {'C': 0, 'C#':0, 'D':0, 'D#':0, 'E':0 , 'F':0, 'F#':0, 'G':0, 'G#':0, 'A':0, 'A#':0, 'B':0}
            timbre_values = {'B1': 0, 'B2':0, 'B3':0, 'B4':0, 'B5':0 , 'B6':0, 'B7':0, 'B8':0, 'B9':0, 'B10':0, 'B11':0, 'B12':0}

            for segment in segments:
                seg_duration = segment['duration']
                pitches = segment['pitches']
                pitch_values['C'] += pitches[0] * seg_duration
                pitch_values['C#'] += pitches[1] * seg_duration
                pitch_values['D'] += pitches[2] * seg_duration
                pitch_values['D#'] += pitches[3] * seg_duration
                pitch_values['E'] += pitches[4] * seg_duration
                pitch_values['F'] += pitches[5] * seg_duration
                pitch_values['F#'] += pitches[6] * seg_duration
                pitch_values['G'] += pitches[7] * seg_duration
                pitch_values['G#'] += pitches[8] * seg_duration
                pitch_values['A'] += pitches[9] * seg_duration
                pitch_values['A#'] += pitches[10] * seg_duration
                pitch_values['B'] += pitches[11] * seg_duration

                timbres = segment['timbre']
                timbre_values['B1'] += timbres[0] * seg_duration
                timbre_values['B2'] += timbres[1] * seg_duration
                timbre_values['B3'] += timbres[2] * seg_duration
                timbre_values['B4'] += timbres[3] * seg_duration
                timbre_values['B5'] += timbres[4] * seg_duration
                timbre_values['B6'] += timbres[5] * seg_duration
                timbre_values['B7'] += timbres[6] * seg_duration
                timbre_values['B8'] += timbres[7] * seg_duration
                timbre_values['B9'] += timbres[8] * seg_duration
                timbre_values['B10'] += timbres[9] * seg_duration
                timbre_values['B11'] += timbres[10] * seg_duration
                timbre_values['B12'] += timbres[11] * seg_duration
                

            for note in pitch_values:
                pitch_values[note] = pitch_values[note] / duration

            for basis in timbre_values:
                timbre_values[basis] = timbre_values[basis] / duration

            track['timbre'] = timbre_values
            track['pitches'] = pitch_values
            print(track['name'], 'done.')

    return track_list
    

In [5]:
access_token = get_service_token()
headers = {
    'Authorization': 'Bearer ' + access_token
}
track_list = get_tracks('6uDp1IXlSsfqDCJgwaGoEJ', headers, track_list={})
track_list = get_track_data(track_list, headers)
track_list = get_track_features(track_list, headers)
# track_list = get_audio_feature(track_list, headers)
track_data = list(track_list.values())

In [6]:
track_df_master = pd.DataFrame(track_data)
track_df_master.set_index('_id', inplace=True)

track_df = track_df_master.copy()

track_df.drop("name", axis=1, inplace=True)
track_df.drop("artist", axis=1, inplace=True)
track_df.drop("explicit", axis=1, inplace=True)
track_df.drop("duration_ms", axis=1, inplace=True)
track_df.drop("time_signature", axis=1, inplace=True)
track_df.drop("mode", axis=1, inplace=True)
track_df.drop("key", axis=1, inplace=True)
track_df['release_date'] = pd.to_numeric(track_df['release_date'].str.split('-',expand=True)[1])
track_df["release_date"].fillna(track_df["release_date"].median(skipna=True), inplace=True)

tone_model = pickle.load(open('tone.model','rb'))
tone_X = track_df.copy()
tone = tone_model.predict(tone_X)
track_df['tone'] = tone
hot_track = track_df[track_df['tone'] == 0]
cold_track = track_df[track_df['tone'] == 1]

hot_track_X = hot_track.drop(['tone'], axis=1)
hot_model = pickle.load(open('spring_summer.model','rb'))
season = hot_model.predict(hot_track_X)
hot_track_X['season'] = season
hot_track_X['season'] = hot_track_X['season'].replace(0, 'spring')
hot_track_X['season'] = hot_track_X['season'].replace(1, 'summer')

cold_track_X = cold_track.drop(['tone'], axis=1)
cold_model = pickle.load(open('winter_autumn.model','rb'))
season = cold_model.predict(cold_track_X)
cold_track_X['season'] = season
cold_track_X['season'] = cold_track_X['season'].replace(0, 'autumn')
cold_track_X['season'] = cold_track_X['season'].replace(1, 'rainy')
cold_track_X['season'] = cold_track_X['season'].replace(2, 'winter')

season_sr = cold_track_X['season'].append(hot_track_X['season'])
track_df = pd.merge(track_df, season_sr, left_index=True, right_index=True, how='left')

In [7]:
# pitch_keys = ['C','C#','D', 'D#', 'E' , 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
# timbre_keys = ['B1', 'B2', 'B3', 'B4', 'B5' , 'B6', 'B7', 'B8', 'B9', 'B10', 'B11', 'B12']

# track_df = pd.concat([track_df, track_df['pitches'].apply(pd.Series)], axis=1)
# track_df = pd.concat([track_df, track_df['timbre'].apply(pd.Series)], axis=1)
# track_df.drop(0, axis=1, inplace=True)
# for pitch in pitch_keys:
#     track_df[pitch].fillna(track_df[pitch].median(skipna=True), inplace=True)

# for timbre in timbre_keys:
#     track_df[timbre].fillna(track_df[timbre].median(skipna=True), inplace=True)

# track_df.drop("timbre", axis=1, inplace=True)
# track_df.drop("pitches", axis=1, inplace=True)

emotion_model = pickle.load(open('emotion.model','rb'))
emotion_X = track_df.drop(['tone', 'season'], axis=1)
emotion = emotion_model.predict(emotion_X)
track_df['emotion'] = emotion
track_df['emotion'] = track_df['emotion'].replace(0, 'chill')
track_df['emotion'] = track_df['emotion'].replace(1, 'happy')
track_df['emotion'] = track_df['emotion'].replace(2, 'party')
track_df['emotion'] = track_df['emotion'].replace(3, 'sad')
track_df['emotion'].value_counts()

chill    342
sad       63
happy     40
party     31
Name: emotion, dtype: int64

In [69]:
from kmodes.kmodes import KModes
import time
import math
t0 = time.time()

n = 20
while n > 1:
# for n in range(20,1,-1):
    km_cao = KModes(n_clusters=n, init = "Huang", n_init=18)
    clusters = km_cao.fit_predict(track_df[['emotion','season']])
    track_count = pd.Series(clusters).value_counts()
    min_track_count = track_count.min()
    single_digit_count = track_count[track_count < 10].size
    if min_track_count > 9:
        break
    dec = math.ceil(single_digit_count/2)
    print(dec)
    n -= dec if (dec > 0) and (n - dec > 1) else 1
    # n -= 1
t1 = time.time()
print(t1-t0)

track_df['cluster'] = clusters
track_df['cluster'].value_counts()

5
2
2
1
4.50798487663269


9    95
0    87
8    84
6    72
2    36
4    28
3    27
1    22
5    14
7    11
Name: cluster, dtype: int64

In [81]:
# print(track_df[track_df['cluster']==0]['season'].value_counts())
# print(track_df[track_df['cluster']==0]['emotion'].value_counts())
season_playlist_name = {
    ('summer',): 'Summer',
    ('spring',): 'Spring',
    ('autumn',): 'Autumn',
    ('winter',): 'Winter',
    ('rainy',): 'Rainy',
    ('spring','summer'): 'Sunny',
    ('autumn','summer'): 'Vividly',
    ('summer','winter'): 'Melty',
    ('rainy','summer'): 'Sultry',
    ('autumn','spring'): 'Peaceful',
    ('spring','winter'): 'Snowy',
    ('rainy','spring'): 'Cloudy',
    ('autumn','winter'): 'Windy',
    ('autumn','rainy'): 'Dampy',
    ('rainy','winter'): 'Freezy'
}

emotion_playlist_name = {
    ('happy',): 'Happiness',
    ('chill',): 'Chill',
    ('party',): 'Party',
    ('sad',): 'Sadness',
    ('chill','happy'): 'Comfort',
    ('happy','party'): 'Fun',
    ('happy','sad'): 'Confusion',
    ('chill','party'): 'Rhythm',
    ('party','sad'): 'Conflict',
    ('chill','sad'): 'Melancholy'
}

clustered_playlist = {}
for i in range(n):
    cluster = track_df[track_df['cluster']==i]
    cluster['id'] = cluster.index
    cluster = pd.merge(cluster, track_df_master[['name','artist']], left_index=True, right_index=True, how='left')
    
    season_char = None
    emotion_char = None
    season_rank = cluster['season'].value_counts().sort_values(ascending=False)
    emotion_rank = cluster['emotion'].value_counts().sort_values(ascending=False)
    
    cluster['emo_sort'] = cluster['emotion'].apply(lambda x: emotion_rank[x])
    cluster.sort_values(by='emo_sort', ascending=False, inplace=True)

    print(emotion_rank)
    print(cluster.head()['emotion'])
    print(cluster.tail()['emotion'])

    size = cluster.shape[0]
    season_char = (season_rank.index[0],)
    if season_rank.size > 1 and (season_rank[1]/size) > (season_rank[0]/size/2):
        season_char = (season_rank.index[0], season_rank.index[1])

    if emotion_rank.index[0] in ['happy', 'party']:
        if 'sad' in emotion_rank.index:
            if emotion_rank['sad'] > 9:
                clustered_playlist['s'+i] = {
                    'name': season_playlist_name[tuple(sorted(season_char))] + " " + emotion_playlist_name[tuple(sorted(('sad')))],
                    'tracks': cluster[cluster['emotion'] == 'sad'][['name','artist','season','emotion','id']].to_dict(orient='records')
                }
            cluster = cluster.drop(cluster[cluster['emotion'] == 'sad'].index)
    elif emotion_rank.index[0] == 'sad':
        if 'party' in emotion_rank.index:
            if emotion_rank['party'] > 9:
                clustered_playlist['s'+i] = {
                    'name': season_playlist_name[tuple(sorted(season_char))] + " " + emotion_playlist_name[tuple(sorted(('party')))],
                    'tracks': cluster[cluster['emotion'] == 'party'][['name','artist','season','emotion','id']].to_dict(orient='records')
                }
            cluster = cluster.drop(cluster[cluster['emotion'] == 'party'].index)
        if 'happy' in emotion_rank.index:
            if emotion_rank['happy'] > 9:
                clustered_playlist['s'+i] = {
                    'name': season_playlist_name[tuple(sorted(season_char))] + " " + emotion_playlist_name[tuple(sorted(('happy')))],
                    'tracks': cluster[cluster['emotion'] == 'happy'][['name','artist','season','emotion','id']].to_dict(orient='records')
                }
            cluster = cluster.drop(cluster[cluster['emotion'] == 'happy'].index)
            
    emotion_char = (emotion_rank.index[0],)
    if emotion_rank.size > 1 and (emotion_rank[1]/size) > (emotion_rank[0]/size/2):
        emotion_char = (emotion_rank.index[0], emotion_rank.index[1])

    clustered_playlist[i] = {
        'name': season_playlist_name[tuple(sorted(season_char))] + " " + emotion_playlist_name[tuple(sorted(emotion_char))],
        'tracks': cluster[['name','artist','season','emotion','id']]
    }
    test_list = cluster['id']
    
for pl in range(len(clustered_playlist)):
    pll = clustered_playlist[pl]
    print('='*20)
    print(pll['name'])
    print(pll['tracks']['season'].value_counts().sort_values(ascending=False))
    print(pll['tracks']['emotion'].value_counts().sort_values(ascending=False))

chill    66
party    10
happy    10
sad       1
Name: emotion, dtype: int64
_id
2LBqCSwhJGcFQeTHMVGwy3    chill
13PgjEmWzRZ2juOJqXF6Jr    chill
3LH6EfPCeySeuymuK2gWAO    chill
4WctYxntPchMNURUL7npao    party
4c57ZlmIohTkITIGzyE197    chill
Name: emotion, dtype: object
_id
38ayrEdVAehCskpDhbGoeQ    chill
58mtgcQVZ56NgWHKsN94nD    chill
3Dv1eDb0MEgF93GpLXlucZ    party
5CPn3icFmQYZQD135ClcYV    chill
0iMGyvMJqv21SF6iTxrcvQ    chill
Name: emotion, dtype: object
happy    19
party     3
Name: emotion, dtype: int64
_id
7JKGMlRO9egsTPss4iSYJy    party
4DlWvj7f4qXBjoYmmMu1Q9    happy
0PzHRsbxdvCtlFDzZeIDIa    happy
7loNIteYiKMEZtPDJKqSt9    happy
1i2BT8khCCaPqIxNhACQ6g    happy
Name: emotion, dtype: object
_id
12hzuN9DIivqREfJumZyMt    happy
0TrPqhAMoaKUFLR7iYDokf    happy
4Qx2Xrs56EQItXv33CRqNx    happy
1CC3sbNapIE3ABGUdpp2oU    happy
5eIhQy1uCsqvsrfchFzKjN    happy
Name: emotion, dtype: object
sad      34
party     2
Name: emotion, dtype: int64
_id
01TyFEZu6mHbffsVfxgrFn    sad
2FkI9ESTJ1VFx6

In [296]:
['spotify:track:'+test for test in test_list]

['spotify:track:5dFo1PvVu4Hmh7arpOgxZh',
 'spotify:track:0Pf1u3VenZZcGo3KMy1kXP',
 'spotify:track:5HxOb9uuXWpf7AICL8Nk0o',
 'spotify:track:3HblOVuBVZqg3fESHDFQvb',
 'spotify:track:3KAyMETjqAZhMkxjZIZ1PB',
 'spotify:track:2YEnGQwfXZokCfRbTykYbo',
 'spotify:track:0T2q3alHawjR9jjiORLfhI',
 'spotify:track:0SYvT7Ec9YrDRFPV9g9UXt',
 'spotify:track:3bjfVBezMzhzI6I9irA5Ra',
 'spotify:track:6Ed1q0X8oSKSm4IIhiQbYg']