In [83]:
## Basic Packages

import time
from math import sqrt
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

## Widget packages

from ipywidgets import interact, interactive, fixed, interact_manual, Layout
import ipywidgets as widgets

## Sklearn packages

from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.decomposition import PCA as sklearnPCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.manifold import TSNE
from sklearn.cluster import AgglomerativeClustering

## Spotipy packages (Spotify API)

import spotipy.oauth2
from spotipy.oauth2 import SpotifyOAuth
import spotipy.util as util
import spotipy

## Importing Bokeh to explore outliers

from bokeh.plotting import figure, output_file, show
import bokeh.models as bmo
from bokeh.palettes import d3
from bokeh.models import Legend, BoxSelectTool, BoxZoomTool, LassoSelectTool
import bokeh.plotting as bpl

## Notebook display settings

pd.options.display.max_columns = 999

start_time = time.time()

## Audio feature descriptions

#https://developer.spotify.com/documentation/web-api/reference/tracks/get-audio-features/

In [11]:
token = util.prompt_for_user_token(username = sp_username, client_id= sp_client_id,client_secret= sp_client_secret,redirect_uri=sp_redirect_uri)



            User authentication requires interaction with your
            web browser. Once you enter your credentials and
            give authorization, you will be redirected to
            a url.  Paste that url you were directed to to
            complete the authorization.

        
Opened https://accounts.spotify.com/authorize?client_id=c11d185f127941938988ee6a37061b85&response_type=code&redirect_uri=http%3A%2F%2Flocalhost%3A8777%2Fcallback%2F in your browser


Enter the URL you were redirected to: http://localhost:8777/callback/?code=AQBMFk7xZRWdIZxxEMuwOGAhMUoP8p_e8WIMEowkXYpJhNpqAS-5LpTFYe1HYZT2fsaEF7JEKpCYNzhLvlR9uYbnPabz6EgwYx5Nq1yaV0DO1PDMMMZRtO6ZdAAm_I9i6XejzuPnCDdjH9HpY6M_NsgUSeB0tQEXQF0cTwm8OQnrb4EFbUjekQ8PptymjNfwEJUjAHM




In [12]:
sp = spotipy.Spotify(auth=token)

## Functions Related to API Calls

In [24]:
def sp_search_artist(sp_artist_name):
        ## Takes user name from user input and searches for artist information - returns dataframe
        artist_search_results = sp.search(q = sp_artist_name, limit = 1, type = 'artist')
        artist_search_results_2 = artist_search_results['artists']
        artist_search_results_3 = artist_search_results_2['items']
        sp_search_artists_df = pd.DataFrame(artist_search_results_3)
        return sp_search_artists_df

In [25]:
def sp_search_album(sp_artist_id):
        ## Search for albums based on artist ID
        album_search_results = sp.artist_albums(sp_artist_id, album_type = 'album')
        album_search_results_2 = album_search_results['items']
        sp_search_album_df = pd.DataFrame(album_search_results_2)       
        return sp_search_album_df

In [26]:
def sp_search_track(album, alb_num, rel_date):
        ## Function that utilizes Spotify's album_tracks method to return all tracks from an album ID
        track_search_results = sp.album_tracks(album, limit = 50)
        track_search_results_2 = track_search_results['items']
        sp_search_track_df = pd.DataFrame(track_search_results_2)
        sp_search_track_df['album_name'] = alb_num
        sp_search_track_df['release_date'] = rel_date
        return sp_search_track_df

In [27]:
def sp_features_audio(song_list):
        ## Function that utilizes Spotify's audio_features method to return audio features (energy, tempo, etc)
        audio_features_results = sp.audio_features(tracks = song_list)
        audio_df = pd.DataFrame(audio_features_results)
        return audio_df

In [28]:
def audio_track_analysis(song_list):
        ## Function that utilizes Spotify's audio_features method to return audio features (energy, tempo, speechiness, etc)
        audio_analysis_list = []
        for i in range(len(song_list)):
            results = sp.audio_analysis(song_list[i])
            audio_analysis_list.append(results)
        track_df = pd.DataFrame(audio_analysis_list)
        return track_df

In [39]:
def count_stats (column):
    count_list = []
    for row in range(len(track_analysis_df)):
        count = 0
        for item in range(len(track_analysis_df.iloc[row, column])):
            count += 1
        count_list.append(count)
    return count_list

## Creating Dataframes from API Calls

In [30]:
## Search for artist by input

artist_search_df = sp_search_artist(input("What artists would you like to see? "))

## Collect artist id and genres

artist_search_id = list(artist_search_df.id)

artist_search_genre = list(artist_search_df.genres)

What artists would you like to see? Arctic Monkeys


In [31]:
## Starting to build album information, then saving off information for later calls

album_search_df = sp_search_album(artist_search_id[0])

album_name_list = list(album_search_df.name)
album_id = list(album_search_df.id)
album_release_date = list(album_search_df.release_date)

In [32]:
## Iterating through albums to return a dataframe with track information

for i in range(len(album_id)):
    if i == 0:
        album_track_df = sp_search_track(album_id[i], album_name_list[i], album_release_date[i])
    else:
        new_track_df = sp_search_track(album_id[i], album_name_list[i], album_release_date[i])
        album_track_df = album_track_df.append(new_track_df)
        album_track_df.reset_index(drop = True, inplace = True)

## Track list will be used to get audio_features and track_analysis for each song        
        
track_list = list(album_track_df.id)

In [33]:
## Audio features for each song

for i in range(len(track_list)):
    if i == 0:
        audio_features_df = sp_features_audio(track_list[i])
    else:
        new_audio_features_df = sp_features_audio(track_list[i])
        audio_features_df = audio_features_df.append(new_audio_features_df)
        audio_features_df.reset_index(drop = True, inplace = True)  

In [34]:
## Track analysis for each song

track_analysis_df = audio_track_analysis(track_list)
track_analysis_df.drop(columns = ['meta', 'track'], inplace = True)

In [60]:
## Creating counts for each track_analysis record

bars_count = count_stats(0)
beats_count = count_stats(1)
sections_count = count_stats(2)
segments_count = count_stats(3)
tatums_count = count_stats(4)

audio_df = pd.DataFrame({'bars_count': bars_count, 'beats_count': beats_count, 
                      'sections_count': sections_count, 'segments_count': segments_count, 
                      'tatums_count': tatums_count})

In [61]:
## Appending track id numbers to audio analysis

track_series = pd.Series(track_list)

audio_df = pd.concat([audio_df,track_series], axis = 1)
audio_df = audio_df.rename(columns = {0 : 'id'})

In [96]:
## Combining audio features and audio analysis features

combined_df = pd.merge(album_track_df, audio_features_df, how = 'left', on = 'id')

In [98]:
## Renaming columns in merged_df

combined_df = combined_df[['href', 'id', 'name', 'track_number', 'album_name', 'release_date',
    'acousticness', 'danceability', 'energy', 'instrumentalness', 'key', 'liveness', 
    'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence']]

In [99]:
audio_df = pd.concat([combined_df, audio_df], axis=1)

In [132]:
## Creating a new field for song length (in seconds)

song_length = []

for i in range(len(album_track_df)):
    song_seconds = (album_track_df['duration_ms'][i] / 1000)
    song_length.append(song_seconds)

In [162]:
## Calculating beats per minute and 

song_len = pd.Series(song_length)

audio_analysis = pd.concat([audio_df, song_len], axis = 1)

audio_analysis.rename(columns = {0 : 'song_length_seconds'}, inplace = True)

audio_analysis['bpm'] = ((audio_analysis['beats_count'] / (audio_analysis['song_length_seconds']) / 60))

audio_analysis = audio_analysis.rename(columns = {'name' : 'song_name'})

In [163]:
cols_to_use = combined_df.columns.difference(audio_analysis.columns)

In [180]:
merged_df = pd.merge(audio_analysis, combined_df[cols_to_use], how = 'left', left_index=True, right_index=True)

In [181]:
## Trimming down the audio features that I want to explore initially

features_df = combined_df[['release_date', 'album_name', 'name', 'acousticness', 'danceability', 'energy', 
    'loudness', 'key', 'mode', 'tempo', 'valence']]

In [182]:
## Separating the release date into year, month, day columns

features_df['year'], features_df['month'], features_df['day'] = features_df['release_date'].str.split('-',2).str

## Sort oldest album to newest album

features_df = features_df.sort_values(by = ['year', 'month', 'day'])

## Reset index and assign unique release dates to a list that will be used in the next loop

features_df.reset_index(drop = True, inplace = True)

unique_release = list(features_df.release_date.unique())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [183]:
## For loop that assigns an album number based on the release date for future plotting

album_number = []

count = 0
record_number = 1

for row in range(len(features_df)):
    if features_df.release_date.iloc[row] == unique_release[count]:
        #print(features_df.release_date.iloc[row], "-------->", unique_release[count], "------->", features_df.release_date.iloc[row] == unique_release[count])
        album_number.append(record_number)
    else:
        #print('Change Album')
        record_number += 1
        count += 1
        album_number.append(record_number)

In [184]:
## Converting resulting list into a dataframe to concat, then renaming the column to show album_number

album_number = pd.DataFrame(album_number)

features_df = pd.concat([features_df, album_number], axis = 1)

features_df.rename(columns = {0 : 'album_number', 'name' : 'song_name'}, inplace = True)

## Creating Raw Dataframe for Linear Regression - Song Recommender

In [185]:
cols_to_use_2 = merged_df.columns.difference(features_df.columns)

raw_features_df = pd.merge(merged_df, features_df[cols_to_use_2], how = 'left', left_index=True, right_index=True)

#raw_features_df = raw_df[raw_df.duplicated(subset = 'song_name', keep = 'first') == False]

#raw_features_df.reset_index(drop = True, inplace = True)

KeyError: "Index(['bars_count', 'beats_count', 'bpm', 'href', 'id', 'instrumentalness',\n       'liveness', 'name', 'sections_count', 'segments_count',\n       'song_length_seconds', 'speechiness', 'tatums_count', 'time_signature',\n       'track_number'],\n      dtype='object') not in index"

In [175]:
raw_df_columns = list(raw_features_df.columns.values)

In [178]:
raw_features_df.head()

Unnamed: 0,href,id,song_name_x,track_number,album_name_x,release_date_x,acousticness_x,danceability_x,energy_x,instrumentalness,key_x,liveness,loudness_x,mode_x,speechiness,tempo_x,time_signature,valence_x,song_name_x.1,track_number.1,album_name_x.1,song_name_x.2,track_number.2,album_name_x.2,bars_count,beats_count,sections_count,segments_count,tatums_count,id.1,song_length_seconds,bpm,name,release_date_y,album_name_y,song_name_y,acousticness_y,danceability_y,energy_y,loudness_y,key_y,mode_y,tempo_y,valence_y,year,month,day,album_number
0,https://api.spotify.com/v1/tracks/0FgNSsaSZTvb...,0FgNSsaSZTvbLXUumSO8LQ,Star Treatment,1,Tranquility Base Hotel & Casino,2018-05-11,0.243,0.581,0.767,0.00131,7,0.141,-5.026,0,0.0527,120.07,4,0.673,Star Treatment,1,Tranquility Base Hotel & Casino,Star Treatment,1,Tranquility Base Hotel & Casino,180,710,15,1229,1420,0FgNSsaSZTvbLXUumSO8LQ,354.64,0.033367,Star Treatment,2006-02-21,"Whatever People Say I Am, That's What I'm Not",The View From The Afternoon,0.00487,0.387,0.922,-5.192,9,0,146.478,0.417,2006,2,21,1
1,https://api.spotify.com/v1/tracks/2AKwcNHeGAqC...,2AKwcNHeGAqCpD6etK425X,One Point Perspective,2,Tranquility Base Hotel & Casino,2018-05-11,0.057,0.714,0.585,0.00444,8,0.144,-4.57,1,0.0419,77.104,4,0.842,One Point Perspective,2,Tranquility Base Hotel & Casino,One Point Perspective,2,Tranquility Base Hotel & Casino,66,268,7,743,536,2AKwcNHeGAqCpD6etK425X,208.64,0.021408,One Point Perspective,2006-02-21,"Whatever People Say I Am, That's What I'm Not",I Bet You Look Good On The Dancefloor,0.00225,0.535,0.948,-4.19,6,0,103.183,0.778,2006,2,21,1
2,https://api.spotify.com/v1/tracks/0mQiDbYxHElU...,0mQiDbYxHElUp1eNpLZXaY,American Sports,3,Tranquility Base Hotel & Casino,2018-05-11,0.00169,0.372,0.865,0.00293,7,0.127,-4.288,0,0.0513,176.689,3,0.594,American Sports,3,Tranquility Base Hotel & Casino,American Sports,3,Tranquility Base Hotel & Casino,152,455,8,576,910,0mQiDbYxHElUp1eNpLZXaY,158.04,0.047984,American Sports,2006-02-21,"Whatever People Say I Am, That's What I'm Not",Fake Tales Of San Francisco,0.00998,0.454,0.778,-4.225,9,1,127.158,0.704,2006,2,21,1
3,https://api.spotify.com/v1/tracks/7co8zvT6HJuy...,7co8zvT6HJuyT81IDukyID,Tranquility Base Hotel & Casino,4,Tranquility Base Hotel & Casino,2018-05-11,0.0411,0.584,0.835,0.0147,9,0.199,-4.326,0,0.0297,142.2,4,0.781,Tranquility Base Hotel & Casino,4,Tranquility Base Hotel & Casino,Tranquility Base Hotel & Casino,4,Tranquility Base Hotel & Casino,124,501,10,771,1002,7co8zvT6HJuyT81IDukyID,212.093,0.03937,Tranquility Base Hotel & Casino,2006-02-21,"Whatever People Say I Am, That's What I'm Not",Dancing Shoes,0.00328,0.522,0.889,-4.137,1,1,144.499,0.852,2006,2,21,1
4,https://api.spotify.com/v1/tracks/3NmndGYOIYoc...,3NmndGYOIYocA5OTHMVoaw,Golden Trunks,5,Tranquility Base Hotel & Casino,2018-05-11,0.129,0.492,0.564,1.7e-05,2,0.131,-5.811,1,0.0278,92.544,4,0.0896,Golden Trunks,5,Tranquility Base Hotel & Casino,Golden Trunks,5,Tranquility Base Hotel & Casino,67,265,7,450,530,3NmndGYOIYocA5OTHMVoaw,173.626,0.025438,Golden Trunks,2006-02-21,"Whatever People Say I Am, That's What I'm Not",You Probably Couldn't See For The Lights But Y...,0.00384,0.58,0.972,-4.274,11,0,105.492,0.926,2006,2,21,1


In [176]:
## Rearranging columns

raw_features_df = raw_features_df[[ 'release_date',
                                    'album_number',
                                    'album_name',
                                    'track_number',
                                    'song_name',
                                    'year',
                                    'month',
                                    'day',
                                    'acousticness',
                                    'danceability',
                                    'energy',
                                    'loudness',
                                    'valence',
                                    'song_length_seconds']]

KeyError: "['release_date' 'album_name' 'song_name' 'acousticness' 'danceability'\n 'energy' 'loudness' 'valence'] not in index"

In [161]:
raw_features_df

Unnamed: 0,href,id,song_name,track_number,album_name,release_date,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,song_name.1,track_number.1,album_name.1,song_name.2,track_number.2,album_name.2,bars_count,beats_count,sections_count,segments_count,tatums_count,id.1,song_length_seconds,bpm
0,https://api.spotify.com/v1/tracks/0FgNSsaSZTvb...,0FgNSsaSZTvbLXUumSO8LQ,Star Treatment,1,Tranquility Base Hotel & Casino,2018-05-11,0.243000,0.581,0.767,0.001310,7,0.1410,-5.026,0,0.0527,120.070,4,0.6730,Star Treatment,1,Tranquility Base Hotel & Casino,Star Treatment,1,Tranquility Base Hotel & Casino,180,710,15,1229,1420,0FgNSsaSZTvbLXUumSO8LQ,354.640,0.033367
1,https://api.spotify.com/v1/tracks/2AKwcNHeGAqC...,2AKwcNHeGAqCpD6etK425X,One Point Perspective,2,Tranquility Base Hotel & Casino,2018-05-11,0.057000,0.714,0.585,0.004440,8,0.1440,-4.570,1,0.0419,77.104,4,0.8420,One Point Perspective,2,Tranquility Base Hotel & Casino,One Point Perspective,2,Tranquility Base Hotel & Casino,66,268,7,743,536,2AKwcNHeGAqCpD6etK425X,208.640,0.021408
2,https://api.spotify.com/v1/tracks/0mQiDbYxHElU...,0mQiDbYxHElUp1eNpLZXaY,American Sports,3,Tranquility Base Hotel & Casino,2018-05-11,0.001690,0.372,0.865,0.002930,7,0.1270,-4.288,0,0.0513,176.689,3,0.5940,American Sports,3,Tranquility Base Hotel & Casino,American Sports,3,Tranquility Base Hotel & Casino,152,455,8,576,910,0mQiDbYxHElUp1eNpLZXaY,158.040,0.047984
3,https://api.spotify.com/v1/tracks/7co8zvT6HJuy...,7co8zvT6HJuyT81IDukyID,Tranquility Base Hotel & Casino,4,Tranquility Base Hotel & Casino,2018-05-11,0.041100,0.584,0.835,0.014700,9,0.1990,-4.326,0,0.0297,142.200,4,0.7810,Tranquility Base Hotel & Casino,4,Tranquility Base Hotel & Casino,Tranquility Base Hotel & Casino,4,Tranquility Base Hotel & Casino,124,501,10,771,1002,7co8zvT6HJuyT81IDukyID,212.093,0.039370
4,https://api.spotify.com/v1/tracks/3NmndGYOIYoc...,3NmndGYOIYocA5OTHMVoaw,Golden Trunks,5,Tranquility Base Hotel & Casino,2018-05-11,0.129000,0.492,0.564,0.000017,2,0.1310,-5.811,1,0.0278,92.544,4,0.0896,Golden Trunks,5,Tranquility Base Hotel & Casino,Golden Trunks,5,Tranquility Base Hotel & Casino,67,265,7,450,530,3NmndGYOIYocA5OTHMVoaw,173.626,0.025438
5,https://api.spotify.com/v1/tracks/3nhzPKCm2yqG...,3nhzPKCm2yqGmgEhdAg19u,Four Out Of Five,6,Tranquility Base Hotel & Casino,2018-05-11,0.062600,0.601,0.863,0.000000,0,0.0717,-4.310,1,0.0685,130.189,4,0.6760,Four Out Of Five,6,Tranquility Base Hotel & Casino,Four Out Of Five,6,Tranquility Base Hotel & Casino,168,668,13,1028,1336,3nhzPKCm2yqGmgEhdAg19u,312.346,0.035644
6,https://api.spotify.com/v1/tracks/1JdArJ9NKCF9...,1JdArJ9NKCF9TQASGQgszg,The World's First Ever Monster Truck Front Flip,7,Tranquility Base Hotel & Casino,2018-05-11,0.346000,0.602,0.601,0.349000,10,0.0735,-5.742,1,0.0303,145.075,4,0.5530,The World's First Ever Monster Truck Front Flip,7,Tranquility Base Hotel & Casino,The World's First Ever Monster Truck Front Flip,7,Tranquility Base Hotel & Casino,108,432,9,548,864,1JdArJ9NKCF9TQASGQgszg,180.013,0.039997
7,https://api.spotify.com/v1/tracks/57oJKJys4oC7...,57oJKJys4oC7NTR7hwZkPU,Science Fiction,8,Tranquility Base Hotel & Casino,2018-05-11,0.029500,0.542,0.766,0.080200,7,0.3960,-5.411,1,0.0282,88.062,4,0.6430,Science Fiction,8,Tranquility Base Hotel & Casino,Science Fiction,8,Tranquility Base Hotel & Casino,67,271,9,653,542,57oJKJys4oC7NTR7hwZkPU,185.906,0.024295
8,https://api.spotify.com/v1/tracks/4z6mZWFgBZd0...,4z6mZWFgBZd07926K61Mi8,She Looks Like Fun,9,Tranquility Base Hotel & Casino,2018-05-11,0.009440,0.514,0.773,0.003260,0,0.3870,-3.754,1,0.0388,103.943,4,0.4450,She Looks Like Fun,9,Tranquility Base Hotel & Casino,She Looks Like Fun,9,Tranquility Base Hotel & Casino,75,311,9,559,622,4z6mZWFgBZd07926K61Mi8,182.560,0.028392
9,https://api.spotify.com/v1/tracks/7seSDB6TiLZa...,7seSDB6TiLZarbicyDIjiQ,Batphone,10,Tranquility Base Hotel & Casino,2018-05-11,0.312000,0.675,0.681,0.000000,5,0.3040,-5.728,0,0.0356,106.521,4,0.6750,Batphone,10,Tranquility Base Hotel & Casino,Batphone,10,Tranquility Base Hotel & Casino,118,473,13,859,946,7seSDB6TiLZarbicyDIjiQ,271.613,0.029024


In [None]:
print("--- %s seconds ---" % (time.time() - start_time))