# Spotify API: Collection of Songs & Audio Features

**Instructions**

To move forward with the project, you need to create a collection of songs with their audio features - as large as possible!

These are the songs that we will cluster. And, later, when the user inputs a song, we will find the cluster to which the song belongs and recommend a song from the same cluster. The more songs you have, the more accurate and diverse recommendations you'll be able to give.


In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pprint


#Initialize SpotiPy with user credentias

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="af3a4e21d9974f798b0ddef081728f2b",
                                                           client_secret="99a65d20eff04d64bcf24b11824dffc4"))   #we start spotify connection

## Playlist 1: Top 100 Most Streamed Songs  on Spotify
https://open.spotify.com/playlist/5ABHKGoOzxkaa28ttQV9sE

In [2]:
playlist1 = sp.user_playlist_tracks("spotify", "5ABHKGoOzxkaa28ttQV9sE") 
#playlist1
#pprint.pprint(playlist1)

In [4]:
playlist1["total"]
#100 songs in total

100

In [5]:
playlist1.keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [6]:
playlist1["items"][0].keys()

dict_keys(['added_at', 'added_by', 'is_local', 'primary_color', 'track', 'video_thumbnail'])

In [7]:
playlist1["items"][0]["track"].keys()

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'episode', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track', 'track_number', 'type', 'uri'])

In [8]:
#navigate to artist info

playlist1["items"][0]["track"]["artists"]

[{'external_urls': {'spotify': 'https://open.spotify.com/artist/1Xyo4u8uXC1ZmMpatF05PJ'},
  'href': 'https://api.spotify.com/v1/artists/1Xyo4u8uXC1ZmMpatF05PJ',
  'id': '1Xyo4u8uXC1ZmMpatF05PJ',
  'name': 'The Weeknd',
  'type': 'artist',
  'uri': 'spotify:artist:1Xyo4u8uXC1ZmMpatF05PJ'}]

In [9]:
#navigate to artist name

playlist1["items"][0]["track"]["artists"][0]['name']

'The Weeknd'

In [10]:
#navigate to song name

playlist1["items"][0]["track"]["name"]

'Blinding Lights'

In [11]:
#navigate to song id

playlist1["items"][0]["track"]["id"]

'0VjIjW4GlUZAMYd2vXMi3b'

In [12]:
# Getting songs information (album...artist...etc) dictionary

def get_songs(username, playlist_id):
    
    results = sp.user_playlist_tracks(username, playlist_id)
    songs = results['items']
    
    while results['next']:
        results = sp.next(results)
        songs.extend(results['items'])
    
    return songs

songs_dic = get_songs("spotify", "5ABHKGoOzxkaa28ttQV9sE")
#songs_dic

In [13]:
# Getting a LIST of all the song names and song id's from playlist "Top 100 most streamed songs on Spotify"

songs_id = []
songs_name = []

for i in range (0, len(songs_dic)):
    song_title = songs_dic[i]["track"]["name"]
    id_song = songs_dic[i]["track"]["id"]
    
    songs_name.append(song_title)
    songs_id.append(id_song)

In [14]:
len(songs_id)

100

In [15]:
len(songs_name)

100

In [None]:
# or to get songs name:

#songs_name = [songs_dic['track']['name'] for track in songs_dic]

# song ids:

#songs_id2 = [songs_dic['track']['id'] for track in songs_dic]
#songs_id2

In [16]:
# Getting a LIST of all artists name

artist_=[]
artists_name = []

for i in range (len(songs_dic)):
    artist_.append(songs_dic[i]['track']['artists'])
    
for i in range (len(artist_)):
    artist_name_= artist_[i][0]['name']
    artists_name.append(artist_name_)

In [17]:
len(artists_name)

100

In [None]:
#Functin to get artists and artists id

#def get_artist_and_ids_from_playlist(playlist_id):
    
    #tracks_from_playlist = get_playlist_tracks("spotify", playlist_id)
    
    #artists = []
    #artists_ids = []
    
    #for tracks in tracks_from_playlist:
        #artists_info = track["track"]['artists']
        
        #for artist in artists_info:
            #artists.append(artist['name'])
            #artists_ids.append(artist['id'])
    
    #return list(set(artists)), list(set(artists_ids))
    
    
 #artist_names, artists_ids = get_artist_and_ids_from_playlist('12Wbv8sIx84T5uh6iOoJ7V')

In [18]:
# Getting Audio Features into a dataframe

df1=pd.DataFrame(sp.audio_features(songs_id[:100]))  # There is a limit of 100 songs in sp.audio_features
df1.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.514,0.73,1,-5.934,1,0.0598,0.00146,9.5e-05,0.0897,0.334,171.005,audio_features,0VjIjW4GlUZAMYd2vXMi3b,spotify:track:0VjIjW4GlUZAMYd2vXMi3b,https://api.spotify.com/v1/tracks/0VjIjW4GlUZA...,https://api.spotify.com/v1/audio-analysis/0VjI...,200040,4
1,0.825,0.652,1,-3.183,0,0.0802,0.581,0.0,0.0931,0.931,95.977,audio_features,7qiZfU4dY1lWllzX7mPBI3,spotify:track:7qiZfU4dY1lWllzX7mPBI3,https://api.spotify.com/v1/tracks/7qiZfU4dY1lW...,https://api.spotify.com/v1/audio-analysis/7qiZ...,233713,4
2,0.824,0.588,6,-6.4,0,0.0924,0.692,0.000104,0.149,0.513,98.027,audio_features,2XU0oxnq2qxCpomAAuJY8K,spotify:track:2XU0oxnq2qxCpomAAuJY8K,https://api.spotify.com/v1/tracks/2XU0oxnq2qxC...,https://api.spotify.com/v1/audio-analysis/2XU0...,209438,4
3,0.501,0.405,1,-5.679,1,0.0319,0.751,0.0,0.105,0.446,109.891,audio_features,7qEHsqek33rTcFNT9PFqLf,spotify:track:7qEHsqek33rTcFNT9PFqLf,https://api.spotify.com/v1/tracks/7qEHsqek33rT...,https://api.spotify.com/v1/audio-analysis/7qEH...,182161,4
4,0.76,0.479,2,-5.574,1,0.0466,0.556,0.0,0.0703,0.913,89.911,audio_features,3KkXRkHbMCARz0aVfEt68P,spotify:track:3KkXRkHbMCARz0aVfEt68P,https://api.spotify.com/v1/tracks/3KkXRkHbMCAR...,https://api.spotify.com/v1/audio-analysis/3KkX...,158040,4


In [19]:
# Getting the 3 lists into a dataframe

df2 = pd.DataFrame({"song_title":songs_name, "songs_id": songs_id, "artist": artists_name})
df2.head()

Unnamed: 0,song_title,songs_id,artist
0,Blinding Lights,0VjIjW4GlUZAMYd2vXMi3b,The Weeknd
1,Shape of You,7qiZfU4dY1lWllzX7mPBI3,Ed Sheeran
2,Dance Monkey,2XU0oxnq2qxCpomAAuJY8K,Tones And I
3,Someone You Loved,7qEHsqek33rTcFNT9PFqLf,Lewis Capaldi
4,Sunflower - Spider-Man: Into the Spider-Verse,3KkXRkHbMCARz0aVfEt68P,Post Malone


In [21]:
# Getting artist name, song titles, song id, and audio features into 1 single dataframe.

df3 = pd.concat([df1, df2], axis=1)
df3.head(5)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,type,id,uri,track_href,analysis_url,duration_ms,time_signature,song_title,songs_id,artist
0,0.514,0.73,1,-5.934,1,0.0598,0.00146,9.5e-05,0.0897,0.334,...,audio_features,0VjIjW4GlUZAMYd2vXMi3b,spotify:track:0VjIjW4GlUZAMYd2vXMi3b,https://api.spotify.com/v1/tracks/0VjIjW4GlUZA...,https://api.spotify.com/v1/audio-analysis/0VjI...,200040,4,Blinding Lights,0VjIjW4GlUZAMYd2vXMi3b,The Weeknd
1,0.825,0.652,1,-3.183,0,0.0802,0.581,0.0,0.0931,0.931,...,audio_features,7qiZfU4dY1lWllzX7mPBI3,spotify:track:7qiZfU4dY1lWllzX7mPBI3,https://api.spotify.com/v1/tracks/7qiZfU4dY1lW...,https://api.spotify.com/v1/audio-analysis/7qiZ...,233713,4,Shape of You,7qiZfU4dY1lWllzX7mPBI3,Ed Sheeran
2,0.824,0.588,6,-6.4,0,0.0924,0.692,0.000104,0.149,0.513,...,audio_features,2XU0oxnq2qxCpomAAuJY8K,spotify:track:2XU0oxnq2qxCpomAAuJY8K,https://api.spotify.com/v1/tracks/2XU0oxnq2qxC...,https://api.spotify.com/v1/audio-analysis/2XU0...,209438,4,Dance Monkey,2XU0oxnq2qxCpomAAuJY8K,Tones And I
3,0.501,0.405,1,-5.679,1,0.0319,0.751,0.0,0.105,0.446,...,audio_features,7qEHsqek33rTcFNT9PFqLf,spotify:track:7qEHsqek33rTcFNT9PFqLf,https://api.spotify.com/v1/tracks/7qEHsqek33rT...,https://api.spotify.com/v1/audio-analysis/7qEH...,182161,4,Someone You Loved,7qEHsqek33rTcFNT9PFqLf,Lewis Capaldi
4,0.76,0.479,2,-5.574,1,0.0466,0.556,0.0,0.0703,0.913,...,audio_features,3KkXRkHbMCARz0aVfEt68P,spotify:track:3KkXRkHbMCARz0aVfEt68P,https://api.spotify.com/v1/tracks/3KkXRkHbMCAR...,https://api.spotify.com/v1/audio-analysis/3KkX...,158040,4,Sunflower - Spider-Man: Into the Spider-Verse,3KkXRkHbMCARz0aVfEt68P,Post Malone


In [22]:
df3.columns

Index(['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms',
       'time_signature', 'song_title', 'songs_id', 'artist'],
      dtype='object')

In [None]:
# saving df to a csv file

df3.to_csv('audio_features_spotify_playlist.csv')

## Playlist "Playlist of the decade"

https://open.spotify.com/playlist/7KGPsYfZic7mD4EZtnVrJf

In [23]:
playlist2 = sp.user_playlist_tracks("spotify", "7KGPsYfZic7mD4EZtnVrJf") 
#playlist2

In [None]:
#pprint.pprint(playlist2)

In [24]:
playlist2["total"]

# 65 total songs

65

In [25]:
playlist2.keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [26]:
playlist2["items"][0].keys()

dict_keys(['added_at', 'added_by', 'is_local', 'primary_color', 'track', 'video_thumbnail'])

In [27]:
# Getting songs information (album...artist...etc) dictionary

songs_dic2 = get_songs("spotify", "7KGPsYfZic7mD4EZtnVrJf")
#songs_dic2

In [28]:
# Getting a LIST of all the song names and song id's from "Playlist of the decade"
songs_id2 = []
songs_name2 = []

for i in range(0, len(songs_dic2)):
    song_title = songs_dic2[i]["track"]["name"]
    id_song = songs_dic2[i]["track"]["id"]
    songs_name2.append(song_title)
    songs_id2.append(id_song)

In [29]:
len(songs_name2)

65

In [31]:
# Getting a LIST of all artists name and artist ID's.

artist_2=[]
artists_name2 = []


for i in range (0, len(songs_dic2)):
    artist_2.append(songs_dic2[i]['track']['artists'])
    
for i in range (len(artist_2)):
    artist_name_= artist_2[i][0]['name']
    
    artists_name2.append(artist_name_)

In [32]:
len(artists_name2)

65

In [33]:
# Getting audio features into a dataframe

df_1=pd.DataFrame(sp.audio_features(songs_id2[:100]))
df_1.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.587,0.299,8,-7.365,1,0.0263,0.779,0.0,0.123,0.356,94.992,audio_features,1bhUWB0zJMIKr9yVPrkEuI,spotify:track:1bhUWB0zJMIKr9yVPrkEuI,https://api.spotify.com/v1/tracks/1bhUWB0zJMIK...,https://api.spotify.com/v1/audio-analysis/1bhU...,259550,3
1,0.501,0.405,1,-5.679,1,0.0319,0.751,0.0,0.105,0.446,109.891,audio_features,7qEHsqek33rTcFNT9PFqLf,spotify:track:7qEHsqek33rTcFNT9PFqLf,https://api.spotify.com/v1/tracks/7qEHsqek33rT...,https://api.spotify.com/v1/audio-analysis/7qEH...,182161,4
2,0.554,0.321,9,-8.251,1,0.028,0.893,0.0,0.0996,0.288,135.047,audio_features,6QPKYGnAW9QozVz2dSWqRg,spotify:track:6QPKYGnAW9QozVz2dSWqRg,https://api.spotify.com/v1/tracks/6QPKYGnAW9Qo...,https://api.spotify.com/v1/audio-analysis/6QPK...,285240,4
3,0.558,0.451,9,-6.163,1,0.0388,0.292,0.0,0.101,0.238,150.008,audio_features,76eBb8heDKtUg68WiKX9j1,spotify:track:76eBb8heDKtUg68WiKX9j1,https://api.spotify.com/v1/tracks/76eBb8heDKtU...,https://api.spotify.com/v1/audio-analysis/76eB...,265382,4
4,0.302,0.655,9,-5.325,1,0.035,0.166,0.0,0.117,0.271,104.918,audio_features,65QUgRcwZuYhJjgCQKeAqG,spotify:track:65QUgRcwZuYhJjgCQKeAqG,https://api.spotify.com/v1/tracks/65QUgRcwZuYh...,https://api.spotify.com/v1/audio-analysis/65QU...,251280,3


In [34]:
# Getting the 3 lists from second playlist into a dataframe

df_2 = pd.DataFrame({"song_title":songs_name2, "songs_id": songs_id2, "artist": artists_name2})
df_2.head(5)

Unnamed: 0,song_title,songs_id,artist
0,Perfect Duet (Ed Sheeran & Beyoncé),1bhUWB0zJMIKr9yVPrkEuI,Ed Sheeran
1,Someone You Loved,7qEHsqek33rTcFNT9PFqLf,Lewis Capaldi
2,Someone Like You,6QPKYGnAW9QozVz2dSWqRg,Adele
3,Take Me Home,76eBb8heDKtUg68WiKX9j1,Jess Glynne
4,The Writer,65QUgRcwZuYhJjgCQKeAqG,Ellie Goulding


In [35]:
# Getting artist name, song titles, song id, and audio features into 1 single dataframe.

df_3 = pd.concat([df_1, df_2], axis=1)
df_3.head(5)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,type,id,uri,track_href,analysis_url,duration_ms,time_signature,song_title,songs_id,artist
0,0.587,0.299,8,-7.365,1,0.0263,0.779,0.0,0.123,0.356,...,audio_features,1bhUWB0zJMIKr9yVPrkEuI,spotify:track:1bhUWB0zJMIKr9yVPrkEuI,https://api.spotify.com/v1/tracks/1bhUWB0zJMIK...,https://api.spotify.com/v1/audio-analysis/1bhU...,259550,3,Perfect Duet (Ed Sheeran & Beyoncé),1bhUWB0zJMIKr9yVPrkEuI,Ed Sheeran
1,0.501,0.405,1,-5.679,1,0.0319,0.751,0.0,0.105,0.446,...,audio_features,7qEHsqek33rTcFNT9PFqLf,spotify:track:7qEHsqek33rTcFNT9PFqLf,https://api.spotify.com/v1/tracks/7qEHsqek33rT...,https://api.spotify.com/v1/audio-analysis/7qEH...,182161,4,Someone You Loved,7qEHsqek33rTcFNT9PFqLf,Lewis Capaldi
2,0.554,0.321,9,-8.251,1,0.028,0.893,0.0,0.0996,0.288,...,audio_features,6QPKYGnAW9QozVz2dSWqRg,spotify:track:6QPKYGnAW9QozVz2dSWqRg,https://api.spotify.com/v1/tracks/6QPKYGnAW9Qo...,https://api.spotify.com/v1/audio-analysis/6QPK...,285240,4,Someone Like You,6QPKYGnAW9QozVz2dSWqRg,Adele
3,0.558,0.451,9,-6.163,1,0.0388,0.292,0.0,0.101,0.238,...,audio_features,76eBb8heDKtUg68WiKX9j1,spotify:track:76eBb8heDKtUg68WiKX9j1,https://api.spotify.com/v1/tracks/76eBb8heDKtU...,https://api.spotify.com/v1/audio-analysis/76eB...,265382,4,Take Me Home,76eBb8heDKtUg68WiKX9j1,Jess Glynne
4,0.302,0.655,9,-5.325,1,0.035,0.166,0.0,0.117,0.271,...,audio_features,65QUgRcwZuYhJjgCQKeAqG,spotify:track:65QUgRcwZuYhJjgCQKeAqG,https://api.spotify.com/v1/tracks/65QUgRcwZuYh...,https://api.spotify.com/v1/audio-analysis/65QU...,251280,3,The Writer,65QUgRcwZuYhJjgCQKeAqG,Ellie Goulding


In [36]:
df3.shape

(100, 21)

In [38]:
# Concatenating both final dataframes from both playlists into 1 single dataframe with 165 songs in total (100 from Playlist 1 and 65 from Playlist 2)

df4 = pd.concat([df3, df_3], axis=0)
df4.head(5)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,type,id,uri,track_href,analysis_url,duration_ms,time_signature,song_title,songs_id,artist
0,0.514,0.73,1,-5.934,1,0.0598,0.00146,9.5e-05,0.0897,0.334,...,audio_features,0VjIjW4GlUZAMYd2vXMi3b,spotify:track:0VjIjW4GlUZAMYd2vXMi3b,https://api.spotify.com/v1/tracks/0VjIjW4GlUZA...,https://api.spotify.com/v1/audio-analysis/0VjI...,200040,4,Blinding Lights,0VjIjW4GlUZAMYd2vXMi3b,The Weeknd
1,0.825,0.652,1,-3.183,0,0.0802,0.581,0.0,0.0931,0.931,...,audio_features,7qiZfU4dY1lWllzX7mPBI3,spotify:track:7qiZfU4dY1lWllzX7mPBI3,https://api.spotify.com/v1/tracks/7qiZfU4dY1lW...,https://api.spotify.com/v1/audio-analysis/7qiZ...,233713,4,Shape of You,7qiZfU4dY1lWllzX7mPBI3,Ed Sheeran
2,0.824,0.588,6,-6.4,0,0.0924,0.692,0.000104,0.149,0.513,...,audio_features,2XU0oxnq2qxCpomAAuJY8K,spotify:track:2XU0oxnq2qxCpomAAuJY8K,https://api.spotify.com/v1/tracks/2XU0oxnq2qxC...,https://api.spotify.com/v1/audio-analysis/2XU0...,209438,4,Dance Monkey,2XU0oxnq2qxCpomAAuJY8K,Tones And I
3,0.501,0.405,1,-5.679,1,0.0319,0.751,0.0,0.105,0.446,...,audio_features,7qEHsqek33rTcFNT9PFqLf,spotify:track:7qEHsqek33rTcFNT9PFqLf,https://api.spotify.com/v1/tracks/7qEHsqek33rT...,https://api.spotify.com/v1/audio-analysis/7qEH...,182161,4,Someone You Loved,7qEHsqek33rTcFNT9PFqLf,Lewis Capaldi
4,0.76,0.479,2,-5.574,1,0.0466,0.556,0.0,0.0703,0.913,...,audio_features,3KkXRkHbMCARz0aVfEt68P,spotify:track:3KkXRkHbMCARz0aVfEt68P,https://api.spotify.com/v1/tracks/3KkXRkHbMCAR...,https://api.spotify.com/v1/audio-analysis/3KkX...,158040,4,Sunflower - Spider-Man: Into the Spider-Verse,3KkXRkHbMCARz0aVfEt68P,Post Malone


In [47]:
print(df4.shape)
print("Audio Features Df Columns are: ",df4.columns)

(165, 21)
Audio Features Df Columns are:  Index(['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms',
       'time_signature', 'song_title', 'songs_id', 'artist'],
      dtype='object')


In [None]:
# saving to a csv file

df4.to_csv("both_playlists_audio_features.csv")