# Lab | API wrappers 

**Create your collection of songs & audio features**

Instructions:

To move forward with the project, you need to create a collection of songs with their audio features - as large as possible!

These are the songs that we will cluster. And, later, when the user inputs a song, we will find the cluster to which the song belongs and recommend a song from the same cluster. The more songs you have, the more accurate and diverse recommendations you'll be able to give. Although... you might want to make sure the collected songs are "curated" in a certain way. Try to find playlists of songs that are diverse, but also that meet certain standards.

The process of sending hundreds or thousands of requests can take some time - it's normal if you have to wait a few minutes (or, if you're ambitious, even hours) to get all the data you need.

## Loading the libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd

## Set up Spotify API

In [2]:
#!pip install spotipy

In [3]:
from config import *
import pandas as pd

In [4]:
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials


#Initialize SpotiPy with user credentias
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id = client_id,
                                                           client_secret = client_secret))

## Get songs from a spotify playlist

In [5]:
#choose a playlist
playlist_id = "7htu5ftbLBRFAwiuHVcUAg"
username = "spotify"

In [6]:
#extracting all information from the playlists' songs
def get_playlist_songs(username, playlist_id):
    
    results = sp.user_playlist_tracks(username, playlist_id)
    playlist = results['items']
    
    while results['next']:
        results = sp.next(results)
        playlist.extend(results['items'])
        
    return playlist

playlist_songs = get_playlist_songs(username, playlist_id)

len(playlist_songs)

9988

In [7]:
def get_playlist_info(tracks):
    
    playlist_dict = {}

    for i in range(len(tracks)):
        id_list = []
        id_list.append(tracks[i]["track"]["name"])

        for j in range(len(tracks[i]["track"]["artists"])):
            id_list.append(tracks[i]["track"]["artists"][j]["name"])
            
        playlist_dict[tracks[i]["track"]["id"]]=id_list
        
    return playlist_dict

In [8]:
songs_df = pd.DataFrame.from_dict(get_playlist_info(playlist_songs), orient = 'index')

In [9]:
def clean_songs_df(df):

    #drop columns
    df.drop([2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31], axis = 1, inplace = True)
    
    #rename columns
    df.rename(columns={0: 'song', 1: 'artist'}, inplace = True)
    
    #clean all song titles to improve search:
    #(1) set titles to lower case
    df['song'] = df['song'].apply(lambda x: x.lower())
    
    #(2) remove special characters
    spec_chars = ["!",'"',"#","%","(",")","*",",","-",".","/",":",";","<",
              "=",">","?","@","[","\\","]","^","_","`","{","|","}","~","–"]
    
    for char in spec_chars:
        df['song'] = df['song'].str.replace(char, '')
    
    #(3) replace vowels with accent
    vowel_dict = {'é': 'e', 'à' : 'a'}
    df['song'].replace(vowel_dict, regex=True, inplace=True)
    
    return df

In [10]:
clean_songs_df = clean_songs_df(songs_df)
clean_songs_df

Unnamed: 0,song,artist
2MvvoeRt8NcOXWESkxWn3g,ribs,Lorde
6UFivO2zqqPFPoQYsEMuCc,bags,Clairo
4Musyaro0NM5Awx8b5c627,the man who can't be moved,The Script
1CkvWZme3pRgbzaxZnTl5X,rolling in the deep,Adele
4lLtanYk6tkMvooU0tWzG8,grenade,Bruno Mars
...,...,...
2foI6dVPMRC7B3qwjca19u,dream a little dream,Robbie Williams
1ZfK0enTcbDvb3LznGG7Wl,can't take my eyes off you x make up for ever,Jessie J
1GE57bvEZMZijuQAbQve3H,штрихи,Burito
7jA5OcunWRSYXq98puhAX8,for the damaged coda,Blonde Redhead


## Get audio features for the songs

In [11]:
def get_audio_features(tracks):
    
    audio_features = {}
    
    for i in range(len(tracks)):
        audio_features[tracks[i]["track"]["id"]] = sp.audio_features(tracks[i]["track"]["uri"])
    
    return audio_features

audio_features = get_audio_features(playlist_songs)

In [12]:
def df_for_features(song_features): 
    
    song_id = list(song_features.keys()) 
    features = list(song_features[song_id[0]][0])

    feature_dict = {}

    for i in range(len(features)):
        value_list=[]

        for j in range(len(song_features)): 

            result = song_features[song_id[j]][0][features[i]]
            value_list.append(result)
            feature_dict[features[i]] = value_list

    df = pd.DataFrame.from_dict(feature_dict)

    return df

feature_df = df_for_features(audio_features)
feature_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.511,0.472,4,-9.277,1,0.0910,0.534,0.614000,0.1100,0.0399,127.978,audio_features,2MvvoeRt8NcOXWESkxWn3g,spotify:track:2MvvoeRt8NcOXWESkxWn3g,https://api.spotify.com/v1/tracks/2MvvoeRt8NcO...,https://api.spotify.com/v1/audio-analysis/2Mvv...,258969,4
1,0.742,0.546,1,-7.694,1,0.0315,0.172,0.380000,0.1150,0.8680,104.996,audio_features,6UFivO2zqqPFPoQYsEMuCc,spotify:track:6UFivO2zqqPFPoQYsEMuCc,https://api.spotify.com/v1/tracks/6UFivO2zqqPF...,https://api.spotify.com/v1/audio-analysis/6UFi...,260520,4
2,0.609,0.629,10,-5.024,1,0.0264,0.425,0.000000,0.0978,0.3250,99.955,audio_features,4Musyaro0NM5Awx8b5c627,spotify:track:4Musyaro0NM5Awx8b5c627,https://api.spotify.com/v1/tracks/4Musyaro0NM5...,https://api.spotify.com/v1/audio-analysis/4Mus...,241467,4
3,0.729,0.756,8,-5.119,1,0.0294,0.131,0.000000,0.0527,0.5220,104.945,audio_features,1CkvWZme3pRgbzaxZnTl5X,spotify:track:1CkvWZme3pRgbzaxZnTl5X,https://api.spotify.com/v1/tracks/1CkvWZme3pRg...,https://api.spotify.com/v1/audio-analysis/1Ckv...,228293,4
4,0.706,0.558,2,-7.237,0,0.0593,0.146,0.000000,0.1180,0.2270,110.443,audio_features,4lLtanYk6tkMvooU0tWzG8,spotify:track:4lLtanYk6tkMvooU0tWzG8,https://api.spotify.com/v1/tracks/4lLtanYk6tkM...,https://api.spotify.com/v1/audio-analysis/4lLt...,223253,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9963,0.415,0.339,0,-10.278,1,0.0316,0.703,0.000000,0.1750,0.3400,131.765,audio_features,2foI6dVPMRC7B3qwjca19u,spotify:track:2foI6dVPMRC7B3qwjca19u,https://api.spotify.com/v1/tracks/2foI6dVPMRC7...,https://api.spotify.com/v1/audio-analysis/2foI...,213747,3
9964,0.494,0.531,7,-8.146,1,0.4180,0.586,0.000000,0.3250,0.2120,79.220,audio_features,1ZfK0enTcbDvb3LznGG7Wl,spotify:track:1ZfK0enTcbDvb3LznGG7Wl,https://api.spotify.com/v1/tracks/1ZfK0enTcbDv...,https://api.spotify.com/v1/audio-analysis/1ZfK...,170427,4
9965,0.534,0.802,7,-4.669,0,0.2190,0.154,0.000004,0.1710,0.7480,179.998,audio_features,1GE57bvEZMZijuQAbQve3H,spotify:track:1GE57bvEZMZijuQAbQve3H,https://api.spotify.com/v1/tracks/1GE57bvEZMZi...,https://api.spotify.com/v1/audio-analysis/1GE5...,195957,4
9966,0.499,0.468,0,-9.995,0,0.0396,0.795,0.880000,0.0899,0.0763,141.463,audio_features,7jA5OcunWRSYXq98puhAX8,spotify:track:7jA5OcunWRSYXq98puhAX8,https://api.spotify.com/v1/tracks/7jA5OcunWRSY...,https://api.spotify.com/v1/audio-analysis/7jA5...,157000,4


In [13]:
def clean_features_df(df):

    #set song id as index
    df.set_index('id', inplace = True)
    
    #drop columns
    df.drop(['uri', 'track_href', 'analysis_url', 'time_signature', 'type'], axis = 1, inplace = True)
        
    return df

In [14]:
clean_feature_df = clean_features_df(feature_df)
clean_feature_df

Unnamed: 0_level_0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2MvvoeRt8NcOXWESkxWn3g,0.511,0.472,4,-9.277,1,0.0910,0.534,0.614000,0.1100,0.0399,127.978,258969
6UFivO2zqqPFPoQYsEMuCc,0.742,0.546,1,-7.694,1,0.0315,0.172,0.380000,0.1150,0.8680,104.996,260520
4Musyaro0NM5Awx8b5c627,0.609,0.629,10,-5.024,1,0.0264,0.425,0.000000,0.0978,0.3250,99.955,241467
1CkvWZme3pRgbzaxZnTl5X,0.729,0.756,8,-5.119,1,0.0294,0.131,0.000000,0.0527,0.5220,104.945,228293
4lLtanYk6tkMvooU0tWzG8,0.706,0.558,2,-7.237,0,0.0593,0.146,0.000000,0.1180,0.2270,110.443,223253
...,...,...,...,...,...,...,...,...,...,...,...,...
2foI6dVPMRC7B3qwjca19u,0.415,0.339,0,-10.278,1,0.0316,0.703,0.000000,0.1750,0.3400,131.765,213747
1ZfK0enTcbDvb3LznGG7Wl,0.494,0.531,7,-8.146,1,0.4180,0.586,0.000000,0.3250,0.2120,79.220,170427
1GE57bvEZMZijuQAbQve3H,0.534,0.802,7,-4.669,0,0.2190,0.154,0.000004,0.1710,0.7480,179.998,195957
7jA5OcunWRSYXq98puhAX8,0.499,0.468,0,-9.995,0,0.0396,0.795,0.880000,0.0899,0.0763,141.463,157000


## concatenate both dataframes

In [15]:
final_df = pd.merge(clean_songs_df, clean_feature_df, right_index=True, left_index=True)
final_df

Unnamed: 0,song,artist,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
2MvvoeRt8NcOXWESkxWn3g,ribs,Lorde,0.511,0.472,4,-9.277,1,0.0910,0.534,0.614000,0.1100,0.0399,127.978,258969
6UFivO2zqqPFPoQYsEMuCc,bags,Clairo,0.742,0.546,1,-7.694,1,0.0315,0.172,0.380000,0.1150,0.8680,104.996,260520
4Musyaro0NM5Awx8b5c627,the man who can't be moved,The Script,0.609,0.629,10,-5.024,1,0.0264,0.425,0.000000,0.0978,0.3250,99.955,241467
1CkvWZme3pRgbzaxZnTl5X,rolling in the deep,Adele,0.729,0.756,8,-5.119,1,0.0294,0.131,0.000000,0.0527,0.5220,104.945,228293
4lLtanYk6tkMvooU0tWzG8,grenade,Bruno Mars,0.706,0.558,2,-7.237,0,0.0593,0.146,0.000000,0.1180,0.2270,110.443,223253
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2foI6dVPMRC7B3qwjca19u,dream a little dream,Robbie Williams,0.415,0.339,0,-10.278,1,0.0316,0.703,0.000000,0.1750,0.3400,131.765,213747
1ZfK0enTcbDvb3LznGG7Wl,can't take my eyes off you x make up for ever,Jessie J,0.494,0.531,7,-8.146,1,0.4180,0.586,0.000000,0.3250,0.2120,79.220,170427
1GE57bvEZMZijuQAbQve3H,штрихи,Burito,0.534,0.802,7,-4.669,0,0.2190,0.154,0.000004,0.1710,0.7480,179.998,195957
7jA5OcunWRSYXq98puhAX8,for the damaged coda,Blonde Redhead,0.499,0.468,0,-9.995,0,0.0396,0.795,0.880000,0.0899,0.0763,141.463,157000
