In [1]:
import os
import requests
import pandas as pd
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
from pprint import pprint
from time import sleep
from bs4 import BeautifulSoup
import re
from ipywidgets import IntProgress
from IPython.display import display

In [2]:
# Spotify API Keys
spotify_cliend_id='bca78196e824433fbdf88ec18d84825f'
spotify_client_secret='d43763215bd8435eb9b3faaf048ca038'
os.environ['SPOTIPY_CLIENT_ID']=spotify_cliend_id
os.environ['SPOTIPY_CLIENT_SECRET']=spotify_client_secret

In [3]:
# Generate API Token
client_credentials_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [4]:
def get_features(title, artist):
    results = sp.search(q=f'track:{title} artist:{artist}', type='track', limit=1)
    if len(results['tracks']['items']) == 0:
        results = sp.search(q=f'{title}', type='track', limit=1)
    if len(results['tracks']['items']) == 0:
        return
    track_info = results['tracks']['items'][0]
    track_id = track_info['id']
    features = sp.audio_features([track_id])[0]
    
    if features is None:
        return
    
    features_as_dict = {
        'URL': track_info['external_urls']['spotify'],
        'TrackId': track_id,
        'Acousticness': features['acousticness'],
        'Danceability': features['danceability'],
        'Duration (ms)': features['duration_ms'],
        'Energy': features['energy'],
        'Instrumentalness': features['instrumentalness'],
        'Key': features['key'],
        'Liveness': features['liveness'],
        'Loudness': features['loudness'],
        'Mode': features['mode'],
        'Speechiness': features['speechiness'],
        'Tempo': features['tempo'],
        'Time Signature': features['time_signature'],
        'Valence': features['valence'],
        'Album': track_info['album']['name'],
        'Image': track_info['album']['images'][0]['url'] if len(track_info['album']['images'])>0 else None,
        'Explicit': track_info['explicit'],
        'Popularity': track_info['popularity']
    }
    return features_as_dict

In [5]:
get_features('Roadhouse Blues', 'Doors')

{'URL': 'https://open.spotify.com/track/1Q5kgpp4pmyGqPwNBzkSrw',
 'TrackId': '1Q5kgpp4pmyGqPwNBzkSrw',
 'Acousticness': 0.337,
 'Danceability': 0.612,
 'Duration (ms)': 243827,
 'Energy': 0.634,
 'Instrumentalness': 4.6e-05,
 'Key': 9,
 'Liveness': 0.147,
 'Loudness': -8.212,
 'Mode': 1,
 'Speechiness': 0.0319,
 'Tempo': 121.059,
 'Time Signature': 4,
 'Valence': 0.904,
 'Album': 'Morrison Hotel',
 'Image': 'https://i.scdn.co/image/34b1c4afdd8e576fb048e9e6c900c6c9fe33ea76',
 'Explicit': False,
 'Popularity': 71}

In [6]:
GRAMMY_WINNERS = os.path.join('..', 'data', 'grammy-winners.csv')
GRAMMY_WINNERS_ENRICHED = os.path.join('..', 'data', 'grammy-winners-enriched.csv')
SONGS_RELEASED = os.path.join('..', 'data', 'songs-released.csv')
SONGS_RELEASED_ENRICHED = os.path.join('..', 'data', 'songs-released-enriched.csv')
TOP_100_SONGS = os.path.join('..', 'data', 'top-100-songs.csv')
TOP_100_SONGS_ENRICHED = os.path.join('..', 'data', 'top-100-songs-enriched.csv')

In [7]:
grammy_winners_df = pd.read_csv(GRAMMY_WINNERS)
grammy_winners_df.rename(columns={'annualGrammy':'Year', 'awardFor':'Title', 'name':'Artist'}, inplace=True)
grammy_winners_df.drop(labels=['awardType', 'category'], axis=1, inplace=True)
grammy_winners_df['Year'] = grammy_winners_df.Year + 1957
grammy_winners_df.drop_duplicates(inplace=True)
grammy_winners_df = grammy_winners_df[['Year', 'Artist', 'Title']]
grammy_winners_df.head()

Unnamed: 0,Year,Artist,Title
0,1958,Nel Blu Dipinto Di Blu (Volare),Nel Blu Dipinto Di Blu (Volare)
1,1958,Henry Mancini,The Music From Peter Gunn
3,1958,Ella Fitzgerald,Ella Fitzgerald Sings The Irving Berlin Song Book
4,1958,Catch A Falling Star,Catch A Falling Star
5,1958,Billy May's Big Fat Brass,Billy May's Big Fat Brass


In [8]:
songs_released_df = pd.read_csv(SONGS_RELEASED)
songs_released_df.drop_duplicates(inplace=True)
songs_released_df = songs_released_df[['Year', 'Artist', 'Title']]
songs_released_df.head()

Unnamed: 0,Year,Artist,Title
0,1958,Clyde McPhatter,A Lover's Question
1,1958,Pat Boone,A Wonderful Time Up There
2,1958,The Everly Brothers,All I Have to Do Is Dream
3,1958,Andy Williams,Are You Sincere
4,1958,The Playmates,Beep Beep


In [9]:
top_100_songs_df = pd.read_csv(TOP_100_SONGS)
top_100_songs_df.drop_duplicates(inplace=True)
top_100_songs_df = top_100_songs_df[['Year', 'Artist', 'Title']]
top_100_songs_df.drop_duplicates(inplace=True)
top_100_songs_df.head()

Unnamed: 0,Year,Artist,Title
0,1958,Domenico Modugno,Volare
1,1958,Everly Brothers,All I Have To Do Is Dream / Claudette
2,1958,Elvis Presley,Don't / I Beg of You
3,1958,David Seville,Witch Doctor
4,1958,Pérez Prado,Patricia


In [10]:
import time
def enrich_with_features(df):
    pb = IntProgress(min=0, max=len(df))
    display(pb)
    failures = []
    for index, row in df.iterrows():
        time.sleep(4)
        features = get_features(row['Title'], row['Artist'])
        pb.value += 1
        if features is None:
            failures.append((row['Title'], row['Artist']))
            continue
        for k, v in features.items():
            df.loc[index, k] = v
            
    print(f'Number of failures - {len(failures)}')
    print(failures)
    return df

In [11]:
grammy_winners_df.shape

(3952, 3)

In [12]:
grammy_winners_enriched_df = enrich_with_features(grammy_winners_df)
grammy_winners_enriched_df.head()

IntProgress(value=0, max=3952)

Number of failures - 398
[('The Best Of The Stan Freberg Shows', 'The Best Of The Stan Freberg Shows'), ('Beethoven: Sonatas No. 21 In C (Waldstein) And No. 18 In E Flat', 'Beethoven: Sonatas No. 21 In C (Waldstein) And No. 18 In E Flat'), ('Conversations With The Guitar', 'Conversations With The Guitar'), ('The Spanish Guitars Of Laurindo Almeida', 'The Spanish Guitars Of Laurindo Almeida'), ('A Program Of Song - Leontyne Price Recital', 'A Program Of Song - Leontyne Price Recital'), ('FDR Speaks', 'FDR Speaks'), ('Ella Fitzgerald Sings The George And Ira Gershwin Songbook', 'Ella Fitzgerald Sings The George And Ira Gershwin Songbook'), ('Latin A La Lee', 'Latin A La Lee'), ('Stravinsky Conducts 1960: Le Sacre Du Printemps; Petrouchka', 'Stravinsky Conducts 1960: Le Sacre Du Printemps; Petrouchka'), ('Andre Previn Plays Harold Arlen', 'André Previn'), ('Belafonte Folk Singers At Home And Abroad', 'Belafonte Folk Singers At Home And Abroad'), ('Reverie For Spanish Guitar', 'Reverie For

Unnamed: 0,Year,Artist,Title,URL,TrackId,Acousticness,Danceability,Duration (ms),Energy,Instrumentalness,...,Loudness,Mode,Speechiness,Tempo,Time Signature,Valence,Album,Image,Explicit,Popularity
0,1958,Nel Blu Dipinto Di Blu (Volare),Nel Blu Dipinto Di Blu (Volare),https://open.spotify.com/track/006Ndmw2hHxvnLb...,006Ndmw2hHxvnLbJsBFnPx,0.987,0.518,216373.0,0.06,8e-06,...,-14.887,1.0,0.0441,127.87,4.0,0.336,Tutto Modugno (Mister Volare),https://i.scdn.co/image/5e8c49f7a8d161c1d65109...,False,35.0
1,1958,Henry Mancini,The Music From Peter Gunn,https://open.spotify.com/track/3BdPP6Xce6FUcfa...,3BdPP6Xce6FUcfaCFsnZIg,0.893,0.55,177733.0,0.318,0.881,...,-14.516,1.0,0.0313,138.037,4.0,0.62,The music from Peter Gunn,https://i.scdn.co/image/1ad2e8ce1f988c27678298...,False,16.0
3,1958,Ella Fitzgerald,Ella Fitzgerald Sings The Irving Berlin Song Book,https://open.spotify.com/track/5FY0EikZVSBOwpj...,5FY0EikZVSBOwpjQa9S5Ii,0.675,0.615,138320.0,0.186,0.0,...,-12.382,0.0,0.0508,73.007,4.0,0.749,Ella Fitzgerald Sings The Irving Berlin Song Book,https://i.scdn.co/image/3350581fb4712a44a6f6b5...,False,34.0
4,1958,Catch A Falling Star,Catch A Falling Star,https://open.spotify.com/track/0qu9P0DcFcgAycR...,0qu9P0DcFcgAycRsbWupnZ,0.259,0.726,176840.0,0.33,0.000309,...,-14.864,1.0,0.033,109.158,4.0,0.712,I Just Came Home To Count The Memories,https://i.scdn.co/image/848ede6cee3d8111533c7e...,False,39.0
5,1958,Billy May's Big Fat Brass,Billy May's Big Fat Brass,https://open.spotify.com/track/4fv9FQ1TNeAYw8z...,4fv9FQ1TNeAYw8zJrVMUi7,0.421,0.711,145907.0,0.254,4.5e-05,...,-12.661,0.0,0.0374,115.418,4.0,0.552,Billy May's Big Fat Brass,https://i.scdn.co/image/04fbb97c5d46c80d9fc9e1...,False,9.0


In [13]:
grammy_winners_enriched_df.shape

(3952, 22)

In [14]:
grammy_winners_enriched_df.to_csv(GRAMMY_WINNERS_ENRICHED, index=False)

In [15]:
songs_released_df.shape

(6074, 3)

In [16]:
songs_released_enriched_df = enrich_with_features(songs_released_df)
songs_released_enriched_df.head()

IntProgress(value=0, max=6074)

retrying ...1secs
retrying ...1secs
retrying ...1secs
retrying ...1secs
retrying ...1secs
retrying ...1secs
Number of failures - 117
[('The Same Old Army', 'The Swinging Corporals'), ('Ya Mujabid Fe Sabil Allah', 'Umm Kulthum'), ("How Come There's No Dog Day?", 'Tommy Cooper'), ('Little Soldier Say Goodnight', 'Dave Sampson And The Hunters'), ("Pepy's Diary", 'Benny Hill'), ('The Pretty Little Filly With The Ponytail', 'Sheldon Allman'), ('Quietly Bonkers', 'Bernard Cribbins'), ("(Let's Dance) The Screw", 'The Crystals'), ('All I Want For Christmas Is A Beatle', 'Dora Bryan'), ('A Miss From Diss', 'The Singing Postman'), ('Bin Born A Long Time', 'The Singing Postman'), ('Clock In The Biscuit Tin', 'The Singing Postman'), ('Dommies In The Bar', 'The Singing Postman'), ('Edna The Barmaid', 'The Singing Postman'), ('First And Second Law Of Thermodynamics', 'Flanders & Swann'), ("Ha' Th' Bottom Dropped Out?", 'The Singing Postman'), ('Hev Yew Gotta Loight, Boy?', 'The Singing Postman'), ("

Unnamed: 0,Year,Artist,Title,URL,TrackId,Acousticness,Danceability,Duration (ms),Energy,Instrumentalness,...,Loudness,Mode,Speechiness,Tempo,Time Signature,Valence,Album,Image,Explicit,Popularity
0,1958,Clyde McPhatter,A Lover's Question,https://open.spotify.com/track/6HBgCYResmsdmOu...,6HBgCYResmsdmOufyHVoNB,0.887,0.67,150267.0,0.436,1.7e-05,...,-11.698,1.0,0.0554,83.023,4.0,0.951,Clyde,https://i.scdn.co/image/5f15ac3a4737e257e96c26...,False,43.0
1,1958,Pat Boone,A Wonderful Time Up There,https://open.spotify.com/track/2ddcdLTP0DHU86V...,2ddcdLTP0DHU86VYETlEwm,0.757,0.79,127427.0,0.441,5.6e-05,...,-14.047,1.0,0.0499,95.903,4.0,0.945,Pat Boone's Greatest Hits (Reissue),https://i.scdn.co/image/c3c266be0f8eef047d3fb5...,False,25.0
2,1958,The Everly Brothers,All I Have to Do Is Dream,https://open.spotify.com/track/7dblNGnRXEBrVJu...,7dblNGnRXEBrVJunazs2U5,0.839,0.534,143333.0,0.462,0.0,...,-8.172,1.0,0.0266,96.195,4.0,0.534,The Very Best of The Everly Brothers,https://i.scdn.co/image/fb7459c82a4c70cd707621...,False,64.0
3,1958,Andy Williams,Are You Sincere,https://open.spotify.com/track/6SW50CFLBpAWGDB...,6SW50CFLBpAWGDB0VtbTuW,0.868,0.488,168573.0,0.121,2.6e-05,...,-19.629,1.0,0.0286,104.434,4.0,0.367,Best Of Andy Williams,https://i.scdn.co/image/fb3ed68b30105fbf0dd0f2...,False,14.0
4,1958,The Playmates,Beep Beep,https://open.spotify.com/track/4uwounqJOKHVt0Y...,4uwounqJOKHVt0Y2Hu3dGE,0.719,0.519,150213.0,0.502,0.0,...,-6.289,0.0,0.274,76.545,4.0,0.759,Right Said Fred: A Barrel Of Laughs,https://i.scdn.co/image/93a777dfffbe54b67105b3...,False,30.0


In [17]:
songs_released_enriched_df.shape

(6074, 22)

In [18]:
songs_released_enriched_df.to_csv(SONGS_RELEASED_ENRICHED, index=False)

In [19]:
top_100_songs_df.shape

(5978, 3)

In [20]:
top_100_songs_enriched_df = enrich_with_features(top_100_songs_df)
top_100_songs_enriched_df.head()

IntProgress(value=0, max=5978)

retrying ...1secs
retrying ...1secs
Number of failures - 88


Unnamed: 0,Year,Artist,Title,URL,TrackId,Acousticness,Danceability,Duration (ms),Energy,Instrumentalness,...,Loudness,Mode,Speechiness,Tempo,Time Signature,Valence,Album,Image,Explicit,Popularity
0,1958,Domenico Modugno,Volare,https://open.spotify.com/track/4TVFmPiDtVtFe5P...,4TVFmPiDtVtFe5P9y5OwJ9,0.963,0.297,274867.0,0.208,2.2e-05,...,-15.611,0.0,0.0446,97.044,4.0,0.341,The 50 Greatest Vocal Hits,https://i.scdn.co/image/f769e1ec2c19cd07c9d333...,False,37.0
1,1958,Everly Brothers,All I Have To Do Is Dream / Claudette,,,,,,,,...,,,,,,,,,,
2,1958,Elvis Presley,Don't / I Beg of You,https://open.spotify.com/track/1VyH20PJxY6GgYP...,1VyH20PJxY6GgYPju3a7rq,0.994,0.358,98240.0,0.147,0.000302,...,-15.543,0.0,0.0428,135.258,3.0,0.0763,The Symbol of Faith - Russian Sacred and Class...,https://i.scdn.co/image/20120ac8e4e60790e76343...,False,0.0
3,1958,David Seville,Witch Doctor,https://open.spotify.com/track/48skOMMuEQSfwo0...,48skOMMuEQSfwo0JZpfqjk,0.455,0.6,142493.0,0.742,0.0,...,-4.752,1.0,0.0604,138.227,4.0,0.84,Alvin And The Chipmunks (Original Motion Pictu...,https://i.scdn.co/image/698c60bd6fd10a55dfdd76...,False,26.0
4,1958,Pérez Prado,Patricia,https://open.spotify.com/track/205EtD8tQnHDXMk...,205EtD8tQnHDXMkmtYiaHF,0.0862,0.704,140867.0,0.638,0.251,...,-9.522,1.0,0.0457,137.396,4.0,0.837,The Best Of Perez Prado: The Original Mambo #5,https://i.scdn.co/image/7700a28d06ef4e7ba51e2b...,False,37.0


In [21]:
top_100_songs_enriched_df.shape

(5978, 22)

In [22]:
top_100_songs_enriched_df.to_csv(TOP_100_SONGS_ENRICHED, index=False)