# Extracting Audio Features of Songs
In this notebook, we will use `Spotify's Web API` to scarpe the audio features of the songs we have selected from the top 200 songs.

In [90]:
import pandas as pd

from __future__ import print_function    # (at top of module)
from spotipy.oauth2 import SpotifyClientCredentials
import json
import spotipy
import time
import sys
import pathlib
import os
import csv
from tqdm import tqdm

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

In [47]:
client_credentials_manager = SpotifyClientCredentials(client_id="7dbe272b9d9b44278d84430e76374e88",
                                                      client_secret="14e94037bec24a3680b9249332a3d129")
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
sp.trace = False

In [61]:
sp.audio_features("7BKLCZ1jbUBVqRi2FVlTVw")

[{'danceability': 0.748,
  'energy': 0.524,
  'key': 8,
  'loudness': -5.599,
  'mode': 1,
  'speechiness': 0.0338,
  'acousticness': 0.414,
  'instrumentalness': 0,
  'liveness': 0.111,
  'valence': 0.661,
  'tempo': 95.01,
  'type': 'audio_features',
  'id': '7BKLCZ1jbUBVqRi2FVlTVw',
  'uri': 'spotify:track:7BKLCZ1jbUBVqRi2FVlTVw',
  'track_href': 'https://api.spotify.com/v1/tracks/7BKLCZ1jbUBVqRi2FVlTVw',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/7BKLCZ1jbUBVqRi2FVlTVw',
  'duration_ms': 244960,
  'time_signature': 4}]

In [62]:
# Define location of the data
data_dir = '../data'
filename = 'selected_regions_top_200_daily.csv'
data_path = os.path.join(data_dir, filename)

if not pathlib.Path(data_path).exists():
    raise FileNotFoundError('No file found at the location defined.')

In [58]:
# Load the dataset
filepath = pathlib.Path(data_path)

if filepath.exists():
    data_df = pd.read_csv(filepath, parse_dates=True)
else:
    data_df = pd.DataFrame()

# View the first 5 rows    
data_df.head()

Unnamed: 0,Position,Track Name,Artist,Streams,date,region,spotify_id
0,1.0,Starboy,The Weeknd,3135625.0,2017-01-01,global,5aAx2yezTd8zXrkmtKl66Z
1,2.0,Closer,The Chainsmokers,3015525.0,2017-01-01,global,7BKLCZ1jbUBVqRi2FVlTVw
2,3.0,Let Me Love You,DJ Snake,2545384.0,2017-01-01,global,4pdPtRcBmOSQDlJ3Fk945m
3,4.0,Rockabye (feat. Sean Paul & Anne-Marie),Clean Bandit,2356604.0,2017-01-01,global,5knuzwU65gJK7IF5yJsuaW
4,5.0,One Dance,Drake,2259887.0,2017-01-01,global,1xznGGDReH1oQq0xzbwXa3


In [86]:
track_ids = data_df["spotify_id"].unique()
track_ids, len(track_ids)

(array(['5aAx2yezTd8zXrkmtKl66Z', '7BKLCZ1jbUBVqRi2FVlTVw',
        '4pdPtRcBmOSQDlJ3Fk945m', ..., '7as7OL7cmgFZDADgVjQZjz',
        '1lVwFdNhP5q2HQdHoKX30A', '3rnso1KNGrkXfqNTlBXdPH'], dtype=object),
 22750)

## Extract Audio Features
In this section, the audio features of the tracks that have been in the top 200 are going to be extracted and saved to a CSV file named `spotify_top_tracks_audio_features.csv`.

> **Approximate time required**: `1 hour` and `20 minutes`.<sup>*</sup>

<sup>*</sup>The estimated times can be different based your computer's performance.

In [89]:
with open(os.path.join(data_dir, 'spotify_top_tracks_audio_features.csv'), 'w') as csvfile:
    fieldnames = list(sp.audio_features("7BKLCZ1jbUBVqRi2FVlTVw")[0].keys())
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    
    for track_id in tqdm(track_ids, desc="Saving Audio Features"):
        data_dict = sp.audio_features(track_id)[0]
        if data_dict is not None:
            writer.writerow(data_dict)

Saving Audio Features: 100%|██████████| 22750/22750 [50:13<00:00,  7.55it/s]  


In [158]:
track_dets = sp.track("7BKLCZ1jbUBVqRi2FVlTVw")

In [159]:
sp.artist(track_dets["artists"][0]["id"])["genres"]

['dance pop', 'edm', 'electropop', 'pop', 'pop dance', 'tropical house']

In [163]:
track = sp.track("7BKLCZ1jbUBVqRi2FVlTVw")
pprint(track["album"]["release_date"])

'2016-07-29'


In [172]:
with open(os.path.join(data_dir, 'spotify_top_tracks_release_dates.csv'), 'w') as csvfile:
    fieldnames = ["id", "release_date"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    
    for track_id in tqdm(track_ids, desc="Saving Release Dates"):
        data_dict = {
            "id": track_id,
            "release_date": sp.track(track_id)["album"]["release_date"]
        }
        writer.writerow(data_dict)

Saving Release Dates: 100%|██████████| 22750/22750 [51:14<00:00,  7.40it/s]  


In [139]:
data_df.head()

Unnamed: 0,Position,Track Name,Artist,Streams,date,region,spotify_id
0,1.0,Starboy,The Weeknd,3135625.0,2017-01-01,global,5aAx2yezTd8zXrkmtKl66Z
1,2.0,Closer,The Chainsmokers,3015525.0,2017-01-01,global,7BKLCZ1jbUBVqRi2FVlTVw
2,3.0,Let Me Love You,DJ Snake,2545384.0,2017-01-01,global,4pdPtRcBmOSQDlJ3Fk945m
3,4.0,Rockabye (feat. Sean Paul & Anne-Marie),Clean Bandit,2356604.0,2017-01-01,global,5knuzwU65gJK7IF5yJsuaW
4,5.0,One Dance,Drake,2259887.0,2017-01-01,global,1xznGGDReH1oQq0xzbwXa3


In [138]:
track_dets = data_df.iloc[0]

In [140]:
track = network.get_track(track_dets["Artist"], track_dets["Track Name"])

In [141]:
track.get_url()

'https://www.last.fm/music/the%2bweeknd/_/starboy'

In [177]:
data_dict = sp.search(q="year:2018", market="us", limit=50, type='artist')

In [183]:
data_dict["artists"].keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [198]:
for artist in data_dict["artists"]["items"]:
    print(artist["name"])
    track_dict = sp.search(q=f"year:2018 & artist:{artist['name']}", market="us", limit=50, type='track')
    pprint(track_dict)
    break

Drake
{'tracks': {'href': 'https://api.spotify.com/v1/search?query=year%3A2018+%26+artist%3ADrake&type=track&market=US&offset=0&limit=50',
            'items': [{'album': {'album_type': 'album',
                                 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3TVXtAsR1Inumwj472S9r4'},
                                              'href': 'https://api.spotify.com/v1/artists/3TVXtAsR1Inumwj472S9r4',
                                              'id': '3TVXtAsR1Inumwj472S9r4',
                                              'name': 'Drake',
                                              'type': 'artist',
                                              'uri': 'spotify:artist:3TVXtAsR1Inumwj472S9r4'}],
                                 'external_urls': {'spotify': 'https://open.spotify.com/album/1ATL5GLyefJaxhQzSPVrLX'},
                                 'href': 'https://api.spotify.com/v1/albums/1ATL5GLyefJaxhQzSPVrLX',
                                 '

                                             'width': 300},
                                            {'height': 64,
                                             'url': 'https://i.scdn.co/image/ab67616d0000485148211b35de1773773619907f',
                                             'width': 64}],
                                 'name': "The Tide's Magnificence: Songs and "
                                         'Poems of Molly Drake',
                                 'release_date': '2018-02-23',
                                 'release_date_precision': 'day',
                                 'total_tracks': 26,
                                 'type': 'album',
                                 'uri': 'spotify:album:2foAaJeadpPstoqTuDss5Y'},
                       'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/1789Un2gEvELbBTnHoS9Rl'},
                                    'href': 'https://api.spotify.com/v1/artists/1789Un2gEvELbBTnHoS9Rl',
               

                                 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/1789Un2gEvELbBTnHoS9Rl'},
                                              'href': 'https://api.spotify.com/v1/artists/1789Un2gEvELbBTnHoS9Rl',
                                              'id': '1789Un2gEvELbBTnHoS9Rl',
                                              'name': 'Molly Drake',
                                              'type': 'artist',
                                              'uri': 'spotify:artist:1789Un2gEvELbBTnHoS9Rl'}],
                                 'external_urls': {'spotify': 'https://open.spotify.com/album/2foAaJeadpPstoqTuDss5Y'},
                                 'href': 'https://api.spotify.com/v1/albums/2foAaJeadpPstoqTuDss5Y',
                                 'id': '2foAaJeadpPstoqTuDss5Y',
                                 'images': [{'height': 640,
                                             'url': 'https://i.scdn.co/image/ab67616d0000b27348211

                       'preview_url': 'https://p.scdn.co/mp3-preview/4105cc98d7e7cc9c6388528f96a680b1a8387256?cid=7dbe272b9d9b44278d84430e76374e88',
                       'track_number': 23,
                       'type': 'track',
                       'uri': 'spotify:track:3tkTMijNviitNLpvJHsdrn'},
                      {'album': {'album_type': 'album',
                                 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/1789Un2gEvELbBTnHoS9Rl'},
                                              'href': 'https://api.spotify.com/v1/artists/1789Un2gEvELbBTnHoS9Rl',
                                              'id': '1789Un2gEvELbBTnHoS9Rl',
                                              'name': 'Molly Drake',
                                              'type': 'artist',
                                              'uri': 'spotify:artist:1789Un2gEvELbBTnHoS9Rl'}],
                                 'external_urls': {'spotify': 'https://open.spotify.c

                       'external_urls': {'spotify': 'https://open.spotify.com/track/5yOd6pJ6CmwUwz93tcqlQH'},
                       'href': 'https://api.spotify.com/v1/tracks/5yOd6pJ6CmwUwz93tcqlQH',
                       'id': '5yOd6pJ6CmwUwz93tcqlQH',
                       'is_local': False,
                       'is_playable': True,
                       'name': 'A Sound',
                       'popularity': 7,
                       'preview_url': 'https://p.scdn.co/mp3-preview/2afbeacea78e2be403a1e782ace9e8566a2a3bc4?cid=7dbe272b9d9b44278d84430e76374e88',
                       'track_number': 9,
                       'type': 'track',
                       'uri': 'spotify:track:5yOd6pJ6CmwUwz93tcqlQH'},
                      {'album': {'album_type': 'album',
                                 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/1789Un2gEvELbBTnHoS9Rl'},
                                              'href': 'https://api.spotify.com/v1/ar

In [215]:
for track in track_dict["tracks"]["items"]:
    artists = [data["name"] for data in track["artists"]]
    print(artists)
    break

['Drake', 'Static Major', 'Ty Dolla $ign']
