# Extracting Audio Features of Songs
In this notebook, we will use `Spotify's Web API` to scarpe the audio features of the songs we have selected from the top 200 songs.

In [None]:
import pandas as pd

from __future__ import print_function    # (at top of module)
from spotipy.oauth2 import SpotifyClientCredentials
import json
import spotipy
import time
import sys
import pathlib
import os
import csv
from tqdm import tqdm

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

In [47]:
client_credentials_manager = SpotifyClientCredentials(client_id="7dbe272b9d9b44278d84430e76374e88",
                                                      client_secret="14e94037bec24a3680b9249332a3d129")
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
sp.trace = False

In [61]:
sp.audio_features("7BKLCZ1jbUBVqRi2FVlTVw")

[{'danceability': 0.748,
  'energy': 0.524,
  'key': 8,
  'loudness': -5.599,
  'mode': 1,
  'speechiness': 0.0338,
  'acousticness': 0.414,
  'instrumentalness': 0,
  'liveness': 0.111,
  'valence': 0.661,
  'tempo': 95.01,
  'type': 'audio_features',
  'id': '7BKLCZ1jbUBVqRi2FVlTVw',
  'uri': 'spotify:track:7BKLCZ1jbUBVqRi2FVlTVw',
  'track_href': 'https://api.spotify.com/v1/tracks/7BKLCZ1jbUBVqRi2FVlTVw',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/7BKLCZ1jbUBVqRi2FVlTVw',
  'duration_ms': 244960,
  'time_signature': 4}]

In [62]:
# Define location of the data
data_dir = '../data'
filename = 'selected_regions_top_200_daily.csv'
data_path = os.path.join(data_dir, filename)

if not pathlib.Path(data_path).exists():
    raise FileNotFoundError('No file found at the location defined.')

In [58]:
# Load the dataset
filepath = pathlib.Path(data_path)

if filepath.exists():
    data_df = pd.read_csv(filepath, parse_dates=True)
else:
    data_df = pd.DataFrame()

# View the first 5 rows    
data_df.head()

Unnamed: 0,Position,Track Name,Artist,Streams,date,region,spotify_id
0,1.0,Starboy,The Weeknd,3135625.0,2017-01-01,global,5aAx2yezTd8zXrkmtKl66Z
1,2.0,Closer,The Chainsmokers,3015525.0,2017-01-01,global,7BKLCZ1jbUBVqRi2FVlTVw
2,3.0,Let Me Love You,DJ Snake,2545384.0,2017-01-01,global,4pdPtRcBmOSQDlJ3Fk945m
3,4.0,Rockabye (feat. Sean Paul & Anne-Marie),Clean Bandit,2356604.0,2017-01-01,global,5knuzwU65gJK7IF5yJsuaW
4,5.0,One Dance,Drake,2259887.0,2017-01-01,global,1xznGGDReH1oQq0xzbwXa3


In [86]:
track_ids = data_df["spotify_id"].unique()
track_ids, len(track_ids)

(array(['5aAx2yezTd8zXrkmtKl66Z', '7BKLCZ1jbUBVqRi2FVlTVw',
        '4pdPtRcBmOSQDlJ3Fk945m', ..., '7as7OL7cmgFZDADgVjQZjz',
        '1lVwFdNhP5q2HQdHoKX30A', '3rnso1KNGrkXfqNTlBXdPH'], dtype=object),
 22750)

## Extract Audio Features
In this section, the audio features of the tracks that have been in the top 200 are going to be extracted and saved to a CSV file named `spotify_top_tracks_audio_features.csv`.

> **Approximate time required**: `1 hour` and `20 minutes`.<sup>*</sup>

<sup>*</sup>The estimated times can be different based your computer's performance.

In [None]:
with open(os.path.join(data_dir, 'spotify_top_tracks_audio_features.csv'), 'w') as csvfile:
    fieldnames = list(sp.audio_features("7BKLCZ1jbUBVqRi2FVlTVw")[0].keys())
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    i = 0
    
    for track_id in tqdm(track_ids, desc="Saving Audio Features"):
        data_dict = sp.audio_features(track_id)[0]
        if data_dict is not None:
            writer.writerow(data_dict)

Saving Audio Features:  42%|████▏     | 9640/22750 [20:41<38:28,  5.68it/s]  

In [74]:
json_data = sp.search(q="year:2018", market="us", limit=50, offset=50)

In [50]:
json_data

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=year%3A2018&type=track&market=US&offset=50&limit=50',
  'items': [{'album': {'album_type': 'single',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/4MCBfE4596Uoi2O4DtmEMz'},
       'href': 'https://api.spotify.com/v1/artists/4MCBfE4596Uoi2O4DtmEMz',
       'id': '4MCBfE4596Uoi2O4DtmEMz',
       'name': 'Juice WRLD',
       'type': 'artist',
       'uri': 'spotify:artist:4MCBfE4596Uoi2O4DtmEMz'}],
     'external_urls': {'spotify': 'https://open.spotify.com/album/7zfsq2Hboh0SZMcWo2ASkD'},
     'href': 'https://api.spotify.com/v1/albums/7zfsq2Hboh0SZMcWo2ASkD',
     'id': '7zfsq2Hboh0SZMcWo2ASkD',
     'images': [{'height': 640,
       'url': 'https://i.scdn.co/image/ab67616d0000b273f9061a1abb0b9a27a78c9916',
       'width': 640},
      {'height': 300,
       'url': 'https://i.scdn.co/image/ab67616d00001e02f9061a1abb0b9a27a78c9916',
       'width': 300},
      {'height': 64,
       'url': 'https:

In [54]:
pd.read_json(json.dumps(json_data["tracks"]["items"]))

ValueError: Protocol not known: [{"album": {"album_type": "single", "artists": [{"external_urls": {"spotify": "https

In [53]:
!conda install fsspec --yes

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /Users/raj/Documents/Canada/Durham/DATA1200/Assignment/env

  added / updated specs:
    - fsspec


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    fsspec-0.8.7               |     pyhd3eb1b0_0          81 KB
    ------------------------------------------------------------
                                           Total:          81 KB

The following NEW packages will be INSTALLED:

  fsspec             pkgs/main/noarch::fsspec-0.8.7-pyhd3eb1b0_0

The following packages will be UPDATED:

  ca-certificates                      2021.1.19-hecd8cb5_0 --> 2021.1.19-hecd8cb5_1
  openssl                                 1.1.1i-h9ed2024_0 --> 1.1.1k-h9ed2024_0



Downloading and Extracting Packages
fsspec-0.8.7         | 81 KB     | ####################################