In [2]:
# import packages
import pandas as pd
import requests
import json
from api_spotify import client_id, client_secret

In [3]:
# read in billboard data
billboard = pd.read_csv("../data/processed/billboard_charts_num_one.csv")

## Spotify data context

In this script, we will get the correspoding track data for the number one hits.

In [25]:
# let us first par down the billboard data

## we only want post 1960 so we can have pretty full decades
billboard_search = billboard[billboard["chart_week"]>="1960-01-01"]

## for the search, we don't need to get the same track multiple times...
## filter down to unique title and performers
billboard_search = billboard_search[["title", "performer"]].drop_duplicates(ignore_index=True)

In [9]:
# okay set up the authorization for the spotify API

## authorize
auth_url = 'https://accounts.spotify.com/api/token'
data = {
    'grant_type': 'client_credentials',
    'client_id': client_id,
    'client_secret': client_secret,
}
auth_response = requests.post(auth_url, data=data)
access_token = auth_response.json().get('access_token')
headers = {"Authorization":"Bearer " + access_token}

In [23]:
# function to search spotify
def search_spotify(search_term, type, auth_headers, limit=1):
    '''
    types = ["artist", "track", "album"]
    '''
    response = requests.get("https://api.spotify.com/v1/search?"
                            + "q=" + search_term
                            + "&type=" + type
                            + "&limit=" + str(limit),
                            headers=auth_headers)
    output = json.loads(response.content)[type + "s"]["items"][0]
    if type == "artist":
        output_formatted = {"type":type,
                            "artist_name":output["name"],
                            "artist_id":output["id"],
                            "popularity":output["popularity"],
                            "genres":output["genres"],
                            "followers":output["followers"]["total"]}
    if type == "album":
        output_formatted = {"type":type,
                            "artist_name":output["artists"][0]["name"],
                            "artist_id":output["artists"][0]["id"],
                            "album_name":output["name"],
                            "album_id":output["id"]}
    if type == "track":
        output_formatted = {"type":type,
                            "artist_name":output["artists"][0]["name"],
                            "artist_id":output["artists"][0]["id"],
                            "album_name":output["album"]["name"],
                            "album_id":output["album"]["id"],
                            "track_name":output["name"],
                            "track_id":output["id"],
                            "popularity":output["popularity"],
                            "duration_ms":output["duration_ms"]}
        
    return output_formatted

In [32]:
# cycle through all of our number ones!

## make our search term
billboard_search["search_term"] = billboard_search["title"] + " " + billboard_search["performer"]

## list to store it in
spotify_list = []

for row in range(billboard_search.shape[0]):
    output = search_spotify(billboard_search.loc[row, 
                                                 "search_term"],
                            "track",
                            headers)
    
    output.update({"title":billboard_search.loc[row, 
                                                "title"],
                   "performer":billboard_search.loc[row, 
                                                    "performer"]})
    
    spotify_list.append(output)

In [34]:
## concat into df
spotify_data = pd.DataFrame(spotify_list)

In [37]:
spotify_data.to_csv("../data/processed/spotify_num_one_track_data.csv", 
                    index=False)