In [1]:
import requests
import base64
import pandas as pd
from urllib.parse import urlencode
import webbrowser
import numpy as np
from datetime import datetime

In [208]:
#Replace these variables with your own credentials from the Spotify Developer Dashboard
CLIENT_ID = ''
CLIENT_SECRET = ''

auth_headers = {
    "client_id": CLIENT_ID,
    "response_type": "code",
    "redirect_uri": 'http://localhost:8888/callback',
    "scope": "user-top-read"
}
webbrowser.open("https://accounts.spotify.com/authorize?" + urlencode(auth_headers))

True

In [209]:
#From the previous step (The url after "code=" after authorizing)
auth_code = ""

In [207]:
# Base URL for Spotify API
SPOTIFY_API_URL = 'https://api.spotify.com/v1/'

# Function to authenticate with Spotify and get an access token
def get_access_token(client_id, client_secret, auth_code):
    encoded_client_credentials = base64.b64encode(f'{client_id}:{client_secret}'.encode()).decode()

    headers = {
        'Authorization': f'Basic {encoded_client_credentials}',
    }

    # Parameters for the token request
    data = {
        'grant_type': 'authorization_code',
        'code': auth_code,
        "redirect_uri": 'http://localhost:8888/callback' #Not important for our use case
    }

    response = requests.post('https://accounts.spotify.com/api/token', headers=headers, data=data)
    if response.status_code == 200:
        return response.json()['access_token']
    else:
        print(f'Failed to get access token. Error {response.status_code}')
        return None
    
# Function to make a GET request to the Spotify API
def get_spotify_data(endpoint, access_token, params=None):
    headers = {
        'Authorization': f'Bearer {access_token}'
    }

    response = requests.get(SPOTIFY_API_URL + endpoint, headers=headers, params=params)

    if response.status_code == 200:
        return response.json()
    else:
        print(f'Failed to retrieve data from the Spotify API. Error {response.status_code}')
        return None

# Function to get user's top tracks
def get_user_top(access_token, time_range='medium_term', limit=10, track = True):
    if track:
        endpoint = 'me/top/tracks'
    else:
        endpoint = 'me/top/artists'
    params = {
        'time_range': time_range,  # Change to 'long_term' or 'short_term' for different time ranges
        'limit': limit,            # Change the number to get more or fewer results
    }
    return get_spotify_data(endpoint, access_token, params)

# Function to get top tracks or artists in  the long_term, medium_term, short_term and return as a csv file
def get_top_to_csv(access_token, time_range='medium_term', limit=50, track=True):
	if track:
		top_tracks = get_user_top(access_token, time_range=time_range, limit=limit, track=track)
		df_track = pd.DataFrame(top_tracks['items']) 
		df_track['release_date'] = df_track.loc[:, 'album'].apply(lambda x: pd.to_datetime(x['release_date']).year)
		df_track['image_link'] = df_track.loc[:, 'album'].apply(lambda x: x['images'][1]['url'])
		df_track['artist_name'] = df_track.loc[:, 'artists'].apply(lambda x: x[0]['name'])
		df_track.drop(labels=['album',
				'artists',
				'available_markets',
				'disc_number',
				'duration_ms',
				'explicit',
				'external_ids',
				'external_urls',
				'href',
				'is_local',
				'preview_url',
				'track_number',
				'type',
				'uri'], axis = 1, inplace=True)
		df_track['ranking']= np.arange(1,51)
		df_track.to_csv(f'top{limit}_tracks_{time_range}.csv', index=False, encoding = 'utf-8-sig')
		return f'Retrieved top {limit} tracks in the {time_range}'
	else:
		top_artists = get_user_top(access_token, time_range=time_range, limit=limit, track=track)
		df_artist = pd.DataFrame(top_artists['items'])
		df_artist['ranking']= np.arange(1,51)
		df_artist.drop(labels=['external_urls',
            'followers',
            'href',
            'images',
            'type',
            'uri'], axis= 1, inplace=True)
		df_artist.to_csv(f'top{limit}_artist_{time_range}.csv', index=False, encoding = 'utf-8-sig')
		return f'Retrieved top {limit} artists in the {time_range}'

In [212]:
access_token = get_access_token(CLIENT_ID, CLIENT_SECRET, auth_code)
time_range = ['long_term', 'medium_term', 'short_term']
for tr in time_range:
    get_top_to_csv(access_token, time_range=tr, limit = 50, track = True)
    get_top_to_csv(access_token, time_range=tr, limit = 50, track = False)