In [1]:
# https://www.last.fm/api/accounts

import json

with open("data/credentials.json", "r") as file:
    credentials = json.load(file)
    last_fm_cr = credentials['last_fm']
    key = last_fm_cr['KEY']
    username = last_fm_cr['USERNAME']

In [2]:
# how long to pause between consecutive API requests
pause_duration = 0.2

In [3]:
import requests
import json
import time
import pandas as pd

In [4]:
url = 'https://ws.audioscrobbler.com/2.0/?method=user.get{}&user={}&api_key={}&limit={}&extended={}&page={}&format=json'
limit = 200 #api lets you retrieve up to 200 records per call
extended = 0 #api lets you retrieve extended data for each track, 0=no, 1=yes
page = 1 #page of results to start retrieving at

In [5]:
method = 'toptracks'
request_url = url.format(method, username, key, limit, extended, page)
artist_names = []
track_names = []
play_counts = []
response = requests.get(request_url).json()
for item in response[method]['track']:
    artist_names.append(item['artist']['name'])
    track_names.append(item['name'])
    play_counts.append(item['playcount'])

top_tracks = pd.DataFrame()
top_tracks['artist'] = artist_names
top_tracks['track'] = track_names
top_tracks['play_count'] = play_counts
top_tracks.to_csv('data/lastfm_top_tracks.csv', index=None, encoding='utf-8')
top_tracks.head()

Unnamed: 0,artist,track,play_count
0,Opeth,Bleak,13
1,King Gizzard & The Lizard Wizard,Mars for the Rich,12
2,King Gizzard & The Lizard Wizard,Organ Farmer,12
3,Lucifer,Before the Sun,12
4,Opeth,Benighted,12


In [6]:
method = 'topartists'
request_url = url.format(method, username, key, limit, extended, page)
artist_names = []
play_counts = []
response = requests.get(request_url).json()
for item in response[method]['artist']:
    artist_names.append(item['name'])
    play_counts.append(item['playcount'])

top_artists = pd.DataFrame()
top_artists['artist'] = artist_names
top_artists['play_count'] = play_counts
top_artists.to_csv('data/lastfm_top_artists.csv', index=None, encoding='utf-8')
top_artists.head()

Unnamed: 0,artist,play_count
0,Opeth,720
1,Between the Buried and Me,228
2,King Gizzard & The Lizard Wizard,203
3,Queens of the Stone Age,186
4,Lucifer,178


In [7]:
method = 'topalbums'
request_url = url.format(method, username, key, limit, extended, page)
artist_names = []
album_names = []
play_counts = []
response = requests.get(request_url).json()
for item in response[method]['album']:
    artist_names.append(item['artist']['name'])
    album_names.append(item['name'])
    play_counts.append(item['playcount'])

top_albums = pd.DataFrame()
top_albums['artist'] = artist_names
top_albums['album'] = album_names
top_albums['play_count'] = play_counts
top_albums.to_csv('data/lastfm_top_albums.csv', index=None, encoding='utf-8')
top_albums.head()

Unnamed: 0,artist,album,play_count
0,Between the Buried and Me,Coma Ecliptic,97
1,King Gizzard & The Lizard Wizard,Infest the Rats' Nest,95
2,Opeth,Blackwater Park,89
3,Gojira,From Mars to Sirius,80
4,The Oath,The Oath,79


In [8]:
def get_scrobbles(method='recenttracks', username=username, key=key, limit=200, extended=0, page=1, pages=0):
    '''
    method: api method
    username/key: api credentials
    limit: api lets you retrieve up to 200 records per call
    extended: api lets you retrieve extended data for each track, 0=no, 1=yes
    page: page of results to start retrieving at
    pages: how many pages of results to retrieve. if 0, get as many as api can return.
    '''
    # initialize url and lists to contain response fields
    url = 'https://ws.audioscrobbler.com/2.0/?method=user.get{}&user={}&api_key={}&limit={}&extended={}&page={}&format=json'
    responses = []
    artist_names = []
    artist_mbids = []
    album_names = []
    album_mbids = []
    track_names = []
    track_mbids = []
    timestamps = []
    
    # make first request, just to get the total number of pages
    request_url = url.format(method, username, key, limit, extended, page)
    response = requests.get(request_url).json()
    total_pages = int(response[method]['@attr']['totalPages'])
    if pages > 0:
        total_pages = min([total_pages, pages])
        
    print('{} total pages to retrieve'.format(total_pages))
    
    # request each page of data one at a time
    for page in range(1, int(total_pages) + 1, 1):
        if page % 10 == 0: print(page, end=' ')
        time.sleep(pause_duration)
        request_url = url.format(method, username, key, limit, extended, page)
        responses.append(requests.get(request_url))
    
    # parse the fields out of each scrobble in each page (aka response) of scrobbles
    for response in responses:
        scrobbles = response.json()
        for scrobble in scrobbles[method]['track']:
            # only retain completed scrobbles (aka, with timestamp and not 'now playing')
            if 'date' in scrobble.keys():
                artist_names.append(scrobble['artist']['#text'])
                artist_mbids.append(scrobble['artist']['mbid'])
                album_names.append(scrobble['album']['#text'])
                album_mbids.append(scrobble['album']['mbid'])
                track_names.append(scrobble['name'])
                track_mbids.append(scrobble['mbid'])
                timestamps.append(scrobble['date']['uts'])
                
    # create and populate a dataframe to contain the data
    df = pd.DataFrame()
    df['artist'] = artist_names
    df['artist_mbid'] = artist_mbids
    df['album'] = album_names
    df['album_mbid'] = album_mbids
    df['track'] = track_names
    df['track_mbid'] = track_mbids
    df['timestamp'] = timestamps
    df['datetime'] = pd.to_datetime(df['timestamp'].astype(int), unit='s')
    
    return df

In [9]:
# get all scrobbled tracks ever, in order of recency (pages=0 to get all)
scrobbles = get_scrobbles(pages=0)

45 total pages to retrieve
10 20 30 40 

In [10]:
# save the dataset
scrobbles.to_csv('data/lastfm_scrobbles.csv', index=None, encoding='utf-8')
print('{:,} total rows'.format(len(scrobbles)))
scrobbles.head()

8,805 total rows


Unnamed: 0,artist,artist_mbid,album,album_mbid,track,track_mbid,timestamp,datetime
0,Subvision,7704f820-b48c-44ea-a83a-7fa8e337308a,So Far So Noir,5449233f-8be9-46a8-b0d4-88c964970d51,Room 611,6b8d5e98-b229-43d3-8716-62a1f6c1693d,1633870827,2021-10-10 13:00:27
1,Artifex Pereo,9cbdf028-da04-4095-a6f7-349fcf82992c,Passengers,9ce51dd1-be64-410b-aa73-e3345909f992,Paper Ruled All,4ed19189-c527-46e6-a68c-73a18508b47b,1633870572,2021-10-10 12:56:12
2,Judas Priest,6b335658-22c8-485d-93de-0bc29a1d0349,Firepower,2bedb078-b357-48fa-ba6e-bc59c7791b8b,Lightning Strike,25513122-daf2-486f-8256-224259b25960,1633870276,2021-10-10 12:51:16
3,Electric Citizen,f0e56cc2-3a42-4d20-a0c4-1f83e7f56e70,Higher Time,b9ca933a-d25a-4f1d-baca-1f9c19d15001,Ghost Of Me,0595c3e3-e556-4bfc-b75b-c76e45bb8ed9,1633825259,2021-10-10 00:20:59
4,Joan Jett and the Blackhearts,46e63d3b-d91b-4791-bb73-e9f638a45ea0,Good Music,705f3db2-59a3-4765-8afb-1d16a2a0984e,Roadrunner,04db7e7d-bf13-397c-aa26-7856b0791cf0,1633825038,2021-10-10 00:17:18
