Importing the data

In [74]:
from pathlib import Path
import json
import pandas as pd
import requests
import time
from typing import Dict, Optional
from datetime import datetime

INPUT_GLOB = "data/*.json"
OG_CSV = "data/og_data.csv"
CLEANED_CSV = "data/cleaned_data.csv"
ENRICHED_CSV = "data/music_data.csv"

The data I obtained from spotify was in 4 json files of the same structure, so I extract it and after preprocessing I'll produce a csv file

In [46]:
from glob import glob

files = sorted(glob(INPUT_GLOB))
if not files:
    raise FileNotFoundError(f"No files matched {INPUT_GLOB}")

frames = []
for fp in files:
    with open(fp, "r", encoding="utf-8") as f:
        data = json.load(f)
        if not isinstance(data, list):
            print(f"Skipping {fp}: top-level JSON is not a list")
            continue
        df = pd.json_normalize(data)
        frames.append(df)

merged = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
print(f"Loaded {len(files)} files → {len(merged):,} rows × {merged.shape[1]} cols")
merged.tail()


Loaded 4 files → 58,319 rows × 23 cols


Unnamed: 0,ts,platform,ms_played,conn_country,ip_addr,master_metadata_track_name,master_metadata_album_artist_name,master_metadata_album_album_name,spotify_track_uri,episode_name,...,audiobook_uri,audiobook_chapter_uri,audiobook_chapter_title,reason_start,reason_end,shuffle,skipped,offline,offline_timestamp,incognito_mode
58314,2025-10-10T21:40:06Z,android,215813,FR,193.52.24.53,Gonna Love Ya,Avicii,Stories,spotify:track:1rDETmcizen79JghAEm1Se,,...,,,,trackdone,trackdone,True,False,False,1760132000.0,False
58315,2025-10-10T21:43:10Z,android,184013,FR,193.52.24.53,Chemical,Post Malone,Chemical,spotify:track:5w40ZYhbBMAlHYNDaVJIUu,,...,,,,trackdone,trackdone,True,False,False,1760132000.0,False
58316,2025-10-10T21:47:45Z,android,273765,FR,193.52.24.53,Virus (How About Now),Martin Garrix,Virus (How About Now),spotify:track:3ukWpmRHvpuDATCJkgLEkF,,...,,,,trackdone,trackdone,True,False,False,1760133000.0,False
58317,2025-10-10T21:51:27Z,android,221273,FR,193.52.24.53,Reload - Radio Edit,Sebastian Ingrosso,Reload,spotify:track:5jyUBKpmaH670zrXrE0wmO,,...,,,,trackdone,trackdone,True,False,False,1760133000.0,False
58318,2025-10-10T21:53:46Z,android,139384,FR,193.52.24.53,Gatluak - Bakermat Remix,Kronan,Gatluak,spotify:track:0GT2hQQ6mdHeW0AZDl5NlJ,,...,,,,trackdone,trackdone,True,False,False,1760133000.0,False


Outputting the original, non-modified dataset in csv format (og_data.csv)

In [47]:
def export_to_csv(df, output_csv):
    Path(output_csv).parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(output_csv, index=False)
    print(f"Wrote {len(df):,} rows to {output_csv}")

In [49]:
export_to_csv(merged, OG_CSV)

Wrote 58,319 rows to data/og_data.csv


Checking the structure of the dataset I've obtained:

In [50]:
def summary_func(df):
    summary = pd.DataFrame({
        "dtype": df.dtypes,
        "non_null": df.notna().sum(),
        "nulls": df.isna().sum(),
    })
    summary["pct_null"] = summary["nulls"] / len(df)
    summary["nunique_dropna"] = df.nunique(dropna=True)
    summary["nunique_incl_na"] = df.nunique(dropna=False)
    summary = summary.sort_values(["nunique_incl_na","pct_null","dtype"]).reset_index().rename(columns={"index":"column"})
    return summary

In [51]:
summary_func(merged).head(23)

Unnamed: 0,column,dtype,non_null,nulls,pct_null,nunique_dropna,nunique_incl_na
0,audiobook_title,object,0,58319,1.0,0,1
1,audiobook_uri,object,0,58319,1.0,0,1
2,audiobook_chapter_uri,object,0,58319,1.0,0,1
3,audiobook_chapter_title,object,0,58319,1.0,0,1
4,shuffle,bool,58319,0,0.0,2,2
5,skipped,bool,58319,0,0.0,2,2
6,offline,bool,58319,0,0.0,2,2
7,incognito_mode,bool,58319,0,0.0,2,2
8,platform,object,58319,0,0.0,7,7
9,reason_start,object,58319,0,0.0,9,9


I noticed that there are columns which in my case might be empty or have too few non-null values (for example, I usually don't listen to audiobooks or podcasts on Spotify, so all of the rows and columns related to those topics have to be removed). I also remove the ip address and incognito mode columns for data privacy purposes.

In [52]:
music_only = merged[merged["spotify_track_uri"].notna()].reset_index(drop=True)

In [53]:
drop_cols = ["episode_show_name", "episode_name", "spotify_episode_uri", "audiobook_title", "audiobook_uri", "audiobook_chapter_uri", "audiobook_chapter_title", "ip_addr", "incognito_mode", "offline_timestamp", "shuffle"]
music_only = music_only.drop(columns=drop_cols)
music_only.shape

(58297, 12)

In [54]:
summary_func(music_only).head(14)


Unnamed: 0,column,dtype,non_null,nulls,pct_null,nunique_dropna,nunique_incl_na
0,skipped,bool,58297,0,0.0,2,2
1,offline,bool,58297,0,0.0,2,2
2,platform,object,58297,0,0.0,7,7
3,reason_start,object,58297,0,0.0,9,9
4,reason_end,object,58297,0,0.0,10,10
5,conn_country,object,58297,0,0.0,14,14
6,master_metadata_album_artist_name,object,58297,0,0.0,5406,5406
7,master_metadata_album_album_name,object,58297,0,0.0,9497,9497
8,master_metadata_track_name,object,58297,0,0.0,12100,12100
9,spotify_track_uri,object,58297,0,0.0,14074,14074


In [55]:
music_only.head()

Unnamed: 0,ts,platform,ms_played,conn_country,master_metadata_track_name,master_metadata_album_artist_name,master_metadata_album_album_name,spotify_track_uri,reason_start,reason_end,skipped,offline
0,2022-12-23T14:38:28Z,android,155986,UA,The Fusion,Omnia,A State Of Trance Year Mix 2012,spotify:track:6Rd4QKdxu1mxigZrL3f2Go,trackdone,trackdone,False,False
1,2022-12-23T14:40:52Z,android,143272,UA,Absolut,Bjornberg,Absolut,spotify:track:7jBLucQFTopT38PDxm0s0V,trackdone,trackdone,False,False
2,2022-12-23T14:43:57Z,android,184508,UA,Rio De Janeiro,Maison & Dragen,EDM Experience 001,spotify:track:7c7WEErzg7TWLUUY8llgEs,trackdone,trackdone,False,False
3,2022-12-23T14:44:08Z,android,1280,UA,J'ai Envie De Toi,GAIA,J'ai Envie De Toi,spotify:track:3bupH9r8ZlCeLGMvlyL7rE,trackdone,endplay,True,False
4,2022-12-23T14:46:01Z,android,113265,UA,Let It Snow! Let It Snow! Let It Snow!,Olivier Abeille,Xmas Memories,spotify:track:6EHw6eo7BwCiBDUro7YNdp,clickrow,trackdone,False,False


In [56]:
export_to_csv(music_only, CLEANED_CSV)

Wrote 58,297 rows to data/cleaned_data.csv


I like the data I've got, but to do more discoveries about my music taste and habits, I'd like to enrich it with additional info about artists. For that, I will use Spotify API:

I see that now I don't have the artist URI column in my dataset, but it is crucial for fetching additional data about the artists. So I'll use Spotify API and track URIs:

In [None]:
CLIENT_ID = '416eb70cbfc84c79a0626aa40568d86a'
CLIENT_SECRET = 'here_goes_my_secret'

def get_spotify_token(client_id: str, client_secret: str) -> str:
    """Get Spotify API access token"""
    auth_url = 'https://accounts.spotify.com/api/token'
    auth_response = requests.post(auth_url, {
        'grant_type': 'client_credentials',
        'client_id': client_id,
        'client_secret': client_secret,
    })
    auth_response.raise_for_status()
    return auth_response.json()['access_token']

token = get_spotify_token(CLIENT_ID, CLIENT_SECRET)
print("✓ Successfully connected to Spotify API!")

✓ Successfully connected to Spotify API!


As Spotify has limits on the amount of requests I can send, in order to minimize it I will firsty get the unique track IDs and artists I have been listening to:

In [None]:
def extract_track_id(spotify_uri):
    """Extract track ID from spotify:track:XXXXX format"""
    if pd.isna(spotify_uri) or not isinstance(spotify_uri, str):
        return None
    parts = spotify_uri.split(':')
    if len(parts) == 3 and parts[0] == 'spotify' and parts[1] == 'track':
        return parts[2]
    return None

music_only['track_id'] = music_only['spotify_track_uri'].apply(extract_track_id)

unique_tracks = music_only[music_only['track_id'].notna()]['track_id'].unique()

print(f"Total rows: {len(music_only):,}")
print(f"Unique tracks: {len(unique_tracks):,}")
print(f"Reduction: {len(music_only) - len(unique_tracks):,} duplicate track plays")
print(f"\nEstimated API calls needed: ~{len(unique_tracks) // 50 + 1} for tracks") #50 artists can be processed per API call

Total rows: 58,297
Unique tracks: 14,074
Reduction: 44,223 duplicate track plays

Estimated API calls needed: ~282 for tracks


In [62]:
def get_tracks_batch(track_ids, token):
    """Get multiple tracks in a single API call (up to 50)"""
    headers = {'Authorization': f'Bearer {token}'}
    track_ids_str = ','.join(track_ids[:50])
    url = f'https://api.spotify.com/v1/tracks?ids={track_ids_str}'
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()

#to store track_id -> artist_id mapping
track_to_artist = {}

batch_size = 50
total_batches = len(unique_tracks) // batch_size + 1

print(f"Fetching track data in {total_batches} batches...")
print("This will take approximately {:.1f} minutes".format(total_batches * 0.05 / 60))

for i in range(0, len(unique_tracks), batch_size):
    batch = unique_tracks[i:i+batch_size].tolist()
    
    try:
        result = get_tracks_batch(batch, token)
        
        for track in result.get('tracks', []):
            if track and track.get('artists'):
                track_id = track['id']
                artist_id = track['artists'][0]['id']
                artist_name = track['artists'][0]['name']
                
                track_to_artist[track_id] = {
                    'artist_id': artist_id,
                    'artist_name': artist_name
                }
        
        if (i // batch_size + 1) % 10 == 0:
            print(f"Progress: {i + batch_size}/{len(unique_tracks)} tracks processed")
        
        time.sleep(0.05)
        
    except Exception as e:
        print(f"Error in batch {i//batch_size + 1}: {e}")
        time.sleep(1)

print(f"\n✓ Fetched data for {len(track_to_artist):,} tracks")

with open('track_to_artist_mapping.json', 'w') as f:
    json.dump(track_to_artist, f)
print("✓ Saved intermediate mapping to 'track_to_artist_mapping.json'")

Fetching track data in 282 batches...
This will take approximately 0.2 minutes
Progress: 500/14074 tracks processed
Progress: 1000/14074 tracks processed
Progress: 1500/14074 tracks processed
Progress: 2000/14074 tracks processed
Progress: 2500/14074 tracks processed
Progress: 3000/14074 tracks processed
Progress: 3500/14074 tracks processed
Progress: 4000/14074 tracks processed
Progress: 4500/14074 tracks processed
Progress: 5000/14074 tracks processed
Progress: 5500/14074 tracks processed
Progress: 6000/14074 tracks processed
Progress: 6500/14074 tracks processed
Progress: 7000/14074 tracks processed
Progress: 7500/14074 tracks processed
Progress: 8000/14074 tracks processed
Progress: 8500/14074 tracks processed
Progress: 9000/14074 tracks processed
Progress: 9500/14074 tracks processed
Progress: 10000/14074 tracks processed
Progress: 10500/14074 tracks processed
Progress: 11000/14074 tracks processed
Progress: 11500/14074 tracks processed
Progress: 12000/14074 tracks processed
Progr

In [64]:
#get unique artists
unique_artist_ids = set()
unique_artist_names = set()

for track_data in track_to_artist.values():
    unique_artist_ids.add(track_data['artist_id'])
    unique_artist_names.add(track_data['artist_name'])

print(f"Unique tracks: {len(unique_tracks):,}")
print(f"Unique artists: {len(unique_artist_ids):,}")
print(f"Reduction: {len(unique_tracks) - len(unique_artist_ids):,} repeated artists")
print(f"\nEstimated API calls needed for artists: {len(unique_artist_ids):,}")
print(f"Estimated time: ~{len(unique_artist_ids) * 0.05 / 60:.1f} minutes")

Unique tracks: 14,074
Unique artists: 5,406
Reduction: 8,668 repeated artists

Estimated API calls needed for artists: 5,406
Estimated time: ~4.5 minutes


In [69]:
def get_artists_batch(artist_ids, token, max_retries=5):
    """
    Get multiple artists in a single API call (up to 50)
    With exponential backoff for rate limiting
    """
    headers = {'Authorization': f'Bearer {token}'}
    artist_ids_str = ','.join(artist_ids[:50])
    url = f'https://api.spotify.com/v1/artists?ids={artist_ids_str}'
    
    for attempt in range(max_retries):
        try:
            response = requests.get(url, headers=headers)
            
            if response.status_code == 429:
                retry_after = int(response.headers.get('Retry-After', 5))
                print(f"  ⚠ Rate limited. Waiting {retry_after} seconds...")
                time.sleep(retry_after + 1)
                continue
            
            response.raise_for_status()
            return response.json()
            
        except requests.exceptions.RequestException as e:
            if attempt < max_retries - 1:
                wait_time = 2 ** attempt  #exponential backoff: 1, 2, 4, 8, 16 seconds
                print(f"  ⚠ Error (attempt {attempt + 1}/{max_retries}): {e}")
                print(f"  Waiting {wait_time} seconds before retry...")
                time.sleep(wait_time)
            else:
                print(f"  ✗ Failed after {max_retries} attempts: {e}")
                return None
    
    return None

try:
    with open('artist_data.json', 'r') as f:
        artist_data = json.load(f)
    print(f"Loaded existing cache with {len(artist_data)} artists")
except FileNotFoundError:
    artist_data = {}
    print("No existing cache found, starting fresh")

unique_artists_list = list(unique_artist_ids)
total_artists = len(unique_artists_list)

artists_to_fetch = [aid for aid in unique_artists_list if aid not in artist_data]

print(f"\nTotal unique artists: {total_artists:,}")
print(f"Already cached: {len(artist_data):,}")
print(f"To fetch: {len(artists_to_fetch):,}")

batch_size = 50
total_batches = len(artists_to_fetch) // batch_size + (1 if len(artists_to_fetch) % batch_size else 0)

print(f"Batches needed: {total_batches}")
print(f"Estimated time: ~{total_batches * 0.5 / 60:.1f} minutes\n")

#process in batches
for i in range(0, len(artists_to_fetch), batch_size):
    batch = artists_to_fetch[i:i+batch_size]
    batch_num = i // batch_size + 1
    
    print(f"Batch {batch_num}/{total_batches}: Fetching {len(batch)} artists...")
    
    result = get_artists_batch(batch, token)
    
    if result and result.get('artists'):
        for artist in result['artists']:
            if artist:  # artist can be None if not found
                artist_data[artist['id']] = {
                    'name': artist.get('name'),
                    'genres': ', '.join(artist.get('genres', [])[:5]),
                    'popularity': artist.get('popularity'),
                    'followers': artist.get('followers', {}).get('total'),
                }
        
        print(f"  ✓ Batch {batch_num} complete ({len(artist_data)}/{total_artists} total)")
    else:
        print(f"  ✗ Batch {batch_num} failed")
        #add empty entries for failed artists to avoid re-fetching
        for artist_id in batch:
            if artist_id not in artist_data:
                artist_data[artist_id] = {
                    'name': None,
                    'genres': None,
                    'popularity': None,
                    'followers': None,
                }
    
    if batch_num % 5 == 0:
        with open('artist_data.json', 'w') as f:
            json.dump(artist_data, f)
        print(f"  💾 Progress saved")
    
    time.sleep(0.5)

with open('artist_data.json', 'w') as f:
    json.dump(artist_data, f)

print(f"\n✓ Fetched data for {len(artist_data):,} artists")
print(f"✓ Saved artist data to 'artist_data.json'")

No existing cache found, starting fresh

Total unique artists: 5,406
Already cached: 0
To fetch: 5,406
Batches needed: 109
Estimated time: ~0.9 minutes

Batch 1/109: Fetching 50 artists...
  ✓ Batch 1 complete (50/5406 total)
Batch 2/109: Fetching 50 artists...
  ✓ Batch 2 complete (100/5406 total)
Batch 3/109: Fetching 50 artists...
  ✓ Batch 3 complete (150/5406 total)
Batch 4/109: Fetching 50 artists...
  ✓ Batch 4 complete (200/5406 total)
Batch 5/109: Fetching 50 artists...
  ✓ Batch 5 complete (250/5406 total)
  💾 Progress saved
Batch 6/109: Fetching 50 artists...
  ✓ Batch 6 complete (300/5406 total)
Batch 7/109: Fetching 50 artists...
  ✓ Batch 7 complete (350/5406 total)
Batch 8/109: Fetching 50 artists...
  ✓ Batch 8 complete (400/5406 total)
Batch 9/109: Fetching 50 artists...
  ✓ Batch 9 complete (450/5406 total)
Batch 10/109: Fetching 50 artists...
  ✓ Batch 10 complete (500/5406 total)
  💾 Progress saved
Batch 11/109: Fetching 50 artists...
  ✓ Batch 11 complete (550/5406

In [71]:
print("Mapping artist data back to original dataframe...")
music_only['artist_id'] = None
music_only['artist_name_spotify'] = None
music_only['artist_genres'] = None
music_only['artist_popularity'] = None
music_only['artist_followers'] = None

#map using track_id
for idx, row in music_only.iterrows():
    track_id = row['track_id']
    
    if pd.notna(track_id) and track_id in track_to_artist:
        artist_id = track_to_artist[track_id]['artist_id']
        
        music_only.at[idx, 'artist_id'] = artist_id
        
        if artist_id in artist_data:
            music_only.at[idx, 'artist_name_spotify'] = artist_data[artist_id]['name']
            music_only.at[idx, 'artist_genres'] = artist_data[artist_id]['genres']
            music_only.at[idx, 'artist_popularity'] = artist_data[artist_id]['popularity']
            music_only.at[idx, 'artist_followers'] = artist_data[artist_id]['followers']
    
    if (idx + 1) % 5000 == 0:
        print(f"Mapped {idx + 1}/{len(music_only)} rows")

print("\n✓ Mapping complete!")
print(f"Rows with artist data: {music_only['artist_id'].notna().sum():,}")

Mapping artist data back to original dataframe...
Mapped 5000/58297 rows
Mapped 10000/58297 rows
Mapped 15000/58297 rows
Mapped 20000/58297 rows
Mapped 25000/58297 rows
Mapped 30000/58297 rows
Mapped 35000/58297 rows
Mapped 40000/58297 rows
Mapped 45000/58297 rows
Mapped 50000/58297 rows
Mapped 55000/58297 rows

✓ Mapping complete!
Rows with artist data: 58,297


In [72]:
music_only

Unnamed: 0,ts,platform,ms_played,conn_country,master_metadata_track_name,master_metadata_album_artist_name,master_metadata_album_album_name,spotify_track_uri,reason_start,reason_end,skipped,offline,track_id,artist_id,artist_name_spotify,artist_genres,artist_popularity,artist_followers
0,2022-12-23T14:38:28Z,android,155986,UA,The Fusion,Omnia,A State Of Trance Year Mix 2012,spotify:track:6Rd4QKdxu1mxigZrL3f2Go,trackdone,trackdone,False,False,6Rd4QKdxu1mxigZrL3f2Go,0XZkeCAlpiO5qcIlAJzZaA,Omnia,"trance, progressive trance",39,48581
1,2022-12-23T14:40:52Z,android,143272,UA,Absolut,Bjornberg,Absolut,spotify:track:7jBLucQFTopT38PDxm0s0V,trackdone,trackdone,False,False,7jBLucQFTopT38PDxm0s0V,2UvXv8S4pzzh6ze3ztLIRh,Bjornberg,,11,2719
2,2022-12-23T14:43:57Z,android,184508,UA,Rio De Janeiro,Maison & Dragen,EDM Experience 001,spotify:track:7c7WEErzg7TWLUUY8llgEs,trackdone,trackdone,False,False,7c7WEErzg7TWLUUY8llgEs,07JTRGL3SO0v492XOpYhGN,Maison & Dragen,,18,5861
3,2022-12-23T14:44:08Z,android,1280,UA,J'ai Envie De Toi,GAIA,J'ai Envie De Toi,spotify:track:3bupH9r8ZlCeLGMvlyL7rE,trackdone,endplay,True,False,3bupH9r8ZlCeLGMvlyL7rE,3Jkc5q9qBSNOTf3IvAyJW9,GAIA,"trance, progressive trance",45,108662
4,2022-12-23T14:46:01Z,android,113265,UA,Let It Snow! Let It Snow! Let It Snow!,Olivier Abeille,Xmas Memories,spotify:track:6EHw6eo7BwCiBDUro7YNdp,clickrow,trackdone,False,False,6EHw6eo7BwCiBDUro7YNdp,43d1D4ncfmUYZCkZsxvfEy,Olivier Abeille,lo-fi,14,437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58292,2025-10-10T21:40:06Z,android,215813,FR,Gonna Love Ya,Avicii,Stories,spotify:track:1rDETmcizen79JghAEm1Se,trackdone,trackdone,False,False,1rDETmcizen79JghAEm1Se,1vCWHaC5f2uS3yhpwWbIA6,Avicii,edm,78,23420376
58293,2025-10-10T21:43:10Z,android,184013,FR,Chemical,Post Malone,Chemical,spotify:track:5w40ZYhbBMAlHYNDaVJIUu,trackdone,trackdone,False,False,5w40ZYhbBMAlHYNDaVJIUu,246dkjvS1zLTtiykXe5h60,Post Malone,,86,47656151
58294,2025-10-10T21:47:45Z,android,273765,FR,Virus (How About Now),Martin Garrix,Virus (How About Now),spotify:track:3ukWpmRHvpuDATCJkgLEkF,trackdone,trackdone,False,False,3ukWpmRHvpuDATCJkgLEkF,60d24wfXkVzDSfLS6hyCjZ,Martin Garrix,"edm, electronica, progressive house",74,15121775
58295,2025-10-10T21:51:27Z,android,221273,FR,Reload - Radio Edit,Sebastian Ingrosso,Reload,spotify:track:5jyUBKpmaH670zrXrE0wmO,trackdone,trackdone,False,False,5jyUBKpmaH670zrXrE0wmO,6hyMWrxGBsOx6sWcVj1DqP,Sebastian Ingrosso,edm,67,1007675


In [76]:
(music_only['master_metadata_album_artist_name'] == music_only['artist_name_spotify']).all()


False

In [80]:
(music_only['master_metadata_album_artist_name'] != music_only['artist_name_spotify']).sum()


257

In [81]:
music_only[music_only['master_metadata_album_artist_name'] != music_only['artist_name_spotify']]

Unnamed: 0,ts,platform,ms_played,conn_country,master_metadata_track_name,master_metadata_album_artist_name,master_metadata_album_album_name,spotify_track_uri,reason_start,reason_end,skipped,offline,track_id,artist_id,artist_name_spotify,artist_genres,artist_popularity,artist_followers
185,2022-12-28T08:40:29Z,android,0,UA,Keep Me Close,Jeffrey Sutorius,Keep Me Close,spotify:track:6GifPmBMc0n2Gs3zfRkB5D,fwdbtn,fwdbtn,True,False,6GifPmBMc0n2Gs3zfRkB5D,2XGg454n1pSdgoqrfcSDbq,JEFFREY SUTORIUS,"progressive house, big room",28,17774
560,2023-01-01T02:03:23Z,windows,240298,UA,Стань,Antytila,MLNL,spotify:track:2eRMKybvQJYHBZdbEKKDtz,fwdbtn,trackdone,False,False,2eRMKybvQJYHBZdbEKKDtz,5sc9td6C7xxPa3mOmmvXPu,АНТИТІЛА,,42,392136
713,2023-01-04T02:30:52Z,windows,110314,UA,Can't Hold Us (feat. Ray Dalton),Macklemore & Ryan Lewis,The Heist,spotify:track:22skzmqfdWrjJylampe0kt,trackdone,logout,False,False,22skzmqfdWrjJylampe0kt,3JhNCzhSMTxs9WLGJJxWOY,Macklemore,,74,2814815
744,2023-01-05T20:44:11Z,windows,85266,UA,Carol of the Bells,Mykola Dmytrovych Leontovych,Home Alone (Original Motion Picture Soundtrack...,spotify:track:4tHqQMWSqmL6YjXwsqthDI,trackdone,trackdone,False,False,4tHqQMWSqmL6YjXwsqthDI,3dRfiJ2650SZu6GbydcHNb,John Williams,soundtrack,69,1598950
2239,2023-02-05T22:31:24Z,windows,90770,UA,Never Forget,Jónsi,In the Silence,spotify:track:40sYYZZN8BLWs8Qmwamngw,playbtn,endplay,True,False,40sYYZZN8BLWs8Qmwamngw,1WCD4cddyV9Yk4zk7H9eju,Greta Salóme,,13,3771
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56103,2025-07-05T23:24:39Z,android,228720,BE,"Така, як ти",Svyatoslav Vakarchuk,Вночі,spotify:track:1hsANSvcIkut1BRVIHXtLO,trackdone,trackdone,False,False,1hsANSvcIkut1BRVIHXtLO,2xEuSZSodyOIuUAxBM3I5I,Sviatoslav Vakarchuk,,40,78198
56138,2025-07-06T01:36:14Z,android,213374,BE,Tdme,Antytila,Hello,spotify:track:2Pa6q36FP2oAM5q1hFjVSG,trackdone,trackdone,False,False,2Pa6q36FP2oAM5q1hFjVSG,5sc9td6C7xxPa3mOmmvXPu,АНТИТІЛА,,42,392136
56139,2025-07-06T01:40:27Z,android,251874,BE,Люди як кораблі,Antytila,Сонце,spotify:track:6iXggLYvLeiccxZq61pl2i,trackdone,trackdone,False,False,6iXggLYvLeiccxZq61pl2i,5sc9td6C7xxPa3mOmmvXPu,АНТИТІЛА,,42,392136
56668,2025-08-05T22:21:14Z,android,546,BE,Madame,blond,Pour la vie entière,spotify:track:0Slo6AUP4sR8gE1wdQBiRM,unknown,endplay,True,False,0Slo6AUP4sR8gE1wdQBiRM,6bAfbEF8yCMBTtXEBFLh2x,Blond,"french indie pop, french pop",28,5646


In [82]:
music_only = music_only.drop(columns='artist_name_spotify')

In [83]:
export_to_csv(music_only, ENRICHED_CSV)

Wrote 58,297 rows to data/music_data.csv
