In [1]:
import os
import json
import pandas as pd

data = []
folder_path = "./data/"
for filename in os.listdir(folder_path):
    with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
        data.extend(json.load(file))

df = pd.DataFrame(data)
columns_to_keep = ['ms_played', 'master_metadata_track_name', 'master_metadata_album_artist_name',
                   'spotify_track_uri']

df = df[columns_to_keep]
df['ms_played'] = (df['ms_played'] / 60000).round(4)
df.columns = ["mins_played", "song", "artist", "uri"]
df["uri"] = df["uri"].str.replace("spotify:track:", "", regex=False)

In [2]:
df_uri = df[['song', 'artist', 'uri']].drop_duplicates(subset=['song', 'artist'])
df_unique = df.groupby(['song', 'artist'], as_index=False)['mins_played'].sum()
df_unique = pd.merge(df_unique, df_uri, on=['song', 'artist'], how='left')
df = df_unique.sort_values(by='mins_played', ascending=False)

In [3]:
df = df[df['mins_played'] >= 10]
df = df.reset_index(drop=True)

In [4]:
df

Unnamed: 0,song,artist,mins_played,uri
0,Way down We Go,KALEO,382.7891,0y1QJc3SJVPKJ1OvFmFqe6
1,Streets,Doja Cat,347.0222,60ynsPSSKe6O3sfwRnIBRf
2,Superman,Eminem,342.6987,4woTEX1wYOTGDqNXuavlRC
3,Supermassive Black Hole,Muse,315.2990,3lPr8ghNDBLc2uZovNyLs9
4,Little Dark Age,MGMT,289.9186,2Y0iGXY6m6immVb2ktbseM
...,...,...,...,...
449,Save Your Tears (Remix) (with Ariana Grande) -...,The Weeknd,10.1145,1oFAF1hdPOickyHgbuRjyX
450,I WANNA BE YOUR SLAVE,Måneskin,10.0888,4pt5fDVTg5GhEvEtlz9dKk
451,Yerli Plaka,Ceza,10.0322,6qLGmqg7yfTVotHvxVClZ8
452,BIBI PHONK BR,Bibi Babydoll,10.0289,1ytfbcVbSMHIgzijgt4z2e


In [5]:
import re 

def sanitize_filename(name):
    return re.sub(r'[\\/*?:"<>|]', "", name)

In [None]:
from tqdm import tqdm
from downloader import SpotiDownloader

downloader = SpotiDownloader()

for idx, row in tqdm(df.iloc[307:].iterrows(), total=len(df) - 307):
    try:
        song = sanitize_filename(row['song'].lower())
        artist = sanitize_filename(row['artist'].lower())
        uri = row['uri']
        downloader.download_song(song, artist, uri)
    except:
        print(f"An error occurred while downloading {song} by {artist}!")
        continue

downloader.close()

In [10]:
from tqdm import tqdm
from downloader import SpotiDownloader

downloader = SpotiDownloader()

for idx, row in tqdm(df.iterrows(), total=len(df)):
    try:
        song = sanitize_filename(row['song'].lower())
        artist = sanitize_filename(row['artist'].lower())
        downloader.download_cover(name=song, author=artist, uri=row['uri'])
    except:
        print(f"An error occurred while downloading {song} by {artist}!")
        continue
    
downloader.close()

100%|██████████| 454/454 [09:31<00:00,  1.26s/it]
