In [59]:
import pandas as pd
import os

database_path = "/mnt/L-HDD/dataset/"

beatmaps_csv = os.path.join(database_path, "beatmaps.csv")
hit_objects_csv = os.path.join(database_path, "hit_objects.csv")
time_points_csv = os.path.join(database_path, "timing_points.csv")

beatmaps_df = pd.read_csv(beatmaps_csv)
hit_objects_df = pd.read_csv(hit_objects_csv)
time_points_df = pd.read_csv(time_points_csv)


In [60]:
import requests
from dotenv import load_dotenv

load_dotenv()
CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")

def get_access_token():
    url = "https://osu.ppy.sh/oauth/token"
    headers = {"Content-Type": "application/x-www-form-urlencoded"}
    data = {
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "grant_type": "client_credentials",
        "scope": "public"
    }
    
    response = requests.post(url, headers=headers, data=data)
    return response.json().get("access_token")

def get_beatmapset_status(beatmapset_id, access_token):
    url = f"https://osu.ppy.sh/api/v2/beatmapsets/{beatmapset_id}"
    headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"}
    
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        beatmapset = response.json()
        ranked_date = beatmapset.get("ranked_date")
        status = beatmapset.get("status", "Unknown")
        return (status, ranked_date)
    else:
        return f"Error: {response.status_code} - {response.text}"

access_token = get_access_token()


In [61]:
beatmapset_ids = set(beatmaps_df["ID"].str.split("-").str[0])

In [62]:
import time
from tqdm import tqdm
results = {}
for beatmapset_id in tqdm(beatmapset_ids):
    results[beatmapset_id] = get_beatmapset_status(beatmapset_id, access_token)
    time.sleep(0.2)

 39%|██████████████████████████████████████████████████████████████████▏                                                                                                        | 823/2128 [09:39<15:19,  1.42it/s]


KeyboardInterrupt: 

In [41]:
results

{'580215': ('ranked', '2017-06-22T18:00:36Z'),
 '2274433': ('ranked', '2024-11-23T11:22:11Z'),
 '1633799': ('ranked', '2022-03-25T20:43:55Z'),
 '703957': ('ranked', '2018-02-19T04:21:39Z'),
 '2261067': ('ranked', '2025-01-20T05:25:27Z'),
 '1050479': ('ranked', '2019-10-19T18:02:58Z'),
 '842562': ('ranked', '2018-11-06T21:40:34Z'),
 '948225': ('loved', '2020-12-07T08:39:23Z'),
 '895579': ('ranked', '2020-08-23T07:48:20Z'),
 '2186776': ('ranked', '2024-07-03T14:24:19Z'),
 '2235479': ('ranked', '2024-09-02T03:22:32Z'),
 '2175092': ('ranked', '2024-12-08T04:41:46Z'),
 '662892': ('ranked', '2018-02-26T04:00:14Z'),
 '2269930': ('ranked', '2024-11-29T21:22:35Z'),
 '97902': ('ranked', '2014-12-27T20:02:32Z'),
 '2188075': ('ranked', '2025-01-07T00:44:54Z'),
 '1112435': ('ranked', '2020-06-25T05:44:18Z'),
 '25931': ('ranked', '2011-05-14T20:21:47Z'),
 '292599': ('ranked', '2024-06-13T07:06:00Z'),
 '881753': ('ranked', '2019-01-16T18:00:21Z'),
 '310499': ('ranked', '2015-09-05T02:21:24Z'),
 '5385

In [45]:
ranked_beatmaps = {k: v for k, v in results.items() if v[0] == 'ranked'}
ranked_beatmaps

{'580215': ('ranked', '2017-06-22T18:00:36Z'),
 '2274433': ('ranked', '2024-11-23T11:22:11Z'),
 '1633799': ('ranked', '2022-03-25T20:43:55Z'),
 '703957': ('ranked', '2018-02-19T04:21:39Z'),
 '2261067': ('ranked', '2025-01-20T05:25:27Z'),
 '1050479': ('ranked', '2019-10-19T18:02:58Z'),
 '842562': ('ranked', '2018-11-06T21:40:34Z'),
 '895579': ('ranked', '2020-08-23T07:48:20Z'),
 '2186776': ('ranked', '2024-07-03T14:24:19Z'),
 '2235479': ('ranked', '2024-09-02T03:22:32Z'),
 '2175092': ('ranked', '2024-12-08T04:41:46Z'),
 '662892': ('ranked', '2018-02-26T04:00:14Z'),
 '2269930': ('ranked', '2024-11-29T21:22:35Z'),
 '97902': ('ranked', '2014-12-27T20:02:32Z'),
 '2188075': ('ranked', '2025-01-07T00:44:54Z'),
 '1112435': ('ranked', '2020-06-25T05:44:18Z'),
 '25931': ('ranked', '2011-05-14T20:21:47Z'),
 '292599': ('ranked', '2024-06-13T07:06:00Z'),
 '881753': ('ranked', '2019-01-16T18:00:21Z'),
 '310499': ('ranked', '2015-09-05T02:21:24Z'),
 '53850': ('ranked', '2012-07-23T00:24:34Z'),
 '2231

In [58]:
import os
import pandas as pd

def filter_ranked_maps(database_path, ranked_beatmaps, output_path):
    ids = {k for k, v in ranked_beatmaps.items()}
    ranked_dates = {k: v[1] for k, v in ranked_beatmaps.items()}
    
    beatmaps_csv = os.path.join(database_path, "beatmaps.csv")
    hit_objects_csv = os.path.join(database_path, "hit_objects.csv")
    time_points_csv = os.path.join(database_path, "timing_points.csv")
    
    beatmaps_df = pd.read_csv(beatmaps_csv)
    hit_objects_df = pd.read_csv(hit_objects_csv)
    time_points_df = pd.read_csv(time_points_csv)

    beatmaps_df['beatmap_id'] = beatmaps_df['ID'].str.split("-").str[0]
    hit_objects_df['beatmap_id'] = hit_objects_df['ID'].str.split("-").str[0]
    time_points_df['beatmap_id'] = time_points_df['ID'].str.split("-").str[0]

    beatmaps_df = beatmaps_df[beatmaps_df['beatmap_id'].isin(ids)]
    hit_objects_df = hit_objects_df[hit_objects_df['beatmap_id'].isin(ids)]
    time_points_df = time_points_df[time_points_df['beatmap_id'].isin(ids)]

    beatmaps_df["ranked_date"] = beatmaps_df["ID"].str.split("-").str[0].map(ranked_dates)

    beatmaps_df.to_csv(os.path.join(output_path, "beatmaps.csv"), index=False)
    hit_objects_df.to_csv(os.path.join(output_path, "hit_objects.csv"), index=False)
    time_points_df.to_csv(os.path.join(output_path, "timing_points.csv"), index=False)

filter_ranked_maps("/mnt/L-HDD/dataset/", ranked_beatmaps, "/mnt/L-HDD/ranked_dataset/")


In [30]:
import shutil

def get_ranked_audios(audio_folder, ids, output_folder):
    folders = os.listdir(audio_folder)
    for folder in tqdm(folders):
        if(folder in ranked_beatmaps_ids):
            shutil.copytree(
                os.path.join(audio_folder, folder),
                os.path.join(output_folder, folder),
            )

get_ranked_audios("/mnt/L-HDD/dataset/audio/", ranked_beatmaps_ids, "/mnt/L-HDD/ranked_dataset/audio/")


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1926/1926 [02:30<00:00, 12.78it/s]
