In [1]:
import zipfile
def extract_osz(osz_file, output_folder):
    with zipfile.ZipFile(osz_file, 'r') as zip_ref:
        zip_ref.extractall(output_folder)
    print(f"Extracted {osz_file} to {output_folder}")

In [2]:
extract_osz("example/Favorite Liar.osz", "example/beatmapset-2269930")

Extracted example/Favorite Liar.osz to example/beatmapset-2269930


In [3]:
import os

class BeatmapProcessor:
    def __init__(self, beatmapset_folder, osu_file):
        self.beatmapset_folder = beatmapset_folder
        self.osu_file = osu_file
        self.hit_objects = self.parse_hit_objects()
        self.timing_points = self.parse_timing_points()
        self.metadata = self.parse_metadata()
        self.difficulty = self.parse_difficulty()

    def parse_hit_objects(self):
        hit_objects = []
        with open(
            os.path.join(self.beatmapset_folder, self.osu_file), "r", encoding="utf-8"
        ) as f:
            lines = f.readlines()

        in_hit_objects = False
        for line in lines:
            line = line.strip()
            if line == "[HitObjects]":
                in_hit_objects = True
                continue

            if in_hit_objects and line:
                parts = line.split(",")
                x, y, time, obj_type, hit_sound = map(int, parts[:5])
                object_data = parts[5:]

                if obj_type & 1:
                    hit_objects.append(
                        {
                            "type": "circle",
                            "x": x,
                            "y": y,
                            "time": time,
                            "hit_sound": hit_sound,
                        }
                    )

                elif obj_type & 2:
                    slider_path = object_data[0]
                    repeat = int(object_data[1])
                    hit_objects.append(
                        {
                            "type": "slider",
                            "x": x,
                            "y": y,
                            "time": time,
                            "hit_sound": hit_sound,
                            "path": slider_path,
                            "repeat": repeat,
                        }
                    )

                elif obj_type & 8:
                    end_time = int(object_data[0])
                    hit_objects.append(
                        {
                            "type": "spinner",
                            "x": x,
                            "y": y,
                            "time": time,
                            "hit_sound": hit_sound,
                            "end_time": end_time,
                        }
                    )

        return hit_objects

    def parse_timing_points(self):
        timing_points = []
        with open(
            os.path.join(self.beatmapset_folder, self.osu_file), "r", encoding="utf-8"
        ) as f:
            lines = f.readlines()

        in_timing_points = False
        for line in lines:
            line = line.strip()
            if line.startswith("["):
                in_timing_points = line == "[TimingPoints]"
                continue
            if in_timing_points and line:
                parts = line.split(",")

                while len(parts) < 8:
                    parts.append("")

                timing_points.append(
                    {
                        "offset": parts[0],
                        "ms_per_beat": parts[1],
                        "time_signature": parts[2],
                        "meter": parts[3],
                        "sample_set": parts[4],
                        "sample_index": parts[5],
                        "volume": parts[6],
                        "effects": parts[7],
                    }
                )

        return timing_points

    def parse_metadata(self):
        metadata = {}
        with open(
            os.path.join(self.beatmapset_folder, self.osu_file), "r", encoding="utf-8"
        ) as f:
            lines = f.readlines()

        in_metadata = False
        for line in lines:
            line = line.strip()
            if line.startswith("["):
                in_metadata = line == "[Metadata]"
                continue
            if in_metadata and line:
                key, value = line.split(":", 1)
                metadata[key.strip()] = value.strip()

        return metadata

    def parse_difficulty(self):
        difficulty = {}
        with open(
            os.path.join(self.beatmapset_folder, self.osu_file), "r", encoding="utf-8"
        ) as f:
            lines = f.readlines()

        in_difficulty = False
        for line in lines:
            line = line.strip()
            if line.startswith("["):
                in_difficulty = line == "[Difficulty]"
                continue
            if in_difficulty and line:
                key, value = line.split(":", 1)
                difficulty[key.strip()] = value.strip()

        return difficulty

    def get_data(self):
        return {
            "hit_objects": self.hit_objects,
            "timing_points": self.timing_points,
            "metadata": self.metadata,
            "difficulty": self.difficulty,
        }


In [4]:
import time
import pygame

DEFAULT_SOUNDS = {
        "drum-hitclap", "drum-hitfinish", "drum-hitnormal", "drum-hitwhistle", "drum-sliderslide", "drum-slidertick", "drum-sliderwhistle",
        "normal-hitclap", "normal-hitfinish", "normal-hitnormal", "normal-hitwhistle", "normal-sliderslide", "normal-slidertick", "normal-sliderwhistle",
        "soft-hitclap", "soft-hitfinish", "soft-hitnormal", "soft-hitwhistle", "soft-sliderslide", "soft-slidertick", "soft-sliderwhistle",
        "spinnerbonus", "spinnerspin"
}

class BeatmapPlayer:
    def __init__(self):
        pass

    def list_audio_files(self, beatmapset_folder):
        files = [f for f in os.listdir(beatmapset_folder) if f.endswith('.wav')]
        return {file: self.convert_to_default_sound(file) for file in files}

    def match_hit_objects_to_sounds(self, hit_objects, audios):

        def decode_hit_sound(hit_sound):
            sounds = []
            if hit_sound & 2:
                sounds.append("whistle")
            if hit_sound & 4:
                sounds.append("finish")
            if hit_sound & 8:
                sounds.append("clap")
            return sounds if sounds else ["normal"]

        for obj in hit_objects:
            sound_names = decode_hit_sound(obj["hit_sound"])
            matched_files = [
                audios[file] for file in audios
                if any(s in file.lower() for s in sound_names)
            ]
            obj["hit_sounds"] = sound_names
            obj["files"] = matched_files

    
    def convert_to_default_sound(self, sound_file):
        filename = os.path.basename(sound_file).lower().replace(".wav", "")
    
        if filename in DEFAULT_SOUNDS:
            return filename
    
        if "soft" in filename:
            prefix = "soft"
        elif "drum" in filename:
            prefix = "drum"
        else:
            prefix = "normal"
    
        if "whistle" in filename:
            hit_type = "hitwhistle"
        elif "finish" in filename:
            hit_type = "hitfinish"
        elif "clap" in filename:
            hit_type = "hitclap"
        elif "sliderslide" in filename:
            hit_type = "sliderslide"
        elif "slidertick" in filename:
            hit_type = "slidertick"
        elif "sliderwhistle" in filename:
            hit_type = "sliderwhistle"
        elif "spinnerbonus" in filename:
            return "spinnerbonus"
        elif "spinnerspin" in filename:
            return "spinnerspin"
        else:
            hit_type = "hitnormal"
    
        return f"{prefix}-{hit_type}"

    def play(self, beatmap):

            beatmapset_folder = beatmap.beatmapset_folder
            hit_objects = beatmap.hit_objects
            audios = self.list_audio_files(beatmapset_folder)
        
            self.match_hit_objects_to_sounds(hit_objects, audios)

            mp3_files = [f for f in os.listdir(beatmapset_folder) if f.endswith('.mp3')]

            if not mp3_files:
                print("No .mp3 file found in the directory.")
                return None
        
            mp3_path = os.path.join(beatmapset_folder, mp3_files[0])
    
            pygame.mixer.init()
            pygame.mixer.music.load(mp3_path)
            pygame.mixer.music.play()
    
            sound_cache = {
                file: pygame.mixer.Sound(os.path.join("default_hitsounds", file + ".wav"))
                for file in audios.values()
            }
    
            start_time = time.time()
    
            for obj in hit_objects:
                while (time.time() - start_time) * 1000 < obj["time"]:
                    time.sleep(0.001)
    
                for sound_file in obj["files"]:
                    if sound_file in sound_cache:
                        sound_cache[sound_file].play()
    
                print(f"Played sound for {obj['type']} at {obj['time']} ms")
    
            while pygame.mixer.music.get_busy():
                time.sleep(0.1)
    
            print("Simulation finished!")

pygame 2.6.1 (SDL 2.28.4, Python 3.13.2)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [5]:
beatmapset_folder = "example/beatmapset-2269930"
osu_file = "The Wrecks - Favorite Liar (Cut Ver.) (PikAqours) [Amats' Insane].osu"

processor = BeatmapProcessor(beatmapset_folder, osu_file)

processor.get_data()

{'hit_objects': [{'type': 'slider',
   'x': 96,
   'y': 132,
   'time': 8262,
   'hit_sound': 0,
   'path': 'P|149:123|203:116',
   'repeat': 1},
  {'type': 'slider',
   'x': 282,
   'y': 97,
   'time': 8643,
   'hit_sound': 0,
   'path': 'P|302:146|288:197',
   'repeat': 1},
  {'type': 'circle', 'x': 212, 'y': 230, 'time': 9021, 'hit_sound': 2},
  {'type': 'slider',
   'x': 285,
   'y': 343,
   'time': 9273,
   'hit_sound': 0,
   'path': 'P|338:334|392:327',
   'repeat': 1},
  {'type': 'circle', 'x': 475, 'y': 350, 'time': 9655, 'hit_sound': 0},
  {'type': 'slider',
   'x': 475,
   'y': 350,
   'time': 9783,
   'hit_sound': 0,
   'path': 'P|466:296|459:242',
   'repeat': 1},
  {'type': 'slider',
   'x': 332,
   'y': 132,
   'time': 10274,
   'hit_sound': 0,
   'path': 'P|340:185|347:239',
   'repeat': 1},
  {'type': 'slider',
   'x': 284,
   'y': 291,
   'time': 10638,
   'hit_sound': 0,
   'path': 'P|234:311|183:298',
   'repeat': 1},
  {'type': 'circle', 'x': 127, 'y': 237, 'time': 

In [6]:
player = BeatmapPlayer()
player.play(processor)

Played sound for slider at 8262 ms
Played sound for slider at 8643 ms
Played sound for circle at 9021 ms
Played sound for slider at 9273 ms
Played sound for circle at 9655 ms
Played sound for slider at 9783 ms
Played sound for slider at 10274 ms
Played sound for slider at 10638 ms
Played sound for circle at 11002 ms
Played sound for slider at 11259 ms
Played sound for slider at 12289 ms
Played sound for slider at 12650 ms
Played sound for circle at 13011 ms
Played sound for slider at 13252 ms
Played sound for circle at 13646 ms
Played sound for slider at 13778 ms
Played sound for slider at 14280 ms
Played sound for slider at 14645 ms
Played sound for circle at 15011 ms
Played sound for slider at 15255 ms
Played sound for slider at 16024 ms
Played sound for slider at 16280 ms
Played sound for circle at 16650 ms
Played sound for circle at 16897 ms
Played sound for slider at 17020 ms
Played sound for slider at 17267 ms
Played sound for circle at 18023 ms
Played sound for slider at 18275 m

In [7]:
import csv
import os


class DataExporter:
    def __init__(self, dataset_folder):
        os.makedirs(dataset_folder, exist_ok=True)
        self.beatmaps_file = os.path.join(dataset_folder, "beatmaps.csv")
        self.hit_objects_file = os.path.join(dataset_folder, "hit_objects.csv")
        self.timing_points_file = os.path.join(dataset_folder, "timing_points.csv")

        self.init_csv()

    def init_csv(self):
        for file, headers in [
            (
                self.beatmaps_file,
                [
                    "ID",
                    "Title",
                    "Artist",
                    "Creator",
                    "Version",
                    "HPDrainRate",
                    "CircleSize",
                    "OverallDifficulty",
                    "ApproachRate",
                    "SliderMultiplier",
                    "SliderTickRate",
                ],
            ),
            (
                self.hit_objects_file,
                ["ID", "Time", "Type", "X", "Y", "HitSound", "Extra"],
            ),
            (
                self.timing_points_file,
                [
                    "ID",
                    "offset",
                    "ms_per_beat",
                    "time_signature",
                    "meter",
                    "sample_set",
                    "sample_index",
                    "volume",
                    "effects",
                ],
            ),
        ]:
            if not os.path.exists(file):
                with open(file, "w", newline="", encoding="utf-8") as f:
                    writer = csv.writer(f)
                    writer.writerow(headers)

    def write_data(self, data, id):
        hit_objects = data["hit_objects"]
        timing_points = data["timing_points"]
        metadata = data["metadata"]
        difficulty = data["difficulty"]

        self.save_beatmap(id, metadata, difficulty)
        self.save_hit_objects(id, hit_objects)
        self.save_timing_points(id, timing_points)

    def save_beatmap(self, id, metadata, difficulty):
        with open(self.beatmaps_file, "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(
                [
                    id,
                    metadata.get("Title", ""),
                    metadata.get("Artist", ""),
                    metadata.get("Creator", ""),
                    metadata.get("Version", ""),
                    difficulty.get("HPDrainRate", ""),
                    difficulty.get("CircleSize", ""),
                    difficulty.get("OverallDifficulty", ""),
                    difficulty.get("ApproachRate", ""),
                    difficulty.get("SliderMultiplier", ""),
                    difficulty.get("SliderTickRate", ""),
                ]
            )

    def save_hit_objects(self, id, hit_objects):
        with open(self.hit_objects_file, "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            for obj in hit_objects:
                extra = obj.get("path", obj.get("end_time", ""))
                writer.writerow(
                    [
                        id,
                        obj["time"],
                        obj["type"],
                        obj["x"],
                        obj["y"],
                        obj["hit_sound"],
                        extra,
                    ]
                )

    def save_timing_points(self, id, timing_points):
        with open(self.timing_points_file, "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            for tp in timing_points:
                writer.writerow(
                    [
                        id,
                        tp.get("offset"),
                        tp.get("ms_per_beat"),
                        tp.get("time_signature"),
                        tp.get("meter"),
                        tp.get("sample_set"),
                        tp.get("sample_index"),
                        tp.get("volume"),
                        tp.get("effects"),
                    ]
                )


In [8]:
database_path = "example/dataset"
data_exporter = DataExporter(database_path)

In [9]:
osu_files = [
    file for file in os.listdir(beatmapset_folder) if file.endswith(".osu")
]
beatmapsetId = beatmapset_folder.split("-")[1]

for index, osu_file in enumerate(osu_files):
    id = beatmapsetId + "-" + str(index)
    processor = BeatmapProcessor(beatmapset_folder, osu_file)
    data = processor.get_data()
    data_exporter.write_data(data, id)


In [10]:
import shutil

audio_files = [
    f
    for f in os.listdir(beatmapset_folder)
    if f.lower().endswith((".mp3", ".ogg"))
]
audio_folder = os.path.join(database_path, "audio", beatmapset_folder.split("-")[1])
os.makedirs(audio_folder, exist_ok=True)

if audio_files:
    first_audio = audio_files[0]
    shutil.copy2(
        os.path.join(beatmapset_folder, first_audio),
        os.path.join(audio_folder, first_audio)
    )

In [11]:
import pandas as pd

df = pd.read_csv("example/dataset/beatmaps.csv")
df

Unnamed: 0,ID,Title,Artist,Creator,Version,HPDrainRate,CircleSize,OverallDifficulty,ApproachRate,SliderMultiplier,SliderTickRate
0,2269930-0,Favorite Liar (Cut Ver.),The Wrecks,PikAqours,Kuroame's Favorite Liar,5.5,3.8,9.6,9.7,1.8,1.0
1,2269930-1,Favorite Liar (Cut Ver.),The Wrecks,PikAqours,Harumi's Normal,3.0,3.2,3.8,5.8,0.9,1.0
2,2269930-2,Favorite Liar (Cut Ver.),The Wrecks,PikAqours,Amats' Insane,5.0,3.6,8.0,9.0,1.55,1.0
3,2269930-3,Favorite Liar (Cut Ver.),The Wrecks,PikAqours,Kuki's Extra,5.0,3.8,9.2,9.4,1.8,1.0
4,2269930-4,Favorite Liar (Cut Ver.),The Wrecks,PikAqours,Parad0xa's Expert,5.2,3.6,8.8,9.2,1.6,0.5
5,2269930-5,Favorite Liar (Cut Ver.),The Wrecks,PikAqours,Fuxi's Extra,5.0,3.8,9.3,9.4,1.82,1.0
6,2269930-6,Favorite Liar (Cut Ver.),The Wrecks,PikAqours,My Favorite Liar,5.0,3.8,9.5,9.6,1.81,1.0
7,2269930-7,Favorite Liar (Cut Ver.),The Wrecks,PikAqours,Armada's Easy,2.0,2.0,2.0,4.0,0.9,1.0
8,2269930-8,Favorite Liar (Cut Ver.),The Wrecks,PikAqours,Hard,4.0,3.6,7.0,8.0,1.31,1.0
