In [2]:
# allow relative import from parent directory
import sys  
from pathlib import Path
sys.path.insert(0, str(Path().resolve().parents[0]))

# Convert OSU files to the Intermediate Beatmap Format

Data! Preprocess! It's all in the code!

If you wanna train me, here's the road:

All the steps I guide you through, to succeed it's true,

But fear not, you'll master this too, let's begin anew!

In [None]:
# OSZ files path
input_path = "songs/"
# Intermediate Beatmap File + Audio output path
output_path = "ibfs/"
# Output dataset for training / evaluation based on the contents of output_path
dataset_file = "datasets/example.pkl"

First, gather your data, make sure it's clean and neat,

Each sample's features, labels complete.

Then preprocess, normalize, and scale,

To ensure our model won't fail.

In [9]:
# Convert all OSZ files and OSU files within to the Intermediat Beatmap Format
import os
from tqdm import tqdm
from beatlearning.utils import IntermediateBeatmapFormat
from beatlearning.converters import OsuBeatmapConverter

converter = OsuBeatmapConverter()
for file in tqdm(os.listdir(input_path)):
    if file.lower().endswith(".osz"):
        # OSZ files can contain multiple OSU beatmap files, extract them all to a folder based on the file name
        new_folder = os.path.splitext(os.path.basename(file))[0]
        converter.convert(os.path.join(input_path, file), 
                        os.path.join(output_path, new_folder), 
                        os.path.join(output_path, new_folder))


But wait! Before we tokenize, an intermediate format we create,

Between raw data and tokens, it's great.

A custom format, designed for our ease,

A metadata dictionary and a Pandas DataFrame, oh please!

In [None]:
# Select model (you will need to use the EXACT same config / tokenizer throughout training and inference)
from beatlearning.tokenizers import BEaRTTokenizer
from beatlearning.configs import QuaverBEaRT

config = QuaverBEaRT()
tokenizer = BEaRTTokenizer(config)

Now, for generating data, let's make it diverse,

With augmentation techniques, we'll immerse.

Rotate, flip, and shift, or add some noise,

To ensure our model's robust, that's our choice.

In [None]:
# generate a dataset from the converted beatmaps
import numpy as np
import dill as pickle
from beatlearning.utils import BEaRTDataset

dataset = BEaRTDataset(tokenizer,     # pass the tokenizer
                       augment=True)  # set to False for evaluation sets
np.random.seed(1234567)
for folder in tqdm(os.listdir(output_path)):
    # find all IntermediateBeatmapFormat files and the mp3 audio
    ibfs, mp3 = [], None
    for file in os.listdir(os.path.join(output_path, folder)):
        # multiple IBF files can share the same audio
        if file.endswith(".ibf"):
            ibfs.append(IntermediateBeatmapFormat(os.path.join(output_path, folder, file)))
        elif file.endswith(".mp3"):
            mp3 = os.path.join(output_path, folder, file)
    if ibfs and mp3:
        # NOTE: you can add offsets += [0.01, 0.02 ... 0.09] for ADDITIONAL augmentation purposes
        # this will nudge the timing in a way, that 10ms quantized events will result in different tokens
        offsets = [0.0]
        dataset.add(ibfs, mp3, 
                    offsets=offsets,
                    ignore_lead_in=True)
# write dataset
with open(dataset_file, "wb") as f:
    pickle.dump(dataset, f)
print(f"{os.path.getsize(dataset_file) / (1024 ** 3):.3f}GB ({len(dataset)} rows)")
del dataset

That's it for today

Good job! You can move on to the next stage now!