In [1]:
%pip install -r requirements.txt --upgrade

Note: you may need to restart the kernel to use updated packages.


In [2]:
from pathlib import Path
from os.path import basename, splitext
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
import math
import json

from gpt_mini.config import DEFAULT_DEVICE, CONFIG
import gpt_mini.midi_encoder as midi_enc

In [3]:
df = pd.read_csv(CONFIG["preprocess"]["new_dataset_index"], delimiter="\t", header=None, names=['data'])
display(df.head())

Unnamed: 0,data
0,drums/1.mid
1,drums/2.mid
2,drums/3.mid
3,drums/4.mid
4,drums/5.mid


In the provided example, the rhythmic complexity score is calculated as the average of the standard deviations and entropy values of IOIs and durations:

$$ Rhythmic Complexity = \frac{std\_ioi + std\_duration + entropy\_ioi + entropy\_duration}{4} $$

- Standard Deviation: Typically ranges from 0 upwards. The actual range depends on the musical content.
- Entropy: Ranges from 0 to $ log⁡2(n) $, where nn is the number of bins used in the histogram. For example, if you use 10 bins, the maximum entropy is $ log⁡2(10) ≈ 3.32 $.

Practical Considerations

- Normalization: If you want the complexity score to be on a specific scale (e.g., 0 to 1), you can normalize the individual components before averaging them.
- Interpretation: The complexity score is relative and should be interpreted in the context of the dataset. Comparing scores across different pieces of music can provide insights into their relative rhythmic complexity.

In [4]:
max_entropy = np.log2(10)
def normalize(value, min_val, max_val):
    return (value - min_val) / (max_val - min_val)

In [5]:
complexity = midi_enc.calculate_rhythmic_complexity(df["data"][0])
print(df["data"][0], complexity["total"], complexity["bpm"])

drums/1.mid 0.6701770297239691 106


In [6]:
df["complexity"] = df.apply(lambda row: midi_enc.calculate_rhythmic_complexity(row["data"])["total"], axis=1)
df["bpm"] = df.apply(lambda row: midi_enc.calculate_rhythmic_complexity(row["data"])["bpm"], axis=1)
df.sample(30)

Unnamed: 0,data,complexity,bpm
4765,drums/4766.mid,0.488778,120
6608,drums/1699.mid,0.240225,131
3637,drums/3638.mid,0.962892,32
11818,drums/1999.mid,0.43699,195
3020,drums/3021.mid,0.950402,125
6387,drums/1478.mid,0.60423,152
6305,drums/1396.mid,0.305047,120
13213,drums/3394.mid,0.590278,196
8549,drums/3640.mid,0.828102,139
8159,drums/3250.mid,0.997747,229
