In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
dataset_dir = 'dataset/e-gmd-v1.0.0'
slim_metadata_df = pd.read_csv('dataset/e-gmd-v1.0.0-slim.csv') # See `create_slim_metadata.py` for details.
note_occurrences_slim_df = pd.read_csv('dataset/note_occurrences_slim.csv') # See `create_label_mappings.py` for details.
labels_df = pd.read_csv('dataset/label_mapping.csv') # See `create_label_mappings.py` for details.
chopped_df = pd.read_csv('dataset/chopped.csv') # See `chop_dataset.py` for details.

# Note occurrences in slim dataset

In [None]:
note_occurrences_slim_df.head()

In [None]:
plt.xticks(rotation='vertical')
plt.title('Note Occurrences')
_ = plt.bar(note_occurrences_slim_df['name'], note_occurrences_slim_df['occurrences'])

# Label mappings

The top-5 most frequencly occurring drum instrument types are used for training.

The label mappings contain a row for each training drum instrument, with the following columns:
- `id`: Used for one-hot encoding during training. Corresponds to the instrument's occurrence frequency rank in the slim dataset, with the smallest value corresponding to the most common.
- `note`: The MIDI note of the drum instrument.
- `name`: The human-readable name of the drum instrument.

In [None]:
labels_df

In [None]:
get_name = lambda label: labels_df.iloc[label]['name']

# Chopped dataset

The "chopped" dataset is the final, processed dataset used for training.

It consists of a row per "drum hit", which is composed of one or more simultaneously sounding drum instruments, and it has the following columns:
- `file_path`: The path to the audio file in the E-GMD dataset.
- `begin_frame`: The frame (sample index) of the beginning of the hit.
- `num_frames`: The length, in frames, of the hit.
- `label`: A drum instrument label, corresponding to the `id` column in the `dataset/label_mapping.csv` file generated by the `create_label_mapping.py` script.
- `slim_id`: The session ID (index in `e-gmd-v1.0.0-slim.csv`) in which this hit was found, for access to any other metadata.

In [None]:
chopped_df.head()

In [None]:
label_counts = chopped_df.label.value_counts()
label_counts.index = label_counts.index.map(get_name)
label_counts.plot(kind='bar')
plt.title('Label occurrences in "chopped" dataset')
plt.xlabel('')
plt.ylabel('Occurrences')
_ = plt.xticks(rotation='vertical')

In [None]:
from IPython.display import Audio
from scipy.io import wavfile

def preview_record(row):
    audio_file_path = f'{dataset_dir}/{row.file_path}'
    sample_rate, data_int16 = wavfile.read(audio_file_path)
    clip_data_int16 = data_int16[row.begin_frame:row.begin_frame + row.num_frames]
    clip_data = clip_data_int16 / (2**15) # Convert from int16 to float32
    length = clip_data.shape[0] / sample_rate
    time = np.linspace(0, length, clip_data.shape[0])
    label = row.label
    name = labels_df.iloc[label]['name']
    session = slim_metadata_df.iloc[row.slim_id]
    kit_name = session.kit_name

    plt.plot(time, clip_data, label=f'{name} ({kit_name})')

    # Get the default y-axis limits (slightly larger than min and max values, which I want to keep).
    current_axes = plt.gca()
    y_min, y_max = current_axes.get_ylim()

    plt.legend()
    plt.xlabel('Time (s)')
    plt.ylabel('Amplitude')
    plt.xlim([0, length])
    plt.ylim([y_min, y_max]) # Need to reset ylim after adding rects to prevent further autoscaling above/below the rects.
    plt.show()

    return Audio(clip_data, rate=sample_rate)


In [None]:
preview_record(chopped_df[chopped_df.label == 3].iloc[44])