In [2]:
import matplotlib.pyplot as plt
import numpy as np
import scipy
from IPython.display import Audio
from tqdm import tqdm
from pathlib import Path
import random
from enum import Enum
from keras import Sequential 
from keras.layers import Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, Flatten, Dense, Dropout, InputLayer
import tensorflow as tf

In [15]:
SAMPLE_RATE = 44100
WINDOW_SIZE = 512

# Load samples
samples_path = Path('dataset/cut_filtered/')


class BeatType(Enum):
    KICK = 0
    SNARE = 1

def glob_samples(pattern):
	samples = []
	for sample_path in samples_path.glob(pattern):
		sample_rate, wave = scipy.io.wavfile.read(sample_path)
		if(sample_rate != SAMPLE_RATE):
			wave = scipy.signal.resample(wave, int(len(wave) * SAMPLE_RATE / sample_rate))
		
		if len(wave.shape) > 1:
			wave = wave.mean(axis=1)

		start_index = np.argmax(wave > 0.005)
		samples.append(wave[start_index:])
	return samples

kick_samples = glob_samples('kck*/*.wav')
snare_samples = glob_samples('snr*/*.wav')

N = min(len(kick_samples), len(snare_samples))
split = int(N*0.8)

random.shuffle(kick_samples)
random.shuffle(snare_samples)

kick_samples_train = kick_samples[:split]
snare_samples_train = snare_samples[:split]

kick_samples_test = kick_samples[split:N-1]
snare_samples_test = snare_samples[split:N-1]


print(f"test snare: {len(snare_samples_test)}, test kick: {len(kick_samples_test)}, train snare: {len(snare_samples_train)}, train kick: {len(kick_samples_train)}")


train_dataset = [(wave, BeatType.KICK) for wave in kick_samples_train] + [(wave, BeatType.SNARE) for wave in snare_samples_train]
test_dataset = [(wave, BeatType.KICK) for wave in kick_samples_test] + [(wave, BeatType.SNARE) for wave in snare_samples_test]

def augment_amplitude(wave, r=(0.5, 1.5)):
	wave = wave * random.uniform(*r) * random.choice([-1, 1])
	return wave

def augment_noise(wave, r=(0, 0.001)):
	noise = np.random.normal(0, random.uniform(*r), len(wave))
	wave = wave + noise
	return wave

def augment_pitch(wave, r=(-0.2, 0.2)):
	target_len = int(len(wave) * random.uniform(1+r[0], 1+r[1]))
	if target_len <= WINDOW_SIZE:
		return wave
	wave = scipy.signal.resample(wave, target_len)
	return wave

def augment_translate(wave, r=(0, 32)):
	offset_wave = wave[random.randint(*r):]
	if len(offset_wave) <= WINDOW_SIZE:
		return wave
	return offset_wave


def augment(dataset, N=100):
	augmented_dataset = []
	print(f"augmenting {len(dataset)} samples")

	for _ in range(N):
		for wave, label in dataset:
			# augmented_dataset.append((wave[:WINDOW_SIZE], label)) # keep the original
			# wave = augment_amplitude(wave) # 70 it/s
			# wave = augment_noise(wave) # 10 it/s
			# wave = augment_pitch(wave) # 3 it/s
			wave = augment_translate(wave, r=(0, 300))
			augmented_dataset.append((wave[:WINDOW_SIZE], label))

	return augmented_dataset


train_dataset = augment(train_dataset, N=118)
test_dataset = augment(test_dataset, N=15)

random.shuffle(train_dataset)
random.shuffle(test_dataset)

print(f"augmented train: {len(train_dataset)}, augmented test: {len(test_dataset)}")

def conv_preprocess(wave):
	wave = wave.copy()
	wave = wave[:WINDOW_SIZE]
	m = np.max(np.abs(wave))
	wave /= m

	wave *= random.choice([-1, 1])
	return wave

X_train = np.array([conv_preprocess(wave) for wave, label in train_dataset])
y_train = np.array([label.value for freq, label in train_dataset])

X_test = np.array([conv_preprocess(wave) for wave, label in test_dataset])
y_test = np.array([label.value for freq, label in test_dataset])

# reshape input for conv2d
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1], 1)

test snare: 52, test kick: 52, train snare: 211, train kick: 211
augmenting 422 samples
augmenting 104 samples
augmented train: 49796, augmented test: 1560


In [16]:
from keras import Sequential 
from keras.layers import Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, Flatten, Dense, Dropout, InputLayer
import tensorflow as tf

model = Sequential()
model.add(InputLayer(input_shape=(1, X_train.shape[2], 1)))
model.add(Conv2D(4, (1, 3), activation='relu'))
model.add(MaxPooling2D((1, 2)))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x1e12badf910>

In [14]:
model.predict(X_test)



array([[4.5070895e-11],
       [6.7900637e-06],
       [1.0000000e+00],
       ...,
       [9.9999976e-01],
       [9.9999952e-01],
       [6.5466104e-09]], dtype=float32)