In [1]:
import os
import librosa
from keras.layers import *
from keras.metrics import *
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

In [2]:
DATA_PATH = os.path.normpath(os.path.abspath('../Data'))
CATS = [i for i in range(1, 6)]
RATE = 48000
MEAN_FRAMES = 177090  # from metadata analysis

data = defaultdict(list)
for cat in CATS:
    print(f'Processing category {cat}')
    cat_dir = os.path.join(DATA_PATH, str(cat))
    for file in os.listdir(cat_dir):

        # processing only wav files
        abs_path = os.path.join(cat_dir, file)
        if not (os.path.isfile(abs_path) and file[-4:] == '.wav'):
            continue

        x, _ = librosa.load(abs_path, sr=RATE, mono=True)
        x_ = librosa.effects.time_stretch(x, rate=len(x) / MEAN_FRAMES)

        # порешить вопрос с n_fft
        x_db = librosa.amplitude_to_db(abs(librosa.stft(x_, n_fft=2048)))

        data[cat].append(x_db)
        # print(f'{x.shape} and {x_.shape} and {x_db.shape}')
        assert x_.shape[0] == MEAN_FRAMES

Processing category 1
Processing category 2
Processing category 3
Processing category 4
Processing category 5


In [3]:
data[1][0].shape

(1025, 346)

In [4]:
one_hot_cats = {cat: [1 if cat == i + 1 else 0 for i in range(len(CATS))] for cat in CATS}
one_hot_cats

{1: [1, 0, 0, 0, 0],
 2: [0, 1, 0, 0, 0],
 3: [0, 0, 1, 0, 0],
 4: [0, 0, 0, 1, 0],
 5: [0, 0, 0, 0, 1]}

In [5]:
l_data = [(matrix, one_hot_cats[cat]) for cat, db_matrixes in data.items() for matrix in db_matrixes]

In [6]:
len(l_data)

859

In [7]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(l_data, train_size=0.8, random_state=42)

In [8]:
#  В этом месте где-то попутал индексы на дату и лэйблы
def split_on_data_and_label(d):
    result = ([], [])
    for label, data in d:
        result[0].append(np.array(data))
        result[1].append(np.array(label))
    return np.array(result[1]), np.array(result[0])

train_data, train_labels = split_on_data_and_label(train)
test_data, test_labels = split_on_data_and_label(test)

def reshape(a: np.ndarray):
    return a.reshape(*a.shape, 1)

train_data = reshape(train_data)
test_data = reshape(test_data)

print(f'{train_data.shape} and {train_labels.shape}\n{test_data.shape} and {test_labels.shape}')

(687, 1025, 346, 1) and (687, 5)
(172, 1025, 346, 1) and (172, 5)


In [9]:
from keras.models import *
from keras.losses import CategoricalCrossentropy
from keras.optimizers import Adam, SGD

model = Sequential(name='CNN')

model.add(Input(shape=train_data[0].shape, name='Input'))

model.add(Conv2D(filters=30, kernel_size=(20, 10), strides=(11, 3), activation='relu', name='Conv_1'))
model.add(AveragePooling2D(pool_size=(6, 3), name='Pool_1'))

model.add(Conv2D(filters=10, kernel_size=(3, 5), activation='relu', name='Conv_2'))
model.add(AveragePooling2D(pool_size=(6, 3), name='Pool_2'))

model.add(Flatten(name='Flatten'))
model.add(Dense(5, activation='sigmoid', name='Dense_1'))

model.compile(
    optimizer=SGD(learning_rate=0.05),
    loss=CategoricalCrossentropy(),
    metrics=CategoricalAccuracy()
)

print(f'One unit shape: {train_data[0].shape}')
model.summary()

One unit shape: (1025, 346, 1)
Model: "CNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Conv_1 (Conv2D)              (None, 92, 113, 30)       6030      
_________________________________________________________________
Pool_1 (AveragePooling2D)    (None, 15, 37, 30)        0         
_________________________________________________________________
Conv_2 (Conv2D)              (None, 13, 33, 10)        4510      
_________________________________________________________________
Pool_2 (AveragePooling2D)    (None, 2, 11, 10)         0         
_________________________________________________________________
Flatten (Flatten)            (None, 220)               0         
_________________________________________________________________
Dense_1 (Dense)              (None, 5)                 1105      
Total params: 11,645
Trainable params: 11,645
Non-trainable params: 0
____________________________

In [11]:
history = model.fit(
    x=train_data,
    y=train_labels,
    epochs=40,
    verbose=2,
    validation_data=(test_data, test_labels),
    batch_size=1
)

plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.show()

Epoch 1/40


KeyboardInterrupt: 