In [14]:
import matplotlib.pyplot as plt
import numpy as np
import librosa
from librosa.display import specshow
import os
import pandas as pd
from scipy.io import wavfile
from itertools import islice
import xgboost as xgb
from tqdm import tqdm

import keras
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, Conv1D, MaxPool2D, MaxPool1D, Flatten, LSTM, TimeDistributed, BatchNormalization, GlobalAveragePooling2D
from keras.metrics import categorical_accuracy

In [2]:
train_path = 'train/train/'
sample_rate, _ = wavfile.read('train/train/100001.wav')
print(f'sample rate: {sample_rate}')

sample rate: 16000


In [3]:
train_df = pd.read_csv('train.txt', header=None)
train_df.columns = ['sample', 'label']
train_df.head()

Unnamed: 0,sample,label
0,102333.wav,1
1,103981.wav,1
2,100805.wav,0
3,105528.wav,1
4,104159.wav,0


In [4]:
validation_path = 'validation/validation/'
validation_df = pd.read_csv('validation.txt', header=None)
validation_df.columns = ['sample', 'label']
validation_df.head()

Unnamed: 0,sample,label
0,200104.wav,1
1,200866.wav,1
2,200922.wav,1
3,200336.wav,0
4,200070.wav,1


In [5]:
def get_signals(df, path, sr=sample_rate):
    for index, row in df.iterrows():
        file_name = os.path.join(path, row['sample'])
        yield librosa.load(file_name, sr=sr)[0], row['label']
        
def get_signals_like_label(df, path, signal_label, sr=sample_rate):
    signals = get_signals(df, path, sr)
    for signal, label in signals:
        if label == signal_label:
            yield signal, label

def get_fft(sample, sr=sample_rate):
    fft = abs(np.fft.rfft(sample)/len(sample))
    return fft

In [28]:
def salvare_predictii(y_pred, nume_model):
    test_df['label'] = y_pred

    with open('predictions_' + nume_model + '.txt', 'w') as fout:
        fout.write('name,label\n')
        for index, [sample, label] in test_df.iterrows():
            fout.write(str(sample))
            fout.write(',')
            fout.write(str(label))
            fout.write('\n')

In [19]:
X_train, y_train = [], []
signals_gen_train = get_signals(train_df, train_path)

for signal, label in tqdm(signals_gen_train):
    sample = librosa.feature.melspectrogram(signal, sr=sample_rate).T
    X_train.append(sample)
    y_train.append(label)

X_train, y_train = np.array(X_train), np.array(y_train)

8000it [03:12, 41.62it/s]


In [20]:
X_val, y_val = [], []
signals_gen_val = get_signals(validation_df, validation_path)

for signal, label in tqdm(signals_gen_val):
    sample = librosa.feature.melspectrogram(signal, sr=sample_rate).T
    X_val.append(sample)
    y_val.append(label)

X_val, y_val = np.array(X_val), np.array(y_val)

1000it [00:23, 42.72it/s]


In [21]:
test_path = 'test/test/'
test_df = pd.read_csv('test.txt', header=None)
test_df.columns = ['sample']
test_df['label'] = [1] * len(test_df)  # doar ca sa pot folosi aceeasi functie de get_signals

X_test = []
signals_gen_test = get_signals(test_df, test_path)

for signal, label in tqdm(signals_gen_test):
    sample = librosa.feature.melspectrogram(signal, sr=sample_rate).T
    X_test.append(sample)

X_test = np.array(X_test)

3000it [00:18, 158.77it/s]


In [22]:
X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
y_train_categ = to_categorical(y_train, 2)

X_train_cnn.shape

(8000, 32, 128, 1)

In [23]:
cnn = Sequential()
cnn.add(Conv2D(16, (3, 3), activation='relu', strides=(1, 1), padding='same', input_shape=X_train_cnn.shape[1:]))
cnn.add(Conv2D(32, (3, 3), activation='relu', strides=(1, 1), padding='same'))
cnn.add(Conv2D(64, (3, 3), activation='relu', strides=(1, 1), padding='same'))
cnn.add(Conv2D(128, (3, 3), activation='relu', strides=(1, 1), padding='same'))
cnn.add(MaxPool2D((2, 2)))
cnn.add(Dropout(0.5))
cnn.add(Flatten())
cnn.add(Dense(128, activation='relu'))
cnn.add(Dropout(0.5))
cnn.add(Dense(64, activation='relu'))
cnn.add(Dense(16, activation='relu'))
cnn.add(Dense(8, activation='relu'))
cnn.add(Dense(2, activation='softmax'))
cnn.summary()
cnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[categorical_accuracy])

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 128, 16)       160       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 128, 32)       4640      
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 32, 128, 64)       18496     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 32, 128, 128)      73856     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 64, 128)       0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 16, 64, 128)       0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 131072)           

In [24]:
cnn.fit(X_train_cnn, y_train_categ, epochs=20, batch_size=128, validation_split=0.1, shuffle=True)

Train on 7200 samples, validate on 800 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
 256/7200 [>.............................] - ETA: 3:23 - loss: 0.3984 - categorical_accuracy: 0.8125

KeyboardInterrupt: 

L-am oprit manual pentru ca parea ca face overfitting.

In [25]:
X_val_cnn = X_val.reshape(X_val.shape[0], X_val.shape[1], X_val.shape[2], 1)
y_val_categ = to_categorical(y_val, 2)

In [26]:
cnn.evaluate(X_val_cnn, y_val_categ)



[0.6838393650054931, 0.6349999904632568]

In [29]:
X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)
y_pred_cnn = np.argmax(cnn.predict(X_test_cnn), axis=1)
salvare_predictii(y_pred_cnn, 'cnn_melspectogram')