In [1]:
import os
import glob
import shutil
import pathlib
#import cv2
import librosa
import librosa.display
import pandas as pd 
import matplotlib.pyplot as plt
import matplotlib.image as mimg
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from sklearn.utils import class_weight
from collections import Counter
from scipy.io import wavfile
import tensorflow as tf
import tensorflow_io as tfio
from IPython.display import Audio
#tensorflow
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.applications import mobilenet_v2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, SeparableConv2D
from tensorflow.keras.layers import GlobalMaxPooling2D, BatchNormalization, Concatenate
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical


seed_number = 24
tf.random.set_seed(seed_number)
np.random.seed(seed_number)

In [2]:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)

In [3]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [4]:
root = "d:/datasets/sound/"
input_dir = os.path.join(root,"data/")
train_dir = os.path.join(input_dir, 'Training_Data/')
val_dir = os.path.join(input_dir, 'val/')
test_dir = os.path.join(input_dir, 'Testing_Data/')
train_img_dir = os.path.join(root, 'img_data/')
val_img_dir = os.path.join(root, 'img_data_val/')

In [None]:
dataset_dir = [dir for dir in sorted(os.listdir(input_dir)) if os.path.isdir(os.path.join(input_dir, dir))]
label_name = [subdir for subdir in sorted(os.listdir(train_dir)) if os.path.isdir(os.path.join(train_dir, subdir))]

# информация о папках
print(f"Main directories\t: {os.listdir(root)}")
print(f"Dataset sub-directories\t: {dataset_dir}")
print(f"Train set directory\t: {label_name}")

In [3]:
#CONST
SAMPLE_RATE = 16000 # Sampling rate
#duration = 5
#hop_length = 512
FMIN = 20
FMAX = SAMPLE_RATE // 2
#n_mels = 128
N_FFT = 512
Fs = 16000 
#samples =SAMPLE_RATE * duration

In [8]:
#Цикл, который перебирает wav файлы и сохраняет их как спектограммы 
def wav_to_mel(name_classes):
    classes = name_classes.split()
    for g in classes:
        pathlib.Path(f'img_data/{g}').mkdir(parents=True, exist_ok=True)
        for filename in os.listdir(input_dir + f'Training_Data/{g}'):
            wav_file_name = input_dir + f'Training_Data/{g}/{filename}'
            y, sr = librosa.load(wav_file_name, mono=True, duration=5)
            print(y.shape)
            plt.specgram(y, NFFT=N_FFT, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
            plt.axis('off');
            plt.savefig(f'img_data/{g}/{filename[:-3].replace(".", "")}.png')
            plt.clf()
    return(print('Done'))
            
            

In [None]:
wav_to_mel('human spoof')

In [None]:
# выбираем 0,3 данных от всего trainig set для validation set
def val_make (source, dest):
    files = os.listdir(source)
    for f in files:
        if np.random.rand(1) < 0.3:
            shutil.move(source + '/'+ f, dest + '/'+ f)
    return 

In [None]:
val_make(train_img_dir + "spoof", val_img_dir + "spoof") 

In [None]:
val_make(train_img_dir + "human", val_img_dir + "human")

In [10]:
#создаем генераторы
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   rotation_range = 20,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2,
                                   shear_range = 0.15,
                                   zoom_range = 0.15,
                                   horizontal_flip = True,
                                   fill_mode="nearest")

val_datagen = ImageDataGenerator(rescale = 1./255)


In [12]:
#характиристики датасета
train_batch_size = 32
val_batch_size = 32
flow_from_directory = 224
img_height = 432
img_width = 288
#подготавливаем данные
train_gen = train_datagen.flow_from_directory(train_img_dir,
                                              batch_size = train_batch_size,
                                              class_mode = 'binary',
                                              target_size = (img_height, img_width),
                                              seed = seed_number)

val_gen = val_datagen.flow_from_directory(val_img_dir,
                                          batch_size = val_batch_size,
                                          class_mode = 'binary',
                                          target_size = (img_height, img_width),
                                          seed = seed_number)

Found 50000 images belonging to 2 classes.


In [13]:
#узнаем размерность
print(f'Train set batch shape\t: {next(train_gen)[0].shape}')


Train set batch shape	: (32, 432, 288, 3)


In [15]:
#строим модель
def build_model(input_shape):
    model = Sequential()

    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((3, 3),  padding='same'))
    model.add(BatchNormalization())

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((3, 3), padding='same'))
    model.add(BatchNormalization())

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((3, 3), padding='same'))
    model.add(BatchNormalization())

    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((3, 3), padding='same'))
    model.add(BatchNormalization())

    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((3, 3), padding='same'))
    model.add(BatchNormalization())

    # flatten output and feed it into dense layer
    model.add(Flatten())
    model.add(Dense(128, activation='relu',  activity_regularizer=tf.keras.regularizers.l2(0.001)))
    model.add(Dropout(rate=0.2))
    # output layer
    model.add(Dense(1, activation='sigmoid'))
    return model

In [16]:
#константы. кол-во эпох обучения и рэйт обучения
num_epochs = 15
learning_rate = 5e-5  

In [17]:
#создаем оптимизатор и компилируем модель
shape = [img_width, img_height, 3]
optimiser = tensorflow.keras.optimizers.Adam(learning_rate=learning_rate)
    
    
model = build_model(shape)
model.compile(optimizer=optimiser,
                  loss='binary_crossentropy',
                  metrics=['acc'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 286, 430, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 96, 144, 32)       0         
_________________________________________________________________
batch_normalization (BatchNo (None, 96, 144, 32)       128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 94, 142, 64)       18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 48, 64)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 48, 64)        256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 30, 46, 64)        3

In [18]:
#веса классов
counter = Counter(train_gen.classes)                          
max_val = float(max(counter.values()))       
class_weights = {class_id : max_val/num_images for class_id, num_images in counter.items()}   

print(class_weights)

{0: 3.84402247626429, 1: 1.0}


In [None]:
#планировщик обучения
plateau_scheduler = ReduceLROnPlateau(factor=0.2, patience=3, verbose=1, 
                                      min_delta= 0.005, min_lr=5e-7)

In [None]:
#это не нужно.  останавливает обучение, когда точность не растет в теч 3 эпох от макисмальной
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='acc', 
    min_delta=0.001,
    patience=3,
    mode='auto',
    verbose=1,
    baseline=None
)


In [14]:
train_length = len(train_gen.classes)
print(train_length)

50000


In [None]:
#запускаем
history = model.fit(train_gen,
                    epochs = num_epochs,
                    steps_per_epoch = train_length // train_batch_size,
                    validation_data = val_gen,
                    validation_steps = 1,
                    callbacks = [plateau_scheduler],
                    class_weight=class_weights)

history_df = pd.DataFrame.from_dict(history.history)
history_df.to_csv(os.path.join(save_dir, "history_06_11.csv"), index=False)

In [15]:
#выводит графики для наглядности
train_accuracy = history.history['acc']
val_accuracy = history.history['val_acc']
train_loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(train_accuracy))
plt.figure(figsize=(12,4))

# точность
plt.subplot(1,2,1)
plt.plot(epochs, train_accuracy, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'r', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['train', 'val'], loc='lower right')

# потери
plt.subplot(1,2,2)
plt.plot(epochs, train_loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['train', 'val'], loc='upper right')

plt.show()

NameError: name 'history' is not defined

In [None]:
#переводит тестовый набор данных в спектограммы
classes = ['test']
for g in classes:
    pathlib.Path(f'img_data_test/{g}').mkdir(parents=True, exist_ok=True)
    for filename in os.listdir(input_dir + f'Testing_Data/{g}'):
        wav_file_name = input_dir + f'Testing_Data/{g}/{filename}'
        y, sr = librosa.load(wav_file_name, mono=True, duration=5)
        print(y.shape)
        plt.specgram(y, NFFT=n_fft, Fs=2, Fc=0, noverlap=128, cmap='plasma', sides='default', mode='default', scale='dB');
        plt.axis('off');
        plt.savefig(f'img_data_test/{g}/{filename[:-3].replace(".", "")}.png')
        plt.clf()

In [None]:
new_model = tf.keras.models.load_model('saved_model_sound') #загружаем модель

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255) #загружаем генератор

In [None]:
#подготавилваем
test_gen = test_datagen.flow_from_directory(root,
                                              batch_size = 1,
                                              class_mode = None,
                                              classes=['img_data_test'],
                                              target_size = (img_width, img_height),
                                              seed = seed_number,
                                              shuffle=False)

In [None]:
prediction = new_model.predict(test_gen,verbose=1,steps=len(test_gen)) #делаем предикт

In [None]:
predicted_class = np.argmax(prediction,axis=-1)

In [None]:
filenames=test_gen.filenames

In [None]:
results=pd.DataFrame({"file":filenames,"pred":prediction[:,0]} )

In [None]:
results.to_excel("output.xlsx")#сохраняем в формате excel 