In [1]:
# first trial for loading combined train and test tf records
# each is a class and will be loaded into a separate dataset
# these datasets will be appended to a list, and fed into tf.experimental.sample_from_datasets
# use with model.fit_generator

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import librosa
import librosa.display
import pathlib

AUTOTUNE = tf.data.experimental.AUTOTUNE

In [3]:
N_CLASSES = 264 
SAMPLE_RATE = 30000 # Audio sample rate
MAX_DURATION = 30 # Clip duration in seconds 
FFT_SIZE = 1024 # Fourier Transform size 
HOP_SIZE = 512 # Number of samples between each successive FFT window
N_MEL_BINS = 128 
N_SPECTROGRAM_BINS = (FFT_SIZE // 2) + 1
F_MIN = 20 # Min frequency cutoff
F_MAX = SAMPLE_RATE / 2  # Max Frequency cutoff
BATCH_SIZE = 4  # Training Batch size

In [4]:
train = pd.read_csv("/content/drive/My Drive/train.csv", parse_dates=['date'])

In [5]:
train_dir = '/content/drive/My Drive/lala1/Data/Train'
test_dir = '/content/drive/My Drive/lala1/Data/Test'

In [6]:
train_tfr = os.listdir(train_dir)
test_tfr = os.listdir(test_dir)

# tfrecord_dataset_train = tf.data.TFRecordDataset(train_tfr, compression_type="GZIP")
# tfrecord_dataset_test = tf.data.TFRecordDataset(test_tfr, compression_type="GZIP")

In [7]:
def read_tfrecord(serialized_example):
    feature_description = {
          'feature0': tf.io.FixedLenFeature((), tf.string),
          'feature1': tf.io.FixedLenFeature((), tf.int64),
    }
    example = tf.io.parse_single_example(serialized_example, feature_description)

    feature0 = tf.io.parse_tensor(example['feature0'], out_type = tf.float32)
    feature1 = example['feature1']

    return feature0, feature1

In [8]:
print(type(train_dir))

<class 'str'>


In [9]:
i = 0
lis = [0]*264
for d in train_tfr:
    d_path = os.path.join(train_dir, d)
    tfrecord_dataset_train = tf.data.TFRecordDataset([d_path], compression_type="GZIP")
    # records = sum(1 for _ in tfrecord_dataset_train)
    # print(records)
    dataset = tfrecord_dataset_train.map(read_tfrecord)
    lis[i] = dataset
    i = i+1

train_ds = tf.data.experimental.sample_from_datasets(lis)

In [10]:
print(i)

264


In [11]:
i = 0
lis = [0]*264
for d in test_tfr:
    d_path = os.path.join(test_dir, d)
    tfrecord_dataset_test = tf.data.TFRecordDataset([d_path], compression_type="GZIP")
    # records = sum(1 for _ in tfrecord_dataset_test)
    # print(records)
    dataset = tfrecord_dataset_test.map(read_tfrecord)
    lis[i] = dataset
    i = i+1

test_ds = tf.data.experimental.sample_from_datasets(lis)

In [12]:
print(i)

264


In [13]:
def prepare_for_training(ds, shuffle_buffer_size=64, batch_size=4):
    ds = ds.shuffle(buffer_size=shuffle_buffer_size)
    ds = ds.batch(batch_size).repeat()
    ds = ds.map(setshape)
    ds = ds.prefetch(5)
    return ds

def setshape(x, y):
    #x = tf.transpose(x)
    # x = tf.reshape(x, [900000, 1])
    x.set_shape([None, 900000,1])
    # y = tf.reshape(y, [1])
    y = tf.expand_dims(y, -1)
    y.set_shape([None,1])
    return x, y

In [14]:
train_final = prepare_for_training(train_ds)
test_final = prepare_for_training(test_ds)

In [15]:
class LogMelSpectrogram(tf.keras.layers.Layer):
    """Compute log-magnitude mel-scaled spectrograms."""

    def __init__(self, sample_rate, fft_size, hop_size, n_mels,
                 f_min=0.0, f_max=None, **kwargs):
        super(LogMelSpectrogram, self).__init__(**kwargs)
        self.sample_rate = sample_rate
        self.fft_size = fft_size
        self.hop_size = hop_size
        self.n_mels = n_mels
        self.f_min = f_min
        self.f_max = f_max if f_max else sample_rate / 2
        self.mel_filterbank = tf.signal.linear_to_mel_weight_matrix(
            num_mel_bins=self.n_mels,
            num_spectrogram_bins=fft_size // 2 + 1,
            sample_rate=self.sample_rate,
            lower_edge_hertz=self.f_min,
            upper_edge_hertz=self.f_max)

    def build(self, input_shape):
        self.non_trainable_weights.append(self.mel_filterbank)
        super(LogMelSpectrogram, self).build(input_shape)

    def call(self, waveforms):
        """Forward pass.

        Parameters
        ----------
        waveforms : tf.Tensor, shape = (None, n_samples)
            A Batch of mono waveforms.

        Returns
        -------
        log_mel_spectrograms : (tf.Tensor), shape = (None, time, freq, ch)
            The corresponding batch of log-mel-spectrograms
        """
        def _tf_log10(x):
            numerator = tf.math.log(x)
            denominator = tf.math.log(tf.constant(10, dtype=numerator.dtype))
            return numerator / denominator

        def power_to_db(magnitude, amin=1e-16, top_db=80.0):
            """
            https://librosa.github.io/librosa/generated/librosa.core.power_to_db.html
            """
            ref_value = tf.reduce_max(magnitude)
            log_spec = 10.0 * _tf_log10(tf.maximum(amin, magnitude))
            log_spec -= 10.0 * _tf_log10(tf.maximum(amin, ref_value))
            log_spec = tf.maximum(log_spec, tf.reduce_max(log_spec) - top_db)

            return log_spec

        spectrograms = tf.signal.stft(waveforms,
                                      frame_length=self.fft_size,
                                      frame_step=self.hop_size,
                                      pad_end=False)

        magnitude_spectrograms = tf.abs(spectrograms)

        mel_spectrograms = tf.matmul(tf.square(magnitude_spectrograms),
                                     self.mel_filterbank)

        log_mel_spectrograms = power_to_db(mel_spectrograms)

        # add channel dimension
        log_mel_spectrograms = tf.expand_dims(log_mel_spectrograms, 3)
        return log_mel_spectrograms

    def get_config(self):
        config = {
            'fft_size': self.fft_size,
            'hop_size': self.hop_size,
            'n_mels': self.n_mels,
            'sample_rate': self.sample_rate,
            'f_min': self.f_min,
            'f_max': self.f_max,
        }
        config.update(super(LogMelSpectrogram, self).get_config())

        return config

In [16]:
import tensorflow_hub as hub

feature_extractor_url = "https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4"
feature_extractor_layer = hub.KerasLayer(feature_extractor_url,
                                         input_shape=(311, 128, 3))

feature_extractor_layer.trainable = False

In [17]:
from tensorflow.keras.layers import (BatchNormalization, Conv2D, Dense, Dropout, Flatten, Input, MaxPool2D)
from tensorflow.keras.models import Model

def ConvModel(n_classes, sample_rate=SAMPLE_RATE, duration=MAX_DURATION,
              fft_size=FFT_SIZE, hop_size=HOP_SIZE, n_mels=N_MEL_BINS, fmin=F_MIN, fmax=F_MAX):
    n_samples = sample_rate * duration
    input_shape = (n_samples,)

    x = Input(shape=input_shape, name='input', dtype='float32')    
    y = LogMelSpectrogram(sample_rate, fft_size, hop_size, n_mels, fmin, fmax)(x)
    y = BatchNormalization(axis=2)(y)


    y = Conv2D(3, (3,3), padding='same')(y)  
    y = BatchNormalization()(y)

    y = feature_extractor_layer(y, training=False)

    y = Dense(1024, activation='relu')(y)
    y = Dropout(0.1)(y)
    y = Dense(1024, activation='relu')(y)
    y = Dropout(0.1)(y)
    
    y = Dense(n_classes, activation='softmax')(y)

    return Model(inputs=x, outputs=y)

In [18]:
from tensorflow.keras.optimizers import SGD, schedules

n_classes = N_CLASSES
model = ConvModel(n_classes)

lr_schedule = schedules.ExponentialDecay(
    initial_learning_rate=0.05, decay_steps=1000, decay_rate=0.96, staircase=False
)
sgd = SGD(learning_rate=lr_schedule, momentum=0.85)
model.compile(optimizer=sgd,
              loss='sparse_categorical_crossentropy', 
              metrics=['sparse_categorical_accuracy'])

model.summary()
# model = tf.keras.models.load_model('/content/drive/My Drive/checkpoint')

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 900000)]          0         
_________________________________________________________________
log_mel_spectrogram (LogMelS (None, 1756, 128, 1)      0         
_________________________________________________________________
batch_normalization (BatchNo (None, 1756, 128, 1)      512       
_________________________________________________________________
conv2d (Conv2D)              (None, 1756, 128, 3)      30        
_________________________________________________________________
batch_normalization_1 (Batch (None, 1756, 128, 3)      12        
_________________________________________________________________
keras_layer (KerasLayer)     (None, 2048)              23564800  
_________________________________________________________________
dense (Dense)                (None, 1024)             

In [19]:
steps_per_epoch = len(train)//BATCH_SIZE
steps_per_epoch

5343

In [None]:
checkpoint_filepath = '/content/drive/My Drive/checkpoint-mel-1'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_acc',
    mode='max',
    save_best_only=True)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

model.fit(train_final, 
          epochs=50, 
          steps_per_epoch=steps_per_epoch, 
          validation_data=test_final, 
          validation_steps=2, 
         callbacks=[model_checkpoint_callback, early_stop])

Epoch 1/50




Epoch 2/50
 309/5343 [>.............................] - ETA: 10:25:26 - loss: nan - sparse_categorical_accuracy: 0.0040

In [None]:
# model = tf.keras.Sequential([
#                              tf.keras.layers.Conv1D(128,60, strides = 30, input_shape=[900000, 1], activation='relu'),
#                              tf.keras.layers.Dropout(0.4),
#                             #  tf.keras.layers.Conv1D(128, 60, strides = 30, activation='relu'),
#                             #  tf.keras.layers.Dropout(0.4),
#                             #  tf.keras.layers.MaxPooling1D(4, 4),
#                             #  tf.keras.layers.Conv1D(64, 30,strides=15, activation='relu'),
#                             #  tf.keras.layers.Dropout(0.4),
#                             #  tf.keras.layers.Conv1D(64, 30,strides=15, activation='relu'),
#                             #  tf.keras.layers.Dropout(0.4),
#                             #  tf.keras.layers.MaxPooling1D(4,4),
#                             #  tf.keras.layers.Conv1D(64, 30,strides=15, activation='relu'),
#                             #  tf.keras.layers.Dropout(0.4),
#                              tf.keras.layers.Conv1D(64, 30,strides=15, activation='relu'),
#                              tf.keras.layers.Dropout(0.4),
#                              tf.keras.layers.MaxPooling1D(4,4),
#                              tf.keras.layers.Flatten(),
#                              tf.keras.layers.Dense(512, activation='relu'),
#                              tf.keras.layers.Dense(64, activation='relu'),
#                              tf.keras.layers.Dense(264, activation='softmax')                             
# ])

# model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['sparse_categorical_accuracy'])
# model.summary() 

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=[900000, 1]))
for rate in (1, 2, 4, 8) * 2:
    model.add(tf.keras.layers.Conv1D(filters=32, kernel_size=5, padding='causal', activation='relu', dilation_rate=rate))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.MaxPooling1D(4))

model.add(tf.keras.layers.Conv1D(filters=16, kernel_size=5))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.MaxPooling1D(8))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(32, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(264, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['sparse_categorical_accuracy'])
model.summary() 

In [None]:
m_cp = tf.keras.callbacks.ModelCheckpoint('/content/drive/My Drive/my_model_cp-3', monitor='val_loss', verbose=0, save_best_only=True)

In [None]:
history = model.fit_generator(train_final, epochs=50, validation_data=test_final, callbacks=[m_cp])

In [None]:
model.save('/content/drive/My Drive/my_model_3')