In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
from keras.layers.embeddings import Embedding
from keras.models import Model
import string
import re
from sklearn.preprocessing import LabelBinarizer
from keras.preprocessing.sequence import pad_sequences
import keras
from sklearn.model_selection import train_test_split
import os
import Scalagram
from Scalagram import Scalagram

In [2]:
file_location = "/Users/lucasgover/Desktop/Wavelet-Transform/Data/SwainsonCut.wav"
sg = Scalagram(file_location)
image = sg.get_data()

In [3]:
image[0].shape

(76, 2)

In [4]:
"""
original code from: https://github.com/FLming/CRNN.tf2/blob/master/crnn/models.py
The original feature extraction structure from CRNN paper.
Related paper: https://ieeexplore.ieee.org/abstract/document/7801919
"""
from keras import layers


def vgg_style(x):

    x = layers.Conv1D(
        64, 3, padding='same', activation='relu', name='conv1')(x)
    x = layers.MaxPool1D(pool_size=2, padding='same', name='pool1')(x)

    x = layers.Conv1D(
        128, 3, padding='same', activation='relu', name='conv2')(x)
    x = layers.MaxPool1D(pool_size=2, padding='same', name='pool2')(x)

    x = layers.Conv1D(256, 3, padding='same', use_bias=False, name='conv3')(x)
    x = layers.BatchNormalization(name='bn3')(x)
    x = layers.Activation('relu', name='relu3')(x)
    x = layers.Conv1D(
        256, 3, padding='same', activation='relu', name='conv4')(x)
    x = layers.MaxPool1D(
        pool_size=2, strides=(1), padding='same', name='pool4')(x)

    x = layers.Conv1D(512, 3, padding='same', use_bias=False, name='conv5')(x)
    x = layers.BatchNormalization(name='bn5')(x)
    x = layers.Activation('relu', name='relu5')(x)
    x = layers.Conv1D(
        512, 3, padding='same', activation='relu', name='conv6')(x)
    x = layers.MaxPool1D(
        pool_size=2, strides=(1), padding='same', name='pool6')(x)

    x = layers.Conv1D(512, 2, use_bias=False, name='conv7')(x)
    x = layers.BatchNormalization(name='bn7')(x)
    x = layers.Activation('relu', name='relu7')(x)

    x = layers.Reshape((-1, 512), name='reshape7')(x)
    
    return x


def build_model(num_classes,
                weight=None,
                preprocess=None,
                postprocess=None,
                img_shape=(32, None, 3),
                model_name='crnn'):
    x = img_input = keras.Input(shape=img_shape)
    if preprocess is not None:
        x = preprocess(x)
    
    x = vgg_style(x)
    x = layers.Bidirectional(
        layers.LSTM(units=256, return_sequences=True), name='bi_lstm1')(x)
    x = layers.Bidirectional(
        layers.LSTM(units=256, return_sequences=True), name='bi_lstm2')(x)
    x = layers.Flatten()(x)
    x = layers.Dense(units=num_classes, name='logits')(x)
    
    if postprocess is not None:
        x = postprocess(x)

    model = keras.Model(inputs=img_input, outputs=x, name=model_name)
    if weight is not None:
        model.load_weights(weight, by_name=True, skip_mismatch=True)
    return model

In [5]:
flattened_image = np.asarray(list(map(lambda a:a.flatten(),image)))
len(flattened_image)
rev_flatten = np.asarray(list(map(lambda a:a.reshape(len(image[0]),2),flattened_image)))
len(rev_flatten)

165468

model = build_model(num_classes=len(flattened_image[0]),)

In [6]:
model = build_model(len(flattened_image[0]),img_shape=image[0].shape)

2023-02-15 18:27:07.479583: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
model.output_shape

(None, 152)

In [8]:
model.compile(optimizer=tf.keras.optimizers.Adam(0.95),
                  loss="mean_squared_error", metrics=["accuracy"])

In [9]:
model.summary()

Model: "crnn"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 76, 2)]           0         
                                                                 
 conv1 (Conv1D)              (None, 76, 64)            448       
                                                                 
 pool1 (MaxPooling1D)        (None, 38, 64)            0         
                                                                 
 conv2 (Conv1D)              (None, 38, 128)           24704     
                                                                 
 pool2 (MaxPooling1D)        (None, 19, 128)           0         
                                                                 
 conv3 (Conv1D)              (None, 19, 256)           98304     
                                                                 
 bn3 (BatchNormalization)    (None, 19, 256)           1024   

In [10]:
model.fit(image[:-1], flattened_image[1:],epochs=20)

Epoch 1/20
 277/5171 [>.............................] - ETA: 28:13 - loss: 38041.1406 - accuracy: 0.0112