In [29]:
%load_ext autoreload
%autoreload 2


import tensorflow as tf
from tensorflow  import keras
import tensorflow_hub as hub
import numpy as np

import librosa 
from util import WavDataset
import matplotlib.pyplot as plt

import h5py
from pathlib import Path
from config import INTERMEDIATE, MODEL_DIR

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
np.random.seed(0)
hdf5_dataset = h5py.File(INTERMEDIATE / 'train.hdf5', 'r')
random_chunks = np.array(hdf5_dataset)
np.random.shuffle(random_chunks)

In [3]:
X_shape = (10, 1024)
Y_shape = (10, 4)

def chunk_generator():
    for chunk in random_chunks:
        chunk_group = hdf5_dataset[chunk]
        yield (
            np.array(chunk_group['X']), 
            np.array(chunk_group['Y']).T) 

raw_dataset = tf.data.Dataset.from_generator(
    chunk_generator,
    output_signature=(
        tf.TensorSpec(shape=X_shape, dtype=tf.float32),
        tf.TensorSpec(shape=Y_shape, dtype=tf.bool)))

# test
for s in raw_dataset.take(32):
    X, Y = s
    print(np.array(X).sum(), np.array(Y).sum(), end="\t\t")

458.73566 0		594.2076 0		746.2246 0		531.56323 

2024-09-09 11:10:02.884485: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


0		657.0123 0		626.8448 0		664.3427 4		531.1446 0		777.21875 0		763.9229 0		493.90265 0		692.4295 0		566.7892 0		554.7632 0		578.3624 0		545.44275 0		787.87085 0		471.79166 0		701.3546 0		568.9809 0		637.3032 3		641.09607 0		561.34283 0		594.9181 0		801.41156 0		740.50226 0		781.42566 0		628.4475 0		667.9726 0		725.094 0		635.2217 0		614.97064 0		

In [4]:
from keras.layers import Input, Dense

model = keras.Sequential(
    [
        Input(shape=(10, 1024)),
        Dense(256, activation='relu'),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(4, activation='sigmoid')
    ]
)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',  
    metrics=['accuracy']
)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10, 256)           262400    
                                                                 
 dense_1 (Dense)             (None, 10, 128)           32896     
                                                                 
 dense_2 (Dense)             (None, 10, 64)            8256      
                                                                 
 dense_3 (Dense)             (None, 10, 4)             260       
                                                                 
Total params: 303,812
Trainable params: 303,812
Non-trainable params: 0
_________________________________________________________________


In [None]:
# 133921 chunks
# 4185 batches

from tensorflow.keras.callbacks import TensorBoard
import pandas as pd

# split
n_chunks =  133921 
cut = int(n_chunks * 0.8)
train_dataset = raw_dataset.take(cut).batch(32).prefetch(50)
validation_dataset = raw_dataset.skip(cut).batch(32).prefetch(50)

# callbacks
log_dir = Path("logs") / "fit"
tensorboard_callback = TensorBoard(log_dir=log_dir)
checkpoint_path = MODEL_DIR / "training_1" / "crnn.ckpt"         
Path(checkpoint_path).parent.mkdir(exist_ok=True)                
cp_callback = keras.callbacks.ModelCheckpoint(                   
    filepath=checkpoint_path, save_weights_only=True, verbose=1  
)                                                                

# fit
print("/4185 (per epoch)")
history = model.fit(
    train_dataset, 
    validation_data=validation_dataset,
    epochs=20, 
    verbose=2,
    callbacks=[tensorboard_callback, cp_callback])

df = pd.DataFrame(history.history)
df.to_csv("logs/history.csv")

/4185 (per epoch)
Epoch 1/20


In [None]:
history.history