In [2]:
pip install tensorflow_io

Collecting tensorflow_io
  Downloading tensorflow_io-0.36.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (49.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow_io
Successfully installed tensorflow_io-0.36.0


In [4]:
import os
from matplotlib import pyplot as plt
import tensorflow as tf
import tensorflow_io as tfio

In [5]:
def load_wav_16k_mono(filename):

    file_contents = tf.io.read_file(filename)

    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)

    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)

    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

In [6]:
Mir = os.path.join('data', 'Leptonychotes_weddellii')
NMir = os.path.join('data', 'trial')

In [7]:
m = tf.data.Dataset.list_files(Mir+'/*.wav')
nm = tf.data.Dataset.list_files(NMir+'/*.wav')

In [8]:
positives = tf.data.Dataset.zip((m, tf.data.Dataset.from_tensor_slices(tf.ones(len(m)))))
negatives = tf.data.Dataset.zip((nm, tf.data.Dataset.from_tensor_slices(tf.zeros(len(nm)))))
data = positives.concatenate(negatives)

In [None]:
lengths = []
for file in os.listdir(os.path.join('data', 'Leptonychotes_weddellii')):
    tensor_wave = load_wav_16k_mono(os.path.join('data', 'trial', file))
    lengths.append(len(tensor_wave))

In [None]:
def preprocess(file_path, label):
    wav = load_wav_16k_mono(file_path)
    wav = wav[:48000]
    zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav],0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram, label

In [None]:
filepath, label = negatives.shuffle(buffer_size=10000).as_numpy_iterator().next()

In [None]:
spectrogram, label = preprocess(filepath, label)

In [None]:
plt.figure(figsize=(30,20))
plt.imshow(tf.transpose(spectrogram)[0])
plt.show()

In [None]:
data = data.map(preprocess)
data = data.cache()
data = data.shuffle(buffer_size=1000)
data = data.batch(16)
data = data.prefetch(8)



In [None]:
len(data)*.7

21.0

In [None]:
train = data.take(21)
test = data.skip(21).take(9)

In [None]:
samples, labels = train.as_numpy_iterator().next()

In [None]:
samples.shape

(16, 1491, 257, 1)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten

In [None]:
model = Sequential()
model.add(Conv2D(16, (3,3), activation='relu', input_shape=(1491, 257,1)))
model.add(Conv2D(16, (3,3), activation='relu'))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile('Adam', loss='BinaryCrossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision()])

In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 1489, 255, 16)     160       
                                                                 
 conv2d_3 (Conv2D)           (None, 1487, 253, 16)     2320      
                                                                 
 flatten_1 (Flatten)         (None, 6019376)           0         
                                                                 
 dense_2 (Dense)             (None, 64)                385240128 
                                                                 
 dense_3 (Dense)             (None, 1)                 65        
                                                                 
Total params: 385242673 (1.44 GB)
Trainable params: 385242673 (1.44 GB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
hist = model.fit(train, epochs=2, validation_data=test)

Epoch 1/2


In [None]:
X_test, y_test = test.as_numpy_iterator().next()

In [None]:
yhat = model.predict(X_test)

In [None]:
yhat = [1 if prediction > 0.5 else 0 for prediction in yhat]

In [None]:
def load_mp3_16k_mono(filename):
    res = tfio.audio.AudioIOTensor(filename)

    tensor = res.to_tensor()
    tensor = tf.math.reduce_sum(tensor, axis=1) / 2

    sample_rate = res.rate
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)

    wav = tfio.audio.resample(tensor, rate_in=sample_rate, rate_out=16000)
    return wav

In [None]:
audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)

In [None]:
samples, index = audio_slices.as_numpy_iterator().next()

In [None]:
mp3 = os.path.join('data', 'Recordings', '#22B')

In [None]:
wav = load_mp3_16k_mono(mp3)

In [None]:
audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)

In [None]:
samples, index = audio_slices.as_numpy_iterator().next()

In [None]:
def preprocess_mp3(sample, index):
    sample = sample[0]
    zero_padding = tf.zeros([48000] - tf.shape(sample), dtype=tf.float32)
    wav = tf.concat([zero_padding, sample],0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram

In [None]:
audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=16000, sequence_stride=16000, batch_size=1)
audio_slices = audio_slices.map(preprocess_mp3)
audio_slices = audio_slices.batch(64)

In [None]:
yhat = model.predict(audio_slices)
yhat = [1 if prediction > 0.5 else 0 for prediction in yhat]

In [None]:
from itertools import groupby

In [None]:
yhat = [key for key, group in groupby(yhat)]
calls = tf.math.reduce_sum(yhat).numpy()

In [None]:
calls

In [None]:
results = {}
for file in os.listdir(os.path.join('data', 'Recordings')):
    FILEPATH = os.path.join('data','Recordings', file)

    wav = load_mp3_16k_mono(FILEPATH)
    audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)
    audio_slices = audio_slices.map(preprocess_mp3)
    audio_slices = audio_slices.batch(64)

    yhat = model.predict(audio_slices)

    results[file] = yhat

In [None]:
results

In [None]:
class_preds = {}
for file, logits in results.items():
    class_preds[file] = [1 if prediction > 0.99 else 0 for prediction in logits]
class_preds

In [None]:
postprocessed = {}
for file, scores in class_preds.items():
    postprocessed[file] = tf.math.reduce_sum([key for key, group in groupby(scores)]).numpy()
postprocessed

In [None]:
import csv

In [None]:
with open('test1.csv', 'w', newline='') as f:
    writer = csv.writer(f, delimiter=',')
    writer.writerow(['recording', 'capuchin_calls'])
    for key, value in postprocessed.items():
        writer.writerow([key, value])