# Imports and Setups

In [None]:
!nvidia-smi

In [None]:
SEED = 666

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
tf.random.set_seed(SEED)

from tensorflow import keras
from tensorflow.keras.datasets import cifar10

In [None]:
import os
import csv
os.environ["TF_DETERMINISTIC_OPS"] = "1"

import numpy as np
np.random.seed(SEED)

import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

%matplotlib inline

from mpl_toolkits.axes_grid1 import ImageGrid
from PIL import Image

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

import librosa as lb 
import librosa.display
import matplotlib.pyplot as plt
import IPython.display as ipd

from skimage.transform import resize
from scipy import stats

In [None]:
import wandb
from wandb.keras import WandbCallback

wandb.login()

# Download Dataset from W&B Artifacts and Prepare

In [None]:
run = wandb.init(project='rainforest', job_type='download_dataset')

artifact = run.use_artifact('wandb/rainforest/spectrogram-dataset_nfft_2024_hop_512:v0', type='dataset')
artifact_dir = artifact.download()

run.join()

In [None]:
IMG_DIR = Path(artifact_dir+'/')
IMG_PATH = list(map(str, list(IMG_DIR.glob('*.bmp'))))

### Train-test Split

In [None]:
train_path, valid_path = train_test_split(IMG_PATH, test_size=0.20, shuffle=True, random_state=42)
len(IMG_PATH), len(train_path), len(valid_path)

### Dataloader

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 32
IMG_WIDTH = 400
IMG_HEIGHT = 224
CHANNELS = 3
NUM_CLASSES = 24

In [None]:
@tf.function
def parse_data(image_path):
    # parse image
    image = tf.io.read_file(image_path)
    image = tf.image.decode_image(image)
    image = tf.image.convert_image_dtype(image, tf.float32)
    # normalize image
    image = tf.image.per_image_standardization(image)
    
    # parse data
    label = tf.strings.split(image_path, sep='_')[-2]
    label = tf.strings.to_number(label, out_type=tf.int32)
    label = tf.one_hot(label, NUM_CLASSES) 
    
    return image, label

trainloader = tf.data.Dataset.list_files((train_path))
testloader = tf.data.Dataset.list_files((valid_path))

trainloader = (
    trainloader
    .shuffle(1024)
    .map(parse_data, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

testloader = (
    testloader
    .map(parse_data, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

### Visualize

In [None]:
# Ref: https://www.tensorflow.org/tutorials/load_data/images
def show_batch(image_batch, label_batch):
  plt.figure(figsize=(10,10))
  for n in range(25):
      ax = plt.subplot(5,5,n+1)
      plt.imshow(image_batch[n])
      plt.title(np.argmax(label_batch[n]))
      plt.axis('off')
        
image_batch, label_batch = next(iter(trainloader))
show_batch(image_batch, label_batch)

# Model

In [None]:
def get_resnet_model():
  base_model = tf.keras.applications.ResNet50(include_top=False, weights="imagenet", input_shape=(224, 224, 3))
  base_model.trainabe = True

  inputs = Input((IMG_HEIGHT, IMG_WIDTH, 3))
  resize = experimental.preprocessing.Resizing(224,224)(inputs) 
  x = base_model(resize, training=True)
  x = GlobalAveragePooling2D()(x)
  x = Dropout(0.5)(x)  
  outputs = Dense(NUM_CLASSES, activation='sigmoid')(x)

  return Model(inputs, outputs)

In [None]:
keras.backend.clear_session()
model = get_resnet_model()
model.summary()

# Callbacks

In [None]:
earlystoper = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=5, verbose=0, mode='auto',
    restore_best_weights=True
)

# Train

In [None]:
EPOCHS = 70

keras.backend.clear_session()

# initialize model
model = get_resnet_model()

# compile model
opt = keras.optimizers.Adam(learning_rate=0.001)
model.compile(opt, 'binary_crossentropy', metrics=['acc'])

# initialize W&B run
run = wandb.init(project='rainforest', job_type='train')

# declare the artifact we are using
data_artifact = run.use_artifact('wandb/rainforest/spectrogram-dataset_nfft_2024_hop_512:v0')

# train model 
_ = model.fit(trainloader,
          epochs=EPOCHS,
          validation_data=testloader,
          callbacks=[WandbCallback(),
                     earlystoper])

# save model
model.save('model.h5')

# initialize a new artifact to save the model
model_artifact =  wandb.Artifact("trained-model", 
                                 type="model", 
                                 description="Simple model trained with spectrogram dataset formed with nfft 2024 and hop length of 512",
                                 metadata={'optimizer': 'Adam',
                                          'Loss': 'Binary Cross Entropy',
                                          'Learning Rate': 0.001})

model_artifact.add_file('model.h5')
run.log_artifact(model_artifact)

run.join()

# Submit predictions

In [None]:
N_FFT = int(artifact_dir.split('_')[-3])
HOP_LENGTH = int(artifact_dir.split('_')[-1].split(':')[0])
SR = 48000 # high sr for less rounding errors this way
LENGTH = 10 * SR #length of slice

IMG_WIDTH = 400
IMG_HEIGHT = 224

SAVE_DIR = 'kaggle/working/'

In [None]:
def load_test_file(f):
    wav, sr = librosa.load('/kaggle/input/rfcx-species-audio-detection/test/' + f, sr=None)

    # Split for enough segments to not miss anything
    segments = len(wav) / LENGTH
    segments = int(np.ceil(segments))
    
    spect_array = []
    
    for i in range(0, segments):
        # Last segment going from the end
        if (i + 1) * LENGTH > len(wav):
            wav_slice = wav[len(wav) - LENGTH:len(wav)]
        else:
            wav_slice = wav[i * LENGTH:(i + 1) * LENGTH]
            
        # spectrogram
        stft = lb.core.stft(wav_slice, hop_length=HOP_LENGTH, n_fft=N_FFT)
        spectrogram = np.abs(stft)
        spectrogram = resize(spectrogram, (IMG_HEIGHT, IMG_WIDTH))

        # log_spectrogram
        log_spectrogram = lb.amplitude_to_db(spectrogram)
        log_spectrogram = resize(log_spectrogram, (IMG_HEIGHT, IMG_WIDTH))

        # mel_spectrogram
        mel_spectrogram = lb.feature.melspectrogram(wav_slice, n_fft=N_FFT, hop_length=HOP_LENGTH, sr=sr)
        log_mel_spectrogram = lb.amplitude_to_db(mel_spectrogram)
        log_mel_spectrogram = resize(log_mel_spectrogram, (IMG_HEIGHT, IMG_WIDTH))

        # generate image by stacking three transforms 
        img = np.stack((spectrogram, log_spectrogram, log_mel_spectrogram), axis=-1)

        # normalize image
        norm_img = stats.zscore(img)

        spect_array.append(norm_img)
    
    return np.array(spect_array)

In [None]:
# for f in os.listdir('/kaggle/working/'):
#     os.remove('/kaggle/working/' + f)
    
# Prediction loop
print('Starting prediction loop')
with open('submission.csv', 'w', newline='') as csvfile:
    submission_writer = csv.writer(csvfile, delimiter=',')
    submission_writer.writerow(['recording_id','s0','s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11',
                               's12','s13','s14','s15','s16','s17','s18','s19','s20','s21','s22','s23'])
    
    test_files = os.listdir('/kaggle/input/rfcx-species-audio-detection/test/')
    print(len(test_files))
    
    # Every test file is split on several chunks and prediction is made for each chunk
    for i in range(0, len(test_files)):
        data = load_test_file(test_files[i])

        output = model.predict(data)

        # Taking max prediction from all slices per bird species
        # Usually you want Sigmoid layer here to convert output to probabilities
        # In this competition only relative ranking matters, and not the exact value of prediction, so we can use it directly
        maxed_output = np.max(output, axis=0)
        
        file_id = str.split(test_files[i], '.')[0]
        write_array = [file_id]
        
        for out in maxed_output:
            write_array.append(out)
    
        submission_writer.writerow(write_array)
        
        if i % 100 == 0 and i > 0:
            print('Predicted for ' + str(i) + ' of ' + str(len(test_files) + 1) + ' files')

print('Submission generated')