In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import warnings
warnings.filterwarnings('ignore')
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import librosa
import librosa.display
import matplotlib.pyplot as plt
from datetime import datetime

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D, Input, BatchNormalization
from tensorflow.keras import Model

import tensorflow.keras.backend as K
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import IPython.display as ipd
import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
    #for filename in filenames:
        #print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#define some functions to convert different types to data to TFRecords

#credit - stackoverflow, TF docs etc

def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def _float2d_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=value.flatten()))

def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))): # if value ist tensor
        value = value.numpy() # get value of tensor
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def serialize_array(array):
    array = tf.io.serialize_tensor(array)
    return array



In [None]:
train = pd.read_csv('../input/birdsong-recognition/train.csv',)
print(train.shape)
train['sampling_rate'] = train['sampling_rate'].apply(lambda sr: int(sr.split(' ')[0])).astype(np.uint16)
train['sampling_rate'].value_counts()

In [None]:
#filter to only the longer duration clips so we can be sure we can take a standard size clip from the spectrogram
train = train[train['duration'] > 6]

In [None]:
#set up an index corresponding to picking 2 birds at random
selected = train.index[train['ebird_code'].isin(train['ebird_code'].value_counts().sort_values(ascending=False).index[0:2])]
print(train['ebird_code'][selected].value_counts())
selected.shape

In [None]:
#replace with labels (0/1) for the selected 2 bird types
labels = {b:count for count,b in enumerate(train['ebird_code'][selected].value_counts().index)}
print(labels)
train['new_label'] = train['ebird_code'].replace(labels)
train['new_label'][selected].value_counts()

In [None]:
#using these 2 birds as an example, extract clips from teh spectrograms and append to a list
#of clips and labels (so total 200 clips - one sample from each spectrogram)

clip_list = []
label_list = []

for idx in selected:
    
    #load the audio and take the spectrogram
    c=train.loc[idx,'ebird_code']
    f=train.loc[idx,'filename']
    sr=train.loc[idx,'sampling_rate']
    
    audio_path = '/kaggle/input/birdsong-recognition/train_audio/' + c + '/' + f
    
    x , sr = librosa.load(audio_path, sr=sr)       
    x = (x / np.abs(x).max()).astype('float32')
    
    X = librosa.stft(x) #create fourier transform spectrogram
    Xdb = librosa.amplitude_to_db(abs(X)) 
    
    Xdb = Xdb - Xdb.min()
    Xdb = Xdb / Xdb.max()
    
    #as an example just taking one clip from the start of the spectrogram
    clip = Xdb[:, 0:200].copy()
    clip = (clip * 255).astype(np.uint8) #saving as 0-255, np.uint8, other formats possible
    
    label = train.loc[idx,'new_label']
    
    clip_list += [clip] #add clip to the list
    label_list += [label] #add bird label to the list

In [None]:
#shape of the extract samples is 1025 * 200 - would have taken a wider clip but some audios are very short and i
#dont want to navigate around audio clips which are too short

In [None]:
#pick some examples from the list of spectrogram clips and labels which has been created

examples = [0, 5, 10, 20, 110, 120, 130, 140]
fig,axes=plt.subplots(nrows=1,ncols=8,figsize=(20,7))

for count,e in enumerate(examples):
    axes[count].imshow(clip_list[e]) #plot the clip
    axes[count].set_title(label_list[e]) #title is the LABEL (type of bird)

In [None]:
#split manually into train and validation data sets - for demo purposes
#approach is not efficient and can be improved on but not important for a demo
#TFRecords dont seem to have a convenient train/test/validation split function so preferring to do this now

validation_index = [x for x in np.arange(70,100,1)] + [x for x in np.arange(170,200,1)] #take 30 from each bird type for validation
train_index = [x for x in np.arange(0,200,1) if x not in validation_index] #everything else

print(len(train_index), len(validation_index))

In [None]:
#yes this is a very crude train/test split for data and labels just running through quickly...
train_data = [clip_list[i] for i in train_index]

validation_data = [clip_list[i] for i in validation_index]

train_labels = [label_list[i] for i in train_index]

validation_labels = [label_list[i] for i in validation_index]

print(len(train_data), len(validation_data), len(train_labels), len(validation_labels))

In [None]:
#write the TRAIN data to a TFRecords file
#format is to feed through individual lines (Feature-Label) one at a time
#more than 2 items can be added

file_path='train_clips_example1.tfrecords'
    
with tf.io.TFRecordWriter(file_path) as writer:
    for array, label in zip(train_data, train_labels): #this cycles through each line of the array-labels
        serialized_array = serialize_array(array) #this turns the 2d numpy data into a 1d line of numbers
        feature = {'b_feature': _bytes_feature(serialized_array), #1d line of numpy data converted to bytes
                              'b_l' : _int64_feature(label)} #dtype selected for label
        
        example_message = tf.train.Example(features=tf.train.Features(feature=feature)) #the single line to be fed into the record
        writer.write(example_message.SerializeToString()) #line is added

In [None]:
#write the VALIDATION data to a TFRecords file
#format is to feed through individual lines (Feature-Label) one at a time
#more than 2 items can be added

file_path='validation_clips_example1.tfrecords' #change the file name
    
with tf.io.TFRecordWriter(file_path) as writer:
    for array, label in zip(validation_data, validation_labels): #just need to change the data feeds here
        serialized_array = serialize_array(array) #other lines same as example above
        feature = {'b_feature': _bytes_feature(serialized_array),
                              'b_l' : _int64_feature(label)} 
        
        example_message = tf.train.Example(features=tf.train.Features(feature=feature)) 
        writer.write(example_message.SerializeToString()) 

In [None]:
#so the train and validation clips have now been saved to TFRecords

#to load back again, need to start by inputting the file name
#as i understand it, can put multiple file names into the list (as many as needed) - in this example we just have one

train_filenames = ['train_clips_example1.tfrecords']
validation_filenames = ['validation_clips_example1.tfrecords']

In [None]:
BATCH_SIZE=5

In [None]:
#first going to 'read' the validation data back - no data augmentation applied as this is validation

# Read TFRecord file

#instructions for array
def decode_image(image_data): #this gives instructions to decode the 2d numpy array and is used in the function below this
    
    feature = tf.io.parse_tensor(image_data, out_type=tf.uint8) #in this example the image is stored as 0-255 in np.uint8
    
    feature = tf.cast(feature, tf.float32) / 255.0  #divide by 255 and turn back into a float to get data 0-1 scaled
    
    feature = tf.reshape(feature, [1025, 200,1]) #reshape to original 2d format, from the 1d storage
    
    return feature

#general instructions
def _parse_valid_element(element): #this gives instructions on how to read each line from the file
    #dictionary of descriptions - the titles and data types MUST match the instructions for creating the record
    parse_dic = {
        'b_feature': tf.io.FixedLenFeature([], tf.string), # the 2d numpy array has been saved as a string
        'b_l': tf.io.FixedLenFeature([], tf.int64) #the label has been saved as int64
        }    
    example_message = tf.io.parse_single_example(element, parse_dic) #this works by iteration - one example at a time
    #the example message is the individual line which has the 2 components - b_feature and b_label1
    
    feature = decode_image(example_message['b_feature']) #this references the previous function to get back to a 2d numerical array
    
    b_label = example_message['b_l'] #this is the label y/n for event happening
    
    return feature, b_label #returns the 2d array and label


#load up the VALIDATION DATA
v_dataset = tf.data.TFRecordDataset(validation_filenames) #firstly, reference the list of filenames (previous code cell)
valid_dataset = v_dataset.map(_parse_valid_element) #map the input using the above functions
valid_dataset = valid_dataset.batch(BATCH_SIZE) #batch the data (not 100% sure if this is needed for validation data)

In [None]:
#now to read the TRAIN data back


#instructions for array
def decode_augment_image(image_data): #THIS IS FOR TRAINING DATA
    #i have included some clumsy attempts at augmentation - am sure others have much better versions of this
    #chris deotte's notebook in petals starter comp for example 
    
    #read the data back and reformat to original shape, as per validation data
    feature = tf.io.parse_tensor(image_data, out_type=tf.uint8) #the image is stored as 0-255 in np.uint8    
    feature = tf.cast(feature, tf.float32) / 255.0  #divide by 255 and turn into a float
    feature = tf.reshape(feature, [1025, 200,1]) #reshape to 150 x 150
    
    #some crude data augmentation for train data - demo only, there are much better examples
    feature = tf.image.random_flip_left_right(feature) 
    feature = tf.image.random_contrast(feature, 0.8, 1.2)
    
    return feature #return the numpy array

#general instructions
def _parse_train_element(element):
    #dictionary of descriptions - the titles and data types MUST match the instructions for creating the record
    parse_dic = {
        'b_feature': tf.io.FixedLenFeature([], tf.string), # Note that it is tf.string, not tf.float32
        'b_l': tf.io.FixedLenFeature([], tf.int64)
        }    
    example_message = tf.io.parse_single_example(element, parse_dic) #this work by iteration - one example at a time      
    #the example message is the individual line which has the 2 components - b_feature and b_label1
    
    feature = decode_augment_image(example_message['b_feature']) #for train data, may want a different function to include augmentation
    
    b_label = example_message['b_l'] #this is the label y/n for event happening
    
    return feature, b_label #returns the 2d array and label


#load up the TRAINING data
t_dataset = tf.data.TFRecordDataset(train_filenames)  #define the dataset scope in filenames
train_dataset = t_dataset.map(_parse_train_element) #apply TRAIN mapping to the elements needed

#note: my understanding of the shuffle buffer is that it is the range of samples over which shuffling will occur
#larger number = more shuffling e.g. if shuffle = 10, samples will only be shuffled within indexes (0-10), (20-30) etc
train_dataset = train_dataset.shuffle(100, reshuffle_each_iteration=True) 

#think this is needed to feed into the model
train_dataset = train_dataset.batch(BATCH_SIZE)

#think a call to prefetch() should also be added here when there is more data to speed things up

In [None]:
#we can inspect some examples (note - as it doesn't preload into memory, have to iterate)
for raw_record in train_dataset.take(1):
    print(repr(raw_record[1][0].numpy())) #think this shows the (label - [1] column) of the first record
    
    #print(repr(raw_record[0][0][0].numpy())) #think this would show the array - [0] column - of the first record
    
    plt.imshow(raw_record[0][0][:].numpy().reshape(1025,200)) #loaded image with imshow

In [None]:
K.clear_session()

model = Sequential([
    Conv2D(16, 2, padding='same', activation='relu', input_shape=(1025, 200, 1)),
    MaxPooling2D(),
    Dropout(0.5),
    Conv2D(16, 4, padding='same', activation='relu'),
    MaxPooling2D(),
    Dropout(0.5),
   Conv2D(16, 4, padding='same', activation='relu'),
    MaxPooling2D(),
    Dropout(0.5),
    Flatten(),
    Dense(30, activation='relu'),
    Dropout(0.5),
    Dense(2, activation='softmax')
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])

model.summary()

In [None]:
print('model run starting')
st = datetime.now()
print(datetime.now())

#my understanding is that TF will automatically understand any dataset loaded as either
#feature-labels
#feature-labels-weights

#batch size is already entered above & from dataset
#can no longer set validation fraction of data using datasets - must be a separate dataset
history = model.fit(train_dataset,verbose=1,epochs=30,validation_data=valid_dataset)

print('model run ending')
ed = datetime.now()
print(datetime.now())
print('time taken',ed-st)