In [73]:
import numpy as np
import pandas as pd
from pathlib import Path

In [74]:
path_naive = Path("../input/birdclef-2021-naive-npy")
path_naive.exists()

True

In [75]:
df_shortaudio_train = pd.read_csv(path_naive/"shortaudio_train.csv")
df_shortaudio_val = pd.read_csv(path_naive/"shortaudio_val.csv")

df_soundscape_train = pd.read_csv(path_naive/"soundscape_train.csv")
df_soundscape_val = pd.read_csv(path_naive/"soundscape_val.csv")
df_soundscape_test = pd.read_csv(path_naive/"soundscape_test.csv")

In [76]:
df_soundscape_val.head()

Unnamed: 0,row_id,site,audio_id,seconds,birds,n_birds,year,month,day,longitude,latitude,month_x,month_y,day_coarse_x,day_coarse_y,longitude_x,longitude_y,latitude_normalized
0,54955_SSW_395,SSW,54955,395,grycat,1,2017,6,17,-76.45,42.47,-1.0,1.224647e-16,-0.954139,-0.299363,0.234294,-0.972166,0.471889
1,14473_SSW_430,SSW,14473,430,nocall,0,2017,7,1,-76.45,42.47,-0.8660254,-0.5,0.97953,0.201299,0.234294,-0.972166,0.471889
2,26746_COR_330,COR,26746,330,nocall,0,2019,10,4,-84.51,10.12,0.5,-0.8660254,0.688967,0.724793,0.095672,-0.995413,0.112444
3,18003_COR_455,COR,18003,455,rucwar,1,2019,9,4,-84.51,10.12,-1.83697e-16,-1.0,0.688967,0.724793,0.095672,-0.995413,0.112444
4,14473_SSW_365,SSW,14473,365,nocall,0,2017,7,1,-76.45,42.47,-0.8660254,-0.5,0.97953,0.201299,0.234294,-0.972166,0.471889


In [77]:
df_shortaudio_train.head()

Unnamed: 0,primary_label,latitude,longitude,date,filename,year,month,day,npy_filename
0,cubthr,32.5839,-109.9696,2020-08-03,XC617273.ogg,2020,8,3,XC617273_100.npy
1,chbchi,35.253,-120.876,2020-05-21,XC561445.ogg,2020,5,21,XC561445_50.npy
2,mouela1,13.7346,-89.2796,2016-05-27,XC320698.ogg,2016,5,27,XC320698_10.npy
3,cacwre,32.738,-112.2297,2019-09-14,XC497309.ogg,2019,9,14,XC497309_10.npy
4,burwar1,0.0287,-78.8628,2009-06-02,XC35146.ogg,2009,6,2,XC35146_80.npy


I have forgotten to add `month_x`, etc. to `shortaudio_{train,val,test}.csv`. Let's make that up.

This is not necessarily a bad thing -- By forgetting this, our `.csv` files are more ligth-weighted.

In [78]:
def cyclicize_number(number, max_, min_):
    """
    args
        number, int
            \in {min_, min_ + 1, ..., max_}
            e.g. hour => min_ = 0, max_ = 24
                 longitude => min_ = -180, max_ = 180
        max_, int
        min_, int
    return
        (x, y), tuple of float
    """
    period = max_ - min_
    theta = 2 * np.pi * (number / period)
    #theta = 2 * np.pi * ((number - min_) / period)
    x = np.cos(theta)
    y = np.sin(theta)
    return x, y

# N.B. Using the next function to deal with df_train_soundscape is
#      not efficient, since there are only 4 distinct longitudes.
def cyclicize_series(series, max_, min_):
    return list(map(lambda number: cyclicize_number(number, max_, min_), series))

In [79]:
df_shortaudio_train[["month_x", "month_y"]] = cyclicize_series(df_shortaudio_train["month"], 12, 0)
df_shortaudio_train[["day_coarse_x", "day_coarse_y"]] = cyclicize_series(df_shortaudio_train["day"], 31, 0)
df_shortaudio_train[["longitude_x", "longitude_y"]] = cyclicize_series(df_shortaudio_train["longitude"], 180, -180)
df_shortaudio_train["latitude_normalized"] = df_shortaudio_train["latitude"] / 90
df_shortaudio_train.head()

Unnamed: 0,primary_label,latitude,longitude,date,filename,year,month,day,npy_filename,month_x,month_y,day_coarse_x,day_coarse_y,longitude_x,longitude_y,latitude_normalized
0,cubthr,32.5839,-109.9696,2020-08-03,XC617273.ogg,2020,8,3,XC617273_100.npy,-0.5,-0.8660254,0.820763,0.571268,-0.341522,-0.939874,0.362043
1,chbchi,35.253,-120.876,2020-05-21,XC561445.ogg,2020,5,21,XC561445_50.npy,-0.8660254,0.5,-0.440394,-0.897805,-0.513182,-0.85828,0.3917
2,mouela1,13.7346,-89.2796,2016-05-27,XC320698.ogg,2016,5,27,XC320698_10.npy,-0.8660254,0.5,0.688967,-0.724793,0.012573,-0.999921,0.152607
3,cacwre,32.738,-112.2297,2019-09-14,XC497309.ogg,2019,9,14,XC497309_10.npy,-1.83697e-16,-1.0,-0.954139,0.299363,-0.378321,-0.925675,0.363756
4,burwar1,0.0287,-78.8628,2009-06-02,XC35146.ogg,2009,6,2,XC35146_80.npy,-1.0,1.224647e-16,0.918958,0.394356,0.193159,-0.981167,0.000319


In [80]:
df_shortaudio_val[["month_x", "month_y"]] = cyclicize_series(df_shortaudio_val["month"], 12, 0)
df_shortaudio_val[["day_coarse_x", "day_coarse_y"]] = cyclicize_series(df_shortaudio_val["day"], 31, 0)
df_shortaudio_val[["longitude_x", "longitude_y"]] = cyclicize_series(df_shortaudio_val["longitude"], 180, -180)
df_shortaudio_val["latitude_normalized"] = df_shortaudio_val["latitude"] / 90

- Produce common columns for the two diff types of dataframes
- Build `df_train`, `df_val`, `df_test`
- Build `XX_train`, `XX_val`, `XX_test`

In [81]:
L_feature_columns = [
    "month_x",
    "month_y",
    "day_coarse_x",
    "day_coarse_y",
    "longitude_x",
    "longitude_y",
    "latitude_normalized",
]

L_common_columns = L_feature_columns + [
    "npy_filename",
    "primary_label",
]

In [82]:
df_soundscape_train["npy_filename"] = df_soundscape_train["row_id"] + ".npy"
df_soundscape_val["npy_filename"] = df_soundscape_val["row_id"] + ".npy"
df_soundscape_test["npy_filename"] = df_soundscape_test["row_id"] + ".npy"

In [83]:
df_soundscape_train.rename(
    {"birds": "primary_label"},
    axis="columns",
    inplace=True,
)
df_soundscape_val.rename(
    {"birds": "primary_label"},
    axis="columns",
    inplace=True,
)
df_soundscape_test.rename(
    {"birds": "primary_label"},
    axis="columns",
    inplace=True,
)
"primary_label" in df_soundscape_train.columns

True

In [84]:
df_train = pd.concat([
    df_shortaudio_train[L_common_columns],
    df_soundscape_train[L_common_columns],
])
df_train.shape, df_shortaudio_train.shape, df_soundscape_train.shape, len(L_common_columns)

((475238, 9), (473638, 16), (1600, 19), 9)

In [85]:
df_train.head()

Unnamed: 0,month_x,month_y,day_coarse_x,day_coarse_y,longitude_x,longitude_y,latitude_normalized,npy_filename,primary_label
0,-0.5,-0.8660254,0.820763,0.571268,-0.341522,-0.939874,0.362043,XC617273_100.npy,cubthr
1,-0.8660254,0.5,-0.440394,-0.897805,-0.513182,-0.85828,0.3917,XC561445_50.npy,chbchi
2,-0.8660254,0.5,0.688967,-0.724793,0.012573,-0.999921,0.152607,XC320698_10.npy,mouela1
3,-1.83697e-16,-1.0,-0.954139,0.299363,-0.378321,-0.925675,0.363756,XC497309_10.npy,cacwre
4,-1.0,1.224647e-16,0.918958,0.394356,0.193159,-0.981167,0.000319,XC35146_80.npy,burwar1


In [86]:
df_val = pd.concat([
    df_shortaudio_val[L_common_columns],
    df_soundscape_val[L_common_columns],
])
df_val.shape, df_shortaudio_val.shape, df_soundscape_val.shape, len(L_common_columns)

((203389, 9), (202989, 16), (400, 19), 9)

In [87]:
df_test = df_soundscape_test[L_common_columns]
df_test.shape

(400, 9)

Maybe we need to shuffle before assigning `df_train[L_feature_columns].value` to `XX_train`.

In [88]:
df_train = df_train.sample(frac=1)
df_val = df_val.sample(frac=1)

In [89]:
XX_train = df_train[L_feature_columns].values
XX_val = df_val[L_feature_columns].values

In [90]:
df_train

Unnamed: 0,month_x,month_y,day_coarse_x,day_coarse_y,longitude_x,longitude_y,latitude_normalized,npy_filename,primary_label
4480,5.000000e-01,8.660254e-01,-0.758758,0.651372,0.034919,-0.999390,0.172949,XC213067_5.npy,grycat
362679,-8.660254e-01,-5.000000e-01,0.151428,0.988468,0.035712,-0.999362,0.165592,XC486460_15.npy,socfly1
334738,-1.000000e+00,1.224647e-16,-0.250653,-0.968077,-0.231514,-0.972832,0.485168,XC254452_75.npy,wesmea
429055,8.660254e-01,5.000000e-01,0.151428,-0.988468,0.329528,-0.944146,0.220371,XC214482_15.npy,smbani
137846,-5.000000e-01,-8.660254e-01,-0.994869,0.101168,0.998645,-0.052045,0.599221,XC524894_30.npy,cangoo
...,...,...,...,...,...,...,...,...,...
314454,5.000000e-01,8.660254e-01,-0.758758,-0.651372,-0.342230,-0.939616,0.259549,XC466254_60.npy,cowscj1
108605,-8.660254e-01,-5.000000e-01,-0.954139,-0.299363,-0.559015,-0.829158,0.454132,XC327612_15.npy,grycat
268941,-1.836970e-16,-1.000000e+00,0.528964,-0.848644,0.192231,-0.981350,-0.051667,XC251417_20.npy,ruboro1
51398,-1.836970e-16,-1.000000e+00,0.151428,0.988468,0.257748,-0.966212,0.445807,XC433420_5.npy,pilwoo


## Dataset Generator
Why switch to using generator? How large is our data this time? Could you make an estimate?



In [91]:
import tensorflow as tf
import tensorflow.keras as keras

In [92]:
for i in range(5):
    npy_filename_i = df_train["npy_filename"].iloc[i]
    print(npy_filename_i)

XC213067_5.npy
XC486460_15.npy
XC254452_75.npy
XC214482_15.npy
XC524894_30.npy


In [93]:
df_train["npy_filename"].head()

4480       XC213067_5.npy
362679    XC486460_15.npy
334738    XC254452_75.npy
429055    XC214482_15.npy
137846    XC524894_30.npy
Name: npy_filename, dtype: object

In [94]:
XX_train.shape, df_train.shape

((475238, 7), (475238, 9))

In [95]:
XX_train[100]

array([-0.8660254 ,  0.5       , -0.99486932, -0.10116832, -0.50736692,
       -0.86173013,  0.44025   ])

In [96]:
XX_train[100].shape

(7,)

In [97]:
PATH_DATASET = Path("../input/birdclef-2021")

L_birds = [path.name for path
           in (PATH_DATASET / "train_short_audio").iterdir()]
L_birds = sorted(L_birds)
D_label_index = {label: i for i, label in enumerate(L_birds)}
D_index_label = {v: k for k, v in D_label_index.items()}

def label(series):
    #I = np.eye(len(D_label_index))
    y = np.zeros((len(series), len(D_label_index)), dtype=np.float32)
    for i, string in enumerate(series.values):
    #for i, string in enumerate(series):
        if string == "nocall":
            continue
        else:
            L_indices = [D_label_index[label] for label in string.split(" ")]
            #row_i = np.sum(I[L_indices], axis=0)
            #y[i] = row_i
            y[i, L_indices] = 1
    return y

y_train = label(df_train["primary_label"])
np.unique(np.sum(y_train, axis=-1))

array([0., 1., 2., 3., 4., 5.], dtype=float32)

In [98]:
y_train.shape

(475238, 397)

In [99]:
y_val = label(df_val["primary_label"])
#y_test = label(df_test["primary_label"])

In [100]:
import random
random.choice(list((path_naive / "train_npy").iterdir()))

PosixPath('../input/birdclef-2021-naive-npy/train_npy/XC481127_15.npy')

In [101]:
random_npy_path = _

In [102]:
random_npy = np.load(random_npy_path)
random_npy.dtype, random_npy.shape

(dtype('uint8'), (128, 201))

In [103]:
h, w = random_npy.shape

In [117]:
from tensorflow.keras.utils import Sequence
from joblib import Parallel, delayed

class DatasetGenerator(Sequence):
    def __init__(self, df, XX, y, h, w, batch_size=32):
        self.df = df
        self.XX = XX
        self.y = y
        self.h = h
        self.w = w
        self.batch_size = batch_size

    def __len__(self):
        #return self.df.shape[0]
        return self.df.shape[0] // self.batch_size

    #def image_processing(i, batch_npy_filename, batch_image):
    #    npy_filename_i = batch_npy_filename.iloc[i]
    #    image_i = np.load(path_naive / f"train_npy/{npy_filename_i}").astype(np.float32, copy=False)
    #    image_i /= 255.0
    #    image_i = np.repeat(image_i[..., np.newaxis], 3, axis=-1)  # shape: (h, w, 3)
    #    batch_image[i] = image_i
    def image_processing(self, i):
        npy_filename_i = self.batch_npy_filename.iloc[i]
        image_i = np.load(path_naive / f"train_npy/{npy_filename_i}").astype(np.float32, copy=False)
        image_i /= 255.0
        image_i = np.repeat(image_i[..., np.newaxis], 3, axis=-1)  # shape: (h, w, 3)
        self.batch_image[i] = image_i

    def __getitem__(self, idx):
        self.batch_npy_filename = self.df["npy_filename"].iloc[idx*self.batch_size: (idx + 1)*self.batch_size]
        #batch_image = np.empty((self.batch_size, self.h, self.w, 3), dtype=np.float32)
        self.batch_image = np.zeros((self.batch_size, self.h, self.w, 3), dtype=np.float32)
        #tasks = [image_processing(i, batch_npy_filename, batch_image) for i in range(self.batch_size)]
        tasks = [delayed(self.image_processing)(i) for i in range(self.batch_size)]
        pool = Parallel(n_jobs=8)
        pool(tasks)
 

In [118]:
k = 0
for ((i, f), y) in DatasetGenerator(df_train, XX_train, y_train, h, w):
    if k > 10:
        break
    print(i.shape)
    print(f.shape)
    print(y.shape)
    k += 1


ValueError: assignment destination is read-only

In [None]:
from tensorflow.keras.applications import EfficientNetB0
import tensorflow.keras as keras

In [None]:
input_mels = keras.layers.Input(shape=(*random_npy.shape, 3), name="input_mels")
input_spacetime = keras.layers.Input(shape=(XX_train.shape[1],),
                                     name="input_spacetime")

output_efficient = EfficientNetB0(include_top=False, weights="imagenet")(input_mels)
pooled = keras.layers.GlobalAveragePooling2D()(output_efficient)
concatenated = keras.layers.Concatenate()([pooled, input_spacetime])
#concatenated = keras.layers.concatenate([pooled, input_spacetime])
#dropped = keras.layers.Dropout(.2)(pooled)
dropped = keras.layers.Dropout(.2)(concatenated)
output_CNN = keras.layers.Dense(len(L_birds), activation="sigmoid")(dropped)
model = keras.Model(
    #inputs=[input_mels],
    inputs=[input_mels, input_spacetime],
    outputs=[output_CNN],
)
#model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["acc"])
model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=[keras.metrics.Precision(), keras.metrics.Recall()],
)

In [None]:
checkpoint_cb = keras.callbacks.ModelCheckpoint("model1.h5",
                                                save_best_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
                                                  restore_best_weights=True)

class PrintF1Score(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        #print(f"logs.keys() = {logs.keys()}")  # This can check what keys logs has.
        f1_score = 2 * logs["precision"] * logs["recall"] / (logs["precision"] + logs["recall"] + EPSILON)
        val_f1_score = 2 * logs["val_precision"] * logs["val_recall"] / (logs["val_precision"] + logs["val_recall"] + EPSILON)
        print(f"f1_score: {f1_score}")
        print(f"val_f1_score: {val_f1_score}")

In [None]:
model.summary()

### Debug
The problem might be that the dataset generator should not have been rewritten this simply, i.e. using simple generator with `yield`.

In [None]:
keras.layers.Concatenate()([np.zeros((32, 1280)), np.zeros((32, 7))])

In [None]:
k = 0
for ((i, f), y) in trainset_generator():
    if k > 10:
        break
    print(i.shape)
    print(f.shape)
    print(y.shape)
    k += 1

In [None]:
128 * 201 * 3