In [2]:
import numpy as np
import pandas as pd
from pathlib import Path

In [3]:
#path_naive = Path("../input/birdclef-2021-naive-npy")
path_naive = Path.cwd()

path_naive.exists()

True

In [4]:
df_shortaudio_train = pd.read_csv(path_naive/"shortaudio_train.csv")
df_shortaudio_val = pd.read_csv(path_naive/"shortaudio_val.csv")

df_soundscape_train = pd.read_csv(path_naive/"soundscape_train.csv")
df_soundscape_val = pd.read_csv(path_naive/"soundscape_val.csv")
df_soundscape_test = pd.read_csv(path_naive/"soundscape_test.csv")

In [5]:
df_soundscape_val.head()

Unnamed: 0,row_id,site,audio_id,seconds,birds,n_birds,year,month,day,longitude,latitude,month_x,month_y,day_coarse_x,day_coarse_y,longitude_x,longitude_y,latitude_normalized
0,54955_SSW_395,SSW,54955,395,grycat,1,2017,6,17,-76.45,42.47,-1.0,1.224647e-16,-0.954139,-0.299363,0.234294,-0.972166,0.471889
1,14473_SSW_430,SSW,14473,430,nocall,0,2017,7,1,-76.45,42.47,-0.8660254,-0.5,0.97953,0.201299,0.234294,-0.972166,0.471889
2,26746_COR_330,COR,26746,330,nocall,0,2019,10,4,-84.51,10.12,0.5,-0.8660254,0.688967,0.724793,0.095672,-0.995413,0.112444
3,18003_COR_455,COR,18003,455,rucwar,1,2019,9,4,-84.51,10.12,-1.83697e-16,-1.0,0.688967,0.724793,0.095672,-0.995413,0.112444
4,14473_SSW_365,SSW,14473,365,nocall,0,2017,7,1,-76.45,42.47,-0.8660254,-0.5,0.97953,0.201299,0.234294,-0.972166,0.471889


In [6]:
df_shortaudio_train.head()

Unnamed: 0,primary_label,latitude,longitude,date,filename,year,month,day,npy_filename
0,cubthr,32.5839,-109.9696,2020-08-03,XC617273.ogg,2020,8,3,XC617273_120.npy
1,chbchi,35.253,-120.876,2020-05-21,XC561445.ogg,2020,5,21,XC561445_40.npy
2,mouela1,13.7346,-89.2796,2016-05-27,XC320698.ogg,2016,5,27,XC320698_30.npy
3,cacwre,32.738,-112.2297,2019-09-14,XC497309.ogg,2019,9,14,XC497309_10.npy
4,burwar1,0.0287,-78.8628,2009-06-02,XC35146.ogg,2009,6,2,XC35146_60.npy


I have forgotten to add `month_x`, etc. to `shortaudio_{train,val,test}.csv`. Let's make that up.

This is not necessarily a bad thing -- By forgetting this, our `.csv` files are more ligth-weighted.

In [7]:
def cyclicize_number(number, max_, min_):
    """
    args
        number, int
            \in {min_, min_ + 1, ..., max_}
            e.g. hour => min_ = 0, max_ = 24
                 longitude => min_ = -180, max_ = 180
        max_, int
        min_, int
    return
        (x, y), tuple of float
    """
    period = max_ - min_
    theta = 2 * np.pi * (number / period)
    #theta = 2 * np.pi * ((number - min_) / period)
    x = np.cos(theta)
    y = np.sin(theta)
    return x, y

# N.B. Using the next function to deal with df_train_soundscape is
#      not efficient, since there are only 4 distinct longitudes.
def cyclicize_series(series, max_, min_):
    return list(map(lambda number: cyclicize_number(number, max_, min_), series))

In [8]:
df_shortaudio_train[["month_x", "month_y"]] = cyclicize_series(df_shortaudio_train["month"], 12, 0)
df_shortaudio_train[["day_coarse_x", "day_coarse_y"]] = cyclicize_series(df_shortaudio_train["day"], 31, 0)
df_shortaudio_train[["longitude_x", "longitude_y"]] = cyclicize_series(df_shortaudio_train["longitude"], 180, -180)
df_shortaudio_train["latitude_normalized"] = df_shortaudio_train["latitude"] / 90
df_shortaudio_train.head()

Unnamed: 0,primary_label,latitude,longitude,date,filename,year,month,day,npy_filename,month_x,month_y,day_coarse_x,day_coarse_y,longitude_x,longitude_y,latitude_normalized
0,cubthr,32.5839,-109.9696,2020-08-03,XC617273.ogg,2020,8,3,XC617273_120.npy,-0.5,-0.8660254,0.820763,0.571268,-0.341522,-0.939874,0.362043
1,chbchi,35.253,-120.876,2020-05-21,XC561445.ogg,2020,5,21,XC561445_40.npy,-0.8660254,0.5,-0.440394,-0.897805,-0.513182,-0.85828,0.3917
2,mouela1,13.7346,-89.2796,2016-05-27,XC320698.ogg,2016,5,27,XC320698_30.npy,-0.8660254,0.5,0.688967,-0.724793,0.012573,-0.999921,0.152607
3,cacwre,32.738,-112.2297,2019-09-14,XC497309.ogg,2019,9,14,XC497309_10.npy,-1.83697e-16,-1.0,-0.954139,0.299363,-0.378321,-0.925675,0.363756
4,burwar1,0.0287,-78.8628,2009-06-02,XC35146.ogg,2009,6,2,XC35146_60.npy,-1.0,1.224647e-16,0.918958,0.394356,0.193159,-0.981167,0.000319


In [9]:
df_shortaudio_val[["month_x", "month_y"]] = cyclicize_series(df_shortaudio_val["month"], 12, 0)
df_shortaudio_val[["day_coarse_x", "day_coarse_y"]] = cyclicize_series(df_shortaudio_val["day"], 31, 0)
df_shortaudio_val[["longitude_x", "longitude_y"]] = cyclicize_series(df_shortaudio_val["longitude"], 180, -180)
df_shortaudio_val["latitude_normalized"] = df_shortaudio_val["latitude"] / 90

- Produce common columns for the two diff types of dataframes
- Build `df_train`, `df_val`, `df_test`
- Build `XX_train`, `XX_val`, `XX_test`

In [10]:
L_feature_columns = [
    "month_x",
    "month_y",
    "day_coarse_x",
    "day_coarse_y",
    "longitude_x",
    "longitude_y",
    "latitude_normalized",
]

L_common_columns = L_feature_columns + [
    "npy_filename",
    "primary_label",
]

In [11]:
df_soundscape_train["npy_filename"] = df_soundscape_train["row_id"] + ".npy"
df_soundscape_val["npy_filename"] = df_soundscape_val["row_id"] + ".npy"
df_soundscape_test["npy_filename"] = df_soundscape_test["row_id"] + ".npy"

In [12]:
df_soundscape_train.rename(
    {"birds": "primary_label"},
    axis="columns",
    inplace=True,
)
df_soundscape_val.rename(
    {"birds": "primary_label"},
    axis="columns",
    inplace=True,
)
df_soundscape_test.rename(
    {"birds": "primary_label"},
    axis="columns",
    inplace=True,
)
"primary_label" in df_soundscape_train.columns

True

In [13]:
df_train = pd.concat([
    df_shortaudio_train[L_common_columns],
    df_soundscape_train[L_common_columns],
])
df_train.shape, df_shortaudio_train.shape, df_soundscape_train.shape, len(L_common_columns)

((475238, 9), (473638, 16), (1600, 19), 9)

In [14]:
df_train.head()

Unnamed: 0,month_x,month_y,day_coarse_x,day_coarse_y,longitude_x,longitude_y,latitude_normalized,npy_filename,primary_label
0,-0.5,-0.8660254,0.820763,0.571268,-0.341522,-0.939874,0.362043,XC617273_120.npy,cubthr
1,-0.8660254,0.5,-0.440394,-0.897805,-0.513182,-0.85828,0.3917,XC561445_40.npy,chbchi
2,-0.8660254,0.5,0.688967,-0.724793,0.012573,-0.999921,0.152607,XC320698_30.npy,mouela1
3,-1.83697e-16,-1.0,-0.954139,0.299363,-0.378321,-0.925675,0.363756,XC497309_10.npy,cacwre
4,-1.0,1.224647e-16,0.918958,0.394356,0.193159,-0.981167,0.000319,XC35146_60.npy,burwar1


In [15]:
df_val = pd.concat([
    df_shortaudio_val[L_common_columns],
    df_soundscape_val[L_common_columns],
])
df_val.shape, df_shortaudio_val.shape, df_soundscape_val.shape, len(L_common_columns)

((203389, 9), (202989, 16), (400, 19), 9)

In [16]:
df_test = df_soundscape_test[L_common_columns]
df_test.shape

(400, 9)

Maybe we need to shuffle before assigning `df_train[L_feature_columns].value` to `XX_train`.

In [17]:
df_train = df_train.sample(frac=1)
df_val = df_val.sample(frac=1)

In [18]:
XX_train = df_train[L_feature_columns].values
XX_val = df_val[L_feature_columns].values

In [19]:
df_train

Unnamed: 0,month_x,month_y,day_coarse_x,day_coarse_y,longitude_x,longitude_y,latitude_normalized,npy_filename,primary_label
89,-1.000000e+00,1.224647e-16,-0.440394,0.897805,0.264681,-0.964336,0.048344,XC567310_60.npy,grasal1
161748,-1.000000e+00,1.224647e-16,-0.050649,0.998717,-0.505299,-0.862944,0.441781,XC451703_15.npy,marwre
33699,-5.000000e-01,8.660254e-01,0.820763,0.571268,-0.451530,-0.892256,0.362226,XC541632_20.npy,orcwar
366257,-5.000000e-01,8.660254e-01,-0.050649,-0.998717,-0.461756,-0.887007,0.377347,XC469873_10.npy,yebcha
288921,6.123234e-17,1.000000e+00,0.528964,0.848644,0.193166,0.981166,0.328727,XC73032_35.npy,bcnher
...,...,...,...,...,...,...,...,...,...
266191,-8.660254e-01,5.000000e-01,0.151428,-0.988468,-0.526954,-0.849894,0.408200,XC481190_40.npy,barswa
109984,6.123234e-17,1.000000e+00,0.820763,-0.571268,0.008391,-0.999965,0.381850,XC309320_65.npy,norpar
32861,-1.000000e+00,1.224647e-16,0.820763,-0.571268,-0.500543,-0.865712,0.576663,XC196035_140.npy,wilfly
358914,8.660254e-01,-5.000000e-01,-0.050649,-0.998717,0.262878,-0.964829,0.047591,XC380822_5.npy,barant1


## Dataset Generator
Why switch to using generator? How large is our data this time? Could you make an estimate?



In [20]:
import tensorflow as tf
import tensorflow.keras as keras

In [21]:
for i in range(5):
    npy_filename_i = df_train["npy_filename"].iloc[i]
    print(npy_filename_i)

XC567310_60.npy
XC451703_15.npy
XC541632_20.npy
XC469873_10.npy
XC73032_35.npy


In [22]:
df_train["npy_filename"].head()

89        XC567310_60.npy
161748    XC451703_15.npy
33699     XC541632_20.npy
366257    XC469873_10.npy
288921     XC73032_35.npy
Name: npy_filename, dtype: object

In [23]:
XX_train.shape, df_train.shape

((475238, 7), (475238, 9))

In [24]:
XX_train[100]

array([-1.00000000e+00,  1.22464680e-16,  1.51427778e-01, -9.88468324e-01,
        6.93830547e-01,  7.20138301e-01,  6.24151111e-01])

In [25]:
XX_train[100].shape

(7,)

In [26]:
#PATH_DATASET = Path("../input/birdclef-2021")
PATH_DATASET = Path.home() / "datasets/kaggle/birdclef-2021/"

L_birds = [path.name for path
           in (PATH_DATASET / "train_short_audio").iterdir()]
L_birds = sorted(L_birds)
D_label_index = {label: i for i, label in enumerate(L_birds)}
D_index_label = {v: k for k, v in D_label_index.items()}

def label(series):
    #I = np.eye(len(D_label_index))
    y = np.zeros((len(series), len(D_label_index)), dtype=np.float32)
    for i, string in enumerate(series.values):
    #for i, string in enumerate(series):
        if string == "nocall":
            continue
        else:
            L_indices = [D_label_index[label] for label in string.split(" ")]
            #row_i = np.sum(I[L_indices], axis=0)
            #y[i] = row_i
            y[i, L_indices] = 1
    return y

y_train = label(df_train["primary_label"])
np.unique(np.sum(y_train, axis=-1))

array([0., 1., 2., 3., 4., 5.], dtype=float32)

In [27]:
y_train.shape

(475238, 397)

In [28]:
y_val = label(df_val["primary_label"])
#y_test = label(df_test["primary_label"])

In [29]:
import random
random.choice(list((path_naive / "train_npy").iterdir()))

PosixPath('/home/phunc20/git-repos/phunc20/competitions/kaggle/birdclef_2021/my_code/local/train_npy/XC445517_155.npy')

In [30]:
random_npy_path = _

In [31]:
random_npy = np.load(random_npy_path)
random_npy.dtype, random_npy.shape

(dtype('uint8'), (128, 201))

In [32]:
h, w = random_npy.shape

In [33]:
from tensorflow.keras.utils import Sequence
from joblib import Parallel, delayed

class DatasetGenerator(Sequence):
    def __init__(self, df, XX, y, h, w, is_train=True, batch_size=32):
        self.df = df
        self.XX = XX
        self.y = y
        self.h = h
        self.w = w
        self.is_train = is_train
        self.batch_size = batch_size

    def __len__(self):
        #return self.df.shape[0]
        return self.df.shape[0] // self.batch_size

    #def image_processing(i, batch_npy_filename, batch_image):
    #    npy_filename_i = batch_npy_filename.iloc[i]
    #    image_i = np.load(path_naive / f"train_npy/{npy_filename_i}").astype(np.float32, copy=False)
    #    image_i /= 255.0
    #    image_i = np.repeat(image_i[..., np.newaxis], 3, axis=-1)  # shape: (h, w, 3)
    #    batch_image[i] = image_i
    def image_processing(self, i):
        npy_filename_i = self.batch_npy_filename.iloc[i]
        if self.is_train:
            image_i = np.load(path_naive / f"train_npy/{npy_filename_i}").astype(np.float32, copy=False)
        else:
            image_i = np.load(path_naive / f"val_npy/{npy_filename_i}").astype(np.float32, copy=False)
        image_i /= 255.0
        image_i = np.repeat(image_i[..., np.newaxis], 3, axis=-1)  # shape: (h, w, 3)
        self.batch_image[i] = image_i

    def __getitem__(self, idx):
        self.batch_npy_filename = self.df["npy_filename"].iloc[idx*self.batch_size: (idx + 1)*self.batch_size]
        #batch_image = np.empty((self.batch_size, self.h, self.w, 3), dtype=np.float32)
        self.batch_image = np.zeros((self.batch_size, self.h, self.w, 3), dtype=np.float32)

        ## joblib, multiprocessing
        #tasks = [delayed(self.image_processing)(i) for i in range(self.batch_size)]
        #pool = Parallel(n_jobs=8)
        #pool(tasks)
        
        ## single-core implementation
        for i in range(self.batch_size):
            self.image_processing(i)
        batch_X = [self.batch_image, self.XX[idx*self.batch_size: (idx + 1)*self.batch_size]]
        batch_y = self.y[idx*self.batch_size: (idx + 1)*self.batch_size]
        return batch_X, batch_y
 

In [34]:
k = 0
for ((i, f), y) in DatasetGenerator(df_train, XX_train, y_train, h, w):
    if k > 10:
        break
    print(i.shape)
    print(f.shape)
    print(y.shape)
    k += 1


(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)


In [35]:
k = 0
for ((i, f), y) in DatasetGenerator(df_val, XX_val, y_val, h, w, is_train=False):
    if k > 10:
        break
    print(i.shape)
    print(f.shape)
    print(y.shape)
    k += 1

(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)
(32, 128, 201, 3)
(32, 7)
(32, 397)


In [36]:
from tensorflow.keras.applications import EfficientNetB0
import tensorflow.keras as keras

In [37]:
input_mels = keras.layers.Input(shape=(*random_npy.shape, 3), name="input_mels")
input_spacetime = keras.layers.Input(shape=(XX_train.shape[1],),
                                     name="input_spacetime")

output_efficient = EfficientNetB0(include_top=False, weights="imagenet")(input_mels)
pooled = keras.layers.GlobalAveragePooling2D()(output_efficient)
concatenated = keras.layers.Concatenate()([pooled, input_spacetime])
#concatenated = keras.layers.concatenate([pooled, input_spacetime])
#dropped = keras.layers.Dropout(.2)(pooled)
dropped = keras.layers.Dropout(.2)(concatenated)
output_CNN = keras.layers.Dense(len(L_birds), activation="sigmoid")(dropped)
model = keras.Model(
    #inputs=[input_mels],
    inputs=[input_mels, input_spacetime],
    outputs=[output_CNN],
)
#model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["acc"])
model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=[keras.metrics.Precision(), keras.metrics.Recall()],
)

In [38]:
checkpoint_cb = keras.callbacks.ModelCheckpoint("model1.h5",
                                                save_best_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
                                                  restore_best_weights=True)

EPSILON = 1e-6
class PrintF1Score(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        #print(f"logs.keys() = {logs.keys()}")  # This can check what keys logs has.
        f1_score = 2 * logs["precision"] * logs["recall"] / (logs["precision"] + logs["recall"] + EPSILON)
        val_f1_score = 2 * logs["val_precision"] * logs["val_recall"] / (logs["val_precision"] + logs["val_recall"] + EPSILON)
        print(f"f1_score: {f1_score}")
        print(f"val_f1_score: {val_f1_score}")

In [39]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_mels (InputLayer)         [(None, 128, 201, 3) 0                                            
__________________________________________________________________________________________________
efficientnetb0 (Functional)     (None, None, None, 1 4049571     input_mels[0][0]                 
__________________________________________________________________________________________________
global_average_pooling2d (Globa (None, 1280)         0           efficientnetb0[0][0]             
__________________________________________________________________________________________________
input_spacetime (InputLayer)    [(None, 7)]          0                                            
______________________________________________________________________________________________

In [40]:
if Path("model1.h5").exists():
    print("model1.h5 exists! Loading...")
    model = keras.models.load_model("model1.h5")

model1.h5 exists! Loading...


In [41]:
history = model.fit(
    DatasetGenerator(df_train, XX_train, y_train, h, w),
    batch_size=32,
    epochs=100,
    validation_data=DatasetGenerator(df_val, XX_val, y_val, h, w, is_train=False),
    callbacks=[checkpoint_cb, early_stopping_cb, PrintF1Score()],
)

Epoch 1/100
f1_score: 0.7142189225838013
val_f1_score: 0.0017687566223842605
Epoch 2/100
f1_score: 0.7546506500615243
val_f1_score: 0.0
Epoch 3/100
f1_score: 0.7842325019687166
val_f1_score: 0.0048986782181365904
Epoch 4/100
f1_score: 0.8060543637788198
val_f1_score: 0.009110902929684448
Epoch 5/100
f1_score: 0.823530670490825
val_f1_score: 0.0
Epoch 6/100
f1_score: 0.8382908307004968
val_f1_score: 0.007514720921436951
Epoch 7/100
f1_score: 0.8501270661851555
val_f1_score: 0.00726604373574674
Epoch 8/100
f1_score: 0.8604605613410358
val_f1_score: 0.0
Epoch 9/100
f1_score: 0.8696338895953297
val_f1_score: 0.0017632717143532593
Epoch 10/100
f1_score: 0.8769024151542283
val_f1_score: 0.0
Epoch 11/100

KeyboardInterrupt: 

In [1]:
2 * (0.85 * 0.51) / (0.85 + 0.51)

0.6375000000000001

### Debug
The problem might be that the dataset generator should not have been rewritten this simply, i.e. using simple generator with `yield`.

In [None]:
keras.layers.Concatenate()([np.zeros((32, 1280)), np.zeros((32, 7))])

In [None]:
k = 0
for ((i, f), y) in trainset_generator():
    if k > 10:
        break
    print(i.shape)
    print(f.shape)
    print(y.shape)
    k += 1

In [None]:
128 * 201 * 3