In [None]:
import os  #provides functions for interacting with the operating system
import json  #used to work with JSON data
import librosa #librosa is a python package for music and audio analysis
import numpy as np #NumPy is a Python library used for working with arrays
import pandas as pd #data analysis toolkit
from sklearn.model_selection import train_test_split #Split arrays or matrices into random train and test subsets
import tensorflow as tf #TensorFlow is a Python library for fast numerical computing
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import optimizers
from tensorflow.keras.utils import plot_model, Sequence
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, Concatenate

In [None]:
#Trained Data Loading
train_meta = pd.read_csv("/kaggle/input/birdclef-2022/train_metadata.csv")
train_meta2 = pd.read_csv("../input/xenocantoazcsv/train_extended_A-Z_ogg.csv")
train_meta
train_meta2

In [None]:
train_meta = pd.read_csv("/kaggle/input/birdclef-2022/train_metadata.csv")
train_meta

In [None]:
labels = list(train_meta['primary_label'].unique())

## Split audio into 5 seconds chunks

In [None]:
#Function for Cutting Audio If There is any extra Noise.
import soundfile as sf
import os
def cutAudio(file_path, is_save):
    # First load the file
    filename = file_path.replace("/", "_")
    file_path = "/kaggle/input/birdclef-2022/train_audio/" + file_path
    audio, sr = librosa.load(file_path)

    # Get number of samples for 5 seconds; replace 5 by any number
    buffer = 5 * sr

    samples_total = len(audio)
    samples_wrote = 0
    counter = 1

    audio_split = []
    audio_filenames = []
    while samples_wrote < samples_total:
        #check if the buffer is not exceeding total samples 
        if buffer > (samples_total - samples_wrote):
            buffer = samples_total - samples_wrote

        block = audio[samples_wrote : (samples_wrote + buffer)]
        audio_split.append(block)

        # Write 5 second segment
        if is_save == True:
            out_filename = "/kaggle/working/each5s/split_" + str(counter) + "_" + filename
            audio_filenames.append(out_filename)
            sf.write(out_filename, block, sr)
        counter += 1
        samples_wrote += buffer
    return audio_split, sr, audio_filenames

In [None]:
#Function for splitting Trained audio
def splitTrainAudio(_df):
    data = []
    for index, row in _df.iterrows():
        cutAudio(row["filename"], True)
        audio_lst, sr, filenames = cutAudio(row["filename"], True)
        for idx, y in enumerate(audio_lst):
            data.append([row["primary_label"], row["filename"], filenames[idx]])

    data_df = pd.DataFrame(data, columns=['primary_label', 'original_filename', 'filename'])
    data_df.to_csv("/kaggle/working/data_df.csv", index=False)

In [None]:
#Input Sampleling Data to data_frame List
data_frames = []
for label in labels:
    tmp_df = train_meta[train_meta["primary_label"] == label].sample(n=1, replace=True).reset_index(drop=True)
    data_frames.append(tmp_df)
sample_df = pd.concat(data_frames).reset_index(drop=True)
sample_df

In [None]:
#creating a directory named path
!mkdir -p "/kaggle/working/each5s"
splitTrainAudio(sample_df)

In [None]:
#After processing the data we store it in DataFrame
data_df = pd.read_csv("/kaggle/working/data_df.csv")
data_df

#### The full data can be found [here](https://www.kaggle.com/duythanhng/birdclef-2022-audio-per-5-second)

## Training

In [None]:
#From this function we extarting features of Sample data/audio file
num_rows = 216
num_columns = 216
num_channels = 1
n_mels = 512

def extractFeatures(y, sr):
    feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=num_rows, n_mels=n_mels)
    if feat.shape[1] <= num_columns:
        pad_width = num_columns - feat.shape[1]
        feat = np.pad(feat, pad_width=((0,0),(0,pad_width)), mode='constant')
    return feat

In [None]:
#Here we are setting how many data sample/audio file will enter in each time in the network.#Data Managing
class DataGenerator(Sequence):
    def __init__(self,
                _X,
                batch_size=32,
                n_channels=1,
                n_columns=470,
                n_rows=120,
                shuffle=True):
        self.batch_size = batch_size
        self.X = _X
        self.n_channels = n_channels
        self.n_columns = n_columns
        self.n_rows = n_rows
        self.shuffle = shuffle
        self.img_indexes = np.arange(len(self.X))
        self.on_epoch_end()
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.img_indexes) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # Find list of IDs
        list_IDs_temps = [self.img_indexes[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temps)
        return X, y
     
        
    #Function for Updates indexes after each epoch
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.X))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temps):
        X = np.empty((self.batch_size, self.n_rows, self.n_columns))
        y = np.empty((self.batch_size), dtype=int)
        for i, ID in enumerate(list_IDs_temps):
            file_path = self.X.iloc[ID]["filename"]
            audio, sr = librosa.load(file_path)
            feat = extractFeatures(audio, sr)
            x_features = feat.tolist()
            label = self.X.iloc[ID]["target"]
            X[i] = np.array(x_features)
            y[i] = label
        X = X.reshape(X.shape[0], self.n_rows, self.n_columns, self.n_channels)
        
        return X, to_categorical(y, num_classes=len(labels))

In [None]:
params = dict(
    batch_size=128,
    n_rows=num_rows,
    n_columns=num_columns,
    n_channels=num_channels,
)
params_train = dict(
    shuffle=True,
    **params
)
params_valid = dict(
    shuffle=False,
    **params
)

In [None]:
#Function for data visualization of audio sample
import matplotlib.pyplot as plt

def plot_his(history):
    plt.figure(1, figsize = (15,8))
    plt.subplot(221)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'valid'])
    plt.subplot(222)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'valid'])
    plt.show()

In [None]:
#Function for creating Convolutional neural network
def create_cnn():
    img_input = Input(shape=(num_rows, num_columns, num_channels))
    img_conc = Concatenate()([img_input, img_input, img_input])
    base_model = MobileNetV2(
        include_top=False,
        weights='/kaggle/input/keras-pretrain-model-weights/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5',
        input_tensor=img_conc
    )
    # base_model.trainable = False
    avgpool = GlobalAveragePooling2D()(base_model.output)
    outputs = Dense(len(labels), activation='softmax')(avgpool)

    model = Model(inputs=base_model.input, outputs=outputs)
    return model

In [None]:
data_df['target'] = data_df['primary_label'].apply(lambda x: labels.index(x))
data_df

In [None]:
#Function for Input Data Training  for Model
def train_model(model, train_gen, val_gen):
    checkpoint_model_path = "/kaggle/working/mobilnetv2.h5"
    metric = "val_accuracy"
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=["accuracy"])
    num_epochs = 50
    checkpointer = ModelCheckpoint(
        filepath=checkpoint_model_path,
        monitor=metric, verbose=1, save_best_only=True)
    es_callback = EarlyStopping(monitor=metric, patience=5, verbose=1)
    reduce_lr = ReduceLROnPlateau(monitor=metric, factor=0.3, patience=1, verbose=1, min_delta=0.0001, cooldown=1, min_lr=0.00001)

    history = model.fit(
        train_gen,
        epochs=num_epochs,
        validation_data=val_gen,
        callbacks=[checkpointer,es_callback,reduce_lr],
        verbose=1
    )

    plot_his(history)

In [None]:
#Function Calling
X_train, X_valid, _, _ = train_test_split(data_df, data_df["target"], test_size=0.1, random_state=42)
train_generator = DataGenerator(X_train, **params_train)
valid_generator = DataGenerator(X_valid, **params_valid)
cnn_model = create_cnn()
train_model(cnn_model, train_generator, valid_generator)

## Submission

In [None]:
#CNN Function Calling
cnn_model = create_cnn()
cnn_model.load_weights("/kaggle/input/birdclef-2022-keras-model/mobilenetv20_0.8545.h5")

In [None]:
#converting an entire data table into a NumPy matrix array
data_df = pd.read_csv("/kaggle/input/birdclef-2022-keras-model/data_df.csv")
labels = list(data_df['primary_label'].unique())

In [None]:
test_path = "/kaggle/input/birdclef-2022/test_soundscapes/"
files = [f.split('.')[0] for f in sorted(os.listdir(test_path))]

birds_path = "/kaggle/input/birdclef-2022/scored_birds.json"
with open(birds_path) as bf:
    birds = json.load(bf)

data = []
for f in files:
    file_path = test_path + f + '.ogg'
    audio, sr = librosa.load(file_path)
    # Get number of samples for 5 seconds; replace 5 by any number
    buffer = 5 * sr
    samples_total = len(audio)
    samples_wrote = 0
    counter = 1

    while samples_wrote < samples_total:
        #check if the buffer is not exceeding total samples 
        if buffer > (samples_total - samples_wrote):
            buffer = samples_total - samples_wrote

        block = audio[samples_wrote : (samples_wrote + buffer)]
        feat = extractFeatures(block, sr)
        x = feat.reshape(1, num_rows, num_columns, num_channels)
        pred = cnn_model.predict(x)
        label_index = np.argmax(pred,axis=1)[0]
        
        for b in birds:
            segment_end = counter * 5   
            row_id = f + '_' + b + '_' + str(segment_end)
            target = False
            if labels[label_index] == b:
                target = True
            data.append([row_id, target])
        counter += 1
        samples_wrote += buffer
        
submission_df = pd.DataFrame(data, columns=['row_id', 'target'])
submission_df

In [None]:
submission_df.to_csv("/kaggle/working/submission.csv", index=False)

In [None]:
hello