# Importing Libraries

In [None]:
import os
import math
import librosa
import statistics
import librosa.display
import pandas as pd
import numpy as np
import IPython.display as ipd
import matplotlib.pyplot as plt
from IPython.display import Image
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier

In [None]:
# initializing constants
FRAME_SIZE=1024
HOP_LENGTH=512
audio_path = '../input/birdclef-2021/train_short_audio/acafly/XC109605.ogg'
ipd.Audio(audio_path)

# Feature Extraction

# Time Domain Features

# Time Domain Feature Extraction Pipeline

In [None]:
Image("../input/pictures/time_domain_pipeline.PNG")

In [None]:
#function to calculate AE for each frame
def amp_env(signal,frame_size,hop_length):
    return np.array([max(signal[i:i+frame_size]) for i in range(0,len(signal),hop_length)])

# Amplitude Envelope

In [None]:
Image("../input/pictures/amp_env.PNG")

In [None]:
#function to calculate AE for each frame
def amp_env(signal,frame_size,hop_length):
    return np.array([max(signal[i:i+frame_size]) for i in range(0,len(signal),hop_length)])

In [None]:
audio_path = '../input/birdclef-2021/train_short_audio/acafly/XC109605.ogg'
signal , sr = librosa.load(audio_path)
ae_signal=amp_env(signal,FRAME_SIZE,HOP_LENGTH)
frames=range(len(ae_signal))
t=librosa.frames_to_time(frames,hop_length=HOP_LENGTH)
librosa.display.waveplot(signal)
plt.plot(t,ae_signal,color="r")
plt.show()

# Root Mean Square Energy

In [None]:
Image("../input/pictures/rmse.PNG")

In [None]:
#function to calculate RMSE for each frame
def rmse(signal,frame_size,hop_length):
    return np.array(librosa.feature.rms(signal,frame_length=frame_size,hop_length=hop_length)[0])

In [None]:
audio_path = '../input/birdclef-2021/train_short_audio/acafly/XC109605.ogg'
signal , sr = librosa.load(audio_path)
rmse_signal=rmse(signal,FRAME_SIZE,HOP_LENGTH)
frames=range(len(rmse_signal))
t=librosa.frames_to_time(frames,hop_length=HOP_LENGTH)
librosa.display.waveplot(signal)
plt.plot(t,rmse_signal,color="g")
plt.show()

# Zero Crossing Rate

In [None]:
Image("../input/pictures/zcr.PNG")

In [None]:
#function to calculate ZCR for each frame
def zcr(signal,frame_size,hop_length):
    return np.array(librosa.feature.zero_crossing_rate(signal,frame_length=frame_size,hop_length=hop_length)[0])

In [None]:
audio_path = '../input/birdclef-2021/train_short_audio/acafly/XC109605.ogg'
signal , sr = librosa.load(audio_path)
zcr_signal=zcr(signal,FRAME_SIZE,HOP_LENGTH)
frames=range(len(zcr_signal))
t=librosa.frames_to_time(frames,hop_length=HOP_LENGTH)
librosa.display.waveplot(signal)
plt.plot(t,zcr_signal,color="b")
plt.show()

#  Visualization

In [None]:
audio_path = '../input/birdclef-2021/train_short_audio/acafly/XC109605.ogg'
signal , sr = librosa.load(audio_path)
plt.figure(figsize=(15,30))
# original signal
ori_signal=signal
frames=range(len(ori_signal))
t=librosa.frames_to_time(frames,hop_length=HOP_LENGTH)
plt.subplot(4,1,1)
librosa.display.waveplot(signal)
plt.plot(t,ori_signal,color="black")
plt.title("Original Audio Signal")
        
# plot AE for the signal
plt.subplot(4,1,2)
ae_signal=amp_env(signal,FRAME_SIZE,HOP_LENGTH)
frames=range(len(ae_signal))
t=librosa.frames_to_time(frames,hop_length=HOP_LENGTH)
librosa.display.waveplot(signal)
plt.plot(t,ae_signal,color="r")
plt.title("AE Audio Signal")
        
# plot RMSE for the signal
plt.subplot(4,1,3)
rmse_signal=rmse(signal,FRAME_SIZE,HOP_LENGTH)
frames=range(len(rmse_signal))
t=librosa.frames_to_time(frames,hop_length=HOP_LENGTH)
librosa.display.waveplot(signal)
plt.plot(t,rmse_signal,color="g")
plt.title("RMSE Audio Signal")
        
# plot ZCR for the signal
plt.subplot(4,1,4)
zcr_signal=zcr(signal,FRAME_SIZE,HOP_LENGTH)
frames=range(len(zcr_signal))
t=librosa.frames_to_time(frames,hop_length=HOP_LENGTH)
librosa.display.waveplot(signal)
plt.plot(t,zcr_signal,color="b")
plt.title("ZCR Audio Signal")
plt.show()

# Frequency Domain Features

# Frequency Domain Feature Extraction Pipeline

In [None]:
Image("../input/pictures/frequency_domain_pipeline.PNG")

# Extract spectrograms

In [None]:
audio_path = '../input/birdclef-2021/train_short_audio/acafly/XC109605.ogg'
signal , sr = librosa.load(audio_path)
# Short Fourier Transform
signal_stft = librosa.stft(signal, n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)

# Band Energy Ratio

In [None]:
Image("../input/pictures/ber.PNG")

In [None]:
def calculate_split_frequency_bin(split_frequency, sample_rate, num_frequency_bins):
    #Infer the frequency bin associated to a given split frequency
    frequency_range = sample_rate / 2
    frequency_delta_per_bin = frequency_range / num_frequency_bins
    split_frequency_bin = math.floor(split_frequency / frequency_delta_per_bin)
    return int(split_frequency_bin)

In [None]:
split_frequency_bin = calculate_split_frequency_bin(2000, 22050, 1025)
split_frequency_bin

In [None]:
#function to calculate BER for each frame
def ber(spectrogram, split_frequency, sample_rate):
    #Calculate band energy ratio with a given split frequency
    split_frequency_bin = calculate_split_frequency_bin(split_frequency, sample_rate, len(spectrogram[0]))
    band_energy_ratio = []
    # calculate power spectrogram
    power_spectrogram = np.abs(spectrogram) ** 2
    power_spectrogram = power_spectrogram.T
    # calculate BER value for each frame
    for frame in power_spectrogram:
        sum_power_low_frequencies = frame[:split_frequency_bin].sum()
        sum_power_high_frequencies = frame[split_frequency_bin:].sum()
        band_energy_ratio_current_frame = sum_power_low_frequencies / sum_power_high_frequencies
        band_energy_ratio.append(band_energy_ratio_current_frame)
    return np.array(band_energy_ratio)

In [None]:
ber_signal= ber(signal_stft, 2000, sr)
frames = range(len(ber_signal))
t = librosa.frames_to_time(frames, hop_length=HOP_LENGTH)
plt.plot(t, ber_signal, color="r")
plt.show()

# Spectral Centroid

In [None]:
Image("../input/pictures/sc.PNG")

In [None]:
#function to calculate SC for each frame
def sc(signal,framesize,hoplength):
    return np.array(librosa.feature.spectral_centroid(y=signal, sr=sr, n_fft=framesize, hop_length=hoplength)[0])

In [None]:
sc_signal=sc(signal,FRAME_SIZE,HOP_LENGTH)
frames = range(len(sc_signal))
t = librosa.frames_to_time(frames, hop_length=HOP_LENGTH)
plt.plot(t, sc_signal, color='g')
plt.show()

# Spectral Bandwidth

In [None]:
Image("../input/pictures/sb.PNG")

In [None]:
#function to calculate SB for each frame
def sb(signal,framesize,hoplength):
    return np.array(librosa.feature.spectral_bandwidth(y=signal, sr=sr, n_fft=framesize, hop_length=hoplength)[0])

In [None]:
sb_signal=sb(signal,FRAME_SIZE,HOP_LENGTH)
frames = range(len(sb_signal))
t = librosa.frames_to_time(frames, hop_length=HOP_LENGTH)
plt.plot(t, sb_signal, color='b')
plt.show()

# Mel Frequency Cepstral Coefficients

In [None]:
Image("../input/pictures/mfcc.PNG")

In [None]:
#function to calculate MFCC for each frame
def mfcc(signal,sample_rate,hoplength):
    return np.array(librosa.feature.mfcc(y=signal, n_mfcc=hoplength, sr=sample_rate))

In [None]:
librosa.display.specshow(mfcc(signal,sr,HOP_LENGTH), x_axis="time", sr=sr)
plt.colorbar(format="%+2.f")
plt.show()

# Visualization

In [None]:
audio_path = '../input/birdclef-2021/train_short_audio/acafly/XC109605.ogg'
signal , sr = librosa.load(audio_path)
plt.figure(figsize=(15,30))
# original signal
ori_signal=signal
frames=range(len(ori_signal))
t=librosa.frames_to_time(frames,hop_length=HOP_LENGTH)
plt.subplot(5,1,1)
librosa.display.waveplot(signal)
plt.plot(t,ori_signal,color="black")
plt.title("Original Audio Signal")
        
# plot BER for the signal
plt.subplot(5,1,2)
ber_signal= ber(signal_stft, 2000, sr)
frames=range(len(ae_signal))
t=librosa.frames_to_time(frames,hop_length=HOP_LENGTH)
librosa.display.waveplot(signal)
plt.plot(t,ber_signal,color="r")
plt.title("BER Audio Signal")
        
# plot SC for the signal
plt.subplot(5,1,3)
sc_signal=sc(signal,FRAME_SIZE,HOP_LENGTH)
frames=range(len(sc_signal))
t=librosa.frames_to_time(frames,hop_length=HOP_LENGTH)
librosa.display.waveplot(signal)
plt.plot(t,sc_signal,color="g")
plt.title("SC Audio Signal")
        
# plot SB for the signal
plt.subplot(5,1,4)
sb_signal=sb(signal,FRAME_SIZE,HOP_LENGTH)
frames=range(len(zcr_signal))
t=librosa.frames_to_time(frames,hop_length=HOP_LENGTH)
librosa.display.waveplot(signal)
plt.plot(t,sb_signal,color="b")
plt.title("SB Audio Signal")

# plot MFCC for the signal
plt.subplot(5,1,5)
librosa.display.specshow(mfcc(signal,sr,HOP_LENGTH), x_axis="time", sr=sr)
plt.colorbar(format="%+2.f")
plt.title("MFCC Audio Signal")
plt.show()

# Final Feature Extraction from all Audio files

In [None]:
df=pd.DataFrame()
df['filename']=''
df['AE']=''
df['RMSE']=''
df['ZCR']=''
df['BER']=''
df['SC']=''
df['SB']=''
df['MFCC']=''

In [None]:
'''directory= r'../input/birdclef-2021/train_short_audio'
for root, subdirectories, files in os.walk(directory):
    for subdirectory in subdirectories:
        folder=os.path.join(root, subdirectory)
        print(folder)
        parent_list = os.listdir(folder)
        count =0
        for child in parent_list:
            if count < 5:
                print(child+" "+str(count))
                audio_path=folder+"/"+child
                signal , sr = librosa.load(audio_path)
                signal_stft = librosa.stft(signal, n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)
                # AE for the signal
                ae_signal=amp_env(signal,FRAME_SIZE,HOP_LENGTH)
                # Feature Aggregation of AE
                agg_ae=statistics.mean(ae_signal)
                # RMSE for the signal
                rmse_signal=rmse(signal,FRAME_SIZE,HOP_LENGTH)
                # Feature Aggregation of RMSE
                agg_rmse=statistics.mean(rmse_signal)
                # ZCR for the signal
                zcr_signal=zcr(signal,FRAME_SIZE,HOP_LENGTH)
                # Feature Aggregation of ZCR
                agg_zcr=statistics.mean(zcr_signal)
                # BER for the signal
                ber_signal=ber(signal_stft, 2000, sr)
                # Feature Aggregation of BER
                agg_ber=statistics.mean(ber_signal)
                # SC for the signal
                sc_signal=sc(signal,FRAME_SIZE,HOP_LENGTH)
                # Feature Aggregation of SC
                agg_sc=statistics.mean(sc_signal)
                # SB for the signal
                sb_signal=sb(signal,FRAME_SIZE,HOP_LENGTH)
                # Feature Aggregation of SB
                agg_sb=statistics.mean(sb_signal)
                # MFCC for the signal
                mfcc_signal=mfcc(signal,sr,HOP_LENGTH)[1]
                # Feature Aggregation of MFCC
                agg_mfcc=statistics.mean(mfcc_signal)
                df2 = {'filename':filename, 'AE': agg_ae, 'RMSE': agg_rmse, 'ZCR': agg_zcr  , 'BER': agg_ber, 'SC': agg_sc, 'SB': agg_sb , 'MFCC': agg_mfcc}
                df = df.append(df2, ignore_index = True)
            else:
                break
            count = count+1'''

In [None]:
df

In [None]:
metadata=pd.read_csv("../input/birdclef-2021/train_metadata.csv")

In [None]:
metadata.head()