In [None]:
import os

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import LabelEncoder
import IPython.display as ipd
import matplotlib.pyplot as plt
!pip install librosa 
import librosa
import librosa.display

In [None]:
df = pd.read_csv('../input/birdsong-recognition/train.csv')
print(f'Dataframe has {len(df)} rows')
df.head()

In [None]:
df.info()

In [None]:
le = LabelEncoder()
fig = go.Figure(data=go.Scattergeo(lat=df['latitude'], 
                     lon=df['longitude'], hovertext=df['species'], marker_color=le.fit_transform(df['species'])))
fig.show()

In [None]:
dates = df['date'].value_counts().sort_index()
fig = go.Figure([go.Scatter(x=dates[4:].index, y=dates[4:].values)])
fig.update_layout(title='Quantity of birds recorded over time')
fig.show()

In [None]:
species = df['species'].value_counts()
fig = go.Figure(data=[
    go.Bar(y=species.values, x=species.index)
])

fig.update_layout(title='Distribution of Bird Species')
fig.show()

In [None]:
durations = df['duration'].value_counts()
fig = go.Figure(data=[
    go.Bar(y=durations.values, x=durations.index, marker_color='deeppink')
])

fig.update_layout(title='Distribution of durations in seconds')
fig.update_xaxes(range=[0, 500])
fig.show()

In [None]:
base_dir = '../input/birdsong-recognition/train_audio/'
df['full_path'] = base_dir + df['ebird_code'] + '/' + df['filename']

# Now let's sample a fiew audio files
amered = df[df['ebird_code'] == "amered"].sample(1, random_state = 33)['full_path'].values[0]
cangoo = df[df['ebird_code'] == "cangoo"].sample(1, random_state = 33)['full_path'].values[0]
haiwoo = df[df['ebird_code'] == "haiwoo"].sample(1, random_state = 33)['full_path'].values[0]
pingro = df[df['ebird_code'] == "pingro"].sample(1, random_state = 33)['full_path'].values[0]
vesspa = df[df['ebird_code'] == "vesspa"].sample(1, random_state = 33)['full_path'].values[0]

bird_sample_list = ["amered", "cangoo", "haiwoo", "pingro", "vesspa"]

In [None]:
ipd.Audio(amered)

In [None]:
ipd.Audio(cangoo)

In [None]:
ipd.Audio(haiwoo)

In [None]:
ipd.Audio(pingro)

In [None]:
y, sr = librosa.load(vesspa)
audio_file, _ = librosa.effects.trim(y)
y_amered, sr_amered = librosa.load(amered)
audio_amered, _ = librosa.effects.trim(y_amered)

y_cangoo, sr_cangoo = librosa.load(cangoo)
audio_cangoo, _ = librosa.effects.trim(y_cangoo)

y_haiwoo, sr_haiwoo = librosa.load(haiwoo)
audio_haiwoo, _ = librosa.effects.trim(y_haiwoo)

y_pingro, sr_pingro = librosa.load(pingro)
audio_pingro, _ = librosa.effects.trim(y_pingro)

y_vesspa, sr_vesspa = librosa.load(vesspa)
audio_vesspa, _ = librosa.effects.trim(y_vesspa)

In [None]:
fig, ax = plt.subplots(5, figsize = (16, 9))
fig.suptitle('Sound Waves', fontsize=16)

librosa.display.waveplot(y = audio_amered, sr = sr_amered, color = "#A300F9", ax=ax[0])
librosa.display.waveplot(y = audio_cangoo, sr = sr_cangoo, color = "#4300FF", ax=ax[1])
librosa.display.waveplot(y = audio_haiwoo, sr = sr_haiwoo, color = "#009DFF", ax=ax[2])
librosa.display.waveplot(y = audio_pingro, sr = sr_pingro, color = "#00FFB0", ax=ax[3])
librosa.display.waveplot(y = audio_vesspa, sr = sr_vesspa, color = "#D9FF00", ax=ax[4]);

for i, name in zip(range(5), bird_sample_list):
    ax[i].set_ylabel(name, fontsize=13)