In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.patches import Rectangle
import librosa
from librosa import display as ld

sns.set_style(style="darkgrid")
mpl.rcParams.update({'figure.max_open_warning': 0})

In [None]:
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session


Datasets
* train_tp.csv
* train_fp.csv

In [None]:
df_tp=pd.read_csv('/kaggle/input/rfcx-species-audio-detection/train_tp.csv')
df_fp=pd.read_csv('/kaggle/input/rfcx-species-audio-detection/train_fp.csv')

In [None]:
df_tp.head()

In [None]:
#0099c367b
df_tp.describe()

In [None]:
df_fp.head()

In [None]:
df_fp.describe()

In [None]:
# Unique species
len(df_tp['species_id'].unique())

In [None]:
# ploting chart1 function
def grafico_dispersao(df, titulo):
    total = float(len(df))
    f, ax = plt.subplots(1,1, figsize=(16,4))
    ax = sns.countplot(x="species_id", data=df, palette="Set3")
    plt.xticks(rotation=45, size=8)
    for p in ax.patches:
        height = p.get_height()
        ax.text(p.get_x() + p.get_width() / 2., height + 3, '{:1.2f}%'.format(100 * height / total), ha="center", rotation=45)
    limite_y = ax.get_ylim()
    ax.set_ylim(limite_y[0], limite_y[1]*1.15)
    ax.set_title(titulo)
    plt.plot()
    
grafico_dispersao(df_tp, "Dataset TP - Dispesão das amostras por espécie")
grafico_dispersao(df_fp, "Dataset FP - Dispesão das amostras por espécie")

In [None]:
estatistica = df_tp[['species_id', 'f_min', 'f_max']].groupby('species_id').agg(['describe'])
estatistica[:]

In [None]:
sr = df_tp.set_index(['recording_id', 'species_id', 'songtype_id']).stack().rename_axis(index={None: 'indicator'}).rename('value')
sr.index = sr.index.droplevel('recording_id')
sr.index = sr.index.droplevel('songtype_id')
sr.sort_index(inplace=True)

In [None]:
def violinplot_cutomize(sr, ax, species_id, indicators):
    sns.violinplot(
        ax = ax,
        data = sr.loc[(species_id, indicators)].reset_index().sort_values(by='indicator', ascending=False), 
        y="value", 
        x="indicator",
        split=True,
        scale="count", 
        inner="quartile", 
        linewidth=1,
        palette="Set3")

    
def kdeplot_customize(sr, ax, species_id, indicators):
    sns.kdeplot(
        ax = ax,
        data = sr.loc[(species_id, indicators)].reset_index().sort_values(by='indicator', ascending=False), 
        #y="value", 
        x="value",
        palette="Set3")

def countplot_customize(sr, ax, species_id, indicators):
    sns.countplot(
        ax = ax,
        data = sr.loc[(species_id, indicators)].reset_index().sort_values(by='indicator', ascending=False),
        x="value",
        palette="Set3")
    plt.xlabel(indicators)

def species_chart(sr, indicators):
    for species_id in sr.index.levels[0].values:
        f, axs = plt.subplots(1,3, figsize=(16,4))
        violinplot_cutomize(sr, axs[0], species_id, indicators)
        countplot_customize(sr, axs[1], species_id, indicators[0])
        countplot_customize(sr, axs[2], species_id, indicators[1])
        f.suptitle('Specie #' + str(species_id), fontsize=16)
        
species_chart(sr, ['f_min', 'f_max'])

In [None]:
import soundfile as sf

example = df_tp.iloc[3]

data, sf_rate = sf.read('../input/rfcx-species-audio-detection/train/'+example['recording_id']+'.flac')
#data = cp.array(data)

#varfft = cp.abs( cp.fft.fft(data)[:(len(data)//2)] )
#    
#return cp.asnumpy( varfft.reshape( (1000,1440) ).mean(axis=1) )
"""
FT = []
for fn in tqdm(traint.recording_id.to_array()):
    FT.append( extract_fft( '../input/rfcx-species-audio-detection/train/'+fn+'.flac' ) )
FT = np.stack(FT)
gc.collect()

FT.shape
"""

In [None]:
data

In [None]:
sftf = librosa.stft(data)
sftf_xdb = librosa.amplitude_to_db(abs(sftf))

plt.figure(figsize=(30, 10))
ld.specshow(sftf_xdb, sr=sf_rate, x_axis='time', y_axis='hz')

# Add the audio position
ax = plt.gca()

audio_position = Rectangle(
    (example['t_min'],example['f_min']),
    example['t_max'] - example['t_min'],
    example['f_max'] - example['f_min'],
    linewidth=10,
    edgecolor='white',
    facecolor='none'
)
ax.add_patch(audio_position)

plt.colorbar()
plt.show()

In [None]:
plt.close('all')
fig, ax = plt.subplots(1, 1, figsize=(12, 10))
ld.specshow(sftf_xdb, sr=sf_rate, x_axis='time', y_axis='hz')

ax = plt.gca()
ax.set_xlim(left = example['t_min'], right = example['t_max'])
ax.set_ylim(bottom = example['f_min'], top = example['f_max'])
plt.show()

In [None]:
import IPython.display as ipd
ipd.Audio('../input/rfcx-species-audio-detection/train/'+example['recording_id']+'.flac')