In [73]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import os
import shutil
%matplotlib inline
import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa
import librosa.display
import pathlib
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from numpy.random import uniform

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from sklearn import metrics 
import os

SR=22050
NUM_ROWS=40
NUM_COLS=int(SR * 5 / 512) + 1

#MODEL_ID='102'
MODEL = f'model_102_.hdf5'
LABLES = f'lables_102.csv'

PRED_TH = 0.4
MIN_SC = 0.2
MIN_AMP = 1.5
MIN_SAMPLES = 300
NUM_SAMPLES = 600

In [2]:
def splitDataFrameList(df, target_column):
    '''
    df = dataframe to split,
    target_column = the column containing a list of values

    returns: a dataframe with each entry for the target column separated, with each element moved into a new row.
    The values in the other columns are duplicated across the newly divided rows.
    '''

    def splitListToRows(row):
        split_row = row[target_column]
        for s in split_row:
            new_row = row.to_dict()
            new_row[target_column] = s
            new_rows.append(new_row)

    new_rows = []
    df.reset_index().apply(splitListToRows, axis=1)
    new_index = df.index.names if df.index.names[
                                      0] is not None else df.index.name if df.index.name is not None else 'index'
    new_df = pd.DataFrame(new_rows).set_index(new_index) if len(new_rows) > 0 else pd.DataFrame(new_rows)
    return new_df

In [3]:
def split_df(df, sec=5):
    df['seconds'] = df.duration.apply(lambda x: [(i + 1) * sec for i in range(int(x / sec))])
    res = splitDataFrameList(df, 'seconds')
    return res.drop(columns=['duration'])

def add_duration(df_in, path):
    df = df_in.copy().reset_index()
    df['duration'] = None
    for ind, row in df.iterrows():
        audio_path = os.path.join(path, f"{row['audio_id']}.mp3")
        x , sr = librosa.load(audio_path,sr=SR)
        df.loc[ind,'duration'] = max(5,int(len(x) / (sr*5)))
    return df

def add_cols(df, path):
    test=df.copy().sort_values(by='audio_id').reset_index()
    test.loc[:,'amp_r']=None
    test.loc[:,'sc_r']=None
    fle=''
    for ind, row in test.iterrows():
        if row['audio_id']!=fle:
            fle=row['audio_id']
            audio_path = os.path.join(path, fle+'.mp3')
            x , sr = librosa.load(audio_path,sr=SR)
            x_abs = np.abs(x)
            amp_mean = np.mean(x_abs)
        sec = int(row['seconds'])
        xx_abs = x_abs[max(0,sr*(sec-5)):min(sr*sec,len(x))]
        xx = x[max(0, sr * (sec - 5)):min(sr * sec, len(x))]
        spec = librosa.feature.mfcc(y=xx, sr=SR, n_mfcc=40)
        sc = librosa.feature.spectral_centroid(xx, sr=SR)[0,:]
        test.loc[ind,'amp_r'] = np.mean(xx_abs) / amp_mean
        test.loc[ind,'sc_r'] = np.std(sc) / np.mean(sc)
        for i in range(NUM_COLS - spec.shape[1]):
            spec = np.concatenate([spec, spec[:, -1].reshape(spec.shape[0], 1)], axis=1)
        test.loc[[ind],'x'] = pd.Series([spec.reshape(spec.shape[0], spec.shape[1], 1)],index=[ind])

    return test

In [4]:
filt = lambda row: row['amp_r']>MIN_AMP or row['sc_r']>MIN_SC

def agg_site_3(df):
    df = df[df['prob']<1.0]
    if not len(df):
        return 'nocall'
    df = df.sort_values(by='prob',ascending=False)
    if df.iloc[0]['pred']=='nocall':
        return 'nocall'
    max_prob = df.iloc[0]['prob']
    df=df[df['prob']>0.9*max_prob]
    res = set(df['pred'])
    res = [x for x in res if x!='nocall']
    return " ".join(res) if len(res) else "nocall"


    
def gen_predictions(df):
    model = tf.keras.models.load_model(MODEL)
    lables = pd.read_csv(LABLES).lables.values
    X = np.concatenate([x[np.newaxis,...] for x in df.x.values],axis=0)
    P = model.predict(X)
    df['prob'] = np.max(P,axis=-1)
    df['pred'] = [lables[i] for i in np.argmax(P, axis=-1)]
    df['pred'] = np.where(df['prob']>PRED_TH,df['pred'],'nocall')
    return df

def add_birds(test_df, test_path):
    df = test_df.copy()
    site12 = test_df[test_df.site!='site_3']
    site3 = test_df[test_df.site=='site_3']
    if len(site3):
        site3 = add_duration(site3,test_path)
        site3 = split_df(site3)
    df = pd.concat([site12,site3])
    df = add_cols(df,test_path)
    df['filt'] = df.apply(filt,axis=1)
    nobird = df[~df.filt]
    nobird['pred'] = 'nocall'
    nobird['prob'] = 1.1
    bird = df[df.filt]
    if len(bird):
        bird = gen_predictions(bird)
    df = pd.concat([bird,nobird])
    test_site12 = df[df.site!='site_3']
    test_site12['birds'] = test_site12['pred']
    test_site12 = test_site12[['row_id','birds']]
    test_site3 = df[df.site=='site_3']
    if len(test_site3):
        test_site3 = test_site3.groupby('row_id').apply(agg_site_3).to_frame().rename(columns={0:'birds'}).reset_index()
    test_sites = pd.concat([test_site12,test_site3])[['row_id','birds']]
    test_df = test_df.merge(test_sites,on=['row_id'])
    return test_df

In [9]:
test_dir = '/home/mor/Downloads/test'

In [64]:
shutil.copy('/home/mor/Downloads/train_audio/amecro/XC109768.mp3', '/home/mor/Downloads/test/XC109768.mp3')
shutil.copy('/home/mor/Downloads/train_audio/amecro/XC112598.mp3', '/home/mor/Downloads/test/XC112598.mp3')
shutil.copy('/home/mor/Downloads/train_audio/wlswar/XC125263.mp3', '/home/mor/Downloads/test/XC125263.mp3')
shutil.copy('/home/mor/Downloads/train_audio/wlswar/XC109600.mp3', '/home/mor/Downloads/test/XC109600.mp3')
shutil.copy('/home/mor/Downloads/train_audio/bkcchi/XC114073.mp3', '/home/mor/Downloads/test/XC114073.mp3')
shutil.copy('/home/mor/Downloads/train_audio/bkcchi/XC114086.mp3', '/home/mor/Downloads/test/XC114086.mp3')

'/home/mor/Downloads/test/XC114086.mp3'

In [75]:
test_df = pd.DataFrame({'audio_id':['XC109768','XC112598','XC125263','XC109600','XC114073','XC114086'],
                    'lable':['amecro','amecro','wlswar','wlswar','bkcchi','bkcchi'],
                    'site':['site_1','site_3','site_1','site_3','site_1','site_3']})
ss = add_duration(test_df[test_df.site!='site_3'],test_dir)
ss = split_df(ss)
test_df=pd.concat([test_df[test_df.site=='site_3'],ss]).reset_index(drop=True).drop(columns=['level_0'])
test_df['row_id'] = test_df.apply(lambda x:f"{x['site']}_{x['audio_id']}{'' if np.isnan(x['seconds']) else '_'+str(x['seconds'])}", axis=1)
test_df

Unnamed: 0,audio_id,lable,seconds,site,row_id
0,XC112598,amecro,,site_3,site_3_XC112598
1,XC109600,wlswar,,site_3,site_3_XC109600
2,XC114086,bkcchi,,site_3,site_3_XC114086
3,XC109768,amecro,5.0,site_1,site_1_XC109768_5.0
4,XC125263,wlswar,5.0,site_1,site_1_XC125263_5.0
5,XC114073,bkcchi,5.0,site_1,site_1_XC114073_5.0


In [76]:
add_birds(test_df,test_dir)



Unnamed: 0,audio_id,lable,seconds,site,row_id,birds
0,XC112598,amecro,,site_3,site_3_XC112598,amecro
1,XC109600,wlswar,,site_3,site_3_XC109600,nocall
2,XC114086,bkcchi,,site_3,site_3_XC114086,nocall
3,XC109768,amecro,5.0,site_1,site_1_XC109768_5.0,rewbla
4,XC125263,wlswar,5.0,site_1,site_1_XC125263_5.0,chispa
5,XC114073,bkcchi,5.0,site_1,site_1_XC114073_5.0,bkcchi


In [74]:
gen_predictions(add_cols(test_df,test_dir))



Unnamed: 0,index,audio_id,lable,seconds,site,row_id,amp_r,sc_r,x,prob,pred
0,7,XC109600,wlswar,5.0,site_3,site_3_XC109600_5.0,1.01738,0.117101,"[[[-371.7934], [-282.36197], [-255.12624], [-2...",0.065429,nocall
1,0,XC109768,amecro,5.0,site_1,site_1_XC109768_5.0,1.12107,0.20385,"[[[-474.00522], [-375.3841], [-341.86386], [-3...",0.492218,rewbla
2,1,XC112598,amecro,5.0,site_3,site_3_XC112598_5.0,1.00163,0.17887,"[[[-483.19217], [-378.23996], [-337.0496], [-3...",0.436692,cangoo
3,2,XC112598,amecro,10.0,site_3,site_3_XC112598_10.0,2.54309,0.195928,"[[[-322.55362], [-315.47314], [-321.23657], [-...",0.982275,amecro
4,3,XC112598,amecro,15.0,site_3,site_3_XC112598_15.0,1.3027,0.174734,"[[[-339.23322], [-337.7178], [-340.32486], [-3...",0.541522,amecro
5,4,XC112598,amecro,20.0,site_3,site_3_XC112598_20.0,0.764312,0.10312,"[[[-324.58972], [-322.45328], [-329.33902], [-...",0.201104,nocall
6,5,XC112598,amecro,25.0,site_3,site_3_XC112598_25.0,1.63639,0.218989,"[[[-155.0106], [-153.97916], [-168.87578], [-1...",0.915462,amecro
7,8,XC114073,bkcchi,5.0,site_1,site_1_XC114073_5.0,0.784996,0.568802,"[[[-500.54977], [-418.42728], [-385.7854], [-3...",0.41931,bkcchi
8,9,XC114086,bkcchi,5.0,site_3,site_3_XC114086_5.0,1.46653,0.229611,"[[[-634.3367], [-630.9502], [-595.7566], [-552...",0.154524,nocall
9,10,XC114086,bkcchi,10.0,site_3,site_3_XC114086_10.0,0.624255,0.163872,"[[[-262.65613], [-265.21155], [-284.2208], [-2...",0.034821,nocall


In [63]:
pd.read_csv('lables_60.csv')

Unnamed: 0.1,Unnamed: 0,lables
0,0,amecro
1,1,bewwre
2,2,bkcchi
3,3,bkhgro
4,4,bkpwar
5,5,bktspa
6,6,blugrb1
7,7,brdowl
8,8,brespa
9,9,brnthr


In [66]:
add_cols(test_df,test_dir)

Unnamed: 0,index,audio_id,lable,seconds,site,row_id,amp_r,sc_r,x
0,7,XC109600,wlswar,5.0,site_3,site_3_XC109600_5.0,1.01738,0.117101,"[[[-371.7934], [-282.36197], [-255.12624], [-2..."
1,0,XC109768,amecro,5.0,site_1,site_1_XC109768_5.0,1.12107,0.20385,"[[[-474.00522], [-375.3841], [-341.86386], [-3..."
2,1,XC112598,amecro,5.0,site_3,site_3_XC112598_5.0,1.00163,0.17887,"[[[-483.19217], [-378.23996], [-337.0496], [-3..."
3,2,XC112598,amecro,10.0,site_3,site_3_XC112598_10.0,2.54309,0.195928,"[[[-322.55362], [-315.47314], [-321.23657], [-..."
4,3,XC112598,amecro,15.0,site_3,site_3_XC112598_15.0,1.3027,0.174734,"[[[-339.23322], [-337.7178], [-340.32486], [-3..."
5,4,XC112598,amecro,20.0,site_3,site_3_XC112598_20.0,0.764312,0.10312,"[[[-324.58972], [-322.45328], [-329.33902], [-..."
6,5,XC112598,amecro,25.0,site_3,site_3_XC112598_25.0,1.63639,0.218989,"[[[-155.0106], [-153.97916], [-168.87578], [-1..."
7,8,XC114073,bkcchi,5.0,site_1,site_1_XC114073_5.0,0.784996,0.568802,"[[[-500.54977], [-418.42728], [-385.7854], [-3..."
8,9,XC114086,bkcchi,5.0,site_3,site_3_XC114086_5.0,1.46653,0.229611,"[[[-634.3367], [-630.9502], [-595.7566], [-552..."
9,10,XC114086,bkcchi,10.0,site_3,site_3_XC114086_10.0,0.624255,0.163872,"[[[-262.65613], [-265.21155], [-284.2208], [-2..."


In [53]:
add_cols(test_df,test_dir)

Unnamed: 0,index,audio_id,lable,seconds,site,row_id,amp_r,sc_r,x
0,6,XC109607,eawpew,5.0,site_1,site_1_XC109607_5.0,1.25421,0.0627571,"[[[-522.42914], [-284.3274], [-186.61722], [-1..."
1,0,XC109768,amecro,5.0,site_1,site_1_XC109768_5.0,1.12107,0.20385,"[[[-474.00522], [-375.3841], [-341.86386], [-3..."
2,7,XC110057,eawpew,5.0,site_3,site_3_XC110057_5.0,0.953781,0.112561,"[[[-506.40152], [-258.4647], [-160.97072], [-1..."
3,1,XC112598,amecro,5.0,site_3,site_3_XC112598_5.0,1.00163,0.17887,"[[[-483.19217], [-378.23996], [-337.0496], [-3..."
4,2,XC112598,amecro,10.0,site_3,site_3_XC112598_10.0,2.54309,0.195928,"[[[-322.55362], [-315.47314], [-321.23657], [-..."
5,3,XC112598,amecro,15.0,site_3,site_3_XC112598_15.0,1.3027,0.174734,"[[[-339.23322], [-337.7178], [-340.32486], [-3..."
6,4,XC112598,amecro,20.0,site_3,site_3_XC112598_20.0,0.764312,0.10312,"[[[-324.58972], [-322.45328], [-329.33902], [-..."
7,5,XC112598,amecro,25.0,site_3,site_3_XC112598_25.0,1.63639,0.218989,"[[[-155.0106], [-153.97916], [-168.87578], [-1..."
8,8,XC114073,bkcchi,5.0,site_1,site_1_XC114073_5.0,0.784996,0.568802,"[[[-500.54977], [-418.42728], [-385.7854], [-3..."
9,9,XC114086,bkcchi,5.0,site_3,site_3_XC114086_5.0,1.46653,0.229611,"[[[-634.3367], [-630.9502], [-595.7566], [-552..."


In [40]:
add_cols(test_df,test_dir)

Unnamed: 0,index,audio_id,lable,seconds,site,row_id,amp_r,sc_r,x
0,6,XC109668,bewwre,5.0,site_1,site_1_XC109668_5.0,1.19843,0.126389,"[[[-524.223], [-430.52963], [-391.9648], [-382..."
1,0,XC109768,amecro,5.0,site_1,site_1_XC109768_5.0,1.12107,0.20385,"[[[-474.00522], [-375.3841], [-341.86386], [-3..."
2,1,XC112598,amecro,5.0,site_3,site_3_XC112598_5.0,1.00163,0.17887,"[[[-483.19217], [-378.23996], [-337.0496], [-3..."
3,2,XC112598,amecro,10.0,site_3,site_3_XC112598_10.0,2.54309,0.195928,"[[[-322.55362], [-315.47314], [-321.23657], [-..."
4,3,XC112598,amecro,15.0,site_3,site_3_XC112598_15.0,1.3027,0.174734,"[[[-339.23322], [-337.7178], [-340.32486], [-3..."
5,4,XC112598,amecro,20.0,site_3,site_3_XC112598_20.0,0.764312,0.10312,"[[[-324.58972], [-322.45328], [-329.33902], [-..."
6,5,XC112598,amecro,25.0,site_3,site_3_XC112598_25.0,1.63639,0.218989,"[[[-155.0106], [-153.97916], [-168.87578], [-1..."
7,8,XC114073,bkcchi,5.0,site_1,site_1_XC114073_5.0,0.784996,0.568802,"[[[-500.54977], [-418.42728], [-385.7854], [-3..."
8,9,XC114086,bkcchi,5.0,site_3,site_3_XC114086_5.0,1.46653,0.229611,"[[[-634.3367], [-630.9502], [-595.7566], [-552..."
9,10,XC114086,bkcchi,10.0,site_3,site_3_XC114086_10.0,0.624255,0.163872,"[[[-262.65613], [-265.21155], [-284.2208], [-2..."


In [31]:
filt = lambda row: row['amp_r']>MIN_AMP or row['sc_r']>MIN_SC

def agg_site_3(df):
    df = df[df['prob']<1.0]
    if not len(df):
        return 'nocall'
    df = df.sort_values(by='prob',ascending=False)
    if df.iloc[0]['pred']=='nocall':
        return 'nocall'
    max_prob = df.iloc[0]['prob']
    df=df[df['prob']>0.9*max_prob]
    res = set(df['pred'])
    res = [x for x in res if x!='nocall']
    return " ".join(res) if len(res) else "nocall"


def split_site3(test_df, test_path):
    site12 = test_df[test_df.site!='site_3']
    site3 = test_df[test_df.site=='site_3']
    if len(site3):
        site3 = add_duration(site3,test_path)
        site3 = split_df(site3).drop(columns=['duration','index'])
    return pd.concat([site12,site3])
    
def gen_predictions(df):
    model = tf.keras.models.load_model('model_60B.hdf5')
    lables = pd.read_csv('lables_60B.csv').lables.values
    X = np.concatenate([x[np.newaxis,...] for x in df.x.values],axis=0)
    P = model.predict(X)
    df['prob'] = np.max(P,axis=-1)
    df['pred'] = [lables[i] for i in np.argmax(P, axis=-1)] 
    return df

def add_birds(test_df, test_path):
    site12 = test_df[test_df.site!='site_3']
    site3 = test_df[test_df.site=='site_3']
    if len(site3):
        site3 = add_duration(site3,test_path)
        site3 = split_df(site3).drop(columns=['duration','index'])
    df = pd.concat([site12,site3])
    df = add_cols(df,test_path)
    df['filt'] = df.apply(filt,axis=1)
    nobird = df[~df.filt]
    nobird['pred'] = 'nocall'
    nobird['prob'] = 1.1
    bird = df[df.filt]
    bird = gen_predictions(bird)
    df = pd.concat([bird,nobird])
    test_site12 = df[df.site!='site_3']
    test_site12['birds'] = test_site12['pred']
    test_site12 = test_site12[['row_id','birds']]
    test_site3 = df[df.site=='site_3']
    if len(test_site3):
        test_site3 = test_site3.groupby('row_id').apply(agg_site_3).to_frame().rename(columns={0:'birds'}).reset_index()
    test_sites = pd.concat([test_site12,test_site3])[['row_id','birds']]
    test_df = test_df.merge(test_sites,on=['row_id'])
    return test_df

In [32]:
# test_path='/home/mor/Downloads/exemple'
# site12 = test_df[test_df.site!='site_3']
# site3 = test_df[test_df.site=='site_3']
# if len(site3):
#     site3 = add_duration(site3,test_path)
#     site3 = split_df(site3).drop(columns=['duration','index'])
# df = pd.concat([site12,site3])
# df = add_cols(df,test_path)
# df['filt'] = df.apply(filt,axis=1)
# nobird = df[~df.filt]
# nobird['pred'] = 'nocall'
# nobird['prob'] = 1.1
# bird = df[df.filt]

In [33]:
res=add_birds(test_df,'/home/mor/Downloads/exemple')
res

Unnamed: 0,audio_id,site,seconds,row_id,birds
0,BLKFR-10-CPL_20190611_093000.pt540,site_1,5.0,site_1_BLKFR-10-CPL_20190611_093000.pt540_5.0,nocall
1,BLKFR-10-CPL_20190611_093000.pt540,site_1,10.0,site_1_BLKFR-10-CPL_20190611_093000.pt540_10.0,nocall
2,BLKFR-10-CPL_20190611_093000.pt540,site_1,15.0,site_1_BLKFR-10-CPL_20190611_093000.pt540_15.0,nocall
3,BLKFR-10-CPL_20190611_093000.pt540,site_1,20.0,site_1_BLKFR-10-CPL_20190611_093000.pt540_20.0,nocall
4,BLKFR-10-CPL_20190611_093000.pt540,site_1,25.0,site_1_BLKFR-10-CPL_20190611_093000.pt540_25.0,nocall
...,...,...,...,...,...
89,BLKFR-10-CPL_20190611_093000.pt540,site_1,450.0,site_1_BLKFR-10-CPL_20190611_093000.pt540_450.0,nocall
90,BLKFR-10-CPL_20190611_093000.pt540,site_1,455.0,site_1_BLKFR-10-CPL_20190611_093000.pt540_455.0,nocall
91,BLKFR-10-CPL_20190611_093000.pt540,site_1,460.0,site_1_BLKFR-10-CPL_20190611_093000.pt540_460.0,nocall
92,BLKFR-10-CPL_20190611_093000.pt540,site_1,465.0,site_1_BLKFR-10-CPL_20190611_093000.pt540_465.0,nocall


In [22]:
res.birds.value_counts()

nocall    93
bkcchi     1
Name: birds, dtype: int64

In [210]:
res.birds.value_counts()

nocall    94
Name: birds, dtype: int64

In [24]:
# Prepare For Train
df = pd.read_csv('train_full.csv').dropna(subset=['amp_r','sc_r'])
df_train = df[(df.sc_r>MIN_SC)|(df.amp_r>MIN_AMP)]
top_l=df_train.ebird_code.value_counts().to_frame().query(f"ebird_code>{MIN_SAMPLES}").index.values
df_train = df_train[df.ebird_code.isin(top_l)]
df_train['lable'] = df_train.ebird_code
print(df_train.lable.value_counts().describe())
df_train = even_df(df_train,NUM_SAMPLES)
# Add nobird
nobird = df[(df.ebird_code.isin(top_l))&((df.sc_r<0.08)&(df.amp_r<1.0)&(df.amp_r>0.5))].sample(NUM_SAMPLES*3)
nobird.loc[:,'lable']='nocall'
df_train=pd.concat([df_train,nobird])
print(df_train.lable.value_counts().describe())
lables = sorted(df_train['lable'].unique())
tr_df, ts_df = hash_split(df_train)
train = Dataset('/home/mor/Downloads/train_audio/',tr_df).ds.batch(32)
test = Dataset('/home/mor/Downloads/train_audio/',ts_df).ds.batch(32)
len(tr_df)

count     59.000000
mean     398.966102
std      101.412263
min      302.000000
25%      330.500000
50%      360.000000
75%      440.000000
max      763.000000
Name: lable, dtype: float64
count      60.000000
mean      621.183333
std       154.981846
min       584.000000
25%       595.750000
50%       600.000000
75%       606.250000
max      1800.000000
Name: lable, dtype: float64


25953

In [35]:
test_df=ts_df.sample(10)

In [36]:
test = Dataset('/home/mor/Downloads/train_audio/',test_df).ds.batch(32)

In [41]:
model = tf.keras.models.load_model('model_60_1.hdf5')
lables = pd.read_csv('lables_60.csv').lables.values
test_df['pred'] = [lables[i] for i in np.argmax(model.predict(test), axis=-1)] 
#test_df['prob'] = np.max(model.predict(test), axis=-1)

In [42]:
test_df.pred.value_counts()

nocall    10
Name: pred, dtype: int64

In [30]:
test=gen_predictions(test,'/home/mor/Downloads/train_audio')

AttributeError: 'BatchDataset' object has no attribute 'sort_values'

In [None]:
# xx.max()>0.25 and sc.std()/sc.mean()>0.15 -> Nocall else 'redhea'