In [1]:
import numpy as np
import librosa
import noisereduce as nr
import matplotlib.pyplot as plt
import sounddevice as sd
import IPython.display as ipd
import pandas as pd
from librosa import effects
import scipy.signal as sp
import statistics as st

In [2]:
def get_audio_split(y,sr):
    duration_ogn = len(y)/sr
    if(duration_ogn <= 30.0):

      data = 15 * sr
    elif(duration_ogn <= 15.0):
      data = 10 * sr 
    elif(duration_ogn <= 10.0):
      data = 5 * sr
    else:
      data = 30 * sr   

    return data  


In [9]:
def get_snip(y,sr):
  data = get_audio_split(y,sr)
  y = y[:data]
  return y

In [3]:
def get_n_fft_val(y,sr):
  duration = len(y)/sr

  if(duration < 2.0):
    n_fft_val =512
  else:
    n_fft_val = 1024   

  return n_fft_val

In [4]:
def reduce_audio_noise(y,sr,n_fft_val):

  reduced_noise = nr.reduce_noise(y=y,sr=sr,stationary=False,prop_decrease=0.8,n_fft=n_fft_val,freq_mask_smooth_hz=300, time_mask_smooth_ms=50)  

  return reduced_noise

In [5]:
def get_compress(reduced_noise):
  
  y_comp = librosa.effects.preemphasis(reduced_noise, coef=0.97)

  return y_comp


In [6]:
def get_s(y):
    S,phase = librosa.magphase(librosa.stft(y))
    return S

In [7]:
def get_rms(y):
    rms = librosa.feature.rms(S=get_s(y))
    return rms

In [8]:
def get_avg_rms(y):
    rms = get_rms(y)
    avg = np.mean(rms)
    return avg


In [10]:
def get_freq(y,sr):
  f0 = librosa.yin(y, fmin=50, fmax=10000,sr=sr)
  avg_fun_freq = np.mean(f0)
  return avg_fun_freq

In [11]:
def get_spect_cent(y):
  spect_cent = librosa.feature.spectral_centroid(S=get_s(y))
  return np.mean(spect_cent)

In [12]:
def get_snr(y_clean,y):
    
    signal_power = np.mean(y_clean ** 2)
    noise_power = np.mean(y ** 2)
    
    if noise_power == 0:
        return float('inf')
    
    snr = 10 * np.log10(signal_power / noise_power)
    return snr

In [13]:
def get_harmonic(y):
    harmonic = librosa.effects.harmonic(y)
    return harmonic

In [14]:
def get_hnr_high(y, sr):
    f0 = librosa.yin(y, fmin=50, fmax=10000, sr=sr) 
    harmonic = librosa.effects.harmonic(y, margin=8)
    noise = y - harmonic                              
    hnr_high = 10 * np.log10(np.var(harmonic) / np.var(noise))
    return hnr_high

In [15]:
def get_hnr_low(y, sr):
    f0 = librosa.yin(y, fmin=50, fmax=10000, sr=sr) 
    harmonic = librosa.effects.harmonic(y, margin=1)
    noise = y - harmonic                              
    hnr_high = 10 * np.log10(np.var(harmonic) / np.var(noise))
    return hnr_high

In [16]:
def get_tonnetz(y,sr):
    tonnetz = librosa.feature.tonnetz(y=get_harmonic(y), sr=sr)
    return np.mean(tonnetz)

In [17]:
def get_mfcc(y,sr):
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    return np.mean(mfcc)

In [18]:
def get_roll_off(y,sr):
    roll_off = librosa.feature.spectral_rolloff(S=get_s(y), sr=sr)
    return np.mean(roll_off)

In [19]:
def get_flatness(y):
    flatness = librosa.feature.spectral_flatness(S=get_s(y))
    return np.mean(flatness)

In [20]:
def get_contrast(y,sr):
    contrast = librosa.feature.spectral_contrast(S=get_s(y), sr=sr)
    return np.mean(contrast)

In [48]:
df = pd.read_csv('mini_data.csv')

In [50]:
df.head(10)

Unnamed: 0,primary_label,secondary_labels,type,filename,collection,rating,url,latitude,longitude,scientific_name,common_name,author,license
0,soulap1,[''],['call'],soulap1/XC114069.ogg,XC,3.0,https://xeno-canto.org/114069,4.8,-71.35,Vanellus chilensis,Southern Lapwing,Sergio Chaparro Herrera,cc-by-nc-sa 3.0
1,trsowl,[''],[''],trsowl/iNat1197731.ogg,iNat,0.0,https://static.inaturalist.org/sounds/1197731.wav,-34.3688,-58.58,Megascops choliba,Tropical Screech-Owl,Unknown,cc-by-nc 4.0
2,strher,[''],['uncertain'],strher/XC659999.ogg,XC,4.0,https://xeno-canto.org/659999,-28.1956,153.5514,Butorides striata,Striated Heron,nick talbot,cc-by-nc-sa 4.0
3,roahaw,['wbwwre1'],"['call', 'song']",roahaw/XC172507.ogg,XC,4.5,https://xeno-canto.org/172507,-3.0936,-78.3951,Rupornis magnirostris,Roadside Hawk,Leonardo Ordóñez-Delgado,cc-by-nc-sa 4.0
4,sobtyr1,[''],"['song', 'dusk song']",sobtyr1/XC405439.ogg,XC,3.0,https://xeno-canto.org/405439,11.15,-74.1167,Camptostoma obsoletum,Southern Beardless-Tyrannulet,Iván Lau,cc-by-nc-sa 4.0
5,whwswa1,[''],['call'],whwswa1/XC211444.ogg,XC,5.0,https://xeno-canto.org/211444,-0.1334,-67.0834,Tachycineta albiventer,White-winged Swallow,Jeremy Minns,cc-by-nc-sa 4.0
6,tbsfin1,[''],['song'],tbsfin1/XC275686.ogg,XC,5.0,https://xeno-canto.org/275686,0.75,-78.917,Sporophila funerea,Thick-billed Seed-Finch,Patricio Mena Valenzuela,cc-by-nc-sa 4.0
7,soulap1,[''],"['song', 'subsong']",soulap1/XC558367.ogg,XC,4.0,https://xeno-canto.org/558367,2.4799,-76.5406,Vanellus chilensis,Southern Lapwing,Alejandro Vivas Ruiz,cc-by-nc-sa 4.0
8,butsal1,[''],"['call', 'song']",butsal1/XC419311.ogg,XC,4.0,https://xeno-canto.org/419311,-16.2891,-39.335,Saltator maximus,Buff-throated Saltator,Fernando Igor de Godoy,cc-by-nc-sa 4.0
9,cotfly1,[''],['song'],cotfly1/XC432604.ogg,XC,0.0,https://xeno-canto.org/432604,7.5241,-76.5948,Todirostrum cinereum,Common Tody-Flycatcher,Andrea Lopera-Salazar,cc-by-nc-sa 4.0


In [51]:
for i in range(0, len(df)):
    y,sr = librosa.load(f'{'train_audio/'}{df.filename[i]}')
    if(df.collection[i] == 'CSA'):
        y = get_snip(y,sr)

    n_fft = get_n_fft_val(y,sr)
    y_red = reduce_audio_noise(y,sr,n_fft)
    y_comp = get_compress(y_red)

    df.loc[i,['rms_reduced']] = get_avg_rms(y_red) 
    df.loc[i,['rms_compressed']] = get_avg_rms(y_comp)

    df.loc[i,['frequency_reduced']] = get_freq(y_red,sr)
    df.loc[i,['frequency_compressed']] = get_freq(y_comp,sr)

    df.loc[i,['spectral_centroid_reduced']] = get_spect_cent(y_red)
    df.loc[i,['spectral_centroid_compressed']] = get_spect_cent(y_comp)
    
    df.loc[i,['snr_reduced']] = get_snr(y_red,y)
    df.loc[i,['snr_compressed']] = get_snr(y_comp,y)

    df.loc[i,['high_hnr_reduced']] = get_hnr_high(y_red,sr)
    df.loc[i,['low_hnr_reduced']] = get_hnr_low(y_red,sr)

    df.loc[i,['high_hnr_compressed']] = get_hnr_high(y_comp,sr)
    df.loc[i,['low_hnr_compressed']] = get_hnr_low(y_comp,sr)

    df.loc[i,['tonnetz_reduced']] = get_tonnetz(y_red,sr)
    df.loc[i,['tonnetz_compressed']] = get_tonnetz(y_comp,sr)

    df.loc[i,['mfcc_reduced']] = get_mfcc(y_red,sr)
    df.loc[i,['mfcc_compressed']] = get_mfcc(y_comp,sr)

    df.loc[i,['roll_off_reduced']] = get_roll_off(y_red,sr)
    df.loc[i,['roll_off_compressed']] = get_roll_off(y_comp,sr)

    df.loc[i,['flatness_reduced']] = get_flatness(y_red)
    df.loc[i,['flatness_compressed']] = get_flatness(y_comp)

    df.loc[i,['contrast_reduced']] = get_contrast(y_red,sr)
    df.loc[i,['contrast_compressed']] = get_contrast(y_comp,sr)
    

print("done.")   

done.


In [52]:
df.to_csv('mini_train.csv', index=False)  