<a href="https://colab.research.google.com/github/sathyasravya/number-of-peacocks-automation/blob/master/inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import glob
import os
import librosa
import numpy as np
import seaborn as sns
import wave
import contextlib
import pandas as pd

from sklearn.metrics import accuracy_score,classification_report,f1_score
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
%matplotlib inline
plt.style.use('ggplot')

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 11
plt.rcParams['figure.titlesize'] = 13

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
s = "02 Eow_peacock_2.WAV"
s.split("_")
#print(fn.split('_')[-1]=='2.WAV')

['02 Eow', 'peacock', '2.WAV']

# Features plots - Code

In [0]:
def load_sound_files(file_paths):
    raw_sounds = []
    for fp in file_paths:
        X,sr = librosa.load(fp)
        raw_sounds.append(X)
    return raw_sounds

def plot_waves(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(10,1,i)
        librosa.display.waveplot(np.array(f),sr=22050)
        plt.title(n.title())
        i += 1
    plt.suptitle('Figure 1: Waveplot',x=0.5, y=0.915,fontsize=18)
    plt.show()
    
def plot_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(10,1,i)
        specgram(np.array(f), Fs=22050)
        plt.title(n.title())
        i += 1
    plt.suptitle('Figure 2: Spectrogram',x=0.5, y=0.915,fontsize=18)
    plt.show()

def plot_log_power_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(10,1,i)
        D = librosa.logamplitude(np.abs(librosa.stft(f))**2, ref_power=np.max)
        librosa.display.specshow(D,x_axis='time' ,y_axis='log')
        plt.title(n.title())
        i += 1
    plt.suptitle('Figure 3: Log power spectrogram',x=0.5, y=0.915,fontsize=18)
    plt.show()

# Extracting features

In [0]:
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    #print(chroma.shape)
    #print("chroma")
    #print(mfccs.shape)
    #print("mfccs")
    #print(mel.shape)
    #print("mels")
    #print(contrast.shape)
    #print("contrast")
    #print(tonnetz.shape)
    #print("tonnetz")    
    return mfccs,chroma,mel,contrast,tonnetz

def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav'):
    d=0
    duration1 = []
    duration2 = []
    for label, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir,sub_dir,file_ext)):
            with contextlib.closing(wave.open(fn,'r')) as f:
                frames = f.getnframes()
                rate = f.getframerate()
                duration = frames / float(rate)
                #print(pd.Series(fn.split('_')).unique())
                #print(d,duration)
                d=d+1
                #print(" ")
                #dn=0
                if(sub_dir!='noise_set'):
                    #if(duration<0):
                    #    dn =dn+1
                    duration1.append(duration)  
                else:
                    duration2.append(duration)
            
    return duration1,duration2

In [0]:
def parse_audio_files2(parent_dir,sub_dirs):
    features = np.empty((0,193))
    finlabel = np.empty((0,3))
    labells=[]
    labells1=np.empty(0) 
    labells2=np.empty(0) 
    d=0
    fns = []
    labels=[]
    label1s=[]
    label2s=[]
    for label, sub_dir in enumerate(sub_dirs):
        print(sub_dir)
        if(sub_dir =='iid' or sub_dir=='Kaan' or sub_dir =='extra_mp3'):
          file_ext='*.WAV'
        else:
          file_ext='*.wav'        
       # print(glob.glob(os.path.join(parent_dir,sub_dir,file_ext)))
        for fn in glob.glob(os.path.join(parent_dir,sub_dir,file_ext)):
            #print("heyyy0")
            mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
            ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
          #  print("ext_features")
          #  print(ext_features.shape)
            features = np.vstack([features,ext_features])
          #  print("features")
          #  print(features.shape)
           # if(sub_dir != 'extra_mp3'):
            if(sub_dir!='noise_set'):
              #print("heyyy2")
              label ="1"
              d = d+1
              if (len(fn.split('_'))>=6):
                label1 = "1"
              else:
                label1="0"
                label="1"
             #   print("heyyy3")
              if(sub_dir=='TRIM_old' or sub_dir=='extra_mp3'):
                #print("heyyy1")
                if(fn.split('_')[-1]=='2.wav'or fn.split('_')[-1]=='2.WAV'):
                  label2="2"
                if(fn.split('_')[-1]=='3.wav'or fn.split('_')[-1]=='3.WAV'):
                  label2="3"   
                if(fn.split('_')[-1]=='1.wav'or fn.split('_')[-1]=='1.WAV'):
                  label2="1"
                if(fn.split('_')[-1]=='MI.wav'or fn.split('_')[-1]=='MI.WAV'):
                  label2="4"
                if(fn.split('_')[3]=='Multiplepeacock'):
                  label2 = "4"
                else:
                  label2 ="1"
              else:
                if(fn.split('_')[-1]=='1.wav' or fn.split('_')[-1]=='1.WAV'):
                  label2="1"
                elif(fn.split('_')[-1]=='2.wav' or fn.split('_')[-1]=='2.WAV'):
                  label2 ="2" 
                elif(fn.split('_')[-1]=='3.wav' or fn.split('_')[-1]=='3.WAV'):
                  label2 ="3"
                elif(fn.split('_')[-1]=='MI.wav' or fn.split('_')[-1]=='3or4.wav' or fn.split('_')[-1]=='3or4.WAV' or fn.split('_')[-1]=='MI.WAV'):
                  label2 ="4"                        
                else:
                  label2 ="0"
                            
            else:
              label ="0"
              label1="1"
              label2 ="0"
           # print("heyyy")
            #print(label,label1,label2)
            fns.append(fn.split('_')[1:])
            use = np.hstack([label,label1,label2])
            finlabel = np.vstack([finlabel,use])
            labels.append(label)
            label1s.append(label1)
            label2s.append(label2)
    return(fns, np.array(features), np.array(finlabel),np.array(labels), np.array(label1s), np.array(label2s))

In [0]:
def recall_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

def precision_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

> #  Feed Train & Test Data 

In [0]:
parent_dir = '/content/drive/My Drive/Colab Notebooks/data' #/content/drive/My Drive/Colab Notebooks/data
#sub_dirs = ['iid','Kaan','C_new2','TRIM_old','A_new1','trimmed_new3','noise_set','Chincholi'] #train dirs
sub_dirs = ['extra_mp3']
duration1,duration2= parse_audio_files(parent_dir,sub_dirs)

In [9]:
fn , features, fl, l,l1,l2 = parse_audio_files2(parent_dir, sub_dirs)

extra_mp3


In [0]:
import pickle
filename = '/content/drive/My Drive/Colab Notebooks/weighted_model.pkl'
infile = open(filename,'rb')
model_w = pickle.load(infile)
infile.close()
filename = '/content/drive/My Drive/Colab Notebooks/plain_model.pkl'
infile = open(filename,'rb')
model_p = pickle.load(infile)
infile.close()

In [0]:
#yy1,yy2,yy3 = model.predict(X_test)
def normalise_op(yy1,yy2,yy3):
  shape = yy3.shape
  for i in range(0, shape[0]):
    for j in range(0, shape[1]):
      if yy3[i, j] >= 0.5:
        yy3[i, j] = 1
      else:
        yy3[i, j] = 0
  for i in range(0, shape[0]):
    if yy2[i, 0] >= 0.5:
      yy2[i, 0] = 1
    else:
      yy2[i, 0] = 0
  for i in range(0, shape[0]):
    if yy1[i, 0] >= 0.5:
      yy1[i, 0] = 1
    else:
      yy1[i, 0] = 0  
  return yy1,yy2,yy3
#yy1,yy2,yy3 = normalise_op(yy1,yy2,yy3)

In [0]:
r1,r2,r3 = model_w.predict(features)

In [0]:
r1,r2,r3 = normalise_op(r1,r2,r3)

In [29]:
print(l[0:10])
print(r1[0:10])

['1' '1' '1' '1' '1' '1' '1' '1' '1' '1']
[[1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]]


In [25]:
print(classification_report(np.array(l,dtype=int),r1))

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         0
         1.0       1.00      0.33      0.50        30

    accuracy                           0.33        30
   macro avg       0.50      0.17      0.25        30
weighted avg       1.00      0.33      0.50        30



  'recall', 'true', average, warn_for)


In [26]:
print(classification_report(np.array(l1,dtype=int),r2))

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00      30.0
         1.0       0.00      0.00      0.00       0.0

    accuracy                           0.00      30.0
   macro avg       0.00      0.00      0.00      30.0
weighted avg       0.00      0.00      0.00      30.0



  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
