# Multi Layer Perceptron Validation

In [1]:
import os
import numpy as np
import torch

from tqdm import tqdm

from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold
from sklearn import preprocessing

from bring_features_and_file_paths import bring_features_and_file_paths
from bring_features_and_file_paths import from_spectrogram_path_to_BirdNET_output_path
from bring_features_and_file_paths import get_BirdNET_detections
from bring_features_and_file_paths import get_spectrogram_time_mark_in_file

In [2]:
feats, fpaths = bring_features_and_file_paths('/grand/projects/BirdAudio/Soundscapes/Features/', sub_sample=None)
# feats, fpaths = bring_features_and_file_paths('/grand/projects/BirdAudio/Soundscapes/Second_Features/', sub_sample=None)
indices = torch.randperm(feats.size()[0])
feats=feats[indices]
fpaths=fpaths[indices]

We have 101076 feature vectors.


In [3]:
label='detection'

vectors=[]
labels=[]
BirdNET_BASE_PATH = '/grand/projects/BirdAudio/Soundscapes/BirdNET_Output'
Spectrograms_BASE_PATH = '/grand/projects/BirdAudio/Soundscapes/Second_Filtered_Spectrograms/'
# Spectrograms_BASE_PATH = '/grand/projects/BirdAudio/Soundscapes/First_Filtered_Spectrograms'
spectrogram_duration = 9.0
for k, (fpath, feat) in enumerate(tqdm(zip(fpaths,feats), total=len(feats))):
    file_path = ''.join([chr(int(x)) for x in fpath]).replace('~','')
    BirdNET_PATH = from_spectrogram_path_to_BirdNET_output_path(fpath)

    try:
        start_time = get_spectrogram_time_mark_in_file(fpath, spectrogram_duration)
        
        interval = (start_time, start_time + spectrogram_duration)
        data_elements = get_BirdNET_detections(os.path.join(BirdNET_BASE_PATH, BirdNET_PATH), interval, confidence_threshold = 0.0)
        for data_element in data_elements:
            vectors.append(np.array(feat))
            labels.append(data_element[label])

    except:
        print("Something was wrong with")
        print(file_path)


#     if k > 100000:
    if k > 100000:
        break

vectors=np.array(vectors)
labels=np.array(labels)
unique_labels=np.unique(labels)
aux=[]
for label in labels:
    aux.append(np.where(unique_labels==label)[0].item())
    
labels=np.array(aux)

  5%|▌         | 5458/101076 [00:33<05:21, 297.53it/s]

Something was wrong with
                                                                                 


 12%|█▏        | 12219/101076 [00:58<04:57, 298.35it/s]

Something was wrong with
                                                                                 


 12%|█▏        | 12474/101076 [00:59<05:04, 291.13it/s]

Something was wrong with
                                                                                 


 22%|██▏       | 22431/101076 [01:34<04:31, 289.59it/s]

Something was wrong with
                                                                                 


 28%|██▊       | 28301/101076 [01:54<04:19, 280.81it/s]

Something was wrong with
                                                                                 


 46%|████▋     | 46867/101076 [03:00<03:21, 269.62it/s]

Something was wrong with
                                                                                 


 64%|██████▍   | 64756/101076 [04:01<01:51, 326.88it/s]

Something was wrong with
                                                                                 


 99%|█████████▉| 100001/101076 [06:03<00:03, 275.44it/s]


In [4]:
vectors.shape

(165321, 384)

In [5]:
labels.shape

(165321,)

In [6]:
unique_labels

array(['American Crow', 'American Goldfinch', 'American Redstart',
       'American Robin', 'Baltimore Oriole', 'Barn Swallow',
       'Belted Kingfisher', 'Black-and-white Warbler',
       'Black-bellied Whistling-Duck', 'Black-capped Chickadee',
       'Black-crowned Night-Heron', 'Blue Jay', 'Blue-gray Gnatcatcher',
       'Brown-headed Cowbird', 'Canada Goose', 'Caspian Tern',
       'Cedar Waxwing', 'Chimney Swift', 'Chipping Sparrow',
       'Common Grackle', 'Common Nighthawk', 'Common Yellowthroat',
       "Cooper's Hawk", 'Downy Woodpecker', 'Eastern Bluebird',
       'Eastern Kingbird', 'Eastern Phoebe', 'Eastern Towhee',
       'Eastern Wood-Pewee', 'European Starling', 'Field Sparrow',
       'Gray Catbird', 'Great Blue Heron', 'Great Crested Flycatcher',
       'Great Egret', 'Green Heron', 'Hairy Woodpecker', 'House Finch',
       'House Sparrow', 'House Wren', 'Indigo Bunting', 'Killdeer',
       'Mallard', 'Marsh Wren', 'Mourning Dove', 'No detection',
       'Northern 

In [7]:
# clf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(100,), random_state=1, activation='identity', max_iter=2000)
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(20,), random_state=1, activation='identity', max_iter=2000)

In [8]:
scaler = preprocessing.StandardScaler().fit(vectors)
clf.fit(scaler.transform(vectors), labels)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [9]:
kf = KFold(n_splits=10)

In [10]:
performances=[]
for train_indices, test_indices in tqdm(kf.split(vectors), total=10):
    scaler = preprocessing.StandardScaler().fit(vectors[train_indices])
    clf.fit(scaler.transform(vectors[train_indices]), labels[train_indices])
    performances.append(clf.score(scaler.transform(vectors[test_indices]), labels[test_indices]))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

In [11]:
np.mean(np.array(performances))

0.4445110069473851

In [12]:
mask=np.array([unique_labels=='Blue Jay'])
mask=np.logical_or(mask,np.array([unique_labels=='Eastern Wood-Pewee']))
mask=np.logical_or(mask,np.array([unique_labels=='Indigo Bunting']))
# mask=np.logical_or(mask,np.array([unique_labels=='Ovenbird']))
# mask=np.logical_or(mask,np.array([unique_labels=='Scarlet Tanager']))
mask=np.logical_or(mask,np.array([unique_labels=='No detection']))

chosen_labels=np.where(mask[0])[0]
chosen_labels

array([11, 28, 40, 45])

In [13]:
def find_indices(A, B):
    indices = np.where(np.isin(A, B))
    return indices


In [14]:
indices=find_indices(labels, chosen_labels)[0]
indices

array([     5,      7,      9, ..., 165313, 165314, 165316])

In [15]:
f_vectors = vectors[indices]
f_labels = labels[indices]
f_performances=[]
for train_indices, test_indices in tqdm(kf.split(f_vectors), total=10):
    scaler = preprocessing.StandardScaler().fit(f_vectors[train_indices])
    clf.fit(scaler.transform(f_vectors[train_indices]), f_labels[train_indices])
    f_performances.append(clf.score(scaler.transform(f_vectors[test_indices]), f_labels[test_indices]))

100%|██████████| 10/10 [2:34:23<00:00, 926.32s/it] 


In [16]:
np.mean(np.array(f_performances))

0.7153753316319381

In [17]:
clf.coefs_[1].shape

(20, 4)