In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

# NOTE: librosa dependencies apparently require specific versions of numpy, try numpy==1.21.4
import librosa
import librosa.display
import seaborn as sns
import IPython.display

In [2]:
eq_df = pd.read_csv('./large_data/eq_harmony_combined.csv')
# display(eq_df)

After initial attempts performed relatively poorly, I decided to try to add another classifier object. Crest factor is the ratio of the maximum amplitude of a signal to its root mean square. As such, I expect short, loud sounds like gunshots to exhibit large crest factors.

After initial attempts with the crest factor I noticed that a number of the audio samples have significant "room noise" present. In an attempt to clean this up I'm taking the hilbert transform (extracts the instantaneous amplitude of a signal). Smoothing this transform and dividing by the root mean square power should then amplify the parts of the signal that are large in amplitude, while minimizing the areas that are simply a constant amplitude "hum". Low crest factor systems will be largely unaffected as the root mean square will be similar to the root mean square of the signal for such signals. Finally we ensure that the maximum amplitude of the signal is scaled to be equal to the input signal.

In [None]:
nansvec = np.isnan(eq_df['crestfactor'])
nansvec[nansvec==True]

In [3]:
eq_df['power_ratio'] = np.log10(eq_df['percussive_power'].values / (eq_df['harmonic_power'].values))

In [None]:
eq_df['hits_ratio'] = np.log10(eq_df['percussive_hits'].values / (eq_df['harmonic_hits'].values + 1e-1) + 5e-4)

In [None]:
plt.figure(figsize=(11,5))
sns.stripplot(data = eq_df,
             x = 'class',
             y = 'crestfactor')
plt.tight_layout()
plt.show()

In [None]:
display(eq_df)

In [None]:
eq_df = eq_df.dropna()

In [None]:
nansvec = np.isnan(eq_df['crestfactor'])
nansvec[nansvec==True]

In [None]:
# try classifying with the log instead?
for i in range(0,len(eq_df)):
    eq_df.iloc[i,1:-9] = np.log10(eq_df.iloc[i,1:-9].values.astype(float))

In [None]:
def TPR(prediction, data, numclasses):
    """
    Returns True Positive Ratio given a prediction and data
    """
    confmat = confusion_matrix(prediction, data)

#     TN = confmat[0,0]
#     FP = confmat[0,1]
#     FN = confmat[1,0]
#     TP = confmat[1,1]
    TP = np.zeros(numclasses)
    FN = np.zeros(numclasses)
    R = np.zeros(numclasses)
    for i in range(numclasses):
        TP[i] = confmat[i,i]
        FN[i] = confmat[i,:].sum() - confmat[i,i]
    
    R = TP/(TP + FN)
    
    
    return R
    

In [None]:
def recall(prediction, data, numclasses):
    """
    Calculates recall of a prediction
    """
    
    confmat = confusion_matrix(prediction, data)

#     TN = confmat[0,0]
#     FP = confmat[0,1]
#     FN = confmat[1,0]
#     TP = confmat[1,1]
#     TP = confmat[6,6]
#     FN = confmat[6,:] - confmat[6,6]
    TP = np.zeros(numclasses)
    R = np.zeros(numclasses)
    FN = np.zeros(numclasses)
    for i in range(numclasses):
        TP[i] = confmat[i,i]
        FN[i] = confmat[i,:].sum() - confmat[i,i]
    
    R = TP/(TP + FN)
    
    return R

def precision(prediction, data,numclasses):
    """
    Calculates precision of a prediction
    """
    
    confmat = confusion_matrix(prediction, data)

#     TN = confmat[0,0]
#     FP = confmat[0,1]
#     FN = confmat[1,0]
#     TP = confmat[1,1]
#     TP = confmat[6,6]
#     FP = confmat[:,6].sum() - confmat[6,6]
    TP = np.zeros(numclasses)
    FP = np.zeros(numclasses)
    P = np.zeros(numclasses)
    for i in range(numclasses):
        TP[i] = confmat[i,i]
        FP[i] = confmat[:,i].sum() - confmat[i,i]
    
    P = TP/(TP + FP)
    
    return P

In [None]:
def Fmeasure(prediction, data, numclasses):
    """
    Returns Fmeasure. 
    
    This is considered a balance of the precision and the recall.
    
    F = (2*P*R)/(P+R)
    
    where
    
    P = TP/(TP + FP) is the precision and
    R = TP/(TP + FN) is the recall.
    
    Reference: 
    Müller, Meinard. Fundamentals of music processing: Audio, analysis, algorithms, applications. 
    Vol. 5. Cham: Springer, 2015.
    Sec. 4.5 pp. 217
    """
    confmat = confusion_matrix(prediction, data)

#     TN = confmat[0,0]
#     FP = confmat[0,1]
#     FN = confmat[1,0]
#     TP = confmat[1,1]
#     TP = confmat[6,6]
#     FN = confmat[6,:].sum() - confmat[6,6]
#     FP = confmat[:,6].sum() - confmat[6,6]
    TP = np.zeros(numclasses)
    FN = np.zeros(numclasses)
    FP = np.zeros(numclasses)
    P = np.zeros(numclasses)
    R = np.zeros(numclasses)
    for i in range(numclasses):
        TP[i] = confmat[i,i]
        FN[i] = confmat[i,:].sum() - confmat[i,i]
        FP[i] = confmat[:,i].sum() - confmat[i,i]
    
    P = TP/(TP + FP)
    R = TP/(TP + FN)
    
    F = (2*P*R)/(P + R)

    return F
    

In [None]:
eq_df2 = eq_df.copy()
eq_df2.replace({'air_conditioner':0, 'car_horn':1, 'children_playing':2, 'dog_bark':3, 'drilling':4,
                'engine_idling':5, 'gun_shot':6, 'jackhammer':7, 'siren':8, 'street_music':9},inplace=True)
# eq_df2.replace({'air_conditioner':0, 'car_horn':1, 'children_playing':2, 'dog_bark':3, 'drilling':4,
#                 'engine_idling':0, 'gun_shot':5, 'jackhammer':4, 'siren':6, 'street_music':2},inplace=True)
# eq_df2.replace({'air_conditioner':0, 'car_horn':0, 'children_playing':0, 'dog_bark':0, 'drilling':0,
#                 'engine_idling':0, 'gun_shot':1, 'jackhammer':0, 'siren':0, 'street_music':0},inplace=True)

Documentation of the dataset suggests not shuffling the dataset. This is because there are a number of audio files that are taken as sections from longer audio files and will result in anomalous results if these are shuffled together. Instead the dataset has included a psuedorandom "fold" category to serve as splits for cross validation. 

In [None]:
dropfold = 6

In [None]:
eq_df3 = eq_df2.drop(eq_df2[eq_df2['fold']==dropfold].index)

In [None]:
eq_df3[eq_df3['fold']==dropfold].values

In [None]:
eq_df3.drop(columns='fold',inplace=True)

In [None]:
eq_df3.head()

In [None]:
eq_df3.iloc[1,1:].values

In [None]:
X_train = eq_df3.iloc[:,1:].values
# X_train = [eq_df3.iloc[:,1:-2].values, eq_df3.iloc[:,-1]]
X_train

In [None]:
y_train = eq_df3.iloc[:,0].values
y_train

In [None]:
X_val = eq_df2[eq_df2['fold'] == dropfold].copy()
X_val.drop(columns='fold',inplace=True)
X_val.drop(columns='salience',inplace=True)
X_val = X_val.iloc[:,1:].values
y_val = eq_df2[eq_df2['fold'] == dropfold]
y_val = y_val.iloc[:,0].values

In [None]:
np.any(np.isinf(X_val))

In [None]:
y_val

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(150,150,150,150,150,), max_iter=100000,early_stopping=True)

In [None]:
mlp.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

Here we consider the True Positive Rate for the gunshot data as an indicator of the goodness of fit. Interestingly this is often higher when the classifier can classify into multiple categories than simply gunshot/not gunshot.

In [None]:
# confmat = confusion_matrix(y_train, mlp.predict(X_train))
# acc = 100*confmat[1,1] / (np.sum(np.sum(confmat[1,:])))
# print("Training accuracy of", acc)
# confmat = confusion_matrix(y_val, mlp.predict(X_val))
# acc = 100*confmat[1,1] / (np.sum(confmat[1,:]))
# print("Validation accuracy of", acc)

# confmat = confusion_matrix(y_train, mlp.predict(X_train))
# acc = 100*confmat[6,6] / (np.sum(np.sum(confmat[6,:])))
acc =100*TPR(y_train, mlp.predict(X_train),10)
acc2 =100*Fmeasure(y_train, mlp.predict(X_train),10)
plt.scatter(range(10),acc)
# print("Training TPR of", acc, "Fmeasure of", acc2)
# confmat = confusion_matrix(y_val, mlp.predict(X_val))
# acc = 100*confmat[6,6] / (np.sum(confmat[6,:]))
acc = 100*TPR(y_val, mlp.predict(X_val),10)
acc2 = 100*Fmeasure(y_val, mlp.predict(X_val),10)
plt.scatter(range(10),acc)
# print("Validation TPR of", acc, "Fmeasure of", acc2)

In [None]:
pd.DataFrame(confusion_matrix(y_val, mlp.predict(X_val)),
            columns=["predicted "+str(i) for i in range(10)],
            index=["actual "+str(i) for i in range(10)])

# pd.DataFrame(confusion_matrix(y_val, mlp.predict(X_val)),
#             columns=["predicted "+str(i) for i in range(2)],
#             index=["actual "+str(i) for i in range(2)])

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
classlabels = {'air_conditioner':0, 'car_horn':1, 'children_playing':2, 'dog_bark':3, 'drilling':4, 'engine_idling':5, 'gun_shot':6, 'jackhammer':7, 'siren':8, 'street_music':9}.keys()

In [None]:
plt.figure(figsize=(9,9))
disp = ConfusionMatrixDisplay(confusion_matrix(y_val, mlp.predict(X_val)),display_labels=classlabels)
disp.plot(xticks_rotation='vertical')
plt.show()

As suggested by the dataset the most accurate results for a classifier are acheived when averaged over the different possible test/validation splits.

In [None]:
accuracy_vec = np.zeros(10)
for dropfold in range(1,11):
    eq_df2 = eq_df.copy()
    eq_df2.replace({'air_conditioner':0, 'car_horn':1, 'children_playing':2, 'dog_bark':3, 'drilling':4,
                'engine_idling':5, 'gun_shot':6, 'jackhammer':7, 'siren':8, 'street_music':9},inplace=True)
#     eq_df2.replace({'air_conditioner':0, 'car_horn':1, 'children_playing':2, 'dog_bark':3, 'drilling':4,
#                 'engine_idling':0, 'gun_shot':6, 'jackhammer':4, 'siren':8, 'street_music':2},inplace=True)
#     eq_df2.replace({'air_conditioner':0, 'car_horn':0, 'children_playing':0, 'dog_bark':0, 'drilling':0,
#                     'engine_idling':0, 'gun_shot':1, 'jackhammer':0, 'siren':0, 'street_music':0},inplace=True)

    eq_df3 = eq_df2.drop(eq_df2[eq_df2['fold']==dropfold].index)
    eq_df3.drop(columns='fold',inplace=True)
    eq_df3.drop(columns='salience',inplace=True)
    X_train = eq_df3.iloc[:,1:].values
    y_train = eq_df3.iloc[:,0].values
    
#     X_val = eq_df2[eq_df2['fold'] == dropfold]
#     X_val = X_val.iloc[:,1:].values
#     y_val = eq_df2[eq_df2['fold'] == dropfold]
#     y_val = y_val.iloc[:,0].values
    
    X_val = eq_df2[eq_df2['fold'] == dropfold].copy()
    X_val.drop(columns='fold',inplace=True)
    X_val.drop(columns='salience',inplace=True)
    X_val = X_val.iloc[:,1:].values
    y_val = eq_df2[eq_df2['fold'] == dropfold]
    y_val = y_val.iloc[:,0].values
    
    mlp = MLPClassifier(hidden_layer_sizes=(150,150,150,150,150,), max_iter=100000, early_stopping=True)
    mlp.fit(X_train, y_train)
#     acc = 100*Fmeasure(y_val, mlp.predict(X_val),7)
    recall = np.round(100*TPR(y_val, mlp.predict(X_val),10)[6],2)
    prec = np.round(100*precision(y_val, mlp.predict(X_val),10)[6],2)
    Fmeas = np.round(100*Fmeasure(y_val, mlp.predict(X_val),10)[6],2)
#     acc = 100*TPR(y_val, mlp.predict(X_val),7)[5]
    print("Validation TPR of", recall, ",\n \tprecision of ", prec, ",\n \tand Fmeasure of", Fmeas, "on fold", str(dropfold))
    accuracy_vec[dropfold-1] = recall
    
print(accuracy_vec)
accuracy_vec.mean()

In [None]:
mlp.predict_proba(X_val)[1]

In [None]:
from sklearn.ensemble import VotingClassifier

In [None]:
accuracy_vec = np.zeros(10)
for dropfold in range(1,11):
    eq_df2 = eq_df.copy()
    eq_df2.replace({'air_conditioner':0, 'car_horn':1, 'children_playing':2, 'dog_bark':3, 'drilling':4,
                'engine_idling':5, 'gun_shot':6, 'jackhammer':7, 'siren':8, 'street_music':9},inplace=True)
#     eq_df2.replace({'air_conditioner':0, 'car_horn':1, 'children_playing':2, 'dog_bark':3, 'drilling':4,
#                 'engine_idling':0, 'gun_shot':6, 'jackhammer':4, 'siren':8, 'street_music':2},inplace=True)
#     eq_df2.replace({'air_conditioner':0, 'car_horn':0, 'children_playing':0, 'dog_bark':0, 'drilling':0,
#                     'engine_idling':0, 'gun_shot':1, 'jackhammer':0, 'siren':0, 'street_music':0},inplace=True)

    eq_df3 = eq_df2.drop(eq_df2[eq_df2['fold']==dropfold].index)
    eq_df3.drop(columns='fold',inplace=True)
    eq_df3.drop(columns='salience',inplace=True)
    X_train = eq_df3.iloc[:,1:].values
    y_train = eq_df3.iloc[:,0].values
    
#     X_val = eq_df2[eq_df2['fold'] == dropfold]
#     X_val = X_val.iloc[:,1:].values
#     y_val = eq_df2[eq_df2['fold'] == dropfold]
#     y_val = y_val.iloc[:,0].values
    
    X_val = eq_df2[eq_df2['fold'] == dropfold].copy()
    X_val.drop(columns='fold',inplace=True)
    X_val.drop(columns='salience',inplace=True)
    X_val = X_val.iloc[:,1:].values
    y_val = eq_df2[eq_df2['fold'] == dropfold]
    y_val = y_val.iloc[:,0].values
    
    mlp1 = MLPClassifier(hidden_layer_sizes=(150,150,150,150,150,), max_iter=100000, early_stopping=True)
    mlp2 = MLPClassifier(hidden_layer_sizes=(150,150,150,150,150,), max_iter=100000, early_stopping=True)
    mlp3 = MLPClassifier(hidden_layer_sizes=(150,150,150,150,150,), max_iter=100000, early_stopping=True)
    mlp4 = MLPClassifier(hidden_layer_sizes=(150,150,150,150,150,), max_iter=100000, early_stopping=True)
    mlp5 = MLPClassifier(hidden_layer_sizes=(150,150,150,150,150,), max_iter=100000, early_stopping=True)


#     mlp.fit(X_train, y_train)
    vote_class = VotingClassifier(estimators=[
                 ('mlp1', mlp1), ('mlp2', mlp2), ('mlp3', mlp3), ('mlp4', mlp4), ('mlp5', mlp5)],
                 voting='soft')
    vote_class = vote_class.fit(X_train, y_train)
#     acc = 100*Fmeasure(y_val, mlp.predict(X_val),7)
    recall = np.round(100*TPR(y_val, vote_class.predict(X_val),10)[6],2)
    prec = np.round(100*precision(y_val, vote_class.predict(X_val),10)[6],2)
    Fmeas = np.round(100*Fmeasure(y_val, vote_class.predict(X_val),10)[6],2)
#     acc = 100*TPR(y_val, mlp.predict(X_val),7)[5]
    print("Validation TPR of", recall, ",\n \tprecision of ", prec, ",\n \tand Fmeasure of", Fmeas, "on fold", str(dropfold))
    accuracy_vec[dropfold-1] = recall
    
print(accuracy_vec)
accuracy_vec.mean()

In [None]:
int(round(np.random.rand()*200,0)+50)

In [None]:
def randlayer():
    return int(round(np.random.rand()*200,0)+50)

In [None]:
accuracy_vec = np.zeros(10)
for dropfold in range(1,11):
    eq_df2 = eq_df.copy()
    eq_df2.replace({'air_conditioner':0, 'car_horn':1, 'children_playing':2, 'dog_bark':3, 'drilling':4,
                'engine_idling':5, 'gun_shot':6, 'jackhammer':7, 'siren':8, 'street_music':9},inplace=True)
#     eq_df2.replace({'air_conditioner':0, 'car_horn':1, 'children_playing':2, 'dog_bark':3, 'drilling':4,
#                 'engine_idling':0, 'gun_shot':6, 'jackhammer':4, 'siren':8, 'street_music':2},inplace=True)
#     eq_df2.replace({'air_conditioner':0, 'car_horn':0, 'children_playing':0, 'dog_bark':0, 'drilling':0,
#                     'engine_idling':0, 'gun_shot':1, 'jackhammer':0, 'siren':0, 'street_music':0},inplace=True)

    eq_df3 = eq_df2.drop(eq_df2[eq_df2['fold']==dropfold].index)
    eq_df3.drop(columns='fold',inplace=True)
    eq_df3.drop(columns='salience',inplace=True)
    X_train = eq_df3.iloc[:,1:].values
    y_train = eq_df3.iloc[:,0].values
    
#     X_val = eq_df2[eq_df2['fold'] == dropfold]
#     X_val = X_val.iloc[:,1:].values
#     y_val = eq_df2[eq_df2['fold'] == dropfold]
#     y_val = y_val.iloc[:,0].values
    
    X_val = eq_df2[eq_df2['fold'] == dropfold].copy()
    X_val.drop(columns='fold',inplace=True)
    X_val.drop(columns='salience',inplace=True)
    X_val = X_val.iloc[:,1:].values
    y_val = eq_df2[eq_df2['fold'] == dropfold]
    y_val = y_val.iloc[:,0].values
    
    mlp1 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp2 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp3 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp4 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp5 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp6 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp7 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp8 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp9 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp10 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp11 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp12 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp13 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp14 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp15 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp16 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp17 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp18 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp19 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)
    mlp20 = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)


#     mlp.fit(X_train, y_train)
    vote_class = VotingClassifier(estimators=[
                 ('mlp1', mlp1), ('mlp2', mlp2), ('mlp3', mlp3), ('mlp4', mlp4), ('mlp5', mlp5),
                    ('mlp6', mlp6), ('mlp7', mlp7), ('mlp8', mlp8), ('mlp9', mlp9), ('mlp10', mlp10),
                    ('mlp11', mlp11), ('mlp12', mlp12), ('mlp13', mlp13), ('mlp14', mlp14), ('mlp15', mlp15),
                    ('mlp16', mlp16), ('mlp17', mlp17), ('mlp18', mlp18), ('mlp19', mlp19), ('mlp20', mlp20)],
                 voting='soft', n_jobs=4)
    vote_class = vote_class.fit(X_train, y_train)
#     acc = 100*Fmeasure(y_val, mlp.predict(X_val),7)
    recall = np.round(100*TPR(y_val, vote_class.predict(X_val),10)[6],2)
    prec = np.round(100*precision(y_val, vote_class.predict(X_val),10)[6],2)
    Fmeas = np.round(100*Fmeasure(y_val, vote_class.predict(X_val),10)[6],2)
#     acc = 100*TPR(y_val, mlp.predict(X_val),7)[5]
    print("Validation TPR of", recall, ",\n \tprecision of ", prec, ",\n \tand Fmeasure of", Fmeas, "on fold", str(dropfold))
    accuracy_vec[dropfold-1] = recall
    
print(accuracy_vec)
accuracy_vec.mean()

In [None]:
teststr = 'mlp'+str(1)
print(teststr)

In [None]:
exec('mlp'+str(21)+' = MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True)') 

In [None]:
mlp21

In [None]:
def voter_tuple(i):
    teststr = 'mlp'+str(i)
    return (teststr, MLPClassifier(hidden_layer_sizes=(randlayer(),randlayer(),randlayer(),randlayer(),randlayer(),), max_iter=100000, early_stopping=True))

In [None]:
voter_tuple(2)

In [None]:
from joblib import dump

In [None]:
num_voters=2
accuracy_vec = np.zeros(10)
for dropfold in range(1,11):
    eq_df2 = eq_df.copy()
    eq_df2.replace({'air_conditioner':0, 'car_horn':1, 'children_playing':2, 'dog_bark':3, 'drilling':4,
                'engine_idling':5, 'gun_shot':6, 'jackhammer':7, 'siren':8, 'street_music':9},inplace=True)
#     eq_df2.replace({'air_conditioner':0, 'car_horn':1, 'children_playing':2, 'dog_bark':3, 'drilling':4,
#                 'engine_idling':0, 'gun_shot':6, 'jackhammer':4, 'siren':8, 'street_music':2},inplace=True)
#     eq_df2.replace({'air_conditioner':0, 'car_horn':0, 'children_playing':0, 'dog_bark':0, 'drilling':0,
#                     'engine_idling':0, 'gun_shot':1, 'jackhammer':0, 'siren':0, 'street_music':0},inplace=True)

    eq_df3 = eq_df2.drop(eq_df2[eq_df2['fold']==dropfold].index)
    eq_df3.drop(columns='fold',inplace=True)
    eq_df3.drop(columns='salience',inplace=True)
    X_train = eq_df3.iloc[:,1:].values
    y_train = eq_df3.iloc[:,0].values
    
#     X_val = eq_df2[eq_df2['fold'] == dropfold]
#     X_val = X_val.iloc[:,1:].values
#     y_val = eq_df2[eq_df2['fold'] == dropfold]
#     y_val = y_val.iloc[:,0].values
    
    X_val = eq_df2[eq_df2['fold'] == dropfold].copy()
    X_val.drop(columns='fold',inplace=True)
    X_val.drop(columns='salience',inplace=True)
    X_val = X_val.iloc[:,1:].values
    y_val = eq_df2[eq_df2['fold'] == dropfold]
    y_val = y_val.iloc[:,0].values
    
    voter_list = []
    
    for i in range(num_voters):
        voter_list.append(voter_tuple(i))

#     mlp.fit(X_train, y_train)
    vote_class = VotingClassifier(estimators=voter_list,
                 voting='soft', n_jobs=4)
    vote_class = vote_class.fit(X_train, y_train)
#     acc = 100*Fmeasure(y_val, mlp.predict(X_val),7)
    recall = np.round(100*TPR(y_val, vote_class.predict(X_val),10)[6],2)
    prec = np.round(100*precision(y_val, vote_class.predict(X_val),10)[6],2)
    Fmeas = np.round(100*Fmeasure(y_val, vote_class.predict(X_val),10)[6],2)
#     acc = 100*TPR(y_val, mlp.predict(X_val),7)[5]
    print("Validation TPR of", recall, ",\n \tprecision of ", prec, ",\n \tand Fmeasure of", Fmeas, "on fold", str(dropfold))
    accuracy_vec[dropfold-1] = recall
    dump(vote_class, 'hive_mind_democracy_fold'+str(dropfold)+'.joblib') 
    
print(accuracy_vec)
accuracy_vec.mean()

In [None]:
from joblib import load

In [None]:
clf = load('hive_mind_democracy_fold10.joblib') 

In [None]:
X_val.drop(columns='salience',inplace=True)

In [None]:
plt.figure(figsize=(9,9))
disp = ConfusionMatrixDisplay(confusion_matrix(y_val, clf.predict(X_val)),display_labels=classlabels)
disp.plot(xticks_rotation='vertical')
plt.show()