In [17]:
import numpy as np
import pandas as pd

sound_data = pd.read_csv('datasets/jrelabeled.csv')
labeled_data = sound_data[sound_data['label'] >= 0]
labeled_data['label'].value_counts()
X = labeled_data.drop(['filename', 'label', 'predictions'], axis=1)
y = labeled_data['label']
X.head()

Unnamed: 0,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20
0,0.490575,0.025069,2171.913549,2139.885722,4431.553141,0.101278,-380.53302,112.694641,-31.796848,1.780545,...,10.122002,-1.219215,-5.020499,-8.176243,-6.034646,5.1411,-4.364628,-1.78379,-1.651915,-2.534616
1,0.441578,0.031176,2452.753355,2400.318018,5115.332031,0.126144,-353.482391,92.485397,-3.311667,21.250517,...,-3.828105,-2.363839,-5.873488,2.105093,-7.501835,2.208278,-3.90622,-3.250791,-2.498704,0.930458
2,0.391119,0.049024,2102.894499,2018.743044,3995.107015,0.106585,-298.112457,105.581268,-1.911337,38.33287,...,-2.13652,-4.300785,-13.941048,0.797238,-8.774372,-1.213172,-6.336737,-4.957156,-6.576495,-1.673529
5,0.469809,0.037327,2239.813927,2191.132913,4524.066162,0.112287,-324.963318,108.212601,-16.467024,26.865139,...,-2.978554,-0.77089,-7.123728,3.142917,-7.908026,0.713636,-4.640324,-4.185403,-4.470373,4.049245
6,0.370643,0.060327,2311.597408,1842.221912,4046.746826,0.141018,-244.242737,101.293144,-26.639498,17.342655,...,0.350607,-7.042977,-17.581421,0.219237,-15.416845,-2.52766,-6.503064,-7.859387,-11.159444,-1.384048


In [18]:
from sklearn.preprocessing import StandardScaler

st_scaler = StandardScaler()
scaled_X = st_scaler.fit_transform(X)

In [19]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.2)

In [20]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop

model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics = ['accuracy'])

In [21]:
history = model.fit(np.array(X_train), y_train, 
                    epochs=20, 
                    batch_size=24,
                   verbose=2,
                   validation_data=(np.array(X_test), y_test))

Epoch 1/20
96/96 - 0s - loss: 0.1256 - accuracy: 0.9508 - val_loss: 0.0225 - val_accuracy: 0.9965
Epoch 2/20
96/96 - 0s - loss: 0.0207 - accuracy: 0.9939 - val_loss: 0.0117 - val_accuracy: 0.9965
Epoch 3/20
96/96 - 0s - loss: 0.0089 - accuracy: 0.9970 - val_loss: 0.0170 - val_accuracy: 0.9930
Epoch 4/20
96/96 - 0s - loss: 0.0059 - accuracy: 0.9978 - val_loss: 0.0086 - val_accuracy: 0.9965
Epoch 5/20
96/96 - 0s - loss: 0.0068 - accuracy: 0.9978 - val_loss: 0.0082 - val_accuracy: 0.9983
Epoch 6/20
96/96 - 0s - loss: 0.0204 - accuracy: 0.9913 - val_loss: 0.0276 - val_accuracy: 0.9948
Epoch 7/20
96/96 - 0s - loss: 0.0062 - accuracy: 0.9978 - val_loss: 0.0374 - val_accuracy: 0.9861
Epoch 8/20
96/96 - 0s - loss: 8.3009e-04 - accuracy: 1.0000 - val_loss: 0.0188 - val_accuracy: 0.9930
Epoch 9/20
96/96 - 0s - loss: 3.2531e-04 - accuracy: 1.0000 - val_loss: 0.0239 - val_accuracy: 0.9913
Epoch 10/20
96/96 - 0s - loss: 2.2807e-04 - accuracy: 1.0000 - val_loss: 0.0227 - val_accuracy: 0.9930
Epoch 1

In [22]:
from pydub import AudioSegment
import librosa
import pathlib
import os



def preprocessing(filename):
    

    y, sr = librosa.load(f"audio/{filename}.wav", mono=True)
    
    
    rmse = librosa.feature.rms(y=y) #root mean squred ENERGY note. changed to rms form rmse
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    
    to_append = f'{np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'

    for e in mfcc:
        to_append += f' {np.mean(e)}'
    
    scaled_data = st_scaler.transform([to_append.split()])
    
    return scaled_data

In [38]:
vector = preprocessing(f'roganclimate/roganclimate8')
model.predict(vector)

array([[0.99997556]], dtype=float32)

# Trying out Random Forests

In [35]:
from sklearn.ensemble import RandomForestClassifier

rf_clf = RandomForestClassifier()
rf_clf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [37]:
from sklearn.metrics import confusion_matrix
train_predictions = rf_clf.predict(X_train)

cmatrix = confusion_matrix(y_train, train_predictions)
cmatrix

array([[ 912,    0],
       [   0, 1387]], dtype=int64)

In [39]:
rf_clf.predict(vector)

array([1], dtype=int64)