In [2]:
import numpy as np
from numpy import save
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from scipy import fftpack
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier

# Load dataset
# Change TAG value to load different dataset
# TAG == 0 MFCCS data
# TAG == 1 frequences and amplitude data
TAG = 1

ds_freqs = []

if TAG == 0:
    ds_samples = np.load("ds_mfccs.npy")
    new_s = []
    
    # reshape dataset to 1D array
    for el in ds_samples:
        new_s.append(el.ravel())

    new_s = np.array(new_s)
    ds_samples = new_s
    
if TAG == 1:
    ds_samples = np.load("ds_samples.npy")
    for el in ds_samples:
        X = fftpack.fft(el)
        ds_freqs.append(fftpack.fftfreq(len(el)) * 16000)

    ds_freqs = np.array(ds_freqs)

    # concatenate frequences and ampliture festures
    ds_tot = []
    for i in range(len(ds_freqs)):
        ds_tot.append(np.concatenate((ds_samples[i], ds_freqs[i])))

    ds_tot = np.array(ds_tot)
    ds_samples = ds_tot

labels = np.load("labels.npy")
print('DS: ',ds_samples.shape,'  ',labels.shape)

# shuffle data before splitting
randomize = np.arange(len(ds_samples))
np.random.shuffle(randomize)
ds_samples = ds_samples[randomize]
labels = labels[randomize]

# split dataset 70-30
X_train_flat, X_test_flat, y_train_flat, y_test_flat = train_test_split( ds_samples, labels, test_size = 0.33, random_state = 42)
print('TRAIN: ',X_train_flat.shape, ' ', y_train_flat.shape)
print('TEST: ',X_test_flat.shape, ' ', y_test_flat.shape)

# reshape data
X_train_flat.flatten()
X_test_flat.flatten()

# cast label char to cathegory
le = preprocessing.LabelEncoder()
le.fit(y_train_flat)

y_train_cat = le.transform(y_train_flat)
y_test_cat = le.transform(y_test_flat)

# learning step on RF and SVC classifier
clf1 = svm.SVC()
clf2 = RandomForestClassifier(max_depth=2, random_state=0)

# prediction step 
clf1.fit(X_train_flat, y_train_cat)
clf2.fit(X_train_flat, y_train_cat)

pred1 = clf1.predict(X_test_flat)
pred2 = clf2.predict(X_test_flat)

# evaluation step
acc1 = accuracy_score(y_test_cat, pred1)
acc2 = accuracy_score(y_test_cat, pred2)

print('Accuracy SVC: ',acc1)
print('Accuracy RF: ',acc2)


(1335, 192000)
DS:  (1335, 192000)    (1335,)
TRAIN:  (894, 192000)   (894,)
TEST:  (441, 192000)   (441,)
['F' 'M']
(441,)    (441,)
(441,)    (441,)
Accuracy SVC:  0.47165532879818595
Accuracy RF:  0.5668934240362812
