In [None]:
import librosa
from librosa.effects import pitch_shift
import os
import pathlib
import random
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from librosa.feature import mfcc
from sklearn import svm
import scipy.io.wavfile as wav
import scipy.signal as signal
from sklearn.model_selection import train_test_split
import tensorflow as tf
features = []
labels = []
dataset_path: str = "C:\\Users\\rclendening\\researchData\\RedVox_TrainingBinary_wYTVids"
data_dir = pathlib.Path(dataset_path)
droneDict = {  # One hot encoding for labels probs should do it like I did below?
    "Drone": [1, 0],
    "Noise": [0, 1]
}
droneCountDict = {  # One hot encoding for labels
    "Drone": 0,
    "Noise": 1
}

dataCount = [0, 0]
# drones = np.array(tf.io.gfile.listdir(str(data_dir)))
# filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
# filenames = tf.random.shuffle(filenames)
# num_samples = len(filenames)
# train_files = filenames
# x = round((len(train_files) / 5))
# train_files = train_files[:x]
# print("Total num of samples: ", num_samples)
# print("Number of examples per label:", len(tf.io.gfile.listdir(str(data_dir / drones[0]))))
# print("Example file tensor: ", filenames[0])
# print(drones)
train_files=[]
for path, subdirs, files in os.walk(dataset_path):
    for name in files:
        train_files.append(os.path.join(path, name))
# test_file = tf.io.read_file(
#     "C:\\Users\\rclendening\\researchData\\Training_Data_NM_RS\\IF1200\\d301sA1r01p0120210823_6.wav")
# test_audio, _ = tf.audio.decode_wav(contents=test_file)
# test_audio.shape


def split_audio(waveData, labelName, sampleFreq):
    '''
    Frames audio data and converts to feature space (MFCC)
    :param waveData: waveData array of time-domain audio
    :param frame_duration: Duration of frames desired
    :param startTime: Start for each clip
    :param sampleFreq: Sample Frequency (8Khz)
    :param labelName: Name of label
    @return list of features (ds), list of labels corresponding to feature dataset:
    '''
    # middle third of data
    duration = waveData.shape[0]
    startTime = np.round(duration / 3)
    endTime = np.round(duration * 2 / 3)
    waveDataSplit= waveData[int(startTime):int(endTime)]
    features=MFCCCalc(waveDataSplit.squeeze(), sampleFreq)
    dataCount[droneCountDict[labelName]] += features.shape[1]
    label= [droneDict[labelName]] * features.shape[1]
    return features, label

def create_dataset(train_files):
    '''
    Creates feature dataset and label dataset.
    @param train_files: EagerTensor of file paths.
    @return list of features (ds), list of labels corresponding to feature dataset:
    '''
    i = 0
    features = []
    labels = []
    for x in train_files:
        #test_file = tf.io.read_file(x)
        #test_audio, sampleRate = tf.audio.decode_wav(contents=test_file)
        test_audio, sampleRate = librosa.load(x, sr=8000)
        if min(np.asarray(test_audio)) != 0:
            x = str(x)
            label = x.split('\\')
            label = label[5]
            newData = test_audio[0: test_audio.shape[0] - test_audio.shape[0] % sampleRate]  # trim to nearest second
            newFeats, newLabs = split_audio(newData, label, int(sampleRate))
            features.extend(newFeats.transpose())
            labels.extend(newLabs)

    return features, labels

def MFCCCalc(audioData, Fs):
    '''
    Converts decoded wav file to MFCC feature space
    @param audioData: Numpy array of decoded audio wav file
    @return MFCC coefficients
    '''
    #audioData=audioData.numpy()
    data= audioData.astype(float)
    #coefs = mfcc(data, sr=sampleRate, hop_length=2048)
    coefs = mfcc(y=data, hop_length=2048,n_mfcc=40, sr=Fs)

    return coefs

def grabTrainingSamples(n, trainingData):
    '''
    Ensures even training set by grabbing an even amount of training samples from each class.
    @param n: limiting class count
    @param trainingData: trainingData list that includes both features and labels
    @return MFCC coefficients
    '''
    droneCount=0
    noiseCount=0
    evenTrainingData = []
    evenLabelData = []
    for i in range(len(labels)):
        lab = trainingData[i][1]
        if lab == [1, 0] and droneCount < n:
            droneCount += 1
            evenTrainingData.append(trainingData[i][0])
            evenLabelData.append(lab)
        elif lab == [0,1] and noiseCount < n:
            noiseCount += 1
            evenTrainingData.append(trainingData[i][0])
            evenLabelData.append(lab)
    return evenTrainingData, evenLabelData

Fs = 8000
numFeat = 40 #COULD BE SOURCE OF ERROR
features, labels = create_dataset(train_files)
newSet = list(zip(features, labels))
random.seed(42)
random.shuffle(newSet)  # Ensure data is mixed together
n = np.min(dataCount)  # Ensure data is symmetric (aka even amounts of training data for all classes)
# features, labels = grabTrainingSamples(n, features, labels)
features, labels = grabTrainingSamples(n, newSet)

trainFeatures, testFeatures, trainTruth, testTruth = train_test_split(features, labels, test_size=0.8, random_state=42)
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score, KFold
from sklearn.model_selection import GridSearchCV

newLabels=[]
for x in trainTruth: #convert one hot to actual numbers
    if x[0] == 1:
        val=0
    else:
        val=1
    newLabels.append(val)
newTestLabels=[]
for x in testTruth: #convert one hot to actual numbers
    if x[0] == 1:
        val=0
    else:
        val=1
    newTestLabels.append(val)


## Complex Model Evaluation (Linear SVM)

This function performances' linear svm with cross validation.

In [39]:
scalar= StandardScaler()
linear_model = svm.SVC(C=0.001,kernel='linear')
pipeline= Pipeline([('transformer', scalar), ('estimator', linear_model)])
cv = KFold(n_splits=5)
scores= cross_val_score(pipeline,trainFeatures,newLabels,cv=cv)

print(np.average(scores))
pipeline.fit(trainFeatures,newLabels)
print(pipeline.score(testFeatures,newTestLabels))


0.8402173913043478
0.8356459676835012


## Complex Model Evaluation (Poly SVM)

This function performances' linear svm with cross validation.

In [None]:
scalar= StandardScaler()
poly_model = svm.SVC(kernel='poly')
param_grid= {
    'estimator__C':[1,10,100], 'estimator__gamma':[1,0.1,0.001,0.0001],'estimator__degree':[2,3,4]}
pipeline= Pipeline([('transformer', scalar), ('estimator', poly_model)])
cv = KFold(n_splits=4)
search = GridSearchCV(pipeline, param_grid,refit=True,verbose=3, n_jobs=-1)
print(search.fit(trainFeatures,newLabels))
print(search.best_params_)
print(search.best_score_)
scores= cross_val_score(pipeline,trainFeatures,newLabels,cv=cv)
print(np.average(scores))
pipeline.fit(trainFeatures,newLabels)
print(pipeline.score(testFeatures,newTestLabels))

In [44]:
print(pipeline.get_params())

{'memory': None, 'steps': [('transformer', StandardScaler()), ('estimator', SVC(kernel='poly'))], 'verbose': False, 'transformer': StandardScaler(), 'estimator': SVC(kernel='poly'), 'transformer__copy': True, 'transformer__with_mean': True, 'transformer__with_std': True, 'estimator__C': 1.0, 'estimator__break_ties': False, 'estimator__cache_size': 200, 'estimator__class_weight': None, 'estimator__coef0': 0.0, 'estimator__decision_function_shape': 'ovr', 'estimator__degree': 3, 'estimator__gamma': 'scale', 'estimator__kernel': 'poly', 'estimator__max_iter': -1, 'estimator__probability': False, 'estimator__random_state': None, 'estimator__shrinking': True, 'estimator__tol': 0.001, 'estimator__verbose': False}


## Complex Model Evaluation (RBF)

This function performances' hyperparameter tuning of the RBF kernel for SVM. In TS which contains 80% of data, it achieves .90073 accuracy rate.

In [23]:
scalar= StandardScaler()
model = svm.SVC(kernel='rbf')
pipeline= Pipeline([('transformer', scalar), ('estimator', model)])
cv = KFold(n_splits=10)
param_grid= {
    'estimator__C':[0.1,1,10,100], 'estimator__gamma':[1,0.1,0.01,0.001,0.0001]}

search = GridSearchCV(pipeline, param_grid,refit=True,verbose=3, n_jobs=-1)
print(search.fit(trainFeatures,newLabels))
print(search.best_params_)
print(search.best_score_)
newTestLabels=[]
for x in testTruth: #convert one hot to actual numbers
    if x[0] == 1:
        val=0
    else:
        val=1
    newTestLabels.append(val)
search.score(testFeatures,newTestLabels)

{'estimator__C': 100, 'estimator__gamma': 0.01}
0.9008695652173913


0.9007318310267372

## Simple Model Evaluation: LDA/QDA

The fact that LDA outperforms QDA suggests that there is more of a linear decision boundary within the dataset.

In [29]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
lda= LinearDiscriminantAnalysis()
qda = QuadraticDiscriminantAnalysis()
lda.fit(trainFeatures,newLabels)
scores= cross_val_score(lda,trainFeatures,newLabels,cv=5)
print(scores)
qda.fit(trainFeatures,newLabels)
scores= cross_val_score(qda,trainFeatures,newLabels,cv=5)
print(scores)
print("LDA Test Set Score",lda.score(testFeatures,newTestLabels))
print("QDA Test Set Score",qda.score(testFeatures,newTestLabels))

[0.82753623 0.84963768 0.83188406 0.83623188 0.85072464]
[0.77681159 0.80217391 0.77826087 0.78985507 0.78985507]
LDA Test Set Score 0.8367690747047315
QDA Test Set Score 0.7816281428881965


In [None]:
from sklearn.metrics import accuracy_score
newTestTargets=[]
for x in testTargets:
    if x[0] == 1:
        val=0
    else:
        val=1
    newTestTargets.append(val)
print(accuracy_score(y_true=newTestTargets, y_pred=y_pred))
testFeats= testFeatures

Testing with cell phone scenario. Achieving 0% accuracy because although sampled at same frequency, there must be some cell phone artifacts in the noise data that is making all cell phone data be seen as noise. After further evaluation, taking the logPSD causes the training data to potentially overfit(?). When PSD is calculated without log, the algorithm can accurately predict the drone when using cell phones as test data. Further investigation will be required to further refine and determine.

This function implements majority voting scheme. It assumes all cell phones record an equal amount of data during a test, and assumes the class with the most votes is the correct class.

In [None]:
NOISE=1
DRONE=0
def split_test_audio(waveData, labelName, sampleFreq):
    '''
    Frames audio data and converts to feature space (spectrogram)
    :param waveData: waveData array of time-domain audio
    :param frame_duration: Duration of frames desired
    :param startTime: Start for each clip
    :param sampleFreq: Sample Frequency (8Khz)
    :param labelName: Name of label
    @return list of features (ds), list of labels corresponding to feature dataset:
    '''
    duration = waveData.shape[0]
    features=MFCCCalc(waveData.numpy().squeeze())
    label= [droneDict[labelName]] * features.shape[1]
    return features, label

def create_test_dataset(test_files, testTime):
    """
    Creates feature dataset and label dataset.
    @param test_files: EagerTensor of file paths.
    @return list of features (ds), list of labels corresponding to feature dataset:
    """
    features = []
    featuresLL=[]
    labels = []
    numPhones=0
    phoneName=[]
    for x in test_files:
        test_file = tf.io.read_file(x)
        test_audio, sampleRate = tf.audio.decode_wav(contents=test_file)
        if len(test_audio) >= testTime *sampleRate and min(np.asarray(test_audio)) != 0: #ensure data actually has sound and recorded correctly
            numPhones+=1
            x = str(x)
            phoneName.append(x)
            label = x.split('\\')
            #label = label[6]
            #test_audio = test_audio[0: test_audio.shape[0] - test_audio.shape[0] % 8000]  # trim to nearest second
            test_audio = test_audio[0: (testTime-1)*sampleRate]  # trim to nearest second
            #test_audio = test_audio[round(testTime/3)*sampleRate: round(testTime)*sampleRate]  # trim to nearest second
            newFeats, newLabs = split_test_audio(test_audio, "Drone", int(sampleRate))
            featuresLL.append(newFeats.transpose())
            features.extend(newFeats.transpose())
            labels.extend(newLabs)

    return features, labels, numPhones, featuresLL

def maxValues(features):
    maxVals=[]
    for x in features:
        maxVals.append(abs(x).max())

    return maxVals

def maxValueRanks(maxValArray):
    maxValueRankArr=[]
    for x in maxValArray:
        seq = sorted(x)
        index = [seq.index(v) for v in x]
        maxValueRankArr.append(index)
    return maxValueRankArr
def majorityVoteNew(testFeats):
    y_prediction=[]
    maxVals=[]
    for x in testFeats:
        # x -= mean
        # x /= std
        #x= np.reshape(x, (len(x), numFeat, 1))
        #x_scaled= scaler.transform(x)
        maxVals.append(maxValues(x))
        #pred=np.argmax(trainedModel.predict(x),axis=1)
        pred=pipeline.predict(x)
        y_prediction.append(pred)

    maxVals=np.asarray(maxVals).transpose()
    maxValueRankArr=maxValueRanks(maxVals)
    numPhones=len(y_prediction)
    maxVote = (numPhones-1) * numPhones / 2 # closed form for summation
    predictedList=[]
    prevState=7
    for i in range(len(pred)): # i is feature frame
        predictedDrone=0
        for j in range(len(y_prediction)): # j is phone
            if y_prediction[j][i] == 0:
            #if y_prediction[j][i] == 0 and maxValueRankArr[i][j] > 14:
                predictedDrone+= 1*maxValueRankArr[i][j]
                #predictedDrone+= 1

        if predictedDrone/maxVote >.45:
        #if predictedDrone/numPhones >0.63:
        #if predictedDrone/5 >0.50:
            predictedVal=0
            #predictedList.append(0)
        else:
            #predictedList.append(1)
            predictedVal=1
        prevState, prediction=fourBitPrediction(prevState, predictedVal)
        predictedList.append(prediction)

    return predictedList, y_prediction
def fourBitPrediction(prevPredictState,prediction): #prediction need to be 1 (noise) or -1 (drone)
    if prediction == NOISE:
        predVal=1
    else:
        predVal=-1
    if prevPredictState+predVal>7:
        actualPrediction= 1
    else:
        actualPrediction = 0
    prevPredictState=prevPredictState+predVal
    if prevPredictState>15:
        prevPredictState=15
    elif prevPredictState<0:
        prevPredictState=0
    return prevPredictState, actualPrediction

def threeBitPrediction(prevPredictState,prediction): #prediction need to be 1 (noise) or -1 (drone)
    if prediction == NOISE:
        predVal=1
    else:
        predVal=-1
    if prevPredictState+predVal>3:
        actualPrediction= 1
    else:
        actualPrediction = 0
    prevPredictState=prevPredictState+predVal
    if prevPredictState>7:
        prevPredictState=7
    elif prevPredictState<0:
        prevPredictState=0
    return prevPredictState, actualPrediction

def twoBitPrediction(prevPredictState,prediction): #prediction need to be 1 (noise) or -1 (drone)
    if prediction == NOISE:
        predVal=1
    else:
        predVal=-1
    if prevPredictState+predVal>1:
        actualPrediction= 1
    else:
        actualPrediction = 0
    prevPredictState=prevPredictState+predVal
    if prevPredictState>3:
        prevPredictState=3
    elif prevPredictState<0:
        prevPredictState=0
    return prevPredictState, actualPrediction


dataset_path: str = "C:\\Users\\rclendening\\researchData\\EscapeCell_DataWav\\A3\\A3R5P2" #TODO
#dataset_path: str = "C:\\Users\\rclendening\\researchData\\test123" #TODO
Testdata_dir = pathlib.Path(dataset_path)
phones = tf.io.gfile.glob(str(Testdata_dir) + '/*')
testTime= 90
testSceneFeatures,testSceneTargets, numPhones, featuresLL =create_test_dataset(phones, testTime)
predictedList, y_prediction = majorityVoteNew(featuresLL)

correcto=0

for y in y_prediction:
    correcto=1
    for x in range(0,348):
        if y[x] == 0:
            correcto+=1
    print("Single Phone",correcto/400)
correcto=0
for x in range(0,348):
    if predictedList[x] == 0:
        correcto+=1
print("Accuracy for drone present:", correcto/400)
plt.plot(range(len(predictedList)),predictedList)
plt.figure()
#plt.plot(classPrediction)







In [None]:
correcto=0
sumVal=0
phone=0
start=0
stop=len(predictedList)
#y_actual=[1]*200+[1]*700+[0]*1000
y_actual=[1]*200+[0]*500+[1]*800
for y in y_prediction:
    phone+=1
    correcto=0
    for x in range(start,stop):
        if y[x] == y_actual[x]:
            correcto+=1
    print("Single Phone",phone)
    print(correcto/(stop-start))
    sumVal+= correcto/(stop-start)
print("Average Value:", sumVal/len(y_prediction))
correcto=0
for x in range(start,stop):
    if predictedList[x] == y_actual[x]:
        correcto+=1
print("Accuracy for drone present:", correcto/(stop-start))
plt.plot(np.arange(0,testTime,testTime/len(predictedList)),predictedList)
#plt.plot(predictedList)
plt.figure()
print("Novel method outperforms by:", (100*((correcto/(stop-start))-sumVal/len(y_prediction))/abs(sumVal/len(y_prediction))), "%")
#plt.plot(classPrediction)

In [None]:
import pandas as pd
truthData= pd.read_csv(r"C:\Users\rclendening\researchData\researchCSVs_Scripts_etc\testTruthData.csv")
novelAve=[]
sumAve=[]
outPerform=[]
for x in range(18):
    truthScenario = truthData.iloc[x]
    name=truthScenario[0]
    startDetect= truthScenario[1]
    endDetect=truthScenario[2]
    testTime=90
    start=0
    dataset_path: str = "C:\\Users\\rclendening\\researchData\\EscapeCell_DataWav\\A3\\"+name
    Testdata_dir = pathlib.Path(dataset_path)
    phones = tf.io.gfile.glob(str(Testdata_dir) + '/*')
    testFeatures,testTargets, numPhones, featuresLL =create_test_dataset(phones, testTime)
    predictedList, y_prediction = majorityVoteNew(featuresLL)
    stop=len(predictedList)
    y_actual=np.ones(stop)
    y_actual[int(np.round(stop*startDetect/testTime)):int(np.round(stop*endDetect/testTime))]=0
    sumVal=0
    for y in y_prediction:
        phone+=1
        correcto=0
        for x in range(start,stop):
            if y[x] == y_actual[x]:
                correcto+=1
        #print("Single Phone",phone)
        #print(correcto/(stop-start))
        sumVal+= correcto/(stop-start)
    print(name)
    print("Average Value:", sumVal/len(y_prediction))
    sumAve.append(sumVal/len(y_prediction))
    correcto=0
    for x in range(start,stop):
        if predictedList[x] == y_actual[x]:
            correcto+=1
    percCorrect= correcto/stop
    novelAve.append(percCorrect)
    print("Accuracy for drone present:", percCorrect)
    #plt.plot(np.arange(0,testTime,testTime/stop),predictedList)
#plt.plot(predictedList)
    #plt.figure()
    performanceGain=100*((percCorrect-sumVal/len(y_prediction))/abs(sumVal/len(y_prediction)))
    outPerform.append(performanceGain)
    print("Novel method outperforms by:", performanceGain, "%")
print(np.average(outPerform))
print(np.average(novelAve))
print(np.average(sumAve))

In [None]:
from scipy import stats
novelAve=np.asarray(novelAve)
a_no_outliers = novelAve[(np.abs(stats.zscore(novelAve)) < 2)]
a_no_outliers=np.sort(a_no_outliers)
print(a_no_outliers[4:])
print(np.average(a_no_outliers[4:]))