In [3]:
import numpy as np
import librosa
import matplotlib.pyplot as plt
import noisereduce as nr
from keras.models import model_from_json
from sklearn.preprocessing import LabelEncoder
import IPython
import os
import csv

  from tqdm.autonotebook import tqdm
Using TensorFlow backend.


In [4]:
#Load segment audio classification model

model_path = r"Audio_classification/Models/"
model_name = "audio_NN_New2020_03_23_16_40_28_acc_90.37"

# Model reconstruction from JSON file
with open(model_path + model_name + '.json', 'r') as f:
    model = model_from_json(f.read())

# Load weights into the new model
model.load_weights(model_path + model_name + '.h5')

# Replicate label encoder
lb = LabelEncoder()
lb.fit_transform(['Calling', 'Clapping', 'Falling', 'Sweeping', 'WashingHand', 'WatchingTV','enteringExiting','other'])

array([0, 1, 2, 3, 4, 5, 6, 7], dtype=int64)

In [11]:
#Some Utils

# Plot audio with zoomed in y axis
def plotAudio(output):
    fig, ax = plt.subplots(nrows=1,ncols=1, figsize=(20,10))
    plt.plot(output, color='blue')
    ax.set_xlim((0, len(output)))
    ax.margins(2, -0.1)
    plt.show()

# Plot audio
def plotAudio2(output):
    fig, ax = plt.subplots(nrows=1,ncols=1, figsize=(20,4))
    plt.plot(output, color='blue')
    ax.set_xlim((0, len(output)))
    plt.show()

# Split a given long audio file on silent parts.
# Accepts audio numpy array audio_data, window length w and hop length h, threshold_level, tolerence
# threshold_level: Silence threshold
# Higher tolence to prevent small silence parts from splitting the audio.
# Returns array containing arrays of [start, end] points of resulting audio clips
def split_audio(audio_data, w, h, threshold_level, tolerence=10):
    split_map = []
    start = 0
    data = np.abs(audio_data)
    threshold = threshold_level*np.mean(data[:20000])
    inside_sound = False
    near = 0
    for i in range(0,len(data)-w, h):
        win_mean = np.mean(data[i:i+w])
        if(win_mean>threshold and not(inside_sound)):
#             print(i, 'inside sound')
            inside_sound = True
            start = i
        if(win_mean<=threshold and inside_sound and near>tolerence):
#             print(i, 'outside sound')
            inside_sound = False
            near = 0
            split_map.append([start, i])
        if(inside_sound and win_mean<=threshold):
            near += 1
    return split_map

def minMaxNormalize(arr):
    mn = np.min(arr)
    mx = np.max(arr)
    return (arr-mn)/(mx-mn)

def predictSound(X):
    stfts = np.abs(librosa.stft(X, n_fft=512, hop_length=256, win_length=512))
    print('stft len = ', np.shape(stfts))
    stfts = np.mean(stfts,axis=1)
    print('stft mean len = ', np.shape(stfts))
#     stfts = minMaxNormalize(stfts)
    result = model.predict(np.array([stfts]))
    predictions = [np.argmax(y) for y in result]
    return lb.inverse_transform([predictions[0]])[0]

In [6]:
def get_win_mean(audio_data, w, h, threshold_level):
    data = np.abs(audio_data)
    win_mean = np.zeros(shape = raw_audio.shape, dtype = raw_audio.dtype)
    threshold = threshold_level*np.mean(data[:20000])
    print('np.mean(data[:20000]) = ', np.mean(data[:20000]))
    print('threshold = ', threshold)
    for i in range(0,len(data)-w, h):
        win_mean[i] = np.mean(data[i:i+w])
    return win_mean

In [7]:
conf_M = []
num_labels = len(lb.classes_)

for i in range(num_labels):
    r = []
    for j in range(num_labels):
        r.append(0)
    conf_M.append(r)

def print_M_P(conf_M):
    matrix = []
    
    s = "activity,"
    row = ['activity']
    for i in range(len(conf_M)):
        s += lb.inverse_transform([i])[0] + ","
        row.append(lb.inverse_transform([i])[0])
    print(s[:-1])
    matrix.append(row)
    for i in range(len(conf_M)):
        s = ""
        row = [lb.inverse_transform([i])[0]]
        for j in range(len(conf_M)):
            val = conf_M[i][j]/float(sum(conf_M[i]))
            s += str(round(val,2))
            s += ","
            row.append(str(round(val,2)))
        print(lb.inverse_transform([i])[0],",", s[:-1])
        matrix.append(row)
    print()
    return matrix
        

def save_prediction(file, name, label, subject):
    # read audio data
    raw_audio, sample_rate = librosa.load(file)
    print('sr = ', sample_rate)
    # noise reduction
    noisy_part = raw_audio[0:25000]  # Empherically selected noisy_part position for every sample
    nr_audio = nr.reduce_noise(audio_clip=raw_audio, noise_clip=noisy_part, verbose=False)
    # trimming
    trimmed, index = librosa.effects.trim(nr_audio, top_db=20, frame_length=512, hop_length=64)
    
    # prediction
    pred_label = predictSound(trimmed)
    prediction = lb.transform([pred_label])[0]
    expectation = lb.transform([label])[0]
    conf_M[expectation][prediction] += 1

In [12]:
activities = ['Calling', 'Clapping', 'Drinking', 'Eating', 'Entering',
              'Exiting', 'Falling', 'LyingDown', 'OpeningPillContainer',
              'PickingObject', 'Reading', 'SitStill', 'Sitting', 'Sleeping',
              'StandUp', 'Sweeping', 'UseLaptop', 'UsingPhone', 'WakeUp', 'Walking',
              'WashingHand', 'WatchingTV', 'WaterPouring', 'Writing']
    
subjects = ['s01', 's02', 's03', 's04', 's05', 's06', 's07', 's08', 's09',
            's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17']

test_subjects = ['s05', 's17']

mergedActivities = ['Drinking', 'Eating', 'LyingDown', 'OpeningPillContainer', 
                      'PickingObject', 'Reading', 'SitStill', 'Sitting', 'Sleeping', 
                      'StandUp', 'UseLaptop', 'UsingPhone', 'WakeUp', 'Walking', 
                      'WaterPouring', 'Writing']

specificActivities = ['Calling', 'Clapping', 'Falling', 'Sweeping', 'WashingHand', 'WatchingTV']

enteringExiting = ['Entering', 'Exiting']


def get_model_label(activity):
    model_label = ''
    if activity in specificActivities:
        model_label = activity
    elif activity in enteringExiting:
        model_label = 'enteringExiting'
    elif activity in mergedActivities:
        model_label = 'other' 
    
    return model_label
                
    
for activity in activities:
    for subject in test_subjects:
        innerDir = subject + "/" + activity
        for file in os.listdir("Dataset_audio/" + innerDir):
            if(file.endswith(".wav")):
                model_label = get_model_label(activity)
                save_prediction("Dataset_audio/" + innerDir + "/" + file, file, model_label, subject)
                print(subject,activity,file)

stft len =  (257, 715)
stft mean len =  (257,)
s05 Calling 2019-05-23-14-44-14-725000__1.wav
stft len =  (257, 961)
stft mean len =  (257,)
s05 Calling 2019-05-23-14-44-14-725000__23.wav
stft len =  (257, 900)
stft mean len =  (257,)
s05 Calling 2019-05-23-14-44-14-725000__24.wav
stft len =  (257, 332)
stft mean len =  (257,)
s05 Calling 2019-05-23-15-00-21-197000__1.wav
stft len =  (257, 424)
stft mean len =  (257,)
s05 Calling 2019-05-23-15-00-21-197000__23.wav
stft len =  (257, 958)
stft mean len =  (257,)
s05 Calling 2019-05-23-15-00-21-197000__24.wav
stft len =  (257, 96)
stft mean len =  (257,)
s17 Calling 2019-05-24-16-41-47-063000__1.wav
stft len =  (257, 99)
stft mean len =  (257,)
s17 Calling 2019-05-24-16-41-47-063000__22.wav
stft len =  (257, 368)
stft mean len =  (257,)
s17 Calling 2019-05-24-16-41-47-068000__24.wav
stft len =  (257, 22)
stft mean len =  (257,)
s17 Calling 2019-05-24-16-53-12-526000__1.wav


KeyboardInterrupt: 

In [None]:
mat = print_M_P(conf_M)

In [8]:
print(mat)
# writing to csv file 
with open("test_results.csv", 'w') as csvfile: 
    # creating a csv writer object 
    csvwriter = csv.writer(csvfile) 
    # writing the data rows 
    csvwriter.writerows(mat)

[['activity', 'Calling', 'Clapping', 'Falling', 'Sweeping', 'WashingHand', 'WatchingTV', 'enteringExiting', 'other'], ['Calling', '0.5', '0.0', '0.0', '0.0', '0.0', '0.0', '0.17', '0.33'], ['Clapping', '0.0', '1.0', '0.0', '0.0', '0.0', '0.0', '0.0', '0.0'], ['Falling', '0.0', '0.08', '0.58', '0.0', '0.0', '0.0', '0.08', '0.25'], ['Sweeping', '0.0', '0.0', '0.0', '0.67', '0.0', '0.0', '0.0', '0.33'], ['WashingHand', '0.0', '0.0', '0.0', '0.0', '0.5', '0.0', '0.0', '0.5'], ['WatchingTV', '0.0', '0.0', '0.0', '0.0', '0.0', '0.67', '0.0', '0.33'], ['enteringExiting', '0.0', '0.07', '0.0', '0.0', '0.0', '0.0', '0.67', '0.27'], ['other', '0.0', '0.01', '0.0', '0.01', '0.0', '0.0', '0.0', '0.98']]


In [13]:
conf_M

[[13, 0, 0, 0, 0, 0, 5, 9],
 [0, 12, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 10]]