# Augmenting data to train Audio-IMU fused model

# We generate augmented data with
- 50ms, 100ms, 150ms, 200ms, 500ms, and 1000ms of shifts.
- The shifting is done in the IMU modality.
- Training labels are are aligned with the Audio Modality.
- The shift are saved in the end.

In [1]:
import numpy as np
import scipy.io

from keras import optimizers
from keras.optimizers import SGD
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import *
from keras.callbacks import Callback
from keras.callbacks import ModelCheckpoint


import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, LSTM, Dense, Dropout, Flatten, Activation
from keras.layers.core import Permute, Reshape
from keras import backend as K

from matplotlib import pyplot as plt
from PIL import Image as img_PIL


# random seed.
rand_seed = 2

from numpy.random import seed
seed(rand_seed)
from tensorflow import set_random_seed
set_random_seed(rand_seed)

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## 1. Loading the data

In [2]:
def one_hot_encoding(y_data):
    Mapping=dict()
    sub_dirs=['downstair','upstair','run','jump','walk','handwashing','exercise']

    categories=7

    count=0
    for i in sub_dirs:
        Mapping[i]=count
        count=count+1

    y_features2=[]
    for i in range(len(y_data)):
        Type=y_data[i]
        lab=Mapping[Type]
        y_features2.append(lab)

    y_features=np.array(y_features2)
    y_features=y_features.reshape(y_features.shape[0],1)
    from keras.utils import to_categorical
    y_features = to_categorical(y_features)

    return y_features

In [5]:
path='data/'


def get_train_data(path=path):
    Train_data=np.load(path+'train_5000.npz')
    Features_imu=Train_data['arr_0']
    Labels=Train_data['arr_1']
    Features_sound=Train_data['arr_2']

    Labels = one_hot_encoding(Labels)
    Features_imu = Features_imu.reshape(Features_imu.shape[0],1, Features_imu.shape[1], Features_imu.shape[2]) 

    return Features_imu,Labels,Features_sound

def get_valid_data(path=path):
    Train_data=np.load(path+'valid_1000.npz')
    Features_imu=Train_data['arr_0']
    Labels=Train_data['arr_1']
    Features_sound=Train_data['arr_2']

    Labels = one_hot_encoding(Labels)
    Features_imu = Features_imu.reshape(Features_imu.shape[0],1, Features_imu.shape[1], Features_imu.shape[2]) 

    return Features_imu,Labels,Features_sound

def get_test_data(path=path):
    Train_data=np.load(path+'test_1377.npz')
    Features_imu=Train_data['arr_0']
    Labels=Train_data['arr_1']
    Features_sound=Train_data['arr_2']

    Labels = one_hot_encoding(Labels)
    Features_imu = Features_imu.reshape(Features_imu.shape[0],1, Features_imu.shape[1], Features_imu.shape[2]) 

    return Features_imu,Labels,Features_sound


In [6]:
Features_imu,Labels,Features_sound = get_train_data()
print(Features_imu.shape, Labels.shape, Features_sound.shape)
print('Train Classes distribution: ',np.sum(Labels, axis =0))

Features_imu2,Labels2,Features_sound2 = get_valid_data()
print(Features_imu2.shape, Labels2.shape, Features_sound2.shape)
print('Valid Classes distribution: ',np.sum(Labels2, axis =0))

Features_imu3,Labels3,Features_sound3 = get_test_data()
print(Features_imu3.shape, Labels3.shape, Features_sound3.shape)
print('Test Classes distribution: ',np.sum(Labels3, axis =0))

(5000, 1, 40, 12) (5000, 7) (5000, 193)
Train Classes distribution:  [760. 994. 546. 780. 532. 732. 656.]
(1000, 1, 40, 12) (1000, 7) (1000, 193)
Valid Classes distribution:  [150. 188. 136. 141.  98. 157. 130.]
(1377, 1, 40, 12) (1377, 7) (1377, 193)
Test Classes distribution:  [219. 268. 124. 146. 143. 234. 243.]


## Observation: All of these test labels are continuous.
### Time window used to create a sample: 2 Second.

# Time shifting, our goal is to use the longest windows and insert time shifting into them

# First preprocess the data to get the continuous acc and gyro windows
- data ordering: 12 sensors each with 40 samples.
- 12 sensors are: acc_right, gyro_right, acc_left, gyro_left

In [7]:
def get_train_data2(path=path):
    Train_data=np.load(path+'train_5000.npz')
    Features_imu=Train_data['arr_0']
    Labels=Train_data['arr_1']
    Features_sound=Train_data['arr_2']
    
    #We will not do one hot encoding now
    #Labels = one_hot_encoding(Labels)
    
    Features_imu = Features_imu.reshape(Features_imu.shape[0],1, Features_imu.shape[1], Features_imu.shape[2]) 

    return Features_imu,Labels,Features_sound

In [8]:
Features_imu_t1,Labels_t1,Features_sound_t1 = get_train_data2()
print(Features_imu_t1.shape, Labels_t1.shape, Features_sound_t1.shape)
#print('Test Classes distribution: ',np.sum(Labels_t1, axis =0))

(5000, 1, 40, 12) (5000,) (5000, 193)


In [9]:
# We see all of the labels are continuous
Labels_t1 = np.array(Labels_t1)

In [10]:
Labels_t1[:100]

array(['run', 'jump', 'handwashing', 'walk', 'handwashing', 'upstair',
       'handwashing', 'downstair', 'handwashing', 'downstair', 'walk',
       'upstair', 'run', 'run', 'walk', 'run', 'downstair', 'handwashing',
       'walk', 'upstair', 'handwashing', 'exercise', 'exercise',
       'handwashing', 'downstair', 'downstair', 'upstair', 'downstair',
       'upstair', 'exercise', 'jump', 'downstair', 'upstair', 'upstair',
       'exercise', 'upstair', 'handwashing', 'upstair', 'upstair',
       'upstair', 'downstair', 'upstair', 'handwashing', 'exercise',
       'handwashing', 'downstair', 'exercise', 'upstair', 'jump', 'run',
       'walk', 'walk', 'upstair', 'handwashing', 'jump', 'exercise',
       'jump', 'exercise', 'run', 'handwashing', 'walk', 'handwashing',
       'handwashing', 'run', 'downstair', 'handwashing', 'run', 'upstair',
       'handwashing', 'walk', 'handwashing', 'run', 'jump', 'handwashing',
       'jump', 'run', 'downstair', 'jump', 'upstair', 'run',
       'hand

In [11]:
# Reorder the labels so they are continuous


In [12]:
def number_labels(y_data):
    Mapping=dict()
    sub_dirs=['downstair','upstair','run','jump','walk','handwashing','exercise']

    categories=10

    count=0
    for i in sub_dirs:
        Mapping[i]=count
        count=count+1

    y_features2=[]
    for i in range(len(y_data)):
        Type=y_data[i]
        lab=Mapping[Type]
        y_features2.append(lab)

    y_features=np.array(y_features2)
    #y_features=y_features.reshape(y_features.shape[0],1)
    return y_features

In [13]:
Labels_t1 = number_labels(Labels_t1)

In [14]:
Labels_t1[:10]

array([2, 3, 5, 4, 5, 1, 5, 0, 5, 0])

In [15]:
# Get a sorting order for the labels
train_order = np.argsort(Labels_t1)
Labels_t1[train_order]

array([0, 0, 0, ..., 6, 6, 6])

# Use this sorting order to recreate the array

In [16]:
Train_imu=[]
Train_labels=[]
Train_sound=[]

for i in range(Labels_t1.shape[0]):
    Train_imu.append(Features_imu_t1[train_order[i]])
    Train_labels.append(Labels_t1[train_order[i]])
    Train_sound.append(Features_sound_t1[train_order[i]])

In [17]:
Train_imu=np.array(Train_imu)
Train_labels=np.array(Train_labels)
Train_sound=np.array(Train_sound)

print(Train_imu.shape, Train_labels.shape, Train_sound.shape)

(5000, 1, 40, 12) (5000,) (5000, 193)


In [18]:
Train_labels

array([0, 0, 0, ..., 6, 6, 6])

In [19]:
from keras.utils import to_categorical
Train_labels = to_categorical(Train_labels)

In [20]:
#test accuracy of model on the newly concatenated data so we don't mess up the things

from keras.models import load_model
model_path = 'data/Baseline_IMU_Audio_Fusion'

model = load_model(model_path)


In [21]:
model.evaluate([Train_sound,Train_imu],Train_labels)



[0.002210177281832057, 0.9994]

# Testing accuracy on Reshuffled data.
## Note: not time errors are introduced so Accuracy should be same as the fusion model on original continuous test data

# Now aggregating the data and doing the timing errors

In [22]:
acc_right=[]
gyro_right=[]
acc_left=[]
gyro_left=[]

for i in range(Train_imu.shape[0]):
    acc_right.append([Train_imu[i,0,:,0],Train_imu[i,0,:,1],Train_imu[i,0,:,2]])
    gyro_right.append([Train_imu[i,0,:,3],Train_imu[i,0,:,4],Train_imu[i,0,:,5]])
    acc_left.append([Train_imu[i,0,:,6],Train_imu[i,0,:,7],Train_imu[i,0,:,8]])
    gyro_left.append([Train_imu[i,0,:,9],Train_imu[i,0,:,10],Train_imu[i,0,:,11]])
    

In [23]:
acc_right=np.array(acc_right)
gyro_right=np.array(gyro_right)
acc_left=np.array(acc_left)
gyro_left=np.array(gyro_left)

In [24]:
print(acc_right.shape)
print(gyro_right.shape)
print(acc_left.shape)
print(gyro_left.shape)

(5000, 3, 40)
(5000, 3, 40)
(5000, 3, 40)
(5000, 3, 40)


In [25]:
acc_right_cont= acc_right[0,]
gyro_right_cont= gyro_right[0,]

acc_left_cont= acc_left[0,]
gyro_left_cont= gyro_left[0,]


for i in range(1, Features_imu_t1.shape[0]):
    #print(i)
    acc_right_cont=np.hstack((acc_right_cont,acc_right[i,]))
    gyro_right_cont=np.hstack((gyro_right_cont,gyro_right[i,]))
    acc_left_cont=np.hstack((acc_left_cont,acc_left[i,]))
    gyro_left_cont=np.hstack((gyro_left_cont,gyro_left[i,]))

print('done')

done


In [26]:
print(acc_right_cont.shape)
print(gyro_right_cont.shape)
print(acc_left_cont.shape)
print(gyro_left_cont.shape)

(3, 200000)
(3, 200000)
(3, 200000)
(3, 200000)


# Now shifting the samples

# Defining function to do the evaluation

In [27]:
def get_data_sample_shift(shift_samples = 1):
    sample_size = 40 #need to be 40, as decided during training
    total_samples = acc_right_cont.shape[1]
    
    #print(total_samples)
    
    current_cursor = shift_samples

    i = 0

    acc_right_pro= np.array(acc_right_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])

    #print(acc_right_pro.shape)

    gyro_right_pro= np.array(gyro_right_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])

    acc_left_pro= np.array(acc_left_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])

    gyro_left_pro= np.array(gyro_left_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])

    #print(np.array(acc_right_cont[np.newaxis,:,current_cursor:current_cursor+sample_size]).shape)

    while current_cursor<=(total_samples-2*sample_size):
        current_cursor = current_cursor + sample_size
        #print(current_cursor," : ", i)
        a=acc_right_pro
        b=np.array(acc_right_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])
        acc_right_pro = np.concatenate((a,b),axis=0)


        a=gyro_right_pro
        b=np.array(gyro_right_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])
        gyro_right_pro = np.concatenate((a,b),axis=0)


        a=acc_left_pro
        b=np.array(acc_left_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])
        #print(a.shape,b.shape)
        acc_left_pro = np.concatenate((a,b),axis=0)

        a=gyro_left_pro
        b=np.array(gyro_left_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])
        gyro_left_pro = np.concatenate((a,b),axis=0)
        i = i+1
        
    IMU_processed = np.concatenate((acc_right_pro,gyro_right_pro,acc_left_pro,gyro_left_pro),axis=1)
    IMU_processed = IMU_processed[:,np.newaxis,:,:]
    IMU_processed = np.swapaxes(IMU_processed,2,3)
        
    size = IMU_processed.shape[0]    
        
    return Train_sound[:size], IMU_processed ,Train_labels[:size]

# Generating more training datasets by doing shifting
- 1 sample shift = 50 ms timing error.
- 20 sample shift = 1000ms timing error.
- 1000ms augmentation uses shifts (1,2,10,20)

In [28]:
# #shift by 1 sample

Features_sound_1, IMU_1 ,Labels_1 =  get_data_sample_shift(shift_samples = 1)
print(Features_sound_1.shape, IMU_1.shape, Labels_1.shape)

(4999, 193) (4999, 1, 40, 12) (4999, 7)


In [29]:
import numpy as np
path_1_sample='data/train_data_1_shift'

np.savez(path_1_sample, IMU_1, Labels_1, Features_sound_1)

In [30]:
#shift by 2 sample

Features_sound_2, IMU_2 ,Labels_2 =  get_data_sample_shift(shift_samples = 2)
print(Features_sound_2.shape, IMU_2.shape, Labels_2.shape)

(4999, 193) (4999, 1, 40, 12) (4999, 7)


In [31]:
import numpy as np
path_2_sample='data/train_data_2_shift'

np.savez(path_2_sample, IMU_2, Labels_2, Features_sound_2)

In [32]:
#shift by 10 samples

Features_sound_10, IMU_10 ,Labels_10 =  get_data_sample_shift(shift_samples = 10)
print(Features_sound_10.shape, IMU_10.shape, Labels_10.shape)

(4999, 193) (4999, 1, 40, 12) (4999, 7)


In [33]:
import numpy as np
path1='data/train_data_10_shift'

np.savez(path1, IMU_10, Labels_10, Features_sound_10)

In [34]:
#shift by 20 samples

Features_sound_20, IMU_20 ,Labels_20 =  get_data_sample_shift(shift_samples = 20)
print(Features_sound_20.shape, IMU_20.shape, Labels_20.shape)

(4999, 193) (4999, 1, 40, 12) (4999, 7)


In [35]:
path2='data/train_data_20_shift'

np.savez(path2, IMU_20, Labels_20, Features_sound_20)