# Augmenting data to train Audio-IMU fused model

# We generate augmented data with
- 50ms, 100ms, 150ms, 200ms, 500ms, and 1000ms of shifts.
- The shifting is done in the IMU modality.
- Training labels are are aligned with the Audio Modality.
- The shift are saved in the end.

In [1]:
import numpy as np
import scipy.io

from keras import optimizers
from keras.optimizers import SGD
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import *
from keras.callbacks import Callback
from keras.callbacks import ModelCheckpoint


import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, LSTM, Dense, Dropout, Flatten, Activation
from keras.layers.core import Permute, Reshape
from keras import backend as K

from matplotlib import pyplot as plt
from PIL import Image as img_PIL


# random seed.
rand_seed = 2

from numpy.random import seed
seed(rand_seed)
import tensorflow
tensorflow.random.set_seed(rand_seed)

## 1. Loading the data

In [2]:
def one_hot_encoding(y_data):
    Mapping=dict()
    sub_dirs=['downstair','upstair','run','jump','walk','handwashing','exercise']

    categories=7

    count=0
    for i in sub_dirs:
        Mapping[i]=count
        count=count+1

    y_features2=[]
    for i in range(len(y_data)):
        Type=y_data[i]
        lab=Mapping[Type]
        y_features2.append(lab)

    y_features=np.array(y_features2)
    y_features=y_features.reshape(y_features.shape[0],1)
    from keras.utils import to_categorical
    y_features = to_categorical(y_features)

    return y_features

## Observation: All of these test labels are continuous.
### Time window used to create a sample: 2 Second.

# Time shifting, our goal is to use the longest windows and insert time shifting into them

# First preprocess the data to get the continuous acc and gyro windows
- data ordering: 12 sensors each with 40 samples.
- 12 sensors are: acc_right, gyro_right, acc_left, gyro_left

In [3]:
path='C:/Users/macro/Desktop/UCLA Class/ECE 209AS ML/TimeAwareness/data/'

def get_test_data(path=path):
    Train_data = np.load(path+'Data_test_71.pkl',allow_pickle=True)
    Labels = Train_data[1]
    Features_imu = np.asarray(Train_data[0],dtype=np.float64)
    Features_audio = Train_data[2]
    Features_video = Train_data[3]
    
    Features_imu = Features_imu.reshape(Features_imu.shape[0],1, Features_imu.shape[1], Features_imu.shape[2])

    return Labels,Features_imu,Features_audio,Features_video

In [4]:
Labels_t1,Features_imu_t1,Features_sound_t1,Features_video_t1 = get_test_data()
print(Features_imu_t1.shape, len(Labels_t1), Features_sound_t1.shape)
#print('Test Classes distribution: ',np.sum(Labels_t1, axis =0))

(1377, 1, 40, 12) 1377 (1377, 193)


In [5]:
# We see all of the labels are continuous
Labels_t1 = np.array(Labels_t1)

In [6]:
Labels_t1[:100]

array(['downstair', 'downstair', 'downstair', 'downstair', 'downstair',
       'downstair', 'downstair', 'downstair', 'downstair', 'downstair',
       'downstair', 'downstair', 'downstair', 'downstair', 'downstair',
       'downstair', 'downstair', 'downstair', 'downstair', 'downstair',
       'downstair', 'downstair', 'downstair', 'downstair', 'downstair',
       'downstair', 'downstair', 'downstair', 'downstair', 'downstair',
       'downstair', 'downstair', 'downstair', 'downstair', 'downstair',
       'downstair', 'downstair', 'downstair', 'downstair', 'downstair',
       'downstair', 'downstair', 'downstair', 'downstair', 'downstair',
       'downstair', 'downstair', 'downstair', 'downstair', 'downstair',
       'downstair', 'downstair', 'downstair', 'downstair', 'downstair',
       'downstair', 'downstair', 'downstair', 'downstair', 'downstair',
       'downstair', 'downstair', 'downstair', 'downstair', 'downstair',
       'downstair', 'downstair', 'downstair', 'downstair', 'down

In [7]:
def number_labels(y_data):
    Mapping=dict()
    sub_dirs=['downstair','upstair','run','jump','walk','handwashing','exercise']

    categories=10

    count=0
    for i in sub_dirs:
        Mapping[i]=count
        count=count+1

    y_features2=[]
    for i in range(len(y_data)):
        Type=y_data[i]
        lab=Mapping[Type]
        y_features2.append(lab)

    y_features=np.array(y_features2)
    #y_features=y_features.reshape(y_features.shape[0],1)
    return y_features

In [8]:
# For 10 second together
Labels_t1 = number_labels(Labels_t1)
num_windows_together = 10

curr_indx = 0
final_indx = Features_imu_t1.shape[0]

data_together = []

while curr_indx <= final_indx:
    IMU_together   = Features_imu_t1[curr_indx:curr_indx+num_windows_together]
    video_together = Features_video_t1[curr_indx:curr_indx+num_windows_together]
    audio_together = Features_sound_t1[curr_indx:curr_indx+num_windows_together]
    labels_together = Labels_t1[curr_indx:curr_indx+num_windows_together]
    
    data_together.append([IMU_together,audio_together,video_together,labels_together])
    
    curr_indx = curr_indx+num_windows_together
    
rand_seed = 4
from numpy.random import seed
seed(rand_seed)

ordering = np.arange(len(data_together))
np.random.shuffle(ordering)

print(ordering[:100])
print(len(data_together),' ', ordering.shape)

indx = ordering[0]

Features_imu_t1  = data_together[indx][0]
Features_sound_t1 = data_together[indx][1]
Features_video_t1 = data_together[indx][2]
Labels_t1 = data_together[indx][3]

#print(Features_imu_t1.shape,Labels_t1.shape,Features_sound_t1.shape)

import time
start_time = time.time()

for i in range(1, len(data_together)):

    indx = ordering[i]
    
    IMU_together  = data_together[indx][0]
    audio_together = data_together[indx][1]
    video_together= data_together[indx][2]
    labels_together=data_together[indx][3]
    
    #print(indx, IMU_together.shape,sound_together.shape,labels_together.shape)
    
    Features_imu_t1 = np.concatenate((Features_imu_t1,IMU_together),axis=0)
    Features_sound_t1 = np.concatenate((Features_sound_t1,audio_together),axis=0)
    Labels_t1 = np.concatenate((Labels_t1,labels_together),axis=0)
    Features_video_t1 = np.concatenate((Features_video_t1,video_together),axis=0)
    #print(indx, IMU_together.shape,sound_together.shape,labels_together.shape)
    
    if i%100 == 0:
        end_time = time.time()
        print("---Time %s seconds ---" % (end_time - start_time))
        start_time = time.time()

print(Features_imu_t1.shape,Labels_t1.shape,Features_sound_t1.shape,Features_video_t1.shape)
print(Labels_t1[:100])

Train_imu=[]
Train_labels=[]
Train_sound=[]
Train_video=[]

for i in range(Labels_t1.shape[0]):
    Train_imu.append(Features_imu_t1[i])
    Train_labels.append(Labels_t1[i])
    Train_sound.append(Features_sound_t1[i])
    Train_video.append(Features_video_t1[i])
    
Train_imu=np.array(Train_imu)
Train_labels=np.array(Train_labels)
Train_sound=np.array(Train_sound)
Train_video=np.array(Train_video)

print(Train_imu.shape, Train_labels.shape, Train_sound.shape, Train_video.shape)
print(Train_labels)

[ 61   5 131  64 105  19  12   2  26 125  47  80  98 114  25  86  16 112
  95  24 137  41  34  74   1  18  78  35  29  20 108  60  82   7  10 117
  13  15  99  97  27  77  83  43  69  96  92   6  39 119 136  91  63  72
   9  14  90 100  31  93  68  11  65   4 107  54  85  37 120  51 111 124
  53 116  88  71  70 110  84  75  89  62  67  22  23  76  48  59 113  81
  17 123 126  79 118  28 101  33  45  42]
138   (138,)
---Time 7.679170846939087 seconds ---
(1377, 1, 40, 12) (1377,) (1377, 193) (1377, 45, 64, 64, 3)
[2 3 3 3 3 3 3 3 3 3 0 0 0 0 0 0 0 0 0 0 6 6 6 6 6 6 6 6 6 6 3 3 3 3 3 3 3
 3 3 3 5 5 5 5 5 5 5 5 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 6 6 6 6 6 6 6 6 6 6]
(1377, 1, 40, 12) (1377,) (1377, 193) (1377, 45, 64, 64, 3)
[2 3 3 ... 6 6 6]


# Use this sorting order to recreate the array

In [9]:
#Labels_t1 = number_labels(Labels_t1)
#print(Labels_t1[:10])

# Get a sorting order for the labels
#train_order = np.argsort(Labels_t1)
# Reorder the labels so they are continuous
#Labels_t1[train_order]

#Train_imu=[]
#Train_labels=[]
#Train_sound=[]
#Train_video=[]

#for i in range(Labels_t1.shape[0]):
#    Train_imu.append(Features_imu_t1[train_order[i]])
#    Train_labels.append(Labels_t1[train_order[i]])
#    Train_sound.append(Features_sound_t1[train_order[i]])
#    Train_video.append(Features_video_t1[train_order[i]])
    
#Train_imu=np.array(Train_imu)
#Train_labels=np.array(Train_labels)
#Train_sound=np.array(Train_sound)
#Train_video=np.array(Train_video)

#print(Train_imu.shape, Train_labels.shape, Train_sound.shape, Train_video.shape)
#print(Train_labels)

In [10]:
from keras.utils import to_categorical
Train_labels = to_categorical(Train_labels)

# Testing accuracy on Reshuffled data.
## Note: not time errors are introduced so Accuracy should be same as the fusion model on original continuous test data

# Now aggregating the data and doing the timing errors

In [11]:
acc_right=[]
gyro_right=[]
acc_left=[]
gyro_left=[]

for i in range(Train_imu.shape[0]):
    acc_right.append([Train_imu[i,0,:,0],Train_imu[i,0,:,1],Train_imu[i,0,:,2]])
    gyro_right.append([Train_imu[i,0,:,3],Train_imu[i,0,:,4],Train_imu[i,0,:,5]])
    acc_left.append([Train_imu[i,0,:,6],Train_imu[i,0,:,7],Train_imu[i,0,:,8]])
    gyro_left.append([Train_imu[i,0,:,9],Train_imu[i,0,:,10],Train_imu[i,0,:,11]])
    

In [12]:
acc_right=np.array(acc_right)
gyro_right=np.array(gyro_right)
acc_left=np.array(acc_left)
gyro_left=np.array(gyro_left)

In [13]:
print(acc_right.shape)
print(gyro_right.shape)
print(acc_left.shape)
print(gyro_left.shape)

(1377, 3, 40)
(1377, 3, 40)
(1377, 3, 40)
(1377, 3, 40)


In [14]:
acc_right_cont= acc_right[0,]
gyro_right_cont= gyro_right[0,]

acc_left_cont= acc_left[0,]
gyro_left_cont= gyro_left[0,]


for i in range(1, Features_imu_t1.shape[0]):
    #print(i)
    acc_right_cont=np.hstack((acc_right_cont,acc_right[i,]))
    gyro_right_cont=np.hstack((gyro_right_cont,gyro_right[i,]))
    acc_left_cont=np.hstack((acc_left_cont,acc_left[i,]))
    gyro_left_cont=np.hstack((gyro_left_cont,gyro_left[i,]))

print('done')

done


In [15]:
print(acc_right_cont.shape)
print(gyro_right_cont.shape)
print(acc_left_cont.shape)
print(gyro_left_cont.shape)

(3, 55080)
(3, 55080)
(3, 55080)
(3, 55080)


# Now shifting the samples

# Defining function to do the evaluation

In [16]:
def get_data_sample_shift(shift_samples = 1):
    sample_size = 40 #need to be 40, as decided during training
    total_samples = acc_right_cont.shape[1]
    
    #print(total_samples)
    
    current_cursor = shift_samples

    i = 0

    acc_right_pro= np.array(acc_right_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])

    #print(acc_right_pro.shape)

    gyro_right_pro= np.array(gyro_right_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])

    acc_left_pro= np.array(acc_left_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])

    gyro_left_pro= np.array(gyro_left_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])

    #print(np.array(acc_right_cont[np.newaxis,:,current_cursor:current_cursor+sample_size]).shape)

    while current_cursor<=(total_samples-2*sample_size):
        current_cursor = current_cursor + sample_size
        #print(current_cursor," : ", i)
        a=acc_right_pro
        b=np.array(acc_right_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])
        acc_right_pro = np.concatenate((a,b),axis=0)


        a=gyro_right_pro
        b=np.array(gyro_right_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])
        gyro_right_pro = np.concatenate((a,b),axis=0)


        a=acc_left_pro
        b=np.array(acc_left_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])
        #print(a.shape,b.shape)
        acc_left_pro = np.concatenate((a,b),axis=0)

        a=gyro_left_pro
        b=np.array(gyro_left_cont[np.newaxis,:,current_cursor:current_cursor+sample_size])
        gyro_left_pro = np.concatenate((a,b),axis=0)
        i = i+1
        
    IMU_processed = np.concatenate((acc_right_pro,gyro_right_pro,acc_left_pro,gyro_left_pro),axis=1)
    IMU_processed = IMU_processed[:,np.newaxis,:,:]
    IMU_processed = np.swapaxes(IMU_processed,2,3)
        
    size = IMU_processed.shape[0]    
        
    return Train_sound[:size], IMU_processed ,Train_labels[:size], Train_video[:size]

# Generating more training datasets by doing shifting
- 1 sample shift = 50 ms timing error.
- 20 sample shift = 1000ms timing error.
- 1000ms augmentation uses shifts (1,2,10,20)

In [17]:
import numpy as np

for i in range(1,41):
    Features_sound_1, IMU_1 ,Labels_1, Features_video_1 =  get_data_sample_shift(shift_samples = i)
    
#    randomize = np.arange(len(Labels_1))
#    np.random.shuffle(randomize)
    
#    Labels_1 = Labels_1[randomize]
#    IMU_1 = IMU_1[randomize]
#    Features_sound_1 = Features_sound_1[randomize]
#    Features_video_1 = Features_video_1[randomize]
    
    print(Features_sound_1.shape, IMU_1.shape, Labels_1.shape, Features_video_1.shape)
    print(i,' Train Classes distribution: ',np.sum(Labels_1, axis = 0))

    path_sample='E:/augmented_data/test_data_'+str(i)+'_shift'
    np.savez(path_sample, IMU_1, Labels_1, Features_sound_1, Features_video_1)

(1376, 193) (1376, 1, 40, 12) (1376, 7) (1376, 45, 64, 64, 3)
Train Classes distribution:  [219. 268. 124. 146. 143. 234. 242.]
(1376, 193) (1376, 1, 40, 12) (1376, 7) (1376, 45, 64, 64, 3)
Train Classes distribution:  [219. 268. 124. 146. 143. 234. 242.]
(1376, 193) (1376, 1, 40, 12) (1376, 7) (1376, 45, 64, 64, 3)
Train Classes distribution:  [219. 268. 124. 146. 143. 234. 242.]
(1376, 193) (1376, 1, 40, 12) (1376, 7) (1376, 45, 64, 64, 3)
Train Classes distribution:  [219. 268. 124. 146. 143. 234. 242.]
(1376, 193) (1376, 1, 40, 12) (1376, 7) (1376, 45, 64, 64, 3)
Train Classes distribution:  [219. 268. 124. 146. 143. 234. 242.]
(1376, 193) (1376, 1, 40, 12) (1376, 7) (1376, 45, 64, 64, 3)
Train Classes distribution:  [219. 268. 124. 146. 143. 234. 242.]
(1376, 193) (1376, 1, 40, 12) (1376, 7) (1376, 45, 64, 64, 3)
Train Classes distribution:  [219. 268. 124. 146. 143. 234. 242.]
(1376, 193) (1376, 1, 40, 12) (1376, 7) (1376, 45, 64, 64, 3)
Train Classes distribution:  [219. 268. 12

In [None]:
# #shift by 1 sample

#Features_sound_1, IMU_1 ,Labels_1, Features_video_1 =  get_data_sample_shift(shift_samples = 1)
#print(Features_sound_1.shape, IMU_1.shape, Labels_1.shape, Features_video_1.shape)

In [None]:
#import numpy as np
#path_1_sample='augmented_data/train_data_1_shift'

#np.savez(path_1_sample, IMU_1, Labels_1, Features_sound_1, Features_video_1)

In [None]:
#shift by 2 sample

#Features_sound_2, IMU_2 ,Labels_2, Features_video_2 =  get_data_sample_shift(shift_samples = 2)
#print(Features_sound_2.shape, IMU_2.shape, Labels_2.shape, Features_video_2.shape)

In [None]:
#import numpy as np
#path_2_sample='augmented_data/train_data_2_shift'

#np.savez(path_2_sample, IMU_2, Labels_2, Features_sound_2, Features_video_2)

In [None]:
#shift by 10 samples

#Features_sound_10, IMU_10 ,Labels_10, Features_video_10 =  get_data_sample_shift(shift_samples = 10)
#print(Features_sound_10.shape, IMU_10.shape, Labels_10.shape, Features_video_10.shape)

In [None]:
#import numpy as np
#path1='augmented_data/train_data_10_shift'

#np.savez(path1, IMU_10, Labels_10, Features_sound_10, Features_video_10)

In [None]:
#shift by 20 samples

#Features_sound_20, IMU_20 ,Labels_20, Features_video_20 =  get_data_sample_shift(shift_samples = 20)
#print(Features_sound_20.shape, IMU_20.shape, Labels_20.shape, Features_video_20.shape )

In [None]:
#path2='augmented_data/train_data_20_shift'

#np.savez(path2, IMU_20, Labels_20, Features_sound_20, Features_video_20 )