<a href="https://colab.research.google.com/github/rht6226/InnoHAR-NeuralNet/blob/master/Data_Processing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Loading data from Folder

In this module we will be loading the available data into the system and then saving that data in format of numpy arrays for easily loading and training various models.

In [None]:
# Imports
import numpy
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv

In [None]:
# load a single file as a numpy array
def load_file(filepath):
	dataframe = read_csv(filepath, header=None, delim_whitespace=True)
	return dataframe.values


# load a list of files into a 3D array of [samples, timesteps, features]
def load_group(filenames, prefix=''):
	loaded = list()
	for name in filenames:
		data = load_file(prefix + name)
		loaded.append(data)
	# stack group so that features are the 3rd dimension
	loaded = dstack(loaded)
	return loaded

# load a dataset group, such as train or test
def load_dataset_group(group, prefix=''):
	filepath = prefix + group + '/Inertial Signals/'
	# load all 9 files as a single array
	filenames = list()
	# total acceleration
	filenames += ['total_acc_x_'+group+'.txt', 'total_acc_y_'+group+'.txt', 'total_acc_z_'+group+'.txt']
	# body acceleration
	filenames += ['body_acc_x_'+group+'.txt', 'body_acc_y_'+group+'.txt', 'body_acc_z_'+group+'.txt']
	# body gyroscope
	filenames += ['body_gyro_x_'+group+'.txt', 'body_gyro_y_'+group+'.txt', 'body_gyro_z_'+group+'.txt']
	# load input data
	X = load_group(filenames, filepath)
	# load class output
	y = load_file(prefix + group + '/y_'+group+'.txt')
	return X, y

# load the dataset, returns train and test X and y elements
def load_dataset(prefix=''):
	# load all train
	trainX, trainy = load_dataset_group('train', prefix)
	print("Shape of training data = X : {}, y: {}".format(trainX.shape, trainy.shape))
	# load all test
	testX, testy = load_dataset_group('test', prefix)
	print("Shape of testing data = X : {}, y: {}".format(testX.shape, testy.shape))
	# zero-offset class values (Since values stat from 1, we need to start them from zero)
	trainy = trainy - 1
	testy = testy - 1
	print("Final shape of training data = X:{}, y: {}\nFinal shape of testing data = X:{}, y: {}\n".format(trainX.shape, trainy.shape, testX.shape, testy.shape))
	return (trainX, trainy), (testX, testy)

In [None]:
# Load the data by calling required functions

(trainX, trainy), (testX, testy) = load_dataset('/content/drive/MyDrive/InnoHAR/Dataset/UCI_HAR_Dataset/')

Shape of training data = X : (7352, 128, 9), y: (7352, 1)
Shape of testing data = X : (2947, 128, 9), y: (2947, 1)
Final shape of training data = X:(7352, 128, 9), y: (7352, 1)
Final shape of testing data = X:(2947, 128, 9), y: (2947, 1)



In [None]:
# Saving the loaded dataset as numpy array
fpath = '/content/drive/MyDrive/InnoHAR/ProcessedDataset/uci.npz'
numpy.savez(file= fpath, trainX=trainX, trainy=trainy, testX=testX, testy=testy)

In [None]:
# Function for loading the dataset
def load_uci(file):
    with numpy.load(file) as data:
        trainX = data['trainX']
        trainy = data['trainy']
        testX = data['testX']
        testy = data['testy']
    return (trainX, trainy), (testX, testy)

In [None]:
# Loading data from previously saved numpy array
(trainX, trainy), (testX, testy) = load_uci(fpath)

In [None]:
print("Final shape of training data = X:{}, y: {}\nFinal shape of testing data = X:{}, y: {}\n".format(trainX.shape, trainy.shape, testX.shape, testy.shape))

Final shape of training data = X:(7352, 128, 9), y: (7352, 1)
Final shape of testing data = X:(2947, 128, 9), y: (2947, 1)

