In [3]:
import os 
import numpy as np
import torch
import torch.nn as nn
import pandas as pd


In [14]:
def structure_file(df):
    df = df[0].str.split('\t', n=4, expand=True)

    df.columns = ["time", "sensor", "x", "y", "z"]

    GYR_data = df.loc[df['sensor'] == 'GYR']
    ACC_data = df.loc[df['sensor'] == 'ACC']

    GYR_data.index = GYR_data['time'].apply(pd.to_numeric)
    ACC_data.index = ACC_data['time'].apply(pd.to_numeric)
    GYR_data = GYR_data.drop('sensor', axis=1)
    ACC_data = ACC_data.drop('sensor', axis=1)
    GYR_data.columns = ['time_gyr', "gyr_x", "gyr_y", "gyr_z"]
    ACC_data.columns = ['time_acc', "acc_x", "acc_y", "acc_z"]
    
    GYR_data = GYR_data.apply(pd.to_numeric)
    ACC_data = ACC_data.apply(pd.to_numeric)
    
    min_idx = np.argmin([ACC_data.shape[0], GYR_data.shape[0]]) 
    max_idx = np.argmax([ACC_data.shape[0], GYR_data.shape[0]])  

    data_acc_gyr = [ACC_data, GYR_data] 
    data_acc_gyr[max_idx] = data_acc_gyr[max_idx].reindex(data_acc_gyr[min_idx].index, method='nearest', tolerance=0.02)
    new_set = pd.concat(data_acc_gyr, axis=1)
    new_set2 = new_set.dropna().reset_index(drop=True)
    return new_set2

In [44]:
data_running_oscar = structure_file(pd.read_csv('running/running_oscar.txt', header=None))
data_walking_walking = structure_file(pd.read_csv('walking/walking_oscar.txt', header=None))
data_standing_oscar = structure_file(pd.read_csv('standing_still/standing_still_oscar.txt', header=None))

all_data = {'running':[data_running],
           'walking':[data_walking],
           'standing':[data_standing]}


In [62]:
# Create data sets
from sklearn.model_selection import train_test_split
# create sequence of data
num_channels = 6
time_series_len = 200

num_samples = 0
for key in all_data:
    for data_sample in all_data[key]:
        nr_sample = 0
        while nr_sample < data_sample.shape[0]//time_series_len -1:
            nr_sample += 1
            num_samples += 1            

data_np = np.zeros((num_samples, num_channels, time_series_len))
label_np = np.zeros(num_samples)

key_to_label = {'running': 0,
                'walking': 1,
                'standing': 2}

channels = ['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 'gyr_z']

nr_sample = 0
for key in all_data:
    nr_sub_sample = 0
    for data_sample in all_data[key]:
        start_v = 0
        while nr_sub_sample < data_sample.shape[0]//time_series_len - 1:
            label_np[nr_sample] = key_to_label[key] 
            for i in range(num_channels):
                data_key = channels[i]
                data_np[nr_sample, i, :] = data_sample[data_key].iloc[start_v: start_v+time_series_len]
            
            start_v += time_series_len
            nr_sub_sample += 1
            nr_sample += 1
            

X_train, X_test_val, y_train, y_test_val = train_test_split(data_np, label_np, test_size=0.4, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test_val, y_test_val, test_size=0.5, random_state=42)

array([[[ 9.89115900e+00,  1.36380920e+01,  1.36716160e+01, ...,
         -2.42221070e+00, -2.04393000e+00, -2.20195000e+00],
        [-2.46656500e+01, -2.40934300e+01, -1.75165560e+01, ...,
         -2.10029600e+00, -1.93508910e+00, -1.87045290e+00],
        [ 2.49995420e+01,  1.94378200e+01,  7.21063230e+00, ...,
         -3.90167240e-02, -6.20803830e-01, -1.19781500e+00],
        [ 2.73529050e-01,  7.56103500e-01,  5.84594700e-01, ...,
          5.14282200e-01,  2.21344000e-01,  4.45098880e-02],
        [-2.74613950e+00, -5.62347400e-01,  2.71334840e+00, ...,
          8.95996100e-01,  1.25926210e+00,  1.35725400e+00],
        [-7.82241800e-01, -1.22006230e+00, -1.60995480e+00, ...,
         -1.32020570e+00, -9.66537500e-01, -6.19262700e-01]],

       [[-1.76141360e+00, -2.21630860e+00, -2.62811280e+00, ...,
          1.42940980e+01,  9.24234000e+00,  9.40036000e+00],
        [ 3.32214360e-01, -2.73513800e-01, -7.95455930e-01, ...,
         -1.55604860e+01, -1.22732540e+01, -8.25338