In [2]:
import torch
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from src.arch import NN
from src import setup
import pandas as pd
import matplotlib.pyplot as plt
import torch.optim as optim
from pathlib import Path
from scipy.signal import resample


In [3]:
#load data
raw_train = pd.read_csv(setup.raw_train_path) 
train_demographics = pd.read_csv(setup.train_demographics_path)
raw_test = pd.read_csv(setup.raw_test_path)
test_demographics = pd.read_csv(setup.test_demographics_path)

In [21]:
# making 1D data lists for each sensor
gestures = raw_train['gesture'].unique()
subjects = raw_train['subject'].unique()

#sensor data lists
svm_data = []

#tof data
tof_data_1 = []
tof_data_2 = []
tof_data_3 = []
tof_data_4 = []
tof_data_5 = []

#thermo data
thermo_data_1 = []
thermo_data_2 = []
thermo_data_3 = []
thermo_data_4 = []
thermo_data_5 = []

thermo_lists = {1: thermo_data_1, 2: thermo_data_2, 3: thermo_data_3,
                4: thermo_data_4, 5: thermo_data_5}

gyro_data = []

#demographics data
shoulder_to_wrist_cm = []
elbow_to_wrist_cm = []
height = []
handedness = []
adult_child = []
sex = []
age = []

tof_data_1 = []
tof_data_2 = []
tof_data_3 = []
tof_data_4 = []
tof_data_5 = []

tof_lists = {1: tof_data_1, 2: tof_data_2, 3: tof_data_3,
                4: tof_data_4, 5: tof_data_5}


#labels for each gesture and subject
labels = []
for gesture in gestures:   
    for subject in subjects:
        subject_data = raw_train[(raw_train['gesture'] == gesture) & (raw_train['subject'] == subject)]
        subject_demographics = train_demographics[train_demographics['subject'] == subject]

        
        #calculating svm (sensor vector magnitude) from accelerometer data
        svm = np.sqrt(subject_data['acc_x']**2 + subject_data['acc_y']**2 + subject_data['acc_z']**2)
        svm_data.append(svm)
        
        #adding thermo data

        for sensor_id in range(1, 6):
            thermo_lists[sensor_id].append(subject_data[[f'thm_{sensor_id}' for pixel in range(64)]].values)

        #adding gyro data
        gyro_data.append(subject_data[['rot_w', 'rot_x', 'rot_y', 'rot_z']].values)

        #separating tof data into 8x8 grids, sequentially stacked (5 x 3D arrays, one per sensor)

       
        for sensor_id in range(1, 6):
            tof_cols = [f'tof_{sensor_id}_v{pixel}' for pixel in range(64)]
            tof_data_1D = subject_data[tof_cols].values

            #num_samples = tof_data_1D.shape[0]
            #tof_frame_1 = tof_data_1D.reshape(num_samples, 8, 8)  # Reshape to 8x8 grid 

            tof_lists[sensor_id].append(tof_data_1D)
            

        labels.append(gesture)

        #adding demographics data
        shoulder_to_wrist_cm.append(subject_demographics['shoulder_to_wrist_cm'].values[0])
        elbow_to_wrist_cm.append(subject_demographics['elbow_to_wrist_cm'].values[0])
        height.append(subject_demographics['height_cm'].values[0])
        handedness.append(subject_demographics['handedness'].values[0])
        adult_child.append(subject_demographics['adult_child'].values[0])
        sex.append(subject_demographics['sex'].values[0])
        age.append(subject_demographics['age'].values[0])


In [22]:
svm_data_resampled = [resample(svm, 100) for svm in svm_data] 

thermo_data_1_resampled = [resample(thermo, 100) for thermo in thermo_data_1] 
thermo_data_2_resampled = [resample(thermo, 100) for thermo in thermo_data_2]
thermo_data_3_resampled = [resample(thermo, 100) for thermo in thermo_data_3]
thermo_data_4_resampled = [resample(thermo, 100) for thermo in thermo_data_4]
thermo_data_5_resampled = [resample(thermo, 100) for thermo in thermo_data_5]
     
gyro_data_resampled = [resample(gyro, 500) for gyro in gyro_data] 

tof_data_1_resampled = []
tof_data_2_resampled = []
tof_data_3_resampled = []
tof_data_4_resampled = []
tof_data_5_resampled = []

for tof in tof_data_1:
    resampled_array = resample(tof, 100)
    tof_data_1_resampled.append(resampled_array)
for tof in tof_data_2:
    resampled_array = resample(tof, 100)
    tof_data_2_resampled.append(resampled_array)
for tof in tof_data_3:
    resampled_array = resample(tof, 100)
    tof_data_3_resampled.append(resampled_array)
for tof in tof_data_4:
    resampled_array = resample(tof, 100)
    tof_data_4_resampled.append(resampled_array)
for tof in tof_data_5:
    resampled_array = resample(tof, 100)
    tof_data_5_resampled.append(resampled_array)


In [23]:
svm_data_array = np.array(svm_data_resampled)[..., np.newaxis]  # Adding a new axis for compatibility with the model input
thermo_data_1_array = np.array(thermo_data_1_resampled)
thermo_data_2_array = np.array(thermo_data_2_resampled)
thermo_data_3_array = np.array(thermo_data_3_resampled)
thermo_data_4_array = np.array(thermo_data_4_resampled)
thermo_data_5_array = np.array(thermo_data_5_resampled)

tof_data_1_array = np.array(tof_data_1_resampled)
tof_data_2_array = np.array(tof_data_2_resampled)
tof_data_3_array = np.array(tof_data_3_resampled)
tof_data_4_array = np.array(tof_data_4_resampled)
tof_data_5_array = np.array(tof_data_5_resampled)

gyro_data_array = np.array(gyro_data_resampled)
print(svm_data_array.shape, gyro_data_array.shape, thermo_data_1_array.shape, thermo_data_2_array.shape, thermo_data_3_array.shape, thermo_data_4_array.shape, thermo_data_5_array.shape, tof_data_1_array.shape, tof_data_2_array.shape, tof_data_3_array.shape, tof_data_4_array.shape, tof_data_5_array.shape)

(1458, 100, 1) (1458, 500, 4) (1458, 100, 64) (1458, 100, 64) (1458, 100, 64) (1458, 100, 64) (1458, 100, 64) (1458, 100, 64) (1458, 100, 64) (1458, 100, 64) (1458, 100, 64) (1458, 100, 64)


In [24]:
#splitting into train and val sets
input_data = np.concatenate((svm_data_array, thermo_data_1_array, thermo_data_2_array, thermo_data_3_array, thermo_data_4_array, thermo_data_5_array, tof_data_1_array), axis=2)

In [25]:
input_data.shape

(1458, 100, 385)