In [7]:
import csv
import random
import os
from statistics import mean, mode

def make_csvs(filenames, datasets, rows=False):
    for j in range(4):
        with open(filenames[j], "w+") as my_csv:
            csvWriter = csv.writer(my_csv,delimiter=' ')
            if rows:
                csvWriter.writerows(datasets[j])
            else: 
                csvWriter.writerow(datasets[j])
            print(filenames[j], 'created')

def generate_files(window_size, overlap_size, training_split):
    
    X, Y, Z, actv = [], [], [], []

    # pull data from file, split into lists
    with open('./DIMDataset/clean_combined.csv', 'r') as f:
        for line in f:
            line_list = line.split(',')
            # print(line_list)

            X.append(int(line_list[0]))
            Y.append(int(line_list[1]))
            Z.append(int(line_list[2]))
            actv.append(int(line_list[3]))
        print('Samples:', len(X))

    # write streams to file    
    stream_filenames = ['./DIMDataset/stream_x.csv', './DIMDataset/stream_y.csv', 
                 './DIMDataset/stream_z.csv', './DIMDataset/stream_actv.csv']
    datasets = [X, Y, Z, actv]
    make_csvs(stream_filenames, datasets)

    ################################################################################################

    # group streams into windows
    #window_size = 64 # 64 = 1sec windows
    #overlap_size = 16

    X_windows = [X[x:x + window_size] for x in range(0, len(X), window_size - overlap_size)]
    Y_windows = [Y[x:x + window_size] for x in range(0, len(Y), window_size - overlap_size)]
    Z_windows = [Z[x:x + window_size] for x in range(0, len(Z), window_size - overlap_size)]
    actv_windows = [actv[x:x + window_size] for x in range(0, len(actv), window_size - overlap_size)]

    # normalize last chunk with mean values
    for item in [X_windows, Y_windows, Z_windows, actv_windows]:
        if len(item[-1]) < window_size:
            item[-1] = item[-1] + [int(mean(item[-1]))] * (window_size-len(item[-1]))
    print('Windows:', len(X_windows))

    for ii in range(len(actv_windows)):
        try:
            actv_windows[ii] = [mode(actv_windows[ii])]
        except:
            actv_windows[ii] = [int(round(mean(actv_windows[ii])))]

    # write windows to file
    window_filenames = ['./DIMDataset/windows_x.csv', './DIMDataset/windows_y.csv', 
                 './DIMDataset/windows_z.csv', './DIMDataset/windows_actv.csv']
    datasets = [X_windows, Y_windows, Z_windows, actv_windows]
    make_csvs(window_filenames, datasets, rows=True)

    ################################################################################################

    marker = int(len(X_windows) * training_split)
    train_filenames = ['./DIMDataset/train/empatica/acc_x_train.csv', 
                       './DIMDataset/train/empatica/acc_y_train.csv',                     
                       './DIMDataset/train/empatica/acc_z_train.csv', 
                       './DIMDataset/train/actv_train.csv']

    test_filenames = ['./DIMDataset/test/empatica/acc_x_test.csv', 
                      './DIMDataset/test/empatica/acc_y_test.csv', 
                      './DIMDataset/test/empatica/acc_z_test.csv', 
                      './DIMDataset/test/actv_test.csv']

    os.makedirs('./DIMDataset/train/empatica/', exist_ok=True)
    os.makedirs('./DIMDataset/test/empatica/', exist_ok=True)

    print()
    for i in range(len(datasets)):
        train = datasets[i][:marker]
        test = datasets[i][marker:]

        with open(train_filenames[i], 'w+') as train_f:
            csvWriter = csv.writer(train_f,delimiter=' ')
            csvWriter.writerows(train)
            print(train_filenames[i], 'created')

        with open(test_filenames[i], 'w+') as test_f:
            csvWriter = csv.writer(test_f,delimiter=' ')
            csvWriter.writerows(test)
            print(test_filenames[i], 'created')
        

In [8]:
generate_files(112, int(0.2*112), 0.9)

Samples: 6441
./DIMDataset/stream_x.csv created
./DIMDataset/stream_y.csv created
./DIMDataset/stream_z.csv created
./DIMDataset/stream_actv.csv created
Windows: 72
./DIMDataset/windows_x.csv created
./DIMDataset/windows_y.csv created
./DIMDataset/windows_z.csv created
./DIMDataset/windows_actv.csv created

./DIMDataset/train/empatica/acc_x_train.csv created
./DIMDataset/test/empatica/acc_x_test.csv created
./DIMDataset/train/empatica/acc_y_train.csv created
./DIMDataset/test/empatica/acc_y_test.csv created
./DIMDataset/train/empatica/acc_z_train.csv created
./DIMDataset/test/empatica/acc_z_test.csv created
./DIMDataset/train/actv_train.csv created
./DIMDataset/test/actv_test.csv created
