Anomaly_Detector.py
Used to preprocess data and detect possible outliers that might corrupt the model training.
Parameters:
		- GPS latitude increments threshold (0.2).
		- GPS longitude increments threshold (0.2).
		- GPS altitude increments threshold (500).
		- Timestamp value length (27).
		- Z-axis magnetometer value threshold (2000).

In [3]:
import sys


def anomaly_detection(inp, out, th_lat, th_long, th_timestamp, th_sensor, th_alt, type):
    writer = csv.writer(out, delimiter=",")

    for row in csv.reader(inp):
        if row[0] != "id":
            if type == 'gps':
                gps_lat_increment = float(row[3])
                gps_long_increment = float(row[4])
                gps_alt_increment = float(row[5])
            else:
                gps_lat_increment = 0.0
                gps_long_increment = 0.0
                gps_alt_increment = 0.0
            if type == 'magn':
                magn_z = float(row[5])
            else:
                magn_z = 0.0
            timestamp = row[2]
            if (gps_lat_increment < th_lat) and (gps_long_increment < th_long) and (len(timestamp) < th_timestamp) and \
                    (not timestamp.startswith('1970')) and (magn_z < th_sensor) and (gps_alt_increment < th_alt):
                writer.writerow(row)
            else:
                if gps_lat_increment >= th_lat:
                    print("GPS Latitude increment too high: " + str(gps_lat_increment))
                if gps_long_increment >= th_long:
                    print("GPS Longitude increment too high: " + str(gps_long_increment))
                if gps_alt_increment >= th_alt:
                    print("GPS Altitude increment too high: " + str(gps_alt_increment))
                if len(timestamp) >= th_timestamp or timestamp.startswith('1970'):
                    print("Wrong timestamp: " + timestamp)
                if magn_z > th_sensor:
                    print("Wrong sensor value: " + str(magn_z))
                print("")
        else:
            writer.writerow(row)

    inp.close()
    out.close()


if __name__ == '__main__':
    acc_input = open('sensoringData_acc.csv', 'r')
    gyro_input = open('sensoringData_gyro.csv', 'r')
    magn_input = open('sensoringData_magn.csv', 'r')
    gps_input = open('sensoringData_gps.csv', 'r')

    acc_output = open('sensoringData_acc_clean.csv', 'w', newline="")
    gyro_output = open('sensoringData_gyro_clean.csv', 'w', newline="")
    magn_output = open('sensoringData_magn_clean.csv', 'w', newline="")
    gps_output = open('sensoringData_gps_clean.csv', 'w', newline="")

    # 0.2, 0.2, 27, 2000, 500
    anomaly_detection(acc_input, acc_output, float(sys.argv[1]), float(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4]),
                      float(sys.argv[5]), 'acc')
    anomaly_detection(gyro_input, gyro_output, float(sys.argv[1]), float(sys.argv[2]), int(sys.argv[3]),
                      int(sys.argv[4]), float(sys.argv[5]), 'gyro')
    anomaly_detection(magn_input, magn_output, float(sys.argv[1]), float(sys.argv[2]), int(sys.argv[3]),
                      int(sys.argv[4]), float(sys.argv[5]), 'magn')
    anomaly_detection(gps_input, gps_output, float(sys.argv[1]), float(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4]),
                      float(sys.argv[5]), 'gps')


ValueError: could not convert string to float: '-f'

Data_Adapter.py.
This script cuts the first and final X seconds from each activity session. It also detects corrupted sessions,
which are the ones that have gaps in the data related to all sensors but GPS (time gaps higher than five seconds).
Also, replicates GPS data, in order to have at least one observation from this sensor in each sliding window.
A validSessions file is also created to fasten feature extraction process by not evaluation sessions that did not record
any GPS observation.
Parameters:
		- Window size, in seconds (20).
		- Seconds to be cut from the first and final part of each session (5).


In [4]:
import csv
import sys

import pandas as pd


def time_adapt_data(n_seconds, cut_seconds):
    acc_input = open('sensoringData_acc_clean.csv', 'r')
    gyro_input = open('sensoringData_gyro_clean.csv', 'r')
    magn_input = open('sensoringData_magn_clean.csv', 'r')
    gps_input = open('sensoringData_gps_clean.csv', 'r')

    acc_output = open('sensoringData_acc_prepared_' + str(n_seconds) + '.csv', 'w', newline="")
    gyro_output = open('sensoringData_gyro_prepared_' + str(n_seconds) + '.csv', 'w', newline="")
    magn_output = open('sensoringData_magn_prepared_' + str(n_seconds) + '.csv', 'w', newline="")
    gps_output = open('sensoringData_gps_prepared_' + str(n_seconds) + '.csv', 'w', newline="")

    acc_writer = csv.writer(acc_output, delimiter=",")
    gyro_writer = csv.writer(gyro_output, delimiter=",")
    magn_writer = csv.writer(magn_output, delimiter=",")
    gps_writer = csv.writer(gps_output, delimiter=",")

    act_columns = ['id', 'user', 'init_timestamp', 'end_timestamp', 'activity_id', 'activity']
    act_changes = pd.read_csv('activityChanges.csv', header=0, names=act_columns)

    invalid_sess = []

    def adaptation(input, output, writer, invalid_sess):
        username = ""
        activity_id = 0
        timestamp = 0.0
        timestamp_before = 0.0
        ignore = False

        for row in csv.reader(input):
            if row[0] != "id":
                username_now = row[1]
                activity_id_now = row[6]
                timestamp_now = float(row[2])
                if username_now != username or activity_id_now != activity_id:
                    username = row[1]
                    activity_id = row[6]
                    act_df = act_changes.loc[(act_changes['user'] == int(username)) &
                                             (act_changes['activity_id'] == int(activity_id))]
                    timestamp = act_df['init_timestamp'].values[0] + cut_seconds
                    end_timestamp = act_df['end_timestamp'].values[0] - cut_seconds
                    ignore = False
                    for user_inv, act_inv in invalid_sess:
                        if user_inv == username and act_inv == activity_id:
                            ignore = True
                    if not ignore:
                        if timestamp_now > (timestamp + cut_seconds):
                            ignore = True
                            invalid_sess.append([username, activity_id])
                    timestamp_before = timestamp_now
                if end_timestamp >= timestamp_now >= timestamp and not ignore:
                    if timestamp_now > (timestamp_before + cut_seconds):
                        ignore = True
                        invalid_sess.append([username, activity_id])
                    else:
                        writer.writerow(row)
                timestamp_before = timestamp_now
            else:
                writer.writerow(row)

        input.close()
        output.close()

        return invalid_sess

    def gps_adaptation(input, output, writer, invalid_sess):
        counter = -1
        i = 0
        username = ""
        activity_id = 0
        timestamp = 0.0
        gps_seconds = 1
        row_list = []
        session_list = []
        gps_values = []
        ignore = False

        for row in csv.reader(input):
            if row[0] != "id":
                username_now = row[1]
                activity_id_now = row[9]
                timestamp_now = float(row[2])
                if username_now != username or activity_id_now != activity_id:
                    if counter >= 0 and not ignore:
                        for list_row in row_list:
                            gps_values.append(list_row.copy())
                            i += 1
                        if counter == 0:
                            timestamp_before = float(row_before[2])
                            while timestamp_before >= (timestamp + gps_seconds):
                                timestamp += gps_seconds
                                last_row = row_list[len(row_list) - 1]
                                aux_last_row = last_row.copy()
                                if float(aux_last_row[2]) >= timestamp:
                                    aux_last_row[2] = str(timestamp)
                                    gps_values.insert(i - 1, aux_last_row.copy())
                                    i += 1
                        while end_timestamp >= (float(row_before[2]) + gps_seconds):
                            aux_time = float(row_before[2]) + gps_seconds
                            row_before[2] = aux_time
                            gps_values.append(row_before.copy())
                            i += 1
                    username = row[1]
                    activity_id = row[9]
                    act_df = act_changes.loc[(act_changes['user'] == int(username)) &
                                             (act_changes['activity_id'] == int(activity_id))]
                    timestamp = act_df['init_timestamp'].values[0]
                    end_timestamp = act_df['end_timestamp'].values[0]
                    ignore = False
                    for user_inv, act_inv in invalid_sess:
                        if user_inv == username and act_inv == activity_id:
                            ignore = True
                    if not ignore:
                        row_sess = [username, activity_id, timestamp, end_timestamp]
                        if row_sess not in session_list:
                            session_list.append(row_sess)
                    counter = 0
                    row_list.clear()
                else:
                    if timestamp_now >= (timestamp + gps_seconds) and not ignore:
                        for list_row in row_list:
                            gps_values.append(list_row.copy())
                            i += 1
                        while timestamp_now >= (timestamp + gps_seconds):
                            timestamp += gps_seconds
                            last_row = row_list[len(row_list) - 1]
                            aux_last_row = last_row.copy()
                            if counter == 0 and (float(aux_last_row[2]) >= timestamp):
                                aux_last_row[2] = str(timestamp)
                                gps_values.insert(i - 1, aux_last_row.copy())
                                i += 1
                            else:
                                aux_last_row[2] = str(timestamp)
                                gps_values.append(aux_last_row.copy())
                                i += 1
                        row_list.clear()
                    counter += 1
                row_list.append(row)
                row_before = row
            else:
                writer.writerow(row)

        if end_timestamp >= (float(row_before[2]) + gps_seconds) and not ignore:
            for list_row in row_list:
                gps_values.append(list_row.copy())
            while end_timestamp >= (float(row_before[2]) + gps_seconds):
                aux_time = float(row_before[2]) + gps_seconds
                row_before[2] = aux_time
                gps_values.append(row_before.copy())

        session_output = open('validSessions_' + str(n_seconds) + '.csv', 'w', newline="")
        session_writer = csv.writer(session_output, delimiter=",")
        session_row = ['username', 'activity_id', 'init_timestamp', 'end_timestamp']
        session_writer.writerow(session_row)
        for sess_row in session_list:
            session_writer.writerow(sess_row)

        writer.writerows(gps_values)

        input.close()
        output.close()
        session_output.close()

    invalid_sess = adaptation(acc_input, acc_output, acc_writer, invalid_sess)
    invalid_sess = adaptation(gyro_input, gyro_output, gyro_writer, invalid_sess)
    invalid_sess = adaptation(magn_input, magn_output, magn_writer, invalid_sess)
    gps_adaptation(gps_input, gps_output, gps_writer, invalid_sess)


if __name__ == '__main__':
    # 20, 5
    time_adapt_data(int(sys.argv[1]), int(sys.argv[2]))

ValueError: invalid literal for int() with base 10: '-f'

Data_Splitter.py
Script used to split the data got from the previous script in X parts in order to fasten the feature extraction
process.
Parameters:
		- Window size used, in seconds (20).
		- Number of divisions to be applied (8).


In [None]:
import csv
import sys
import pandas as pd


def sensor_split(input, writers, row_count, last_act_ids, last_usernames, val_sess_df, is_gps):
    counter = 0
    row_counter = 0
    username = ""
    activity_id = 0
    row_list = []
    n_div = 0
    writer_now = writers[0]
    usernames = []
    act_ids = []
    invalid = False
    last_valid_username = ""
    last_valid_activity_id = 0

    for row in csv.reader(input):
        if row[0] != "id":
            if counter == 0:
                if row_count < 0:
                    if username == last_usernames[n_div] and activity_id == last_act_ids[n_div]:
                        row_counter = 0
                        n_div += 1
                        writer_now = writers[n_div]
                else:
                    if row_count <= row_counter:
                        row_counter = 0
                        n_div += 1
                        writer_now = writers[n_div]
                        if not invalid:
                            usernames.append(username)
                            act_ids.append(activity_id)
                        else:
                            usernames.append(last_valid_username)
                            act_ids.append(last_valid_activity_id)
                username = int(row[1])
                if is_gps:
                    activity_id = int(row[9])
                else:
                    activity_id = int(row[6])
                sess_df = val_sess_df.loc[(val_sess_df['user'] == username) &
                                          (val_sess_df['activity_id'] == activity_id)]
                if len(sess_df) > 0:
                    invalid = False
                    last_valid_username = username
                    last_valid_activity_id = activity_id
                else:
                    invalid = True
            username_now = int(row[1])
            if is_gps:
                activity_id_now = int(row[9])
            else:
                activity_id_now = int(row[6])
            if username_now != username or activity_id_now != activity_id:
                if not invalid:
                    for list_row in row_list:
                        writer_now.writerow(list_row)
                row_list.clear()
                counter = 0
            else:
                counter += 1
            row_list.append(row)
            row_counter += 1
        else:
            for writer in writers:
                writer.writerow(row)

    if not invalid:
        usernames.append(username)
        act_ids.append(activity_id)
    else:
        usernames.append(last_valid_username)
        act_ids.append(last_valid_activity_id)

    return act_ids, usernames


def split_data(n_seconds, n_div):
    # We select gyroscope as the marker, as it is the most absent in every session.
    input_gyro = open('sensoringData_gyro_prepared_' + str(n_seconds) + '.csv', 'r')
    row_count_gyro = sum(1 for row in csv.reader(input_gyro))
    row_count_gyro_div = row_count_gyro / n_div

    val_sess_path = './validSessions_' + str(n_seconds) + '.csv'
    val_sess_columns = ['user', 'activity_id', 'init_timestamp', 'end_timestamp']
    val_sess_df = pd.read_csv(val_sess_path, header=0, names=val_sess_columns)
    val_sess_df.head()

    i = 1
    writers_acc = []
    writers_gyro = []
    writers_magn = []
    writers_gps = []
    while i <= n_div:
        writers_acc.append(csv.writer(open('sensoringData_acc_prepared_' + str(n_seconds) + '_' + str(i) + '.csv',
                                      'w', newline=""), delimiter=","))
        writers_gyro.append(csv.writer(open('sensoringData_gyro_prepared_' + str(n_seconds) + '_' + str(i) + '.csv',
                                       'w', newline=""), delimiter=","))
        writers_magn.append(csv.writer(open('sensoringData_magn_prepared_' + str(n_seconds) + '_' + str(i) + '.csv',
                                       'w', newline=""), delimiter=","))
        writers_gps.append(csv.writer(open('sensoringData_gps_prepared_' + str(n_seconds) + '_' + str(i) + '.csv',
                                      'w', newline=""), delimiter=","))
        i += 1

    input_acc = open('sensoringData_acc_prepared_' + str(n_seconds) + '.csv', 'r')
    input_gyro = open('sensoringData_gyro_prepared_' + str(n_seconds) + '.csv', 'r')
    input_magn = open('sensoringData_magn_prepared_' + str(n_seconds) + '.csv', 'r')
    input_gps = open('sensoringData_gps_prepared_' + str(n_seconds) + '.csv', 'r')

    last_act_ids, last_usernames = sensor_split(input_gyro, writers_gyro, row_count_gyro_div, [], [], val_sess_df, False)
    sensor_split(input_acc, writers_acc, -1, last_act_ids, last_usernames, val_sess_df, False)
    sensor_split(input_magn, writers_magn, -1, last_act_ids, last_usernames, val_sess_df, False)
    sensor_split(input_gps, writers_gps, -1, last_act_ids, last_usernames, val_sess_df, True)


if __name__ == '__main__':
    split_data(int(sys.argv[1]), int(sys.argv[2]))

Feature_Extraction.py
Here the feature computation for each window size is made.
For each sliding window, we compute mean, var, mad, max, min and iqr functions over related data.
This creates a file for each of the sets defined:
		0 - Acc + GPS (all users)
		1 - Acc + Magn + GPS (all users but the ones missing magnetometer)
		2 - Acc + Gyro + Magn + GPS (all users but the ones missing gyroscope and magnetometer)
It is coded in a Slurm way to be executed as a job array (one job for every data split).
Parameters:
		- Window size used, in seconds (20).
		- Overlap between windows, in seconds (19).
		- Number of seconds set to cut the first and final part of session (5).
		- Number of divisions applied over data (8).
		- Slurm job array index, from 1 to the number of divisions specified before (or -1 to join all data).

In [None]:
import csv
import time
import numpy as np
import pandas as pd
import sys
from scipy import stats
from astropy.stats import median_absolute_deviation


# This does the main work, which is computing all the features and preparing the data lists to be file-written in the
# next steps.
def get_features(id, xs, ys, zs, speeds, bearings, accuracies, aux_count, aux_count_ng, aux_count_ngnm, data_2, data_1,
                 data_0, user, timestamp, activity_id, activity, type):
    xs_mean = np.mean(xs)
    ys_mean = np.mean(ys)
    zs_mean = np.mean(zs)
    xs_var = np.var(xs)
    ys_var = np.var(ys)
    zs_var = np.var(zs)
    xs_mad = median_absolute_deviation(xs)
    ys_mad = median_absolute_deviation(ys)
    zs_mad = median_absolute_deviation(zs)
    xs_max = max(xs)
    zs_max = max(zs)
    ys_max = max(ys)
    xs_min = min(xs)
    ys_min = min(ys)
    zs_min = min(zs)
    xs_iqr = stats.iqr(xs, rng=(25, 75), interpolation='midpoint')
    ys_iqr = stats.iqr(ys, rng=(25, 75), interpolation='midpoint')
    zs_iqr = stats.iqr(zs, rng=(25, 75), interpolation='midpoint')

    if type != 'gps':
        if type == 'acc':
            aux_count += 1
            data_0.append(
                [id, user, timestamp, xs_mean, ys_mean, zs_mean, xs_var, ys_var, zs_var, xs_mad, ys_mad, zs_mad, xs_max,
                 ys_max, zs_max, xs_min, ys_min, zs_min, xs_iqr, ys_iqr, zs_iqr])
            if user != 14 and user != 18:
                aux_count_ng += 1
                data_1.append(
                    [id, user, timestamp, xs_mean, ys_mean, zs_mean, xs_var, ys_var, zs_var, xs_mad, ys_mad, zs_mad,
                     xs_max, ys_max, zs_max, xs_min, ys_min, zs_min, xs_iqr, ys_iqr, zs_iqr])
                if user != 4 and user != 15 and user != 17:
                    aux_count_ngnm += 1
                    data_2.append(
                        [id, user, timestamp, xs_mean, ys_mean, zs_mean, xs_var, ys_var, zs_var, xs_mad, ys_mad, zs_mad,
                         xs_max, ys_max, zs_max, xs_min, ys_min, zs_min, xs_iqr, ys_iqr, zs_iqr])
        else:
            aux_count += 1
            data_0.append(
                [xs_mean, ys_mean, zs_mean, xs_var, ys_var, zs_var, xs_mad, ys_mad, zs_mad, xs_max, ys_max, zs_max,
                 xs_min, ys_min, zs_min, xs_iqr, ys_iqr, zs_iqr])
            if user != 14 and user != 18:
                aux_count_ng += 1
                data_1.append(
                    [xs_mean, ys_mean, zs_mean, xs_var, ys_var, zs_var, xs_mad, ys_mad, zs_mad, xs_max, ys_max, zs_max,
                     xs_min, ys_min, zs_min, xs_iqr, ys_iqr, zs_iqr])
                if user != 4 and user != 15 and user != 17:
                    aux_count_ngnm += 1
                    data_2.append(
                        [xs_mean, ys_mean, zs_mean, xs_var, ys_var, zs_var, xs_mad, ys_mad, zs_mad, xs_max, ys_max,
                         zs_max, xs_min, ys_min, zs_min, xs_iqr, ys_iqr, zs_iqr])
    else:
        speeds_mean = np.mean(speeds)
        bearings_mean = np.mean(bearings)
        accuracies_mean = np.mean(accuracies)
        speeds_var = np.var(speeds)
        bearings_var = np.var(bearings)
        accuracies_var = np.var(accuracies)
        speeds_mad = median_absolute_deviation(speeds)
        bearings_mad = median_absolute_deviation(bearings)
        accuracies_mad = median_absolute_deviation(accuracies)
        speeds_max = max(speeds)
        bearings_max = max(bearings)
        accuracies_max = max(accuracies)
        speeds_min = min(speeds)
        bearings_min = min(bearings)
        accuracies_min = min(accuracies)
        speeds_iqr = stats.iqr(speeds, rng=(25, 75), interpolation='midpoint')
        bearings_iqr = stats.iqr(bearings, rng=(25, 75), interpolation='midpoint')
        accuracies_iqr = stats.iqr(accuracies, rng=(25, 75), interpolation='midpoint')

        aux_count += 1
        data_0.append(
            [xs_mean, ys_mean, zs_mean, speeds_mean, bearings_mean, accuracies_mean, xs_var, ys_var, zs_var, speeds_var,
             bearings_var, accuracies_var, xs_mad, ys_mad, zs_mad, speeds_mad, bearings_mad, accuracies_mad, xs_max,
             ys_max, zs_max, speeds_max, bearings_max, accuracies_max, xs_min, ys_min, zs_min, speeds_min, bearings_min,
             accuracies_min, xs_iqr, ys_iqr, zs_iqr, speeds_iqr, bearings_iqr, accuracies_iqr, activity_id, activity])
        if user != 14 and user != 18:
            aux_count_ng += 1
            data_1.append(
                [xs_mean, ys_mean, zs_mean, speeds_mean, bearings_mean, accuracies_mean, xs_var, ys_var, zs_var,
                 speeds_var, bearings_var, accuracies_var, xs_mad, ys_mad, zs_mad, speeds_mad, bearings_mad,
                 accuracies_mad, xs_max, ys_max, zs_max, speeds_max, bearings_max, accuracies_max, xs_min, ys_min,
                 zs_min, speeds_min, bearings_min, accuracies_min, xs_iqr, ys_iqr, zs_iqr, speeds_iqr, bearings_iqr,
                 accuracies_iqr, activity_id, activity])
            if user != 4 and user != 15 and user != 17:
                aux_count_ngnm += 1
                data_2.append(
                    [xs_mean, ys_mean, zs_mean, speeds_mean, bearings_mean, accuracies_mean, xs_var, ys_var, zs_var,
                     speeds_var, bearings_var, accuracies_var, xs_mad, ys_mad, zs_mad, speeds_mad, bearings_mad,
                     accuracies_mad, xs_max, ys_max, zs_max, speeds_max, bearings_max, accuracies_max, xs_min, ys_min,
                     zs_min, speeds_min, bearings_min, accuracies_min, xs_iqr, ys_iqr, zs_iqr, speeds_iqr, bearings_iqr,
                     accuracies_iqr, activity_id, activity])

    return data_2, data_1, data_0, aux_count, aux_count_ng, aux_count_ngnm


# All the logic regarding the correct application of each sliding window and feature computation.
def extraction(df, type, n_seconds, overlap, cut_seconds, val_sess_df):
    i = 1
    next_i = 0
    init = False
    init_out = False
    finish_window = False
    stop_count = False
    data_0 = []
    data_1 = []
    data_2 = []
    aux_count_ngnm = 0
    aux_count_ng = 0
    aux_count = 0
    aux_data_ngnm = []
    aux_data_ng = []
    aux_data = []
    xs = []
    ys = []
    zs = []
    speeds = []
    bearings = []
    accuracies = []
    timestamp_list = []
    x_value = ''
    y_value = ''
    z_value = ''
    s_value = ''
    b_value = ''
    a_value = ''
    timestamp = 0
    timestamp_beginning = 0
    timestamp_before = 0
    end_timestamp = 0
    user_beginning = ''
    next_time = 0
    if type == 'acc':
        x_value = 'acc_x_axis'
        y_value = 'acc_y_axis'
        z_value = 'acc_z_axis'
    else:
        if type == 'gyro':
            x_value = 'gyro_x_axis'
            y_value = 'gyro_y_axis'
            z_value = 'gyro_z_axis'
        else:
            if type == 'magn':
                x_value = 'magn_x_axis'
                y_value = 'magn_y_axis'
                z_value = 'magn_z_axis'
            else:
                if type == 'gps':
                    x_value = 'gps_lat_increment'
                    y_value = 'gps_long_increment'
                    z_value = 'gps_alt_increment'
                    s_value = 'gps_speed'
                    b_value = 'gps_bearing'
                    a_value = 'gps_accuracy'

    while i < len(df):
        i_before = i
        id = df['id'].values[i]
        user = df['user'].values[i]
        timestamp = float(df['timestamp'].values[i])
        activity_id = df['activity_id'].values[i]
        activity = df['activity'].values[i]
        sess_df = val_sess_df.loc[(val_sess_df['user'] == int(user)) & (val_sess_df['activity_id'] == int(activity_id))]
        if len(sess_df) > 0:
            invalid = False
        else:
            invalid = True

        if not init and not invalid:
            timestamp_beginning = sess_df['init_timestamp'].values[0] + cut_seconds
            end_timestamp = sess_df['end_timestamp'].values[0] - cut_seconds
            next_time = timestamp_beginning + (n_seconds - overlap)
            user_beginning = user
            activity_id_beginning = activity_id
            activity_beginning = activity
            init = True
            if i > 1:
                init_out = True

        if init_out or ((user_beginning != user or activity_id_beginning != activity_id
                         or activity_beginning != activity) and not invalid):
            if timestamp_before <= (timestamp_beginning + n_seconds) <= end_timestamp and len(xs) > 0:
                data_2, data_1, data_0, aux_count, aux_count_ng, aux_count_ngnm = get_features(id, xs, ys, zs, speeds,
                    bearings, accuracies, aux_count, aux_count_ng, aux_count_ngnm, data_2, data_1, data_0,
                    user_beginning, timestamp_before, activity_id_beginning, activity_beginning, type)

            if aux_count > 0:
                aux_data.append([activity_id_beginning, aux_count])
            if aux_count_ng > 0:
                aux_data_ng.append([activity_id_beginning, aux_count_ng])
            if aux_count_ngnm > 0:
                aux_data_ngnm.append([activity_id_beginning, aux_count_ngnm])
            aux_count = 0
            aux_count_ng = 0
            aux_count_ngnm = 0
            timestamp_beginning = sess_df['init_timestamp'].values[0] + cut_seconds
            end_timestamp = sess_df['end_timestamp'].values[0] - cut_seconds
            next_time = timestamp_beginning + (n_seconds - overlap)
            user_beginning = user
            activity_id_beginning = activity_id
            activity_beginning = activity
            xs = []
            ys = []
            zs = []
            speeds = []
            bearings = []
            accuracies = []
            timestamp_list = []
            init_out = False
            stop_count = False
            i += 1
        else:
            if timestamp >= next_time and not stop_count and not invalid:
                next_i = i
                stop_count = True
            if end_timestamp >= timestamp >= timestamp_beginning + n_seconds and not invalid:
                if not xs:
                    print("timestamp: " + str(timestamp))
                    print("timestamp_beginning: " + str(timestamp_beginning))
                    print("timestamp_before: " + str(timestamp_before))
                    print("end_timestamp: " + str(end_timestamp))
                    print("activity_id: " + str(activity_id))
                    print("user: " + str(user))
                    print("next_i: " + str(next_i))
                    print("next_time: " + str(next_time))
                    print("i: " + str(i))
                    print("type: " + str(type))
                data_2, data_1, data_0, aux_count, aux_count_ng, aux_count_ngnm = get_features(id, xs, ys, zs, speeds,
                    bearings, accuracies, aux_count, aux_count_ng, aux_count_ngnm, data_2, data_1, data_0, user,
                    timestamp, activity_id, activity, type)

                timestamp_beginning = next_time
                next_time = timestamp_beginning + (n_seconds - overlap)
                xs = []
                ys = []
                zs = []
                speeds = []
                bearings = []
                accuracies = []
                timestamp_list = []

                user = df['user'].values[i]
                timestamp = float(df['timestamp'].values[i])
                activity_id = df['activity_id'].values[i]
                activity = df['activity'].values[i]
                if next_i < i:
                    if (df['user'].values[next_i] == user) and (df['activity_id'].values[next_i] == activity_id) and (
                            df['activity'].values[next_i] == activity):
                        i = next_i
                    else:
                        i += 1
                else:
                    i += 1
                stop_count = False
                finish_window = True

            else:
                i += 1

        if not finish_window and not invalid and (end_timestamp >= timestamp >= timestamp_beginning):
            xs.append(df[x_value].values[i_before])
            ys.append(df[y_value].values[i_before])
            zs.append(df[z_value].values[i_before])
            if type == 'gps':
                speeds.append(df[s_value].values[i_before])
                bearings.append(df[b_value].values[i_before])
                accuracies.append(df[a_value].values[i_before])
            timestamp_list.append(float(df['timestamp'].values[i_before]))
        timestamp_before = timestamp
        finish_window = False

    if timestamp_before <= (timestamp_beginning + n_seconds) <= end_timestamp and not invalid and len(xs) > 0:
        data_2, data_1, data_0, aux_count, aux_count_ng, aux_count_ngnm = get_features(id, xs, ys, zs, speeds, bearings,
            accuracies, aux_count, aux_count_ng, aux_count_ngnm, data_2, data_1, data_0, user_beginning,
            timestamp_before, activity_id_beginning, activity_beginning, type)

    if aux_count > 0:
        aux_data.append([activity_id_beginning, aux_count])
    if aux_count_ng > 0:
        aux_data_ng.append([activity_id_beginning, aux_count_ng])
    if aux_count_ngnm > 0:
        aux_data_ngnm.append([activity_id_beginning, aux_count_ngnm])
    return data_2, data_1, data_0, aux_data_ngnm, aux_data_ng, aux_data


# Function to initialize and finish all the computation process. It does some calculus in the end to make sure that each
# sensor has the same number of computed windows, getting rid of possible limit situations.
def process_data_split(n_div, n_seconds, overlap, cut_seconds):
    acc_path = './sensoringData_acc_prepared_' + str(n_seconds) + '_' + str(n_div) + '.csv'
    gyro_path = './sensoringData_gyro_prepared_' + str(n_seconds) + '_' + str(n_div) + '.csv'
    magn_path = './sensoringData_magn_prepared_' + str(n_seconds) + '_' + str(n_div) + '.csv'
    gps_path = './sensoringData_gps_prepared_' + str(n_seconds) + '_' + str(n_div) + '.csv'
    val_sess_path = './validSessions_' + str(n_seconds) + '.csv'

    acc_columns = ['id', 'user', 'timestamp', 'acc_x_axis', 'acc_y_axis', 'acc_z_axis', 'activity_id', 'activity']
    gyro_columns = ['id', 'user', 'timestamp', 'gyro_x_axis', 'gyro_y_axis', 'gyro_z_axis', 'activity_id', 'activity']
    magn_columns = ['id', 'user', 'timestamp', 'magn_x_axis', 'magn_y_axis', 'magn_z_axis', 'activity_id', 'activity']
    gps_columns = ['id', 'user', 'timestamp', 'gps_lat_increment', 'gps_long_increment', 'gps_alt_increment',
                   'gps_speed', 'gps_bearing', 'gps_accuracy', 'activity_id', 'activity']
    val_sess_columns = ['user', 'activity_id', 'init_timestamp', 'end_timestamp']
    acc_df = pd.read_csv(acc_path, header=0, names=acc_columns)
    acc_df.head()
    gyro_df = pd.read_csv(gyro_path, header=0, names=gyro_columns)
    gyro_df.head()
    magn_df = pd.read_csv(magn_path, header=0, names=magn_columns)
    magn_df.head()
    gps_df = pd.read_csv(gps_path, header=0, names=gps_columns)
    gps_df.head()
    val_sess_df = pd.read_csv(val_sess_path, header=0, names=val_sess_columns)
    val_sess_df.head()

    data_0 = []
    data_1 = []
    data_2 = []

    results = []
    sensors = [[acc_df, 'acc'], [gyro_df, 'gyro'], [magn_df, 'magn'], [gps_df, 'gps']]
    for sensor in sensors:
        ext_results = extraction(sensor[0], sensor[1], n_seconds, overlap, cut_seconds, val_sess_df)
        results.append(ext_results)

    acc_data_2, acc_data_1, acc_data_0, aux_acc_ngnm, aux_acc_ng, aux_acc = results[0]
    gyro_data_2, gyro_data_1, gyro_data_0, aux_gyro_ngnm, aux_gyro_ng, aux_gyro = results[1]
    magn_data_2, magn_data_1, magn_data_0, aux_magn_ngnm, aux_magn_ng, aux_magn = results[2]
    gps_data_2, gps_data_1, gps_data_0, aux_gps_ngnm, aux_gps_ng, aux_gps = results[3]

    l12 = len(acc_data_2)
    l22 = len(gyro_data_2)
    l32 = len(magn_data_2)
    l42 = len(gps_data_2)
    l11 = len(acc_data_1)
    l31 = len(magn_data_1)
    l41 = len(gps_data_1)
    l10 = len(acc_data_0)
    l40 = len(gps_data_0)
    print(str(l12))
    print(str(l22))
    print(str(l32))
    print(str(l42))
    print(str(l11))
    print(str(l31))
    print(str(l41))
    print(str(l10))
    print(str(l40))

    if l12 != l22 or l12 != l32 or l12 != l42 or l22 != l32 or l22 != l42 or l32 != l42:
        z = 0
        acc_val = 0
        minLen = min(len(aux_acc_ngnm), len(aux_gyro_ngnm), len(aux_magn_ngnm), len(aux_gps_ngnm))
        while z < minLen:
            min_val = min(aux_acc_ngnm[z][1], aux_gyro_ngnm[z][1], aux_magn_ngnm[z][1], aux_gps_ngnm[z][1])
            if aux_acc_ngnm[z][1] != aux_gyro_ngnm[z][1] or aux_acc_ngnm[z][1] != aux_magn_ngnm[z][1] or \
                    aux_acc_ngnm[z][1] != aux_gps_ngnm[z][1]:
                if min_val == aux_acc_ngnm[z][1]:
                    gyro_del = aux_gyro_ngnm[z][1] - min_val
                    k = 0
                    while k < gyro_del:
                        gyro_data_2.pop(acc_val + min_val)
                        k += 1
                    magn_del = aux_magn_ngnm[z][1] - min_val
                    k = 0
                    while k < magn_del:
                        magn_data_2.pop(acc_val + min_val)
                        k += 1
                    gps_del = aux_gps_ngnm[z][1] - min_val
                    k = 0
                    while k < gps_del:
                        gps_data_2.pop(acc_val + min_val)
                        k += 1
                else:
                    if min_val == aux_gyro_ngnm[z][1]:
                        acc_del = aux_acc_ngnm[z][1] - min_val
                        k = 0
                        while k < acc_del:
                            acc_data_2.pop(acc_val + min_val)
                            k += 1
                        magn_del = aux_magn_ngnm[z][1] - min_val
                        k = 0
                        while k < magn_del:
                            magn_data_2.pop(acc_val + min_val)
                            k += 1
                        gps_del = aux_gps_ngnm[z][1] - min_val
                        k = 0
                        while k < gps_del:
                            gps_data_2.pop(acc_val + min_val)
                            k += 1
                    else:
                        if min_val == aux_magn_ngnm[z][1]:
                            acc_del = aux_acc_ngnm[z][1] - min_val
                            k = 0
                            while k < acc_del:
                                acc_data_2.pop(acc_val + min_val)
                                k += 1
                            gyro_del = aux_gyro_ngnm[z][1] - min_val
                            k = 0
                            while k < gyro_del:
                                gyro_data_2.pop(acc_val + min_val)
                                k += 1
                            gps_del = aux_gps_ngnm[z][1] - min_val
                            k = 0
                            while k < gps_del:
                                gps_data_2.pop(acc_val + min_val)
                                k += 1
                        else:
                            if min_val == aux_gps_ngnm[z][1]:
                                acc_del = aux_acc_ngnm[z][1] - min_val
                                k = 0
                                while k < acc_del:
                                    acc_data_2.pop(acc_val + min_val)
                                    k += 1
                                gyro_del = aux_gyro_ngnm[z][1] - min_val
                                k = 0
                                while k < gyro_del:
                                    gyro_data_2.pop(acc_val + min_val)
                                    k += 1
                                magn_del = aux_magn_ngnm[z][1] - min_val
                                k = 0
                                while k < magn_del:
                                    magn_data_2.pop(acc_val + min_val)
                                    k += 1
            acc_val += min_val
            z += 1

    if l11 != l31 or l11 != l41 or l31 != l41:
        z = 0
        acc_val = 0
        minLen = min(len(aux_acc_ng), len(aux_magn_ng), len(aux_gps_ng))
        while z < minLen:
            min_val = min(aux_acc_ng[z][1], aux_magn_ng[z][1], aux_gps_ng[z][1])
            if aux_acc_ng[z][1] != aux_magn_ng[z][1] or aux_acc_ng[z][1] != aux_gps_ng[z][1]:
                if min_val == aux_acc_ng[z][1]:
                    magn_del = aux_magn_ng[z][1] - min_val
                    k = 0
                    while k < magn_del:
                        magn_data_1.pop(acc_val + min_val)
                        k += 1
                    gps_del = aux_gps_ng[z][1] - min_val
                    k = 0
                    while k < gps_del:
                        gps_data_1.pop(acc_val + min_val)
                        k += 1
                else:
                    if min_val == aux_magn_ng[z][1]:
                        acc_del = aux_acc_ng[z][1] - min_val
                        k = 0
                        while k < acc_del:
                            acc_data_1.pop(acc_val + min_val)
                            k += 1
                        gps_del = aux_gps_ng[z][1] - min_val
                        k = 0
                        while k < gps_del:
                            gps_data_1.pop(acc_val + min_val)
                            k += 1
                    else:
                        if min_val == aux_gps_ng[z][1]:
                            acc_del = aux_acc_ng[z][1] - min_val
                            k = 0
                            while k < acc_del:
                                acc_data_1.pop(acc_val + min_val)
                                k += 1
                            magn_del = aux_magn_ng[z][1] - min_val
                            k = 0
                            while k < magn_del:
                                magn_data_1.pop(acc_val + min_val)
                                k += 1
            acc_val += min_val
            z += 1

    if l10 != l40:
        z = 0
        acc_val = 0
        minLen = min(len(aux_acc), len(aux_gps))
        while z < minLen:
            min_val = min(aux_acc[z][1], aux_gps[z][1])
            if aux_acc[z][1] != aux_gps[z][1]:
                if min_val == aux_acc[z][1]:
                    gps_del = aux_gps[z][1] - min_val
                    k = 0
                    while k < gps_del:
                        gps_data_0.pop(acc_val + min_val)
                        k += 1
                else:
                    if min_val == aux_gps[z][1]:
                        acc_del = aux_acc[z][1] - min_val
                        k = 0
                        while k < acc_del:
                            acc_data_0.pop(acc_val + min_val)
                            k += 1
            acc_val += min_val
            z += 1

    j = 0
    minDataLen = min(len(acc_data_2), len(gyro_data_2), len(magn_data_2), len(gps_data_2))
    while j < minDataLen:
        data_2.append(acc_data_2[j] + gyro_data_2[j] + magn_data_2[j] + gps_data_2[j])
        j += 1
    j = 0
    minDataLen = min(len(acc_data_1), len(magn_data_1), len(gps_data_1))
    while j < minDataLen:
        data_1.append(acc_data_1[j] + magn_data_1[j] + gps_data_1[j])
        j += 1
    j = 0
    minDataLen = min(len(acc_data_0), len(gps_data_0))
    while j < minDataLen:
        data_0.append(acc_data_0[j] + gps_data_0[j])
        j += 1

    filePath = './'
    fileName_0 = 'sensoringData_feature_prepared_' + str(n_seconds) + '_' + str(overlap) + '_0_split_' + str(n_div) \
                 + '.csv'
    fileName_1 = 'sensoringData_feature_prepared_' + str(n_seconds) + '_' + str(overlap) + '_1_split_' + str(n_div) \
                 + '.csv'
    fileName_2 = 'sensoringData_feature_prepared_' + str(n_seconds) + '_' + str(overlap) + '_2_split_' + str(n_div) \
                 + '.csv'

    # Extract the table headers.
    headers_2 = ['id', 'user', 'timestamp', 'acc_xs_mean', 'acc_ys_mean', 'acc_zs_mean', 'acc_xs_var', 'acc_ys_var',
                 'acc_zs_var', 'acc_xs_mad', 'acc_ys_mad', 'acc_zs_mad', 'acc_xs_max', 'acc_ys_max', 'acc_zs_max',
                 'acc_xs_min', 'acc_ys_min', 'acc_zs_min', 'acc_xs_iqr', 'acc_ys_iqr', 'acc_zs_iqr', 'gyro_xs_mean',
                 'gyro_ys_mean', 'gyro_zs_mean', 'gyro_xs_var', 'gyro_ys_var', 'gyro_zs_var', 'gyro_xs_mad',
                 'gyro_ys_mad', 'gyro_zs_mad', 'gyro_xs_max', 'gyro_ys_max', 'gyro_zs_max', 'gyro_xs_min',
                 'gyro_ys_min', 'gyro_zs_min', 'gyro_xs_iqr', 'gyro_ys_iqr', 'gyro_zs_iqr', 'magn_xs_mean',
                 'magn_ys_mean', 'magn_zs_mean', 'magn_xs_var', 'magn_ys_var', 'magn_zs_var', 'magn_xs_mad',
                 'magn_ys_mad', 'magn_zs_mad', 'magn_xs_max', 'magn_ys_max', 'magn_zs_max', 'magn_xs_min',
                 'magn_ys_min', 'magn_zs_min', 'magn_xs_iqr', 'magn_ys_iqr', 'magn_zs_iqr', 'gps_lat_mean',
                 'gps_long_mean', 'gps_alt_mean', 'gps_speed_mean', 'gps_bearing_mean', 'gps_accuracy_mean',
                 'gps_lat_var', 'gps_long_var', 'gps_alt_var', 'gps_speed_var', 'gps_bearing_var', 'gps_accuracy_var',
                 'gps_lat_mad', 'gps_long_mad', 'gps_alt_mad', 'gps_speed_mad', 'gps_bearing_mad', 'gps_accuracy_mad',
                 'gps_lat_max', 'gps_long_max', 'gps_alt_max', 'gps_speed_max', 'gps_bearing_max', 'gps_accuracy_max',
                 'gps_lat_min', 'gps_long_min', 'gps_alt_min', 'gps_speed_min', 'gps_bearing_min', 'gps_accuracy_min',
                 'gps_lat_iqr', 'gps_long_iqr', 'gps_alt_iqr', 'gps_speed_iqr', 'gps_bearing_iqr', 'gps_accuracy_iqr',
                 'activity_id', 'activity']

    headers_1 = ['id', 'user', 'timestamp', 'acc_xs_mean', 'acc_ys_mean', 'acc_zs_mean', 'acc_xs_var', 'acc_ys_var',
                 'acc_zs_var', 'acc_xs_mad', 'acc_ys_mad', 'acc_zs_mad', 'acc_xs_max', 'acc_ys_max', 'acc_zs_max',
                 'acc_xs_min', 'acc_ys_min', 'acc_zs_min', 'acc_xs_iqr', 'acc_ys_iqr', 'acc_zs_iqr', 'magn_xs_mean',
                 'magn_ys_mean', 'magn_zs_mean', 'magn_xs_var', 'magn_ys_var', 'magn_zs_var', 'magn_xs_mad',
                 'magn_ys_mad', 'magn_zs_mad', 'magn_xs_max', 'magn_ys_max', 'magn_zs_max', 'magn_xs_min',
                 'magn_ys_min', 'magn_zs_min', 'magn_xs_iqr', 'magn_ys_iqr', 'magn_zs_iqr', 'gps_lat_mean',
                 'gps_long_mean', 'gps_alt_mean', 'gps_speed_mean', 'gps_bearing_mean', 'gps_accuracy_mean',
                 'gps_lat_var', 'gps_long_var', 'gps_alt_var', 'gps_speed_var', 'gps_bearing_var', 'gps_accuracy_var',
                 'gps_lat_mad', 'gps_long_mad', 'gps_alt_mad', 'gps_speed_mad', 'gps_bearing_mad', 'gps_accuracy_mad',
                 'gps_lat_max', 'gps_long_max', 'gps_alt_max', 'gps_speed_max', 'gps_bearing_max', 'gps_accuracy_max',
                 'gps_lat_min', 'gps_long_min', 'gps_alt_min', 'gps_speed_min', 'gps_bearing_min', 'gps_accuracy_min',
                 'gps_lat_iqr', 'gps_long_iqr', 'gps_alt_iqr', 'gps_speed_iqr', 'gps_bearing_iqr', 'gps_accuracy_iqr',
                 'activity_id', 'activity']

    headers_0 = ['id', 'user', 'timestamp', 'acc_xs_mean', 'acc_ys_mean', 'acc_zs_mean', 'acc_xs_var', 'acc_ys_var',
                 'acc_zs_var', 'acc_xs_mad', 'acc_ys_mad', 'acc_zs_mad', 'acc_xs_max', 'acc_ys_max', 'acc_zs_max',
                 'acc_xs_min', 'acc_ys_min', 'acc_zs_min', 'acc_xs_iqr', 'acc_ys_iqr', 'acc_zs_iqr', 'gps_lat_mean',
                 'gps_long_mean', 'gps_alt_mean', 'gps_speed_mean', 'gps_bearing_mean', 'gps_accuracy_mean',
                 'gps_lat_var', 'gps_long_var', 'gps_alt_var', 'gps_speed_var', 'gps_bearing_var', 'gps_accuracy_var',
                 'gps_lat_mad', 'gps_long_mad', 'gps_alt_mad', 'gps_speed_mad', 'gps_bearing_mad', 'gps_accuracy_mad',
                 'gps_lat_max', 'gps_long_max', 'gps_alt_max', 'gps_speed_max', 'gps_bearing_max', 'gps_accuracy_max',
                 'gps_lat_min', 'gps_long_min', 'gps_alt_min', 'gps_speed_min', 'gps_bearing_min', 'gps_accuracy_min',
                 'gps_lat_iqr', 'gps_long_iqr', 'gps_alt_iqr', 'gps_speed_iqr', 'gps_bearing_iqr', 'gps_accuracy_iqr',
                 'activity_id', 'activity']

    # Open CSV file for writing.
    csvFile = csv.writer(open(filePath + fileName_0, 'w', newline=""), delimiter=',')
    csvFile_1 = csv.writer(open(filePath + fileName_1, 'w', newline=""), delimiter=',')
    csvFile_2 = csv.writer(open(filePath + fileName_2, 'w', newline=""), delimiter=',')

    # Add the headers and data to the CSV file.
    csvFile.writerow(headers_0)
    csvFile.writerows(data_0)
    csvFile_1.writerow(headers_1)
    csvFile_1.writerows(data_1)
    csvFile_2.writerow(headers_2)
    csvFile_2.writerows(data_2)


# This function calls the main process and writes all the data into CSV files.
def prepare_data(n_seconds, overlap, cut_seconds, n_div, index):
    # Set:
    # 0 - Acc + GPS (all users)
    # 1 - Acc + Magn + GPS (all users but the ones missing magnetometer)
    # 2 - Acc + Gyro + Magn + GPS (all users but the ones missing gyroscope and magnetometer)

    if index > 0:
        process_data_split(index, n_seconds, overlap, cut_seconds)
    else:
        # File path and name.
        filePath = './'
        fileName_0 = 'sensoringData_feature_prepared_' + str(n_seconds) + '_' + str(overlap) + '_0.csv'
        fileName_1 = 'sensoringData_feature_prepared_' + str(n_seconds) + '_' + str(overlap) + '_1.csv'
        fileName_2 = 'sensoringData_feature_prepared_' + str(n_seconds) + '_' + str(overlap) + '_2.csv'

        # Extract the table headers.
        headers_2 = ['id', 'user', 'timestamp', 'acc_xs_mean', 'acc_ys_mean', 'acc_zs_mean', 'acc_xs_var', 'acc_ys_var',
                     'acc_zs_var', 'acc_xs_mad', 'acc_ys_mad', 'acc_zs_mad', 'acc_xs_max', 'acc_ys_max', 'acc_zs_max',
                     'acc_xs_min', 'acc_ys_min', 'acc_zs_min', 'acc_xs_iqr', 'acc_ys_iqr', 'acc_zs_iqr', 'gyro_xs_mean',
                     'gyro_ys_mean', 'gyro_zs_mean', 'gyro_xs_var', 'gyro_ys_var', 'gyro_zs_var', 'gyro_xs_mad',
                     'gyro_ys_mad', 'gyro_zs_mad', 'gyro_xs_max', 'gyro_ys_max', 'gyro_zs_max', 'gyro_xs_min',
                     'gyro_ys_min', 'gyro_zs_min', 'gyro_xs_iqr', 'gyro_ys_iqr', 'gyro_zs_iqr', 'magn_xs_mean',
                     'magn_ys_mean', 'magn_zs_mean', 'magn_xs_var', 'magn_ys_var', 'magn_zs_var', 'magn_xs_mad',
                     'magn_ys_mad', 'magn_zs_mad', 'magn_xs_max', 'magn_ys_max', 'magn_zs_max', 'magn_xs_min',
                     'magn_ys_min', 'magn_zs_min', 'magn_xs_iqr', 'magn_ys_iqr', 'magn_zs_iqr', 'gps_lat_mean',
                     'gps_long_mean', 'gps_alt_mean', 'gps_speed_mean', 'gps_bearing_mean', 'gps_accuracy_mean',
                     'gps_lat_var', 'gps_long_var', 'gps_alt_var', 'gps_speed_var', 'gps_bearing_var',
                     'gps_accuracy_var', 'gps_lat_mad', 'gps_long_mad', 'gps_alt_mad', 'gps_speed_mad',
                     'gps_bearing_mad', 'gps_accuracy_mad', 'gps_lat_max', 'gps_long_max', 'gps_alt_max',
                     'gps_speed_max', 'gps_bearing_max', 'gps_accuracy_max', 'gps_lat_min', 'gps_long_min',
                     'gps_alt_min', 'gps_speed_min', 'gps_bearing_min', 'gps_accuracy_min', 'gps_lat_iqr',
                     'gps_long_iqr', 'gps_alt_iqr', 'gps_speed_iqr', 'gps_bearing_iqr', 'gps_accuracy_iqr',
                     'activity_id', 'activity']

        headers_1 = ['id', 'user', 'timestamp', 'acc_xs_mean', 'acc_ys_mean', 'acc_zs_mean', 'acc_xs_var', 'acc_ys_var',
                     'acc_zs_var', 'acc_xs_mad', 'acc_ys_mad', 'acc_zs_mad', 'acc_xs_max', 'acc_ys_max', 'acc_zs_max',
                     'acc_xs_min', 'acc_ys_min', 'acc_zs_min', 'acc_xs_iqr', 'acc_ys_iqr', 'acc_zs_iqr', 'magn_xs_mean',
                     'magn_ys_mean', 'magn_zs_mean', 'magn_xs_var', 'magn_ys_var', 'magn_zs_var', 'magn_xs_mad',
                     'magn_ys_mad', 'magn_zs_mad', 'magn_xs_max', 'magn_ys_max', 'magn_zs_max', 'magn_xs_min',
                     'magn_ys_min', 'magn_zs_min', 'magn_xs_iqr', 'magn_ys_iqr', 'magn_zs_iqr', 'gps_lat_mean',
                     'gps_long_mean', 'gps_alt_mean', 'gps_speed_mean', 'gps_bearing_mean', 'gps_accuracy_mean',
                     'gps_lat_var', 'gps_long_var', 'gps_alt_var', 'gps_speed_var', 'gps_bearing_var',
                     'gps_accuracy_var', 'gps_lat_mad', 'gps_long_mad', 'gps_alt_mad', 'gps_speed_mad',
                     'gps_bearing_mad', 'gps_accuracy_mad', 'gps_lat_max', 'gps_long_max', 'gps_alt_max',
                     'gps_speed_max', 'gps_bearing_max', 'gps_accuracy_max', 'gps_lat_min', 'gps_long_min',
                     'gps_alt_min', 'gps_speed_min', 'gps_bearing_min', 'gps_accuracy_min', 'gps_lat_iqr',
                     'gps_long_iqr', 'gps_alt_iqr', 'gps_speed_iqr', 'gps_bearing_iqr', 'gps_accuracy_iqr',
                     'activity_id', 'activity']

        headers_0 = ['id', 'user', 'timestamp', 'acc_xs_mean', 'acc_ys_mean', 'acc_zs_mean', 'acc_xs_var', 'acc_ys_var',
                     'acc_zs_var', 'acc_xs_mad', 'acc_ys_mad', 'acc_zs_mad', 'acc_xs_max', 'acc_ys_max', 'acc_zs_max',
                     'acc_xs_min', 'acc_ys_min', 'acc_zs_min', 'acc_xs_iqr', 'acc_ys_iqr', 'acc_zs_iqr', 'gps_lat_mean',
                     'gps_long_mean', 'gps_alt_mean', 'gps_speed_mean', 'gps_bearing_mean', 'gps_accuracy_mean',
                     'gps_lat_var', 'gps_long_var', 'gps_alt_var', 'gps_speed_var', 'gps_bearing_var',
                     'gps_accuracy_var', 'gps_lat_mad', 'gps_long_mad', 'gps_alt_mad', 'gps_speed_mad',
                     'gps_bearing_mad', 'gps_accuracy_mad', 'gps_lat_max', 'gps_long_max', 'gps_alt_max',
                     'gps_speed_max', 'gps_bearing_max', 'gps_accuracy_max', 'gps_lat_min', 'gps_long_min',
                     'gps_alt_min', 'gps_speed_min', 'gps_bearing_min', 'gps_accuracy_min', 'gps_lat_iqr',
                     'gps_long_iqr', 'gps_alt_iqr', 'gps_speed_iqr', 'gps_bearing_iqr', 'gps_accuracy_iqr',
                     'activity_id', 'activity']

        i = 1
        data_0 = []
        data_1 = []
        data_2 = []
        while i <= n_div:
            split_path_0 = 'sensoringData_feature_prepared_' + str(n_seconds) + '_' + str(
                overlap) + '_0_split_' + str(i) + '.csv'
            split_path_1 = 'sensoringData_feature_prepared_' + str(n_seconds) + '_' + str(
                overlap) + '_1_split_' + str(i) + '.csv'
            split_path_2 = 'sensoringData_feature_prepared_' + str(n_seconds) + '_' + str(
                overlap) + '_2_split_' + str(i) + '.csv'

            split_0_input = open(split_path_0, 'r')
            split_1_input = open(split_path_1, 'r')
            split_2_input = open(split_path_2, 'r')

            for row in csv.reader(split_0_input):
                if row[0] != "id":
                    data_0.append(row)
            for row in csv.reader(split_1_input):
                if row[0] != "id":
                    data_1.append(row)
            for row in csv.reader(split_2_input):
                if row[0] != "id":
                    data_2.append(row)

            i += 1

        # Open CSV file for writing.
        csvFile = csv.writer(open(filePath + fileName_0, 'w', newline=""), delimiter=',')
        csvFile_1 = csv.writer(open(filePath + fileName_1, 'w', newline=""), delimiter=',')
        csvFile_2 = csv.writer(open(filePath + fileName_2, 'w', newline=""), delimiter=',')

        # Add the headers and data to the CSV file.
        csvFile.writerow(headers_0)
        csvFile.writerows(data_0)
        csvFile_1.writerow(headers_1)
        csvFile_1.writerows(data_1)
        csvFile_2.writerow(headers_2)
        csvFile_2.writerows(data_2)


if __name__ == '__main__':
    # 20, 19, 5, 8
    if sys.argv[5]:
        index = int(sys.argv[5])
    else:
        index = -1
    startTime = time.time()
    prepare_data(int(sys.argv[1]), float(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4]), index)
    elapsedTime = time.time() - startTime
    print(elapsedTime)


SVM.py
Script used to train and test the SVM model proposed and obtaining the results.
It is coded in a Slurm way to be executed as a job array (one job for every fold computed over the data).
Parameters:
		- String formed by the window size, overlap size and corresponding set, divided by low bars (20_19.0_2).
		- Slurm job array index, from 1 to 10.


In [1]:
import csv
import warnings

# import tf as tf

warnings.filterwarnings('ignore')

import time
import numpy as np
import pandas as pd
import os
import matplotlib as mpl

if os.environ.get('DISPLAY', '') == '':
    print('No display found. Using non-interactive Agg backend')
    mpl.use('Agg')
from matplotlib import pyplot as plt
import seaborn as sns
import sys
import itertools
import random
from scipy import stats
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from sklearn.metrics import classification_report, f1_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler


def svm_model(case, index):
    # Data initialization
    sns.set(style='whitegrid', palette='muted', font_scale=1.5)
    feat_case = case
    set = int(feat_case.split('_')[2][0])
    directory = 'svm_' + case.replace('.', '')
    LABELS = ['Inactive', 'Active', 'Walking', 'Driving']
    if not os.path.exists(directory):
        os.makedirs(directory)
    case = "svm_" + case

    if set == 2:
        columns = ['id', 'user', 'timestamp', 'acc_xs_mean', 'acc_ys_mean', 'acc_zs_mean', 'acc_xs_var',
                   'acc_ys_var', 'acc_zs_var', 'acc_xs_mad', 'acc_ys_mad', 'acc_zs_mad', 'acc_xs_max',
                   'acc_ys_max', 'acc_zs_max', 'acc_xs_min', 'acc_ys_min', 'acc_zs_min', 'acc_xs_iqr',
                   'acc_ys_iqr', 'acc_zs_iqr', 'gyro_xs_mean', 'gyro_ys_mean', 'gyro_zs_mean', 'gyro_xs_var',
                   'gyro_ys_var', 'gyro_zs_var', 'gyro_xs_mad', 'gyro_ys_mad', 'gyro_zs_mad', 'gyro_xs_max',
                   'gyro_ys_max', 'gyro_zs_max', 'gyro_xs_min', 'gyro_ys_min', 'gyro_zs_min', 'gyro_xs_iqr',
                   'gyro_ys_iqr', 'gyro_zs_iqr', 'magn_xs_mean', 'magn_ys_mean', 'magn_zs_mean', 'magn_xs_var',
                   'magn_ys_var', 'magn_zs_var', 'magn_xs_mad', 'magn_ys_mad', 'magn_zs_mad', 'magn_xs_max',
                   'magn_ys_max', 'magn_zs_max', 'magn_xs_min', 'magn_ys_min', 'magn_zs_min', 'magn_xs_iqr',
                   'magn_ys_iqr', 'magn_zs_iqr', 'gps_lat_mean', 'gps_long_mean', 'gps_alt_mean',
                   'gps_speed_mean', 'gps_bearing_mean', 'gps_accuracy_mean', 'gps_lat_var', 'gps_long_var',
                   'gps_alt_var', 'gps_speed_var', 'gps_bearing_var', 'gps_accuracy_var', 'gps_lat_mad',
                   'gps_long_mad', 'gps_alt_mad', 'gps_speed_mad', 'gps_bearing_mad', 'gps_accuracy_mad',
                   'gps_lat_max', 'gps_long_max', 'gps_alt_max', 'gps_speed_max', 'gps_bearing_max',
                   'gps_accuracy_max', 'gps_lat_min', 'gps_long_min', 'gps_alt_min', 'gps_speed_min',
                   'gps_bearing_min', 'gps_accuracy_min', 'gps_lat_iqr', 'gps_long_iqr', 'gps_alt_iqr',
                   'gps_speed_iqr', 'gps_bearing_iqr', 'gps_accuracy_iqr', 'activity_id', 'activity']
    else:
        if set == 1:
            columns = ['id', 'user', 'timestamp', 'acc_xs_mean', 'acc_ys_mean', 'acc_zs_mean', 'acc_xs_var',
                       'acc_ys_var', 'acc_zs_var', 'acc_xs_mad', 'acc_ys_mad', 'acc_zs_mad', 'acc_xs_max',
                       'acc_ys_max', 'acc_zs_max', 'acc_xs_min', 'acc_ys_min', 'acc_zs_min', 'acc_xs_iqr',
                       'acc_ys_iqr', 'acc_zs_iqr', 'magn_xs_mean', 'magn_ys_mean', 'magn_zs_mean',
                       'magn_xs_var', 'magn_ys_var', 'magn_zs_var', 'magn_xs_mad', 'magn_ys_mad', 'magn_zs_mad',
                       'magn_xs_max', 'magn_ys_max', 'magn_zs_max', 'magn_xs_min', 'magn_ys_min', 'magn_zs_min',
                       'magn_xs_iqr', 'magn_ys_iqr', 'magn_zs_iqr', 'gps_lat_mean', 'gps_long_mean',
                       'gps_alt_mean', 'gps_speed_mean', 'gps_bearing_mean', 'gps_accuracy_mean', 'gps_lat_var',
                       'gps_long_var', 'gps_alt_var', 'gps_speed_var', 'gps_bearing_var', 'gps_accuracy_var',
                       'gps_lat_mad', 'gps_long_mad', 'gps_alt_mad', 'gps_speed_mad', 'gps_bearing_mad',
                       'gps_accuracy_mad', 'gps_lat_max', 'gps_long_max', 'gps_alt_max', 'gps_speed_max',
                       'gps_bearing_max', 'gps_accuracy_max', 'gps_lat_min', 'gps_long_min', 'gps_alt_min',
                       'gps_speed_min', 'gps_bearing_min', 'gps_accuracy_min', 'gps_lat_iqr', 'gps_long_iqr',
                       'gps_alt_iqr', 'gps_speed_iqr', 'gps_bearing_iqr', 'gps_accuracy_iqr', 'activity_id',
                       'activity']
        else:
            if set == 0:
                columns = ['id', 'user', 'timestamp', 'acc_xs_mean', 'acc_ys_mean', 'acc_zs_mean', 'acc_xs_var',
                           'acc_ys_var', 'acc_zs_var', 'acc_xs_mad', 'acc_ys_mad', 'acc_zs_mad', 'acc_xs_max',
                           'acc_ys_max', 'acc_zs_max', 'acc_xs_min', 'acc_ys_min', 'acc_zs_min', 'acc_xs_iqr',
                           'acc_ys_iqr', 'acc_zs_iqr', 'gps_lat_mean', 'gps_long_mean', 'gps_alt_mean',
                           'gps_speed_mean', 'gps_bearing_mean', 'gps_accuracy_mean', 'gps_lat_var',
                           'gps_long_var', 'gps_alt_var', 'gps_speed_var', 'gps_bearing_var',
                           'gps_accuracy_var', 'gps_lat_mad', 'gps_long_mad', 'gps_alt_mad', 'gps_speed_mad',
                           'gps_bearing_mad', 'gps_accuracy_mad', 'gps_lat_max', 'gps_long_max', 'gps_alt_max',
                           'gps_speed_max', 'gps_bearing_max', 'gps_accuracy_max', 'gps_lat_min',
                           'gps_long_min', 'gps_alt_min', 'gps_speed_min', 'gps_bearing_min',
                           'gps_accuracy_min', 'gps_lat_iqr', 'gps_long_iqr', 'gps_alt_iqr', 'gps_speed_iqr',
                           'gps_bearing_iqr', 'gps_accuracy_iqr', 'activity_id', 'activity']

    df = pd.read_csv('./sensoringData_feature_prepared_' + feat_case + '.csv', header=0, names=columns)
    df.head()

    # Data gathering
    segments = []
    labels = []
    for i in range(0, len(df), 1):
        label = stats.mode(df['activity'][i])[0][0]

        acc_xs_mean = df['acc_xs_mean'].values[i]
        acc_ys_mean = df['acc_ys_mean'].values[i]
        acc_zs_mean = df['acc_zs_mean'].values[i]
        acc_xs_var = df['acc_xs_var'].values[i]
        acc_ys_var = df['acc_ys_var'].values[i]
        acc_zs_var = df['acc_zs_var'].values[i]
        acc_xs_mad = df['acc_xs_mad'].values[i]
        acc_ys_mad = df['acc_ys_mad'].values[i]
        acc_zs_mad = df['acc_zs_mad'].values[i]
        acc_xs_max = df['acc_xs_max'].values[i]
        acc_ys_max = df['acc_ys_max'].values[i]
        acc_zs_max = df['acc_zs_max'].values[i]
        acc_xs_min = df['acc_xs_min'].values[i]
        acc_ys_min = df['acc_ys_min'].values[i]
        acc_zs_min = df['acc_zs_min'].values[i]
        acc_xs_iqr = df['acc_xs_iqr'].values[i]
        acc_ys_iqr = df['acc_ys_iqr'].values[i]
        acc_zs_iqr = df['acc_zs_iqr'].values[i]

        if set != 0 and set != 1:
            gyro_xs_mean = df['gyro_xs_mean'].values[i]
            gyro_ys_mean = df['gyro_ys_mean'].values[i]
            gyro_zs_mean = df['gyro_zs_mean'].values[i]
            gyro_xs_var = df['gyro_xs_var'].values[i]
            gyro_ys_var = df['gyro_ys_var'].values[i]
            gyro_zs_var = df['gyro_zs_var'].values[i]
            gyro_xs_mad = df['gyro_xs_mad'].values[i]
            gyro_ys_mad = df['gyro_ys_mad'].values[i]
            gyro_zs_mad = df['gyro_zs_mad'].values[i]
            gyro_xs_max = df['gyro_xs_max'].values[i]
            gyro_ys_max = df['gyro_ys_max'].values[i]
            gyro_zs_max = df['gyro_zs_max'].values[i]
            gyro_xs_min = df['gyro_xs_min'].values[i]
            gyro_ys_min = df['gyro_ys_min'].values[i]
            gyro_zs_min = df['gyro_zs_min'].values[i]
            gyro_xs_iqr = df['gyro_xs_iqr'].values[i]
            gyro_ys_iqr = df['gyro_ys_iqr'].values[i]
            gyro_zs_iqr = df['gyro_zs_iqr'].values[i]

        if set != 0:
            magn_xs_mean = df['magn_xs_mean'].values[i]
            magn_ys_mean = df['magn_ys_mean'].values[i]
            magn_zs_mean = df['magn_zs_mean'].values[i]
            magn_xs_var = df['magn_xs_var'].values[i]
            magn_ys_var = df['magn_ys_var'].values[i]
            magn_zs_var = df['magn_zs_var'].values[i]
            magn_xs_mad = df['magn_xs_mad'].values[i]
            magn_ys_mad = df['magn_ys_mad'].values[i]
            magn_zs_mad = df['magn_zs_mad'].values[i]
            magn_xs_max = df['magn_xs_max'].values[i]
            magn_ys_max = df['magn_ys_max'].values[i]
            magn_zs_max = df['magn_zs_max'].values[i]
            magn_xs_min = df['magn_xs_min'].values[i]
            magn_ys_min = df['magn_ys_min'].values[i]
            magn_zs_min = df['magn_zs_min'].values[i]
            magn_xs_iqr = df['magn_xs_iqr'].values[i]
            magn_ys_iqr = df['magn_ys_iqr'].values[i]
            magn_zs_iqr = df['magn_zs_iqr'].values[i]

        gps_lat_mean = df['gps_lat_mean'].values[i]
        gps_long_mean = df['gps_long_mean'].values[i]
        gps_alt_mean = df['gps_alt_mean'].values[i]
        gps_speed_mean = df['gps_speed_mean'].values[i]
        gps_bearing_mean = df['gps_bearing_mean'].values[i]
        gps_accuracy_mean = df['gps_accuracy_mean'].values[i]
        gps_lat_var = df['gps_lat_var'].values[i]
        gps_long_var = df['gps_long_var'].values[i]
        gps_alt_var = df['gps_alt_var'].values[i]
        gps_speed_var = df['gps_speed_var'].values[i]
        gps_bearing_var = df['gps_bearing_var'].values[i]
        gps_accuracy_var = df['gps_accuracy_var'].values[i]
        gps_lat_mad = df['gps_lat_mad'].values[i]
        gps_long_mad = df['gps_long_mad'].values[i]
        gps_alt_mad = df['gps_alt_mad'].values[i]
        gps_speed_mad = df['gps_speed_mad'].values[i]
        gps_bearing_mad = df['gps_bearing_mad'].values[i]
        gps_accuracy_mad = df['gps_accuracy_mad'].values[i]
        gps_lat_max = df['gps_lat_max'].values[i]
        gps_long_max = df['gps_long_max'].values[i]
        gps_alt_max = df['gps_alt_max'].values[i]
        gps_speed_max = df['gps_speed_max'].values[i]
        gps_bearing_max = df['gps_bearing_max'].values[i]
        gps_accuracy_max = df['gps_accuracy_max'].values[i]
        gps_lat_min = df['gps_lat_min'].values[i]
        gps_long_min = df['gps_long_min'].values[i]
        gps_alt_min = df['gps_alt_min'].values[i]
        gps_speed_min = df['gps_speed_min'].values[i]
        gps_bearing_min = df['gps_bearing_min'].values[i]
        gps_accuracy_min = df['gps_accuracy_min'].values[i]
        gps_lat_iqr = df['gps_lat_iqr'].values[i]
        gps_long_iqr = df['gps_long_iqr'].values[i]
        gps_alt_iqr = df['gps_alt_iqr'].values[i]
        gps_speed_iqr = df['gps_speed_iqr'].values[i]
        gps_bearing_iqr = df['gps_bearing_iqr'].values[i]
        gps_accuracy_iqr = df['gps_accuracy_iqr'].values[i]

        if set == 2:
            segments.append(
                [acc_xs_mean, acc_ys_mean, acc_zs_mean, acc_xs_var, acc_ys_var, acc_zs_var, acc_xs_mad,
                 acc_ys_mad, acc_zs_mad, acc_xs_max, acc_ys_max, acc_zs_max, acc_xs_min, acc_ys_min, acc_zs_min,
                 acc_xs_iqr, acc_ys_iqr, acc_zs_iqr, gyro_xs_mean, gyro_ys_mean, gyro_zs_mean, gyro_xs_var,
                 gyro_ys_var, gyro_zs_var, gyro_xs_mad, gyro_ys_mad, gyro_zs_mad, gyro_xs_max, gyro_ys_max,
                 gyro_zs_max, gyro_xs_min, gyro_ys_min, gyro_zs_min, gyro_xs_iqr, gyro_ys_iqr, gyro_zs_iqr,
                 magn_xs_mean, magn_ys_mean, magn_zs_mean, magn_xs_var, magn_ys_var, magn_zs_var, magn_xs_mad,
                 magn_ys_mad, magn_zs_mad, magn_xs_max, magn_ys_max, magn_zs_max, magn_xs_min, magn_ys_min,
                 magn_zs_min, magn_xs_iqr, magn_ys_iqr, magn_zs_iqr, gps_lat_mean, gps_long_mean, gps_alt_mean,
                 gps_speed_mean, gps_bearing_mean, gps_accuracy_mean, gps_lat_var, gps_long_var, gps_alt_var,
                 gps_speed_var, gps_bearing_var, gps_accuracy_var, gps_lat_mad, gps_long_mad, gps_alt_mad,
                 gps_speed_mad, gps_bearing_mad, gps_accuracy_mad, gps_lat_max, gps_long_max, gps_alt_max,
                 gps_speed_max, gps_bearing_max, gps_accuracy_max, gps_lat_min, gps_long_min, gps_alt_min,
                 gps_speed_min, gps_bearing_min, gps_accuracy_min, gps_lat_iqr, gps_long_iqr, gps_alt_iqr,
                 gps_speed_iqr, gps_bearing_iqr, gps_accuracy_iqr])
        else:
            if set == 1:
                segments.append(
                    [acc_xs_mean, acc_ys_mean, acc_zs_mean, acc_xs_var, acc_ys_var, acc_zs_var, acc_xs_mad,
                     acc_ys_mad, acc_zs_mad, acc_xs_max, acc_ys_max, acc_zs_max, acc_xs_min, acc_ys_min,
                     acc_zs_min, acc_xs_iqr, acc_ys_iqr, acc_zs_iqr, magn_xs_mean, magn_ys_mean, magn_zs_mean,
                     magn_xs_var, magn_ys_var, magn_zs_var, magn_xs_mad, magn_ys_mad, magn_zs_mad, magn_xs_max,
                     magn_ys_max, magn_zs_max, magn_xs_min, magn_ys_min, magn_zs_min, magn_xs_iqr, magn_ys_iqr,
                     magn_zs_iqr, gps_lat_mean, gps_long_mean, gps_alt_mean, gps_speed_mean, gps_bearing_mean,
                     gps_accuracy_mean, gps_lat_var, gps_long_var, gps_alt_var, gps_speed_var, gps_bearing_var,
                     gps_accuracy_var, gps_lat_mad, gps_long_mad, gps_alt_mad, gps_speed_mad, gps_bearing_mad,
                     gps_accuracy_mad, gps_lat_max, gps_long_max, gps_alt_max, gps_speed_max, gps_bearing_max,
                     gps_accuracy_max, gps_lat_min, gps_long_min, gps_alt_min, gps_speed_min, gps_bearing_min,
                     gps_accuracy_min, gps_lat_iqr, gps_long_iqr, gps_alt_iqr, gps_speed_iqr, gps_bearing_iqr,
                     gps_accuracy_iqr])
            else:
                if set == 0:
                    segments.append(
                        [acc_xs_mean, acc_ys_mean, acc_zs_mean, acc_xs_var, acc_ys_var, acc_zs_var, acc_xs_mad,
                         acc_ys_mad, acc_zs_mad, acc_xs_max, acc_ys_max, acc_zs_max, acc_xs_min, acc_ys_min,
                         acc_zs_min, acc_xs_iqr, acc_ys_iqr, acc_zs_iqr, gps_lat_mean, gps_long_mean,
                         gps_alt_mean, gps_speed_mean, gps_bearing_mean, gps_accuracy_mean, gps_lat_var,
                         gps_long_var, gps_alt_var, gps_speed_var, gps_bearing_var, gps_accuracy_var,
                         gps_lat_mad, gps_long_mad, gps_alt_mad, gps_speed_mad, gps_bearing_mad,
                         gps_accuracy_mad, gps_lat_max, gps_long_max, gps_alt_max, gps_speed_max,
                         gps_bearing_max, gps_accuracy_max, gps_lat_min, gps_long_min, gps_alt_min,
                         gps_speed_min, gps_bearing_min, gps_accuracy_min, gps_lat_iqr, gps_long_iqr,
                         gps_alt_iqr, gps_speed_iqr, gps_bearing_iqr, gps_accuracy_iqr])
        labels.append(label)

    segments = np.asarray(segments, dtype=np.float32)
    for i in range(0, len(labels), 1):
        if labels[i] == "Inactive":
            labels[i] = 0
        else:
            if labels[i] == "Active":
                labels[i] = 1
            else:
                if labels[i] == "Walking":
                    labels[i] = 2
                else:
                    if labels[i] == "Driving":
                        labels[i] = 3
    labels = np.asarray(labels, dtype=np.int)

    n_splits = 10

    if index > -1:
        split_fold = index - 1

        kf = StratifiedKFold(n_splits=n_splits)
        train_idx, test_idx = list(kf.split(segments, labels))[split_fold]

        train_x = segments[train_idx]
        train_y = labels[train_idx]
        test_x = segments[test_idx]
        test_y = labels[test_idx]

        scaler = StandardScaler()
        train_x = scaler.fit_transform(train_x)
        test_x = scaler.transform(test_x)

        kf_2 = StratifiedKFold(n_splits=n_splits)
        train_idx_2, test_idx_2 = list(kf_2.split(train_x, train_y))[split_fold]

        train_split_x = train_x[train_idx_2]
        train_split_y = train_y[train_idx_2]
        test_split_x = train_x[test_idx_2]
        test_split_y = train_y[test_idx_2]

        cs = [1, 10, 100, 1000, 10000]
        gammas = [0.0001, 0.001, 0.01, 0.1, 1]
        degrees = [1, 2, 3, 4]
        lin_comb = list(itertools.product(['linear'], cs))
        rbf_comb = list(itertools.product(['rbf'], cs, gammas))
        poly_comb = list(itertools.product(['poly'], cs, gammas, degrees))
        models = []
        f1_scores = []
        combinations = []
        for element in lin_comb:
            combinations.append(element)
        for element in rbf_comb:
            combinations.append(element)
        for element in poly_comb:
            combinations.append(element)

        print("# Tuning hyper-parameters for: " + str(split_fold))
        print()

        for (kernel, C) in lin_comb:
            model = OneVsRestClassifier(SVC(max_iter=1000, kernel=kernel, C=C))
            model.fit(train_split_x, train_split_y)

            models.append(model)
            y_true, y_pred = test_split_y, model.predict(test_split_x)

            f1 = f1_score(y_true, y_pred, average='weighted')
            f1_scores.append(f1)
            print(str(f1) + ' | (' + str(kernel) + ', ' + str(C) + ')')

        for (kernel, C, gamma) in rbf_comb:
            model = OneVsRestClassifier(SVC(max_iter=1000, kernel=kernel, C=C, gamma=gamma))
            model.fit(train_split_x, train_split_y)

            models.append(model)
            y_true, y_pred = test_split_y, model.predict(test_split_x)

            f1 = f1_score(y_true, y_pred, average='weighted')
            f1_scores.append(f1)
            print(str(f1) + ' | (' + str(kernel) + ', ' + str(C) + ', ' + str(gamma) + ')')

        for (kernel, C, gamma, degree) in poly_comb:
            model = OneVsRestClassifier(SVC(max_iter=1000, kernel=kernel, C=C, gamma=gamma, degree=degree))
            model.fit(train_split_x, train_split_y)

            models.append(model)
            y_true, y_pred = test_split_y, model.predict(test_split_x)

            f1 = f1_score(y_true, y_pred, average='weighted')
            f1_scores.append(f1)
            print(str(f1) + ' | (' + str(kernel) + ', ' + str(C) + ', ' + str(gamma) + ', ' + str(degree) + ')')

        mf_index = f1_scores.index(max(f1_scores))
        print()
        print("# Best hyper-parameter combination: " + str(combinations[mf_index]))
        print()

        print("Detailed classification report:")
        print()
        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.")
        print()
        y_true, y_pred = test_y, models[mf_index].predict(test_x)
        print(classification_report(y_true, y_pred))
        print()

        confusion_matrix = metrics.confusion_matrix(y_true, y_pred)
        df_cm = pd.DataFrame(data=confusion_matrix.astype(float))
        df_cm.to_csv(directory + '/confusion_matrix_' + str(split_fold) + "_" + case.replace('.', '') + '.csv',
                     sep=',', header=True, float_format='%.2f', index=False)
        plt.figure(figsize=(16, 14))
        sns.heatmap(confusion_matrix, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt="d")
        plt.title("Confusion matrix")
        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.savefig(directory + '/confusion_matrix_' + str(split_fold) + "_" + case.replace('.', ''))

        linear_scores = open(directory + '/linear_scores_' + str(split_fold) + '.csv', 'w', newline="")
        linear_scores_writer = csv.writer(linear_scores, delimiter=",")
        linear_scores_writer.writerow(f1_scores[0:5])
        rbf_scores = open(directory + '/rbf_scores_' + str(split_fold) + '.csv', 'w', newline="")
        rbf_scores_writer = csv.writer(rbf_scores, delimiter=",")
        rbf_scores_writer.writerow(f1_scores[5:30])
        poly_scores = open(directory + '/poly_scores_' + str(split_fold) + '.csv', 'w', newline="")
        poly_scores_writer = csv.writer(poly_scores, delimiter=",")
        poly_scores_writer.writerow(f1_scores[30:130])

        linear_scores.close()
        rbf_scores.close()
        poly_scores.close()


def set_random_seed(seed_arg):
    seed = int(seed_arg)
    random.seed(seed)
    np.random.seed(seed)
    # tf.set_random_seed(seed)
    return seed


if __name__ == '__main__':
    set_random_seed(6)  # Favourite number
    if sys.argv[1]:
        index = int(sys.argv[1])
    else:
        index = -1
    startTime = time.time()
    svm_model(sys.argv[1], index)
    elapsedTime = time.time() - startTime
    print(elapsedTime)

No display found. Using non-interactive Agg backend


ValueError: invalid literal for int() with base 10: '-f'