# Resample data

This notebook contains functionality for resampling sensor data collected in car experiments. The resmapling is done by identifying a number of samples per second and either up- or downsampling depending on the sampling strategy. 

In [None]:
# Relaod changed modules automatically
%load_ext autoreload
%autoreload 2

Import necessary modules, functions, and constants:

In [None]:
import os
from common.loaddata import load_and_align_data
from resample.resampledata import process_samples, save_resampled_data
from resample.samplingstrat import *
from resample.splitstrat import *
from const.globconst import *

### General HowTo: 1) provide required INPUT  2) execute cells one after another 3) repeat

Provide *INPUT*: **experiment**, **sensor modality**, **car number**, and **path to save the results**.

Correspondence between experiments and experiment folders:

- **'exp-3'**: similar cars (all scenarios: *city*, *highway*, *country*, and *pakring*)

- **'exp-4'**: different cars (*pakring* scenario)

- **'exp-5'**: different cars (*city*, *highway*, and *country* scenarios)

In [None]:
data_path = '/home/seemoo/car_zip/exp-5/raw' # use default experiment folders: 'exp-3', 'exp-4', 'exp5'
sensor_type = 'bar'                          # provide one of the following sensor types: 'acc', 'gyrW', 'unmag', or 'bar'
sensors = CAR2                               # provide car: either CAR1 (sensors 01–05) or CAR2 (sensors 06–10)
save_res_path = '/home/seemoo/car_zip'       # save resampled sensor data here
debug = True                                 # turn debugging on/off

Perform sanity checks on *sensor_type*, load sensor data, and create output folder structure.

In [None]:
# Figure out sampling rate based on the sensor type
if sensor_type == 'acc':
    fs = ACC_FS
elif sensor_type == 'gyrW':
    fs = GYR_FS
elif sensor_type == 'unmag':
    fs = MAG_FS
elif sensor_type == 'bar':
    fs = BAR_FS
else:
    print('Unknown sensor type: "%s", can only be %s' % (sensor_type, SENSOR_TYPES))
    fs = 0
    
# Proceed further only if the sensor type is correct
if fs > 0:
    # Load sensor data
    sensor_data = load_and_align_data(data_path, sensor_type)

    # Show what kind of data has been loaded
    print('Sensor type: "%s", sampling rate:"%d" Hz, sensor numbers: %s' % (sensor_type, fs, sensors)) 
    
    # Define which experiment folder has been provided as input
    exp_flag = True
    if 'exp-3' in data_path:
        exp_folder = 'exp-3'
    elif 'exp-4' in data_path:
        exp_folder = 'exp-4'
    elif 'exp-5' in data_path:
        exp_folder = 'exp-5'
    else:
        print('Error: sensor path must contain one of the experiment folders: "exp-3", "exp-4", or "exp-5", ' + 
              'otherwise further processing will fail, please use default data paths as in the dateset repository')
        exp_flag = False
    
    # Create output folder structure
    if exp_flag:
        # Check if folder exists, otherwise create it
        if not os.path.exists(save_res_path):
            os.makedirs(save_res_path)

        if 'exp-4' in data_path:
            # Create aligned folder
            if not os.path.exists(save_res_path + '/' + exp_folder + '/aligned'):
                os.makedirs(save_res_path + '/' + exp_folder + '/aligned')
        else:
            # Create adv and non-adv folders
            if not os.path.exists(save_res_path + '/' + exp_folder + '/adv'):
                os.makedirs(save_res_path + '/' + exp_folder + '/adv')

            if not os.path.exists(save_res_path + '/' + exp_folder + '/non-adv'):
                os.makedirs(save_res_path + '/' + exp_folder + '/non-adv')

Set up parameters for *alignment_rounds* (defines up- or downsampling strategy) and *split* (defines split between adversarial and non-adversarial cases).

In [None]:
alignment_rounds = []
split = []

# 'exp-3' -> Similar cars
if 'exp-3' in data_path:
    # Acc
    if sensor_type == 'acc':
        # Car 1 or 2
        if sensors == CAR1:
            alignment_rounds = [(SNA_CAR1_R1_ACC, SNA_CAR1_R2_ACC), (SADV_CAR1_R1_ACC, SADV_CAR1_R2_ACC)]
            split = [SNA_CAR1_SPLIT_ACC, SADV_CAR1_SPLIT_ACC]
        else:
            alignment_rounds = [(SNA_CAR2_R1_ACC, SNA_CAR2_R2_ACC), (SADV_CAR2_R1_ACC, SADV_CAR2_R2_ACC)]
            split = [SNA_CAR2_SPLIT_ACC, SADV_CAR2_SPLIT_ACC]
    # Gyr
    elif sensor_type == 'gyrW':
        # Car 1 or 2
        if sensors == CAR1:
            alignment_rounds = [(SNA_CAR1_R1_GYR, SNA_CAR1_R2_GYR), (SADV_CAR1_R1_GYR, SADV_CAR1_R2_GYR)]
            split = [SNA_CAR1_SPLIT_GYR, SADV_CAR1_SPLIT_GYR]
        else:
            alignment_rounds = [(SNA_CAR2_R1_GYR, SNA_CAR2_R2_GYR), (SADV_CAR2_R1_GYR, SADV_CAR2_R2_GYR)]
            split = [SNA_CAR2_SPLIT_GYR, SADV_CAR2_SPLIT_GYR]
    # Mag
    elif sensor_type == 'unmag':
        # Car 1 or 2
        if sensors == CAR1:
            alignment_rounds = [(SNA_CAR1_R1_MAG, SNA_CAR1_R2_MAG), (SADV_CAR1_R1_MAG, SADV_CAR1_R2_MAG)]
            split = [SNA_CAR1_SPLIT_MAG, SADV_CAR1_SPLIT_MAG]
        else:
            alignment_rounds = [(SNA_CAR2_R1_MAG, SNA_CAR2_R2_MAG), (SADV_CAR2_R1_MAG, SADV_CAR2_R2_MAG)]
            split = [SNA_CAR2_SPLIT_MAG, SADV_CAR2_SPLIT_MAG]
    # Bar
    elif sensor_type == 'bar':
        # Car 1 or 2
        if sensors == CAR1:
            alignment_rounds = [(SNA_CAR1_R1_BAR, SNA_CAR1_R2_BAR), (SADV_CAR1_R1_BAR, SADV_CAR1_R2_BAR)]
            split = [SNA_CAR1_SPLIT_BAR, SADV_CAR1_SPLIT_BAR]
        else:
            alignment_rounds = [(SNA_CAR2_R1_BAR, SNA_CAR2_R2_BAR), (SADV_CAR2_R1_BAR, SADV_CAR2_R2_BAR)]
            split = [SNA_CAR2_SPLIT_BAR, SADV_CAR2_SPLIT_BAR]
    
# 'exp-4' -> Different cars parking
elif 'exp-4' in data_path:
    # Acc
    if sensor_type == 'acc':
        # Both car 1 or 2
        alignment_rounds = [(DPARK_CAR12_R1_ACC, DPARK_CAR12_R2_ACC)]
    # Gyr
    elif sensor_type == 'gyrW':
        # Both car 1 or 2
        alignment_rounds = [(DPARK_CAR12_R1_GYR, DPARK_CAR12_R2_GYR)]
    # Mag
    elif sensor_type == 'unmag':
        # Both car 1 or 2
        alignment_rounds = [(DPARK_CAR12_R1_MAG, DPARK_CAR12_R2_MAG)]
    # Bar
    elif sensor_type == 'bar':
        # Both car 1 or 2
        alignment_rounds = [(DPARK_CAR12_R1_BAR, DPARK_CAR12_R2_BAR)]
        
# 'exp-5' -> Different cars everything else exept parking
elif 'exp-5' in data_path:
    # Acc
    if sensor_type == 'acc':
        # Car 1 or 2
        if sensors == CAR1:
            alignment_rounds = [(DNA_CAR1_R1_ACC, DNA_CAR1_R2_ACC), (DADV_CAR1_R1_ACC, DADV_CAR1_R2_ACC)]
            split = [DNA_CAR1_SPLIT_ACC, DADV_CAR1_SPLIT_ACC]
        else:
            alignment_rounds = [(DNA_CAR2_R1_ACC, DNA_CAR2_R2_ACC), (DADV_CAR2_R1_ACC, DADV_CAR2_R2_ACC)]
            split = [DNA_CAR2_SPLIT_ACC, DADV_CAR2_SPLIT_ACC]
    # Gyr
    elif sensor_type == 'gyrW':
        # Car 1 or 2
        if sensors == CAR1:
            alignment_rounds = [(DNA_CAR1_R1_GYR, DNA_CAR1_R2_GYR), (DADV_CAR1_R1_GYR, DADV_CAR1_R2_GYR)]
            split = [DNA_CAR1_SPLIT_GYR, DADV_CAR1_SPLIT_GYR]
        else:
            alignment_rounds = [(DNA_CAR2_R1_GYR, DNA_CAR2_R2_GYR), (DADV_CAR2_R1_GYR, DADV_CAR2_R2_GYR)]
            split = [DNA_CAR2_SPLIT_GYR, DADV_CAR2_SPLIT_GYR]
    # Mag
    elif sensor_type == 'unmag':
        # Car 1 or 2
        if sensors == CAR1:
            alignment_rounds = [(DNA_CAR1_R1_MAG, DNA_CAR1_R2_MAG), (DADV_CAR1_R1_MAG, DADV_CAR1_R2_MAG)]
            split = [DNA_CAR1_SPLIT_MAG, DADV_CAR1_SPLIT_MAG]
        else:
            alignment_rounds = [(DNA_CAR2_R1_MAG, DNA_CAR2_R2_MAG), (DADV_CAR2_R1_MAG, DADV_CAR2_R2_MAG)]
            split = [DNA_CAR2_SPLIT_MAG, DADV_CAR2_SPLIT_MAG]
    # Bar
    elif sensor_type == 'bar':
        # Car 1 or 2
        if sensors == CAR1:
            alignment_rounds = [(DNA_CAR1_R1_BAR, DNA_CAR1_R2_BAR), (DADV_CAR1_R1_BAR, DADV_CAR1_R2_BAR)]
            split = [DNA_CAR1_SPLIT_BAR, DADV_CAR1_SPLIT_BAR]
        else:
            alignment_rounds = [(DNA_CAR2_R1_BAR, DNA_CAR2_R2_BAR), (DADV_CAR2_R1_BAR, DADV_CAR2_R2_BAR)]
            split = [DNA_CAR2_SPLIT_BAR, DADV_CAR2_SPLIT_BAR]

Perform resampling and save the resampled data into respective adversarial and non-adversarial folders.

In [None]:
# Debug counter
debug_count = 0

# We resample sensor data for the specified car and split it to adversarial and non-adversarial subsets
for i in range(2):
    # Dispaly number of samples for provided sensor_type 
    if debug and debug_count == 0:
        print('sensor, %s: ' % sensor_type)
        print()

        # Iterate over sensors
        for s in sensors:
            print('%s,%d' % (s, len(sensor_data[s])))
        print()

    # Create data subset corresponding to adversarial or non-adversarial cases
    subset = {}
    
    # No split to adv and non-adv data for different cars parking
    if 'exp-4' not in data_path:
        # Store split timestamps (they are provided in resample.splitstrat and 
        # are based on the ground truth from experimetns + 2min guard interval, see dataset repository for ground truth)
        split_ts = []

        # Iterate over split
        for j in range(len(split[i])):
            # Intermediate timestamps to divide dataset into adversarial and non-adversarial
            split_ts.append(sensor_data[sensors[j]].loc[sensor_data[sensors[j]]['TS'] == split[i][j]])
            if debug:
                print(split_ts[j])

        # Empty line
        if debug:
            print()
        
        # Non-adversarial case
        if i == 0:
            for j in range(len(split[i])):
                # Check that we correctly capture line indices of the timestamps specified in split
                if debug:
                    print(split_ts[j].index[-1])

                subset[sensors[j]] = sensor_data[sensors[j]].iloc[:split_ts[j].index[-1] + 1]

        # Adversarial case
        else:
            for j in range(len(split[i])):
                # Check that we correctly capture line indices of the timestamps specified in split
                if debug:
                    print(split_ts[j].index[0])

                subset[sensors[j]] = sensor_data[sensors[j]].iloc[split_ts[j].index[0]:]
        
    else:
        # Just re-packing the data, no split is done
        for j in range(len(sensors)):
            subset[sensors[j]] = sensor_data[sensors[j]]
            
    # Check the lenght of data subset
    if debug:
        print()
        for k,v in sorted(subset.items()):
            print('%s,%d' % (k, len(v)))
    
    # 1st round of alignment: resample sensor data based on the 1st sampling strategy
    subset = process_samples(subset, fs, alignment_rounds[i][0])

    # See the number of samples after the 1st resampling round
    if debug:
        print()
        for k,v in sorted(subset.items()):
            print(k, len(v))
    
    if alignment_rounds[i][1] is not None:
        # 2nd round of alignment: resample sensor data based on the 2nd sampling strategy
        subset = process_samples(subset, fs, alignment_rounds[i][1])

        # See the number of samples after the 2nd resampling round
        if debug:
            print()
            for k,v in sorted(subset.items()):
                print(k, len(v))
           
    # Save resampled data
    if 'exp-4' in data_path:
        save_resampled_data(subset, sensor_type, save_res_path + '/' + exp_folder + '/aligned')
        
        # No need for another iteration in different cars parking
        break
    else:
        if i == 0:
            save_resampled_data(subset, sensor_type, save_res_path + '/' + exp_folder + '/non-adv')
        else:
            save_resampled_data(subset, sensor_type, save_res_path + '/' + exp_folder + '/adv')
    
    # Increment debug count
    debug_count += 1
    print()