# FastZIP results

This notebook contains functionality for computing and plotting error rates, pairing time, as well as preparing binary fingerprints for randomness evaluation with NIST SP 800-90B test suite.

Some magic command to automatically reload changed modules

In [None]:
%load_ext autoreload
%autoreload 2

Import necessary modules:

In [None]:
import os
import re
import shutil
from glob import glob
from bitarray import bitarray
from distutils.dir_util import copy_tree
from common.visualizedata import plot_error_rates, plot_fusion_effect
from common.randomness import plot_rand_walk
from common.util import get_fused_error_rates, get_indiv_error_rates, replay_with_compensation, extract_fingerptins, powerful_adv, compute_pairing_time, fuse_fps
from const.globconst import *

### Compute True Acceptance Rates (TARs) of colocated devices (i.e., in the same car)

#### TARs of individual sensor modalities

In [None]:
# Provide path to log files: part 'logs/benign' MUST be in the filepath! 
# 'sim-non-adv' is for *similar cars* experiment: TARs are already computed for both adversarial and non-adversarial cases, 
# so there is no need to lauch this tab with 'sim-adv'—it will produce the same result. 
# For *different cars* experiment change 'sim-non-adv' in log_path to either 'diff-non-adv', 'diff-park', or 'diff-adv'.
log_path = '/home/seemoo/car_zip/logs/benign/sim-non-adv' 

# Iterate over sensor types 
for st in ['acc_v', 'acc_h', 'gyrW', 'bar']:
    # Compute individual error rates for the full experiment and scenarios
    get_indiv_error_rates(log_path, st, 'benign')
    print()
    get_indiv_error_rates(log_path, st, 'benign', 'city')
    print()
    get_indiv_error_rates(log_path, st, 'benign', 'country')
    print()
    get_indiv_error_rates(log_path, st, 'benign', 'highway')
    print()
    get_indiv_error_rates(log_path, st, 'benign', 'parking')
    print()

#### TARs of fused sensor modalities

In [None]:
# Provide path to log files: part 'logs/benign' MUST be in the filepath! 
# 'sim-non-adv' is for *similar cars* experiment: TARs are already computed for both adversarial and non-adversarial cases, 
# so there is no need to lauch this tab with 'sim-adv'—it will produce the same result. 
# For *different cars* experiment change 'sim-non-adv' in log_path to either 'diff-non-adv', 'diff-park', or 'diff-adv'.
log_path = '/home/seemoo/car_zip/logs/benign/diff-non-adv'

# Iterate over different fusion combinations
for fc in FUSION_CONFIG:
    get_fused_error_rates(log_path, fc, 'benign')
    get_fused_error_rates(log_path, fc, 'benign', 'city')
    get_fused_error_rates(log_path, fc, 'benign', 'country')
    get_fused_error_rates(log_path, fc, 'benign', 'highway')
    get_fused_error_rates(log_path, fc, 'benign', 'parking')

### Compute False Acceptance Rates (FARs) of non-colocated devices (i.e., in different cars)

#### FARs of individual sensor modalities (baseline adversary: *injection attack* in the paper). At the time of cell execution only ONE *log_path* should be uncommented; 'silent' refers to *static injection*, while 'moving'—to *moving injection*

In [None]:
# Provide path to log files: part 'logs/baseline' MUST be in the filepath! 
# Compute FARs for *similar cars* experiment
# log_path = '/home/seemoo/car_zip/logs/baseline/sim-non-adv/silent/sim-non-adv'     # silent
# log_path = '/home/seemoo/car_zip/logs/baseline/sim-non-adv/moving/sim-non-adv'     # moving

# Compute FARs for *different cars* experiment
# log_path = '/home/seemoo/car_zip/logs/baseline/diff-non-adv/silent/sim-non-adv'     # silent
log_path = '/home/seemoo/car_zip/logs/baseline/diff-non-adv/moving/sim-non-adv'     # moving

# Iterate over sensor types 
for st in ['acc_v', 'acc_h', 'gyrW', 'bar']:
    # Compute individual error rates for the full experiment and scenarios
    get_indiv_error_rates(log_path, st, 'baseline')
    print()
    get_indiv_error_rates(log_path, st, 'baseline', 'city')
    print()
    get_indiv_error_rates(log_path, st, 'baseline', 'country')
    print()
    get_indiv_error_rates(log_path, st, 'baseline', 'highway')
    print()
    get_indiv_error_rates(log_path, st, 'baseline', 'parking')
    print()

#### FARs of fused sensor modalities (baseline adversary: *injection attack* in the paper). At the time of cell execution only ONE *log_path* should be uncommented; 'silent' refers to *static injection*, while 'moving'—to *moving injection*

In [None]:
# Provide path to log files: part 'logs/baseline' MUST be in the filepath! 
# Compute FARs for *similar cars* experiment
log_path = '/home/seemoo/car_zip/logs/baseline/sim-non-adv/silent/sim-non-adv'     # silent
# log_path = '/home/seemoo/car_zip/logs/baseline/sim-non-adv/moving/sim-non-adv'     # moving

# Compute FARs for *different cars* experiment
# log_path = '/home/seemoo/car_zip/logs/baseline/diff-non-adv/silent/sim-non-adv'     # silent
# log_path = '/home/seemoo/car_zip/logs/baseline/diff-non-adv/moving/sim-non-adv'     # moving

# Iterate over different fusion combinations
for fc in FUSION_CONFIG:
    get_fused_error_rates(log_path, fc, 'baseline')
    get_fused_error_rates(log_path, fc, 'baseline', 'city')
    get_fused_error_rates(log_path, fc, 'baseline', 'country')
    get_fused_error_rates(log_path, fc, 'baseline', 'highway')
    get_fused_error_rates(log_path, fc, 'baseline', 'parking')

#### FARs of individual sensor modalities (replaying adversary: *basic replay* in the paper). At the time of cell execution only ONE *log_path* should be uncommented

In [None]:
# Provide path to log files: part 'logs/replay' MUST be in the filepath! 
# Compute FARs for *similar cars* experiment: change log_path to either'sim-non-adv' or 'sim-adv' 
log_path = '/home/seemoo/car_zip/logs/replay/sim-non-adv'

# Compute FARs for *different cars* experiment: change log_path to either'diff-non-adv', 'diff-park', or 'diff-adv'
# log_path = '/home/seemoo/car_zip/logs/replay/diff-adv'

# Iterate over sensor types 
for st in ['acc_v', 'acc_h', 'gyrW', 'bar']:
    
    if 'diff-non-adv' in log_path or 'diff-adv' in log_path:
        # Compute individual error rates for the full experiment and scenarios
        get_indiv_error_rates(log_path, st, 'replay')
        print()
        get_indiv_error_rates(log_path, st, 'replay', 'city')
        print()
        get_indiv_error_rates(log_path, st, 'replay', 'country')
        print()
        get_indiv_error_rates(log_path, st, 'replay', 'highway')
        print()
        
        # Workaround because we have 'parking' scenario in another folder
        if 'diff-non-adv' in log_path:
            get_indiv_error_rates(log_path.replace('diff-non-adv', 'diff-park'), st, 'replay', 'parking')
        else:
            get_indiv_error_rates(log_path.replace('diff-adv', 'diff-park'), st, 'replay', 'parking')
        
        # Copy parking.json to either diff-non-adv or diff-adv cache folders
        copy_tree(CACHE_PATH + '/replay/far/diff-park/indiv/' + st, 
                         CACHE_PATH + '/replay/far/' + log_path.split('/')[-1] + '/indiv/' + st)
        print()
    elif 'diff-park' in log_path:
        get_indiv_error_rates(log_path, st, 'replay', 'parking')
    else:
        # Compute individual error rates for the full experiment and scenarios
        get_indiv_error_rates(log_path, st, 'replay')
        print()
        get_indiv_error_rates(log_path, st, 'replay', 'city')
        print()
        get_indiv_error_rates(log_path, st, 'replay', 'country')
        print()
        get_indiv_error_rates(log_path, st, 'replay', 'highway')
        print()
        get_indiv_error_rates(log_path, st, 'replay', 'parking')
        print()

#### FARs of fused sensor modalities (replaying adversary: *basic replay* in the paper). At the time of cell execution only ONE *log_path* should be uncommented

In [None]:
# Provide path to log files: part 'logs/replay' MUST be in the filepath! 
# Compute FARs for *similar cars* experiment: change log_path to either'sim-non-adv' or 'sim-adv' 
# log_path = '/home/seemoo/car_zip/logs/replay/sim-non-adv'

# Compute FARs for *different cars* experiment: change log_path to either'diff-non-adv', 'diff-park', or 'diff-adv'
log_path = '/home/seemoo/car_zip/logs/replay/diff-non-adv'

# Iterate over different fusion combinations
for fc in FUSION_CONFIG:
    if 'diff-non-adv' in log_path or 'diff-adv' in log_path:
        get_fused_error_rates(log_path, fc, 'replay')
        get_fused_error_rates(log_path, fc, 'replay', 'city')
        get_fused_error_rates(log_path, fc, 'replay', 'country')
        get_fused_error_rates(log_path, fc, 'replay', 'highway')
        
        # Workaround because we have 'parking' scenario in another folder
        if 'diff-non-adv' in log_path:
            get_fused_error_rates(log_path.replace('diff-non-adv', 'diff-park'), fc, 'replay', 'parking')
        else:
            get_fused_error_rates(log_path.replace('diff-adv', 'diff-park'), fc, 'replay', 'parking')
        
        # Create part of the path showing sensor fusion
        st_path = ''
        
        # Iterate over sensor_types
        for st in fc:
            if st_path:
                st_path += '-' + st
            else:
                st_path += st
        
        # Copy parking.json to either diff-non-adv or diff-adv cache folders
        copy_tree(CACHE_PATH + '/replay/far/diff-park/fused/' + st_path, 
                         CACHE_PATH + '/replay/far/' + log_path.split('/')[-1] + '/fused/' + st_path)
    elif 'diff-park' in log_path:
        get_fused_error_rates(log_path, fc, 'replay', 'parking')
    else:
        get_fused_error_rates(log_path, fc, 'replay')
        get_fused_error_rates(log_path, fc, 'replay', 'city')
        get_fused_error_rates(log_path, fc, 'replay', 'country')
        get_fused_error_rates(log_path, fc, 'replay', 'highway')
        get_fused_error_rates(log_path, fc, 'replay', 'parking')

#### FARs of individual sensor modalities (replaying adversary: *targeted replay* in the paper). At the time of cell execution only ONE *log_path* should be uncommented

In [None]:
# Provide path to log files: part 'keys' MUST be in the filepath! 
# Compute FARs for *similar cars* experiment: change log_path to either'sim-non-adv' or 'sim-adv' 
# log_path = '/home/seemoo/car_zip/logs/keys/sim-adv'

# Compute FARs for *different cars* experiment: change log_path to either'diff-non-adv', 'diff-park', or 'diff-adv'
log_path = '/home/seemoo/car_zip/logs/keys/diff-adv'

# Iterate over sensor types 
for st in ['acc_v', 'acc_h', 'gyrW', 'bar']:
    if 'diff-non-adv' in log_path or 'diff-adv' in log_path:
        replay_with_compensation(log_path, [st], 'full', car=1)
        replay_with_compensation(log_path, [st], 'full', car=2)
        
    elif 'diff-park' in log_path:
        replay_with_compensation(log_path, [st], 'parking', car=1)
        replay_with_compensation(log_path, [st], 'parking', car=2)
        
    else:
        replay_with_compensation(log_path, [st], 'full', car=1)
        replay_with_compensation(log_path, [st], 'full', car=2)

#### FARs of fused sensor modalities (replaying adversary: *targeted replay* in the paper). At the time of cell execution only ONE *log_path* should be uncommented

In [None]:
# Provide path to log files: part 'keys' MUST be in the filepath! 
# Compute FARs for *similar cars* experiment: change log_path to either'sim-non-adv' or 'sim-adv' 
log_path = '/home/seemoo/car_zip/logs/keys/sim-non-adv'

# Compute FARs for *different cars* experiment: change log_path to either'diff-non-adv', 'diff-park', or 'diff-adv'
# log_path = '/home/seemoo/car_zip/logs/keys/diff-adv'

# Iterate over different fusion combinations
for fc in FUSION_CONFIG:
    if 'diff-non-adv' in log_path or 'diff-adv' in log_path:
        replay_with_compensation(log_path, fc, 'full', car=1)
        replay_with_compensation(log_path, fc, 'full', car=2)
        
    elif 'diff-park' in log_path:
        replay_with_compensation(log_path, fc, 'parking', car=1)
        replay_with_compensation(log_path, fc, 'parking', car=2)
        
    else:
        replay_with_compensation(log_path, fc, 'full', car=1)
        replay_with_compensation(log_path, fc, 'full', car=2)

#### FARs with poweful adversary: *similar-context attack*  in the paper

In [None]:
# Provide path to log files: part 'keys/sim-adv' or 'key/diff-adv' MUST be in the filepath! 
# Here, only 'sim-adv' and 'diff-adv' options are possible

# log_path = '/home/seemoo/car_zip/logs/keys/sim-adv'
log_path = '/home/seemoo/car_zip/logs/keys/diff-adv'

# Iterate over different fusion combinations
for fc in FUSION_CONFIG:
    powerful_adv(log_path, fc, car=1)
    powerful_adv(log_path, fc, 'city', car=1)
    powerful_adv(log_path, fc, 'country', car=1)
    powerful_adv(log_path, fc, 'highway', car=1)
    
    powerful_adv(log_path, fc, car=2)
    powerful_adv(log_path, fc, 'city', car=2)
    powerful_adv(log_path, fc, 'country', car=2)
    powerful_adv(log_path, fc, 'highway', car=2)

### Compute pairing time

#### Pairing time with individual sensor modalities

In [None]:
# Provide path to log files: part 'keys' or 'key' MUST be in the filepath! 
# Here, we combine data inside the experiment so use paths 'sim-adv' or 'sim-non-adv' for *similar cars* experiment and 
# 'diff-adv' or 'diff-non-adv' for *different cars* experiment

log_path = '/home/seemoo/car_zip/logs/keys/sim-adv'
# log_path = '/home/seemoo/car_zip/logs/keys/diff-adv'

# Protocol can be either FPAKE (prot='fpake') or fuzzy commitment (prot='fcom')
prot='fpake' 

# Iterate over sensor types 
for st in ['acc_v', 'acc_h', 'gyrW', 'bar']:
    
    # Set number of chunks
    if prot == 'fpake':
        if st == 'acc_v':
            rc = 6
        elif st == 'acc_h':
            rc = 5
        elif st == 'gyrW':
            rc = 4
        elif st == 'bar':
            rc = 5
    elif prot == 'fcom':
        if st == 'acc_v':
            rc = 9
        elif st == 'acc_h':
            rc = 8
        elif st == 'gyrW':
            rc = 10
        elif st == 'bar':
            rc = 13
    else:
        print('Error: unknown protocol type "%s", use either "fpake" or "fcom"!' % (prot,))
        break
    
    compute_pairing_time(log_path, [st], req_chunks=rc, prot=prot)
    compute_pairing_time(log_path, [st], 'city', req_chunks=rc, prot=prot)
    compute_pairing_time(log_path, [st], 'highway', req_chunks=rc, prot=prot)
    compute_pairing_time(log_path, [st], 'country', req_chunks=rc, prot=prot)
    compute_pairing_time(log_path, [st], 'parking', req_chunks=rc, prot=prot)

#### Pairing time with fused sensor modalities

In [None]:
# Provide path to log files: part 'keys' or 'key' MUST be in the filepath! 
# Here, we combine data inside the experiment so use paths 'sim-adv' or 'sim-non-adv' for *similar cars* experiment and 
# 'diff-adv' or 'diff-non-adv' for *different cars* experiment

# log_path = '/home/seemoo/car_zip/logs/keys/sim-adv'
log_path = '/home/seemoo/car_zip/logs/keys/diff-adv'

# Protocol can be either FPAKE (prot='fpake') or fuzzy commitment (prot='fcom')
prot='fcom'

# Iterate over different fusion combinations
for fc in FUSION_CONFIG:
    compute_pairing_time(log_path, fc, prot=prot)
    print()
    compute_pairing_time(log_path, fc, 'city', prot=prot)
    print()
    compute_pairing_time(log_path, fc, 'highway', prot=prot)
    print()
    compute_pairing_time(log_path, fc, 'country', prot=prot)
    print()
    compute_pairing_time(log_path, fc, 'parking', prot=prot) 
    print()
    print()

### Plot results

Set up plotting parameters so that figures look plausible in the notebook:

In [None]:
import matplotlib.pylab as pylab

# Params to control plotting
params = {'legend.fontsize': 16,
          'figure.figsize': (20, 15),
          'axes.labelsize': 16,
          'axes.titlesize': 16,
          'xtick.labelsize': 16,
          'ytick.labelsize': 16}

# Set customized plotting params
pylab.rcParams.update(params)

#### Plot error rates of individual sensor modalities

In [None]:
# Make sure cache_path points to correct folder, e.g., 'benign', 'baseline', 'replay', ...
# !!! cache must point to 'indiv' folders NOT 'fused': for fused error rates see below !!! 
cache_path = '/home/seemoo/car_zip/cache/replay-compensation/far/sim-adv/car1-2/indiv'

# Examples of valid paths
# cache_path = '/home/seemoo/car_zip/cache/benign/tar/diff/indiv'
# cache_path = '/home/seemoo/car_zip/cache/baseline/far/silent/diff/indiv'
# cache_path = '/home/seemoo/car_zip/cache/baseline/far/moving/sim/indiv'
# cache_path = '/home/seemoo/car_zip/cache/replay/far/diff-non-adv/indiv'

# Set up correct action: make sure it corresponds to the provided cache_path!!!
action = 'replay-compensation'

# Save flag
save = False

if not save:
    # Display error rates in text and plot results in the notebook; plots are NOT saved
    plot_error_rates(cache_path, action)
else:
    # Display error rate plots and SAVE them on disk; NO error rates in text are displayed
    plot_error_rates(cache_path, action, ['1', '2'])

#### Plot error rates of fused sensor modalities

In [None]:
# Make sure cache_path points to correct folder, e.g., 'benign', 'baseline', 'replay', ...
# !!! cache must point to 'indiv' folders NOT 'fused': for fused error rates see below !!! 
cache_path = '/home/seemoo/car_zip/cache/benign/tar/sim/fused'

# Examples of valid paths
# cache_path = '/home/seemoo/car_zip/cache/baseline/far/moving/diff/fused'
# cache_path = '/home/seemoo/car_zip/cache/replay/far/diff-non-adv/fused'
# cache_path = '/home/seemoo/car_zip/cache/replay-compensation/far/diff-non-adv/car1-2/fused'
# cache_path = '/home/seemoo/car_zip/cache/powerful/far/diff-adv/car1-2'

# Display sensor fusion plots and save them to disk
plot_fusion_effect(cache_path, 'benign', {'acc_v': 'city', 'acc_h':'country', 'gyrW':'highway', 'bar': 'parking'})

# A few more examples how the function is called for different actions, e.g., 'baseline', 'replay', ...

# plot_fusion_effect(cache_path, 'baseline', {'acc_v': 'parking', 'acc_h':'country', 'gyrW': 'highway', 'bar': 'city'})
# plot_fusion_effect(cache_path, 'baseline', {'acc_v': 'parking', 'acc_h':'city', 'gyrW': 'highway', 'bar': 'country'})
# plot_fusion_effect(cache_path, 'replay', {'acc_v': 'highway', 'acc_h':'parking', 'gyrW': 'city', 'bar': 'country'}) # for sim
# plot_fusion_effect(cache_path, 'replay', {'acc_v': 'country', 'acc_h':'parking', 'gyrW': 'highway', 'bar': 'city'}) # for diff
# plot_fusion_effect(cache_path, 'replay-compensation', {'acc_v': 'highway', 'acc_h':'country', 'gyrW': 'parking', 'bar': 'city'})
# plot_fusion_effect(cache_path, 'powerful', {'acc_v': 'highway', 'acc_h':'country', 'gyrW': 'parking', 'bar': 'city'})
# plot_fusion_effect(cache_path, 'powerful', {'acc_v': 'full', 'acc_h': 'country', 'gyrW': 'city', 'bar': 'highway'})

#### Dispaly and plot pairing times

In [None]:
# Make sure cache_path points to correct folder, e.g., 'pairing-time-fpake' or 'pairing-time-fcom'
# !!! here both 'indiv' and 'fused' folders can be provided !!!

# cache_path = '/home/seemoo/car_zip/cache/pairing-time-fpake/sim/indiv'
cache_path = '/home/seemoo/car_zip/cache/pairing-time-fpake/diff/fused'

# Save flag
save = False

if not save:
    # Display error rates in text and plot results in the notebook; plots are NOT saved
    plot_error_rates(cache_path, 'pairing-time')
else:
    # Display error rate plots and SAVE them on disk; NO error rates in text are displayed
    # To suppress huge parking pairing time in the plot uncomment lines 198--199 in visualizedata.py
    plot_error_rates(cache_path, 'pairing-time', ['1', '2'])

### Prepare fingerprints for randomenss evaluation

#### Collect binary fingerprints of individual sensor modalities and store them in *.txt files (both 'full' and 'reduced' fingerprint lenght)

In [None]:
# Make sure log path points to 'keys'; can be two options 'keys' or 'keys-reduced'
# log_path = '/home/seemoo/car_zip/logs/keys'                 
log_path = '/home/seemoo/car_zip/logs/keys-reduced'         

# Set up fingeprint length: can be 'full' or 'reduced' depending on the log_path
if 'keys-reduced' in log_path: 
    fp_len = 'reduced'
else:
    fp_len = 'full'

# The below code will append to existing files: let's delete any old data we have
if os.path.exists(FP_PATH + '/' + fp_len):
    shutil.rmtree(FP_PATH + '/' + fp_len)
    
# Iterate over sensor types 
break_flag = False
for st in ['acc_v', 'acc_h', 'gyrW', 'bar']:
    # Iterate over scenarios
    for scen in [SIM_NON_ADV, SIM_ADV, DIFF_NON_ADV, DIFF_ADV, DIFF_PARK]:
        # Take 1 key, 5 keys, or 10 keys
        if fp_len == 'full':
            # Generate keys 1x
            extract_fingerptins(log_path + '/' + scen, st, fp_len)
            
            # Generate keys for 5x and 10x
            if st == 'acc_v' or st == 'acc_h':
                extract_fingerptins(log_path + '/' + scen, st, fp_len, 20)
                extract_fingerptins(log_path + '/' + scen, st, fp_len, 10)
            elif st == 'gyrW' or st == 'bar':
                extract_fingerptins(log_path + '/' + scen, st, fp_len, 8)
                extract_fingerptins(log_path + '/' + scen, st, fp_len, 4)
           
        elif fp_len == 'reduced':
            # Generate keys 1x
            extract_fingerptins(log_path + '/' + scen, st, fp_len)
            
            # Generate keys for 5x and 10x
            if st == 'acc_v' or st == 'acc_h':
                extract_fingerptins(log_path + '/' + scen, st, fp_len, 4)
                extract_fingerptins(log_path + '/' + scen, st, fp_len, 2)
            elif st == 'gyrW' or st == 'bar':
                extract_fingerptins(log_path + '/' + scen, st, fp_len, 2)
                extract_fingerptins(log_path + '/' + scen, st, fp_len, 1)
        else:
            print('Error: unknown fp_len "%s", can only be "full" or "reduced"!' % (fp_len, ))
            break_flag = True
            break
    
    # Leave the loop if error
    if break_flag:
        break

#### Collect binary fingerprints of fused sensor modalities and store them in *.txt files (both 'full' and 'reduced' fingerprint lenght)

In [None]:
# Make sure log path points to 'keys'; can be two options 'keys' or 'keys-reduced'

# Paths for *similar cars* experiment (no need to provide any extra paths, e.g., with 'sim-adv')
# log_path = '/home/seemoo/car_zip/logs/keys/sim-non-adv'
# log_path = '/home/seemoo/car_zip/logs/keys-reduced/sim-non-adv'

# Paths for *different cars* experiment (no need to provide any extra paths, e.g., with 'diff-adv')
# log_path = '/home/seemoo/car_zip/logs/keys/diff-non-adv'
log_path = '/home/seemoo/car_zip/logs/keys-reduced/diff-non-adv'

# Set up fingeprint length: can be 'full' or 'reduced' depending on the log_path
if 'keys-reduced' in log_path: 
    fp_len = 'reduced'
else:
    fp_len = 'full'
    
# Iterate over sensor combinations
for fc in FUSION_CONFIG:
    fuse_fps(log_path, fc, fp_len)

#### Store fingerprints in binary files (this format is suitable to be input to NIST randomness tests); for fused fingerprints merge fingerprints from 'sim' and 'diff' experiments

In [None]:
# Read files under FP_PATH
txt_files = glob(FP_PATH + '/**/' + '*.txt', recursive=True)

# Bring some order to the Galaxy
txt_files.sort()

# Iterate over txt files
for tf in txt_files:
    # Open file for reading
    with open(tf, 'r') as f:
        fps = f.read().splitlines()
       
    # Print some stat
    print(tf)
    print(len(fps), fps[0], fps[-1], len(fps[0]))
    print()
    
    # Convert data to bitarray
    fps = bitarray(''.join(fps))
    
    # Save back binary file (needed to run NIST randomness tests)
    with open(tf.split('.')[0], 'wb') as f:
        fps.tofile(f)
        
# This part is to combine fused fingerprints from sim and diff cars
# Iterate over full and reduced fingerprint sets
break_flag = False
for fpl in ['full', 'reduced']:
    # Iterate over sensor combinations
    for fc in FUSION_CONFIG:
        # Create part of the path showing sensor fusion
        st_path = ''

        # Iterate over sensor_types
        for st in fc:
            if st_path:
                st_path += '-' + st
            else:
                st_path += st

        # Get list of two results.txt files for specific fusion combination, e.g., 'acc_v-acc_h'
        res_txt = glob(FP_PATH + '/' + fpl + '/fused/*/' + st_path + '/' + '*.txt', recursive=True)
        
        # We combine files in the following order: first 'sim', then 'diff'
        res_txt.sort(reverse=True)
        
        # Check if res_txt list is valid
        if len(res_txt) == 2:
            if 'sim' not in res_txt[0] or 'diff' not in res_txt[1]:
                print('Error: 1st file "%s" must contain "sim", 2nd file "%s" must contain "diff"!' 
                      % (res_txt[0], res_txt[1]))
                break_flag = True
                break
        else:
            print('Error: resulting list "%s" contains more than two files, must be excatly two!' % (res_txt))
            break_flag = True
            break
        
        # Read files
        with open(res_txt[0], 'r') as f:
            fps_sim = f.read().splitlines()
    
        with open(res_txt[1], 'r') as f:
            fps_diff = f.read().splitlines()
        
        # Construct binary content
        fps = bitarray(''.join(fps_sim + fps_diff))
        
        # Construct save file path
        save_path = '/'
        
        for s in res_txt[0].split('/')[1:-3]:
            save_path += s + '/'
        
        # Save fps to a binary file
        with open(save_path + st_path, 'wb') as f:
            fps.tofile(f)
    
    # Leave the loop if error
    if break_flag:
        break

#### Plot random walks and Markov property

In [None]:
# Path to fingerprints of individual sensor modalities, e.g., 'acc_v'
fps_path = '/home/seemoo/car_zip/fps/full/5x/acc_h.txt'

# Read file
with open(fps_path, 'r') as f:
    fps = f.read().splitlines()
    
# Get number of bits
n_bits = len(fps[0])

# Get sensor type
sensor_type = fps_path.split('.')[0].split('/')[-1]

# Create randomenss folder if it does not exist
if not os.path.exists(PLOT_PATH + '/randomness'):
    os.makedirs(PLOT_PATH + '/randomness')

# Paths to save random walks and markov property
rwalk_path = PLOT_PATH + '/randomness/' + 'rwalk-' + sensor_type + '.pdf'
markp_path = PLOT_PATH + '/randomness/' + 'markp-' + sensor_type + '.pdf'

# Plot random walks and Markov property
plot_rand_walk(fps, n_bits, save_distribution_to=rwalk_path, save_markov_to=markp_path)