Miettinen et al.
===========

This notebook is used to generate the results for the evaluation of the scheme by Miettinnen et al. (Miettinen, M., Asokan, N., Nguyen, T.D., Sadeghi, A.-R., Sobhani, M.: Context-Based Zero-Interaction Pairing and Key Evolution for Advanced Personal Devices. In: Proceedings of the 2014 ACM SIGSAC Conference on Computer and Communications Security - CCS ’14. pp. 880–891. ACM Press, New York, New York, USA (2014).). 

The paper proposes two different fingerprinting approaches, one based on ambient audio ('noiseFingerprint' in the code) and one based on luminosity ('lux_miettinen'). In the paper, the scheme is evaluated in section 4.3.

First, we load a couple of libraries

In [None]:
# %matplotlib notebook

from glob import glob
import gzip
import re
import json
from dateutil import parser
import datetime
import math
from pprint import pprint

from os import makedirs
from os.path import isfile

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

We need to set up a number of constants that allow us to find the relevant files. Change this to point to the correct paths on your system.

In [None]:
CAR_EXP = 'CarExp/'
OFF_EXP = 'OfficeExp/'
MOBILE_EXP = 'MobileExp/'

BASE_DIR = "/media/seemoo/data/zia-data/results/"

PREFIX_JSON='/home/seemoo/plots/json'

Some more internal constants. Leave these alone unless you understand what you are doing. They define the pairings of sensors for the different scenarios.

In [None]:
# Sensor mapping: car experiment
SENSORS_CAR1 = ['01', '02', '03', '04', '05', '06']
SENSORS_CAR2 = ['07', '08', '09', '10', '11', '12']
SENSORS_CAR = SENSORS_CAR1 + SENSORS_CAR2

# Sensor mapping: office experiment
SENSORS_OFFICE1 = ['01', '02', '03', '04', '05', '06', '07', '08']
SENSORS_OFFICE2 = ['09', '10', '11', '12', '13', '14', '15', '16']
SENSORS_OFFICE3 = ['17', '18', '19', '20', '21', '22', '23', '24']
SENSORS_OFFICE = SENSORS_OFFICE1 + SENSORS_OFFICE2 + SENSORS_OFFICE3

SENSORS_MOBILE = ["02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25"]
SENSORS_MOBILE_LIMITED = ["05", "06", "08", "09", "15", "16", "22", "23"]

COLO_CAR = np.array([[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],  # 1
                     [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],  # 2
                     [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],  # 3
                     [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],  # 4
                     [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],  # 5
                     [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],  # 6
                     [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],  # 7
                     [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],  # 8 
                     [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],  # 9
                     [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],  # 10
                     [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],  # 11
                     [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]]) # 12

COLO_MOBILE_LIMITED = np.array([[1, 1, 1, 1, 0, 0, 0, 0],
                                [1, 1, 1, 1, 0, 0, 0, 0],
                                [1, 1, 1, 1, 0, 0, 0, 0],
                                [1, 1, 1, 1, 0, 0, 0, 0],
                                [0, 0, 0, 0, 1, 1, 0, 0],
                                [0, 0, 0, 0, 1, 1, 0, 0],
                                [0, 0, 0, 0, 0, 0, 1, 1],
                                [0, 0, 0, 0, 0, 0, 1, 1]])
COLO_MOBILE = np.zeros((25,25))
for i in range(0, 10):
    for x in range(0, 10):
        COLO_MOBILE[i, x] = 1
    COLO_MOBILE[i, 24] = 1
    COLO_MOBILE[24, i] = 1
for i in range(10, 17):
    for x in range(10, 17):
        COLO_MOBILE[i, x] = 1
for i in range(17, 24):
    for x in range(17, 24):
        COLO_MOBILE[i, x] = 1


# Also prepare colocation matrix for office
COLO_OFFICE = np.zeros((24, 24))
for i in range(0, 8):
    for x in range(0, 8):
        COLO_OFFICE[i, x] = 1
for i in range(8, 16):
    for x in range(8, 16):
        COLO_OFFICE[i, x] = 1
for i in range(16, 24):
    for x in range(16, 24):
        COLO_OFFICE[i, x] = 1

INT_5s = '5sec/'
INT_10s = '10sec/'
INT_15s = '15sec/'
INT_30s = '30sec/'
INT_1min = '1min/'
INT_2min = '2min/'

Now we will define a number of utility functions. First of all, we will need to be able to load data from the files on the file system.

In [None]:
def get_val(sensor1, sensor2, feature, jv, flip='01'):
    if feature == 'noiseFingerprint':
        if type(jv['results'][sensor1]['fingerprint_noise_lev1']) is str:
            if sensor2 == flip:
                return jv['results'][sensor1]['fingerprint_noise_lev1']
            else:
                return jv['results'][sensor1]['fingerprint_noise_lev2']
        else:
            if sensor2 == flip:
                return ''.join([jv['results'][sensor1]['fingerprint_noise_lev1'][idx] for idx in sorted(jv['results'][sensor1]['fingerprint_noise_lev1'].keys())])
            else:
                return ''.join([jv['results'][sensor1]['fingerprint_noise_lev2'][idx] for idx in sorted(jv['results'][sensor1]['fingerprint_noise_lev2'].keys())])
    if feature == 'lux_miettinen':
        res = ''
        i = 128
        while i < len(jv["results"].keys()):
            x = sorted(jv["results"].keys())[i-1]
            res += jv["results"][x]
            i += 128
        i = min(i, len(jv["results"].keys()))
        x = sorted(jv["results"].keys())[i-1]
        res += jv["results"][x]
        return res
            

def import_fps(sensors, scenario, feature, interval):
    """Load results from a specific file and return them as python dict."""
    rv = {}
    if scenario == CAR_EXP:
        if feature is not 'lux_miettinen':
            pattern = BASE_DIR + scenario + 'Sensor-01/audio/' + feature + "/" + interval + 'Sensor-{}.json.gz'
            for sen in sensors:
                if sen == '01':
                    path = pattern.format('02')
                else:
                    path = pattern.format(sen)
                with gzip.open(path, 'rt') as fo:
                    j = json.loads(fo.read())
                    rv[sen] = []
                    for res in j['results']:
                        rv[sen].append(get_val(res, sen, feature, j))
        else:
            pattern = BASE_DIR + scenario + 'Sensor-{}/lux/lux_miettinen/' + interval + 'delta_abs-10.0/delta_rel-0.1/fp_len-128/result.json.gz'
            for sen in sensors:
                path = pattern.format(sen)
                with gzip.open(path, 'rt') as fo:
                    j = json.loads(fo.read())
                    rv[sen] = [get_val(None, sen, feature, j)]
    elif scenario == MOBILE_EXP:
        if feature is not 'lux_miettinen':
            pattern = BASE_DIR + scenario + 'Sensor-02/audio/' + feature + "/" + interval + 'Sensor-{}.json.gz'
            for sen in sensors:
                if sen == '02':
                    path = pattern.format('03')
                else:
                    path = pattern.format(sen)
                with gzip.open(path, 'rt') as fo:
                    j = json.loads(fo.read())
                    rv[sen] = []
                    for res in j['results']:
                        rv[sen].append(get_val(res, sen, feature, j, flip="02"))
        else:
            pattern = BASE_DIR + scenario + 'Sensor-{}/lux/lux_miettinen/' + interval + 'delta_abs-10.0/delta_rel-0.1/fp_len-128/result.json.gz'
            for sen in sensors:
                path = pattern.format(sen)
                with gzip.open(path, 'rt') as fo:
                    j = json.loads(fo.read())
                    rv[sen] = [get_val(None, sen, feature, j, flip="02")]
    if scenario == OFF_EXP:
        if feature is not 'lux_miettinen':
            for day in ['1_0-24h', '2_24-48h', '3_48-72h', '4_72-96h','5_96-120h', '6_120-144h', '7_144-168h']:
                pattern = BASE_DIR + scenario + 'audio/{}/Sensor-01/audio/' + feature + "/" + interval + 'Sensor-{}.json.gz'
                for sen in sensors:
                    if sen == '01':
                        path = pattern.format(day, '02')
                    else:
                        path = pattern.format(day, sen)
                    with gzip.open(path, 'rt') as fo:
                        j = json.loads(fo.read())
                        if sen not in rv:
                            rv[sen] = []
                        for res in j['results']:
                            rv[sen].append(get_val(res, sen, feature, j))
        else:
            pattern = BASE_DIR + scenario + 'Sensor-{}/lux/lux_miettinen/' + interval + 'delta_abs-10.0/delta_rel-0.1/fp_len-128/result.json.gz'
            for sen in sensors:
                path = pattern.format(sen)
                with gzip.open(path, 'rt') as fo:
                    j = json.loads(fo.read())
                    rv[sen] = [get_val(None, sen, feature, j)]
    return rv


def import_fps_timeslotted(sensors, scenario, feature, interval, bits=128):
    """Load results from a specific file and return them as python dict."""
    def increment_timeslot(ts, interval, count):
        if interval == INT_5s:
            incr = count*5
        elif interval == INT_10s:
            incr = count*10
        elif interval == INT_15s:
            incr = count*15
        elif interval == INT_30s:
            incr = count*30
        elif interval == INT_1min:
            incr = count*60
        elif interval == INT_2min:
            incr = count*120
        else:
            raise Exception("Wtf?!")
        return (ts + datetime.timedelta(seconds=incr))
    
    def base_time(ts, interval):
        if interval == INT_5s:
            decr = ts.second % 5
        elif interval == INT_10s:
            decr = ts.second % 10
        elif interval == INT_15s:
            decr = ts.second % 15
        elif interval == INT_30s:
            decr = ts.second % 30
        elif interval == INT_1min:
            decr = ts.second
        elif interval == INT_2min:
            decr = ts.second + (ts.minute % 2) * 60
        else:
            raise Exception("Wtf?!")
        return (ts - datetime.timedelta(seconds = decr))
        
    rv = {}
    if scenario == CAR_EXP:
        if feature is not 'lux_miettinen':
            pattern = BASE_DIR + scenario + 'Sensor-01/audio/' + feature + "/" + interval + 'Sensor-{}.json.gz'
            for sen in sensors:
                if sen == '01':
                    path = pattern.format('02')
                else:
                    path = pattern.format(sen)
                with gzip.open(path, 'rt') as fo:
                    j = json.loads(fo.read())
                    rv[sen] = {}
                    tstamp = list(j["results"].keys())[0]
                    fp = get_val(tstamp, sen, feature, j)
                    ts = base_time(parser.parse(tstamp), interval)
                    for i in range(1, len(fp)):
                        idx = increment_timeslot(ts, interval, i)
                        rv[sen][idx] = ''.join(reversed(fp[max(i-bits, 0):i]))  # Reverse bit order
        else:
            pattern = BASE_DIR + scenario + 'Sensor-{}/lux/lux_miettinen/' + interval + 'delta_abs-10.0/delta_rel-0.1/fp_len-128/result.json.gz'
            for sen in sensors:
                path = pattern.format(sen)
                with gzip.open(path, 'rt') as fo:
                    j = json.loads(fo.read())
                    if bits == 128:
                        # Fingerprints are already 128 bits. Phew.
                        rv[sen] = j["results"]
                    else:
                        rv[sen] = {}
                        # Requested FPs are more or less than 128 bits, merge 128 bit fingerprints to split again later
                        fp = ""
                        for i in range(128, len(j["results"].keys()), 128):
                            idx = list(j["results"].keys())[i]
                            fp += ''.join(reversed(j["results"][idx]))
                        offset = len(j["results"].keys()) % 128
                        # Grab leftover bits
                        if offset > 0:
                            idx_c = len(j["results"].keys()) - 1
                            idx = list(j["results"].keys())[idx_c]
                            fp += ''.join(reversed(j["results"][idx][:offset]))
                        
                        # Generate the new fingerprints
                        tstamp = sorted(j["results"].keys())[0]
                        ts = base_time(parser.parse(tstamp), interval)
                        for i in range(1, len(fp)):
                            idx = increment_timeslot(ts, interval, i)
                            rv[sen][idx] = ''.join(reversed(fp[max(i-bits, 0):i]))  # Reverse bit order again
                        
    elif scenario == MOBILE_EXP:
        if feature is not 'lux_miettinen':
            pattern = BASE_DIR + scenario + 'Sensor-02/audio/' + feature + "/" + interval + 'Sensor-{}.json.gz'
            for sen in sensors:
                if sen == '02':
                    path = pattern.format('03')
                else:
                    path = pattern.format(sen)
                with gzip.open(path, 'rt') as fo:
                    j = json.loads(fo.read())
                    rv[sen] = {}
                    tstamp = list(j["results"].keys())[0]
                    fp = get_val(tstamp, sen, feature, j, flip='02')
                    ts = base_time(parser.parse(tstamp), interval)
                    for i in range(1, len(fp)):
                        idx = increment_timeslot(ts, interval, i)
                        rv[sen][idx] = ''.join(reversed(fp[max(i-bits, 0):i]))  # Reverse bit order
        else:
            pattern = BASE_DIR + scenario + 'Sensor-{}/lux/lux_miettinen/' + interval + 'delta_abs-10.0/delta_rel-0.1/fp_len-128/result.json.gz'
            for sen in sensors:
                path = pattern.format(sen)
                with gzip.open(path, 'rt') as fo:
                    j = json.loads(fo.read())
                    if bits == 128:
                        # Fingerprints are already 128 bits. Phew.
                        rv[sen] = j["results"]
                    else:
                        rv[sen] = {}
                        # Requested FPs are more or less than 128 bits, merge 128 bit fingerprints to split again later
                        fp = ""
                        for i in range(128, len(j["results"].keys()), 128):
                            idx = list(j["results"].keys())[i]
                            fp += ''.join(reversed(j["results"][idx]))
                        offset = len(j["results"].keys()) % 128
                        # Grab leftover bits
                        if offset > 0:
                            idx_c = len(j["results"].keys()) - 1
                            idx = list(j["results"].keys())[idx_c]
                            fp += ''.join(reversed(j["results"][idx][:offset]))
                        
                        # Generate the new fingerprints
                        tstamp = sorted(j["results"].keys())[0]
                        ts = base_time(parser.parse(tstamp), interval)
                        for i in range(1, len(fp)):
                            idx = increment_timeslot(ts, interval, i)
                            rv[sen][idx] = ''.join(reversed(fp[max(i-bits, 0):i]))  # Reverse bit order again
                        

    if scenario == OFF_EXP:
        if feature is not 'lux_miettinen':
            for sen in sensors:
                fp = ''
                rv[sen] = {}
                tstamp = None
                for day in ['1_0-24h', '2_24-48h', '3_48-72h', '4_72-96h','5_96-120h', '6_120-144h', '7_144-168h']:
                    pattern = BASE_DIR + scenario + 'audio/{}/Sensor-01/audio/' + feature + "/" + interval + 'Sensor-{}.json.gz'
                    if sen == '01':
                        path = pattern.format(day, '02')
                    else:
                        path = pattern.format(day, sen)
                    with gzip.open(path, 'rt') as fo:
                        j = json.loads(fo.read())
                        if day == '1_0-24h':
                            tstamp = list(j["results"].keys())[0]
                        fp += get_val(list(j["results"].keys())[0], sen, feature, j)
                
                
                ts = base_time(parser.parse(tstamp), interval)
                for i in range(1, len(fp)):
                    idx = increment_timeslot(ts, interval, i)
                    rv[sen][idx] = ''.join(reversed(fp[max(i-bits, 0):i]))  # Reverse bit order
        else:
            pattern = BASE_DIR + scenario + 'Sensor-{}/lux/lux_miettinen/' + interval + 'delta_abs-10.0/delta_rel-0.1/fp_len-128/result.json.gz'
            for sen in sensors:
                path = pattern.format(sen)
                with gzip.open(path, 'rt') as fo:
                    j = json.loads(fo.read())
                    if bits == 128:
                        # Fingerprints are already 128 bits
                        rv[sen] = j["results"]
                    else:
                        rv[sen] = {}
                        # Requested FPs are more or less than 128 bits, merge 128 bit fingerprints
                        fp = ""
                        for i in range(128, len(j["results"].keys()), 128):
                            idx = list(j["results"].keys())[i]
                            fp += ''.join(reversed(j["results"][idx]))
                        offset = len(j["results"].keys()) % 128
                        # Grab leftover bits
                        if offset > 0:
                            idx_c = len(j["results"].keys()) - 1
                            idx = list(j["results"].keys())[idx_c]
                            fp += ''.join(reversed(j["results"][idx][:offset]))
                        
                        # Generate new fingerprints
                        tstamp = sorted(j["results"].keys())[0]
                        ts = base_time(parser.parse(tstamp), interval)
                        for i in range(1, len(fp)):
                            idx = increment_timeslot(ts, interval, i)
                            rv[sen][idx] = ''.join(reversed(fp[max(i-bits, 0):i]))  # Reverse bit order again
    return rv

# Save the resulting JSON data from the get_far_frr function to a file
def save_result_json(data, paper, interval, scenario, modality, subscenario=None, suffix=None):
    """Save a dictionary into a file as JSON. Mostly used for cache files.
    
    :param data: The data to save.
    :param paper: The paper to save it under (SOUNDPROOF, ...)
    :param interval: The interval to save it under
    :param scenario: The scenario (S_CAR, S_OFFICE)
    :param modality: The modality (max_xcorr, ...)
    :param subscenario: The subscenario, or None if global.
    :param suffix: A suffix to place before the .json"""
    path = '/'.join([PREFIX_JSON, scenario, paper, modality])
    filename = path + '/' + interval
    if subscenario is not None:
        filename += '-' + subscenario
    if suffix is not None:
        filename += '_' + suffix
    filename += '.json'
    makedirs(path, exist_ok=True)
    with open(filename, 'w') as fo:
        json.dump(data, fo, separators=(',', ': '), indent=4)

# Check if a result cache file (generated by save_result_json) exists
def result_exists(paper, interval, scenario, modality, subscenario=None, suffix=None):
    """Check if a cache file for a set of parameters exists.
    
    :param paper: The paper (SOUNDPROOF, ...)
    :param interval: The interval
    :param scenario: The scenario (S_CAR, S_OFFICE)
    :param modality: The modality (max_xcorr, ...)
    :param subscenario: The subscenario, or None if global.
    :return: True if a file exists, otherwise False"""
    path = '/'.join([PREFIX_JSON, scenario, paper, modality])
    filename = path + '/' + interval
    if subscenario is not None:
        filename += '-' + subscenario
    if suffix is not None:
        filename += '_' + suffix
    filename += '.json'
    return isfile(filename)

# Load result cache file (generated by save_result_json)
def load_result(paper, interval, scenario, modality, subscenario=None, suffix=None):
    """Check if a cache file for a set of parameters exists.
    
    :param paper: The paper(SOUNDPROOF, ...)
    :param interval: The interval
    :param scenario: The scenario (S_CAR, S_OFFICE)
    :param modality: The modality (max_xcorr, ...)
    :param subscenario: The subscenario, or None if global.
    :return: The loaded cache as a dictionary"""
    assert result_exists(paper, interval, scenario, modality, subscenario, suffix)
    rv = {}
    
    # Construct file name
    path = '/'.join([PREFIX_JSON, scenario, paper, modality])
    filename = path + '/' + interval
    if subscenario is not None:
        filename += '-' + subscenario
    if suffix is not None:
        filename += '_' + suffix
    filename += '.json'
    
    # Load the data
    with open(filename, 'r') as fo:
        r = json.load(fo)
    
    # Cast thresholds to float, if necessary
    if suffix is None:
        for threshold in r:
            rv[float(threshold)] = r[threshold]
        return rv
    else:
        return r

A couple of function to calculate error rates.

In [None]:
def far_frr(colo, ncolo, maxv=100.0, minv=0.0, increments=1000):
    res = {}
    for i in range(increments+1):
        true_acc = 0.0
        false_acc = 0.0
        true_rej = 0.0
        false_rej = 0.0

        threshold = minv + i * ((maxv - minv) / float(increments))
        for value in colo:
            if value >= threshold:
                true_acc += 1
            else:
                false_rej += 1
        for value in ncolo:
            if value >= threshold:
                false_acc += 1
            else:
                true_rej += 1

        fpr = false_acc / (false_acc + true_rej)
        fnr = false_rej / (false_rej + true_acc)
        tpr = true_acc / (true_acc + false_rej)
        tnr = true_rej / (true_rej + false_acc)
        #print(threshold, fpr, fnr, tpr, tnr)
        res[threshold] = {"fpr": fpr, "fnr": fnr, "tpr": tpr, "tnr": tnr}
    return res


def far_frr_plain(fingerprints, colo, maxv=100.0, minv=0.0, increments=1000, bits=128):
    def histogram_intersection(h1, h2, bins):
        bins = np.diff(bins)
        sm = 0
        for i in range(len(bins)):
            sm += min(bins[i]*h1[i], bins[i]*h2[i])
        return sm

    rv = {}
    colo_perc = []
    ncolo_perc = []
    for i in range(len(sensors)):
        s1 = sensors[i]
        if s1 not in rv:
            rv[s1] = {}
        for c in range(i+1, len(sensors)):
            s2 = sensors[c]
            #print(s1, s2)
            if s2 not in rv:
                rv[s2] = {}
            res = {"perc": [], "dates": [], "fp1": [], "fp2": []}

            for date in nfp[s1]:
                if date not in nfp[s2]:
                    continue
                fp1 = nfp[s1][date]
                fp2 = nfp[s2][date]
                if len(fp1) != bits:
                    continue
                sim_perc = sim_percent(fp1, fp2)
                if colo[i, c] == 1:
                    colo_perc.append(sim_perc)
                else:
                    ncolo_perc.append(sim_perc)
                
                res["perc"].append(sim_perc)
                res["dates"].append(date)
                res["fp1"].append((date, fp1))
                res["fp2"].append((date, fp2))
            rv[s1][s2] = res
            # rv[s2][s1] = res
    
    sns.kdeplot(colo_perc, label="Colocated")
    ax = sns.kdeplot(ncolo_perc, label="Non-Colocated")
    ax.set_xlim(0.0,100.0)

    hc, bins = np.histogram(colo_perc, np.arange(0.0, 100.0, 1.0), density=True)
    hn, _ = np.histogram(ncolo_perc, np.arange(0.0, 100.0, 1.0), density=True)
    print("Intersection:", histogram_intersection(hc, hn, bins))
            
    res = {}
    for incr in range(increments+1):
        threshold = minv + incr * ((maxv - minv) / float(increments))
        res[threshold] = {}
        
        for i in range(len(sensors)):
            s1 = sensors[i]
            res[threshold][s1] = {}
            for c in range(i+1, len(sensors)):
                s2 = sensors[c]
                res[threshold][s1][s2] = {"ta": 0.0, "fa": 0.0, "tr": 0.0, "fr": 0.0}
                for s in range(len(rv[s1][s2]["perc"])):
                    if colo[i, c] == 1:
                        if rv[s1][s2]["perc"][s] >= threshold:
                            res[threshold][s1][s2]["ta"] += 1
                        else:
                            res[threshold][s1][s2]["fr"] += 1
                    else:
                        if rv[s1][s2]["perc"][s] >= threshold:
                            res[threshold][s1][s2]["fa"] += 1
                        else:
                            res[threshold][s1][s2]["tr"] += 1

    return res

def far_frr_surprisal(fingerprints, colo, maxv=100.0, minv=0.0, increments=1000, bits=128, surprisal_margin=0.0):
    probs = {}
    for i in range(len(sensors)):
        p = generate_probabilities(nfp[sensors[i]], bits)
        probs[sensors[i]] = p

    rv = {}
    for i in range(len(sensors)):
        s1 = sensors[i]
        if s1 not in rv:
            rv[s1] = {}
        for c in range(i+1, len(sensors)):
            s2 = sensors[c]
            #print(s1, s2)
            if s2 not in rv:
                rv[s2] = {}
            res = {"perc": [], "dates": [], "fp1": [], "fp2": []}

            for date in nfp[s1]:
                if date not in nfp[s2]:
                    continue
                fp1 = nfp[s1][date]
                fp2 = nfp[s2][date]
                if len(fp1) != bits:
                    continue
                res["perc"].append(sim_percent(fp1, fp2))
                res["dates"].append(date)
                res["fp1"].append((date, fp1))
                res["fp2"].append((date, fp2))
            rv[s1][s2] = res
            # rv[s2][s1] = res

    res = {}
    for incr in range(increments+1):
        incl_count = 0
        threshold = minv + incr * ((maxv - minv) / float(increments))
        res[threshold] = {}
        
        surprisal_threshold = bits * ((100.0 - threshold) / 100.0) + surprisal_margin
        colo_perc = []
        ncolo_perc = []
        for i in range(len(sensors)):
            s1 = sensors[i]
            res[threshold][s1] = {}
            for c in range(i+1, len(sensors)):
                s2 = sensors[c]
                res[threshold][s1][s2] = {"ta": 0.0, "fa": 0.0, "tr": 0.0, "fr": 0.0}
                for s in range(len(rv[s1][s2]["perc"])):
                    date, fp1 = rv[s1][s2]["fp1"][s]
                    _, fp2 = rv[s1][s2]["fp2"][s]
                    if surprisal(date, fp1, probs[s1]) < surprisal_threshold:
                        continue
                    if surprisal(date, fp2, probs[s2]) < surprisal_threshold:
                        continue
                    incl_count += 1
                    if colo[i, c] == 1:
                        if rv[s1][s2]["perc"][s] >= threshold:
                            res[threshold][s1][s2]["ta"] += 1
                        else:
                            res[threshold][s1][s2]["fr"] += 1
                    else:
                        if rv[s1][s2]["perc"][s] >= threshold:
                            res[threshold][s1][s2]["fa"] += 1
                        else:
                            res[threshold][s1][s2]["tr"] += 1
        if incr == increments:
            total = sum([len(nfp[x]) for x in nfp])
            print(incl_count, "/", total)

    return res


def frr_for_far(data, target_far):
    """Calculate the False Reject Rate (FRR) implied by a given target False Accept Rate (FAR).
    Can also compute this for subsets of the datasets, given by "sources" and "targets". In this
    case, it will consider all combinations of sources and targets (e.g., if sources = ["1"] and
    targets = ["2", "3"], it will consider 1-2 and 1-3, but not 2-3).
    
    :param data: The data as a dictionary, as produced by gen_far_frr or the import functions.
    :param target_far: The false accept rate to aim for. Note that 1.0 implies 100%, so 0.1% should
        be written as 0.001.
    :return: A 3-tuple of observed FAR, FRR, and the used threshold.
    """

    # Initialize previous values with bogus values to ensure they are never used in the first iteration.
    prev_far = -500.0
    prev_frr = -500.0
    prev_threshold = -500.0
    for threshold in data:
        false_acc = 0.0
        false_rej = 0.0
        true_acc = 0.0
        true_rej = 0.0
        for s1 in data[threshold]:
            for s2 in data[threshold][s1]:
                # Include the numbers in the overall count
                false_acc += data[threshold][s1][s2]['fa']
                false_rej += data[threshold][s1][s2]['fr']
                true_acc += data[threshold][s1][s2]['ta']
                true_rej += data[threshold][s1][s2]['tr']
        
        # Calculate error rates
        if false_acc + true_rej == 0:
            # No valid solution exists
            return None, None, None
        far = false_acc / (false_acc + true_rej)
        frr = false_rej / (false_rej + true_acc)
        
        if far > target_far:
            # The computed FAR is above the target FAR. Save current values and carry on
            prev_far = far
            prev_frr = frr
            prev_threshold = threshold
        else:
            # We have reached or passed the target FAR. Determine if the previous value was a better fit
            if abs(target_far - far) < abs(target_far - prev_far):
                # We are closer to the target FAR than the previous FAR, use our values
                return (far, frr, threshold)
            else:
                # The previous values were closer, use them
                return (prev_far, prev_frr, prev_threshold)
    assert False, "This statement should never be reached. Last error rates: FAR " + str(prev_far) + ", FRR " + str(prev_frr)

Helper function for visualization

In [None]:
# Visualize FAR and FRR
def plot_far_frr(results, xlow=0.0, xhigh=1.0):
    fig, ax = plt.subplots()
    ax.plot(sorted(results.keys()), [results[threshold]["fpr"] for threshold in sorted(results.keys())], label='FAR')
    ax.plot(sorted(results.keys()), [results[threshold]["fnr"] for threshold in sorted(results.keys())], label='FRR')
    
    prev_fnr = 100000
    prev_fpr = 0
    for threshold in sorted(results.keys()):
        if results[threshold]["fpr"] <= results[threshold]["fnr"]:
            fpr = results[threshold]["fpr"]
            fnr = results[threshold]["fnr"]
            if abs(prev_fpr - prev_fnr) < abs(results[threshold]["fpr"] - results[threshold]["fnr"]):
                fpr = prev_fpr
                fnr = prev_fnr
                threshold = prev_thres
            print("Thresh", threshold, "FAR", fpr, "FRR", fnr)
            ax.plot([threshold, threshold], [0.0, fnr], 'k-')
            ax.plot([0.0, threshold], [fnr, fnr], 'k-')
            ax.set(xlabel='threshold', ylabel='rate', xlim=(xlow,xhigh))
            ax.legend()
            plt.show()
            return fpr, fnr, threshold
        prev_fpr = results[threshold]["fpr"]
        prev_fnr = results[threshold]["fnr"]
        prev_thres = threshold
    # There doesn't seem to be a crossover point
    print("No crossover detected.")
    ax.set(xlabel='threshold', ylabel='rate', xlim=(xlow,xhigh))
    ax.legend()
    plt.show()
    

def plot_far_frr_surprisal(result_data, xlow=0.0, xhigh=1.0):
    if result_data is None:
        return
    
    results = {}
    for threshold in result_data:
        # Prepare result and temporary variables
        results[threshold] = {}
        true_acc = 0.0
        true_rej = 0.0
        false_acc = 0.0
        false_rej = 0.0
        
        # Load counts into the temporary vars
        for s1 in result_data[threshold]:
            for s2 in result_data[threshold][s1]:
                true_acc += result_data[threshold][s1][s2]["ta"]
                true_rej += result_data[threshold][s1][s2]["tr"]
                false_acc += result_data[threshold][s1][s2]["fa"]
                false_rej += result_data[threshold][s1][s2]["fr"]
        
        # Calculate error rates
        # False Accept Rate (FAR)
        fpr = false_acc / (false_acc + true_rej)
        # False Reject Rate (FRR)
        fnr = false_rej / (false_rej + true_acc)
        # True Accept Rate (TAR)
        tpr = true_acc / (true_acc + false_rej)
        # True Reject Rate (TRR)
        tnr = true_rej / (true_rej + false_acc)

        # Put them in a data structure the visualization function understands
        results[threshold] = {"fpr": fpr, "fnr": fnr, "tpr": tpr, "tnr": tnr}
        
    fig, ax = plt.subplots()
    ax.plot(sorted(results.keys()), [results[threshold]["fpr"] for threshold in sorted(results.keys())], label='FAR')
    ax.plot(sorted(results.keys()), [results[threshold]["fnr"] for threshold in sorted(results.keys())], label='FRR')
    
    prev_fnr = 100000
    prev_fpr = 0
    for threshold in sorted(results.keys()):
        if results[threshold]["fpr"] <= results[threshold]["fnr"]:
            fpr = results[threshold]["fpr"]
            fnr = results[threshold]["fnr"]
            # colo_perc = results[threshold]["colo"]
            # ncolo_perc = results[threshold]["ncolo"]
            
            if abs(prev_fpr - prev_fnr) < abs(results[threshold]["fpr"] - results[threshold]["fnr"]):
                fpr = prev_fpr
                fnr = prev_fnr
                threshold = prev_thres
            
            # hc, bins = np.histogram(colo_perc, np.arange(0.0, 100.0, 1.0), density=True)
            # hn, _ = np.histogram(ncolo_perc, np.arange(0.0, 100.0, 1.0), density=True)
            # print("Intersection:", histogram_intersection(hc, hn, bins))
            # print("Included pairs:", len(ncolo_perc) + len(colo_perc))
            
            print("Thresh", threshold, "FAR", fpr, "FRR", fnr, "EER*", (fpr + fnr) / 2.0)
            ax.plot([threshold, threshold], [0.0, fnr], 'k-')
            ax.plot([0.0, threshold], [fnr, fnr], 'k-')
            ax.set(xlabel='threshold', ylabel='rate', xlim=(xlow,xhigh))
            ax.legend()
            plt.show()
            # sns.kdeplot(colo_perc, label="Colocated")
            # ax = sns.kdeplot(ncolo_perc, label="Non-Colocated")
            # ax.set_xlim(0.0,100.0)
            # plt.show()
            return fpr, fnr, threshold
        prev_fpr = results[threshold]["fpr"]
        prev_fnr = results[threshold]["fnr"]
        prev_thres = threshold
    # There doesn't seem to be a crossover point
    print("No crossover detected.")
    ax.set(xlabel='threshold', ylabel='rate', xlim=(xlow,xhigh))
    ax.legend()
    plt.show()


# Hamming Distance
def hamming(str1, str2):
    # Source: https://code.activestate.com/recipes/499304-hamming-distance/#c2
    return sum(map(str.__ne__, str1, str2))

def sim_percent(str1, str2):
    ham = hamming(str1, str2)
    return (1.0 - ham / float(min(len(str1), len(str2)))) * 100

def histogram_intersection(h1, h2, bins):
    bins = np.diff(bins)
    sm = 0
    for i in range(len(bins)):
        sm += min(bins[i]*h1[i], bins[i]*h2[i])
    return sm

# Error rates
At this point, we have defined all functions that we need for data analysis.

To work with the FAR and FRR data, we need to first compute a series of accept and reject results for specific thresholds on the data. This is an expensive operation (on the order of up to a few days for large datasets like the office experiment), but it only needs to be run once - the results are cached on disk for future operations.

If you have obtained this code together with the dataset, it should already contain these caches, no need to regenerate them unless you want to reproduce our results. The below code only generates them if they aren't found in the expected location.

This code only works for the Car and Office scenarios, the Mobile scenario is computed using a different piece of code (further below), as it requires limiting processing to a specific timeframe in the data.

In [None]:
FEATURE = "similarity_percent"
for PAPER in ["noiseFingerprint", "lux_miettinen"]:
    for sensors, scenario, colo in zip([SENSORS_CAR, SENSORS_OFFICE], [CAR_EXP, OFF_EXP], [COLO_CAR, COLO_OFFICE]):
        for interval in [INT_2min, INT_1min, INT_30s, INT_15s, INT_10s, INT_5s]:
            for bits in [64, 128, 256, 512, 1024]:

                print(bits, scenario, interval)
                if not result_exists(PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits)):
                    nfp = import_fps_timeslotted(sensors, scenario, PAPER, interval, bits=bits)

                    res = far_frr_plain(nfp, maxv=101.0, minv=60.0, increments=1000, bits=bits)

                    save_result_json(res, PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits))

                data = load_result(PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits))

                try:
                    far, frr, threshold = plot_far_frr_surprisal(data, xlow=60.0, xhigh=100.0)
                except ZeroDivisionError:
                    print("Does not exist")
                    continue


                res = {"base": {"eer": {"far": far, "frr": frr, "threshold": threshold}}}

                for target_far in [0.001, 0.005, 0.01, 0.015, 0.02, 0.025, 0.03, 0.035, 0.04, 0.045, 0.05]:
                    far, frr, threshold = frr_for_far(data, target_far)
                    res["base"]["far_%s" % target_far] = {"far": far, "frr": frr, "threshold": threshold}

                # Save the whole thing
                save_result_json(res, PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits), suffix="rates")

The same process for the mobile scenario, which requires slightly adapted processing:

In [None]:
FEATURE = "similarity_percent"
for PAPER in ["noiseFingerprint", "lux_miettinen"]:
    for bits in [1024, 512, 256, 128, 64]:
        for sensors, scenario, colo in zip([SENSORS_MOBILE], [MOBILE_EXP], [COLO_MOBILE]):
            # for interval in [INT_5s, INT_10s, INT_15s, INT_30s, INT_1min, INT_2min]:
            for interval in [INT_2min, INT_1min, INT_30s, INT_15s, INT_10s, INT_5s]:

                print(bits, scenario, interval)
                if not result_exists(PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits)):
                    nfp = import_fps_timeslotted(sensors, scenario, PAPER, interval, bits=bits)

                    # After loading the data, we need to exclude all data points that are recorded after
                    # the 21st of October 2018, 12:06:00. This is because after that point, the colocation
                    # of devices changes, and long-term fingerprint-based systems break down.
                    # We thus decided to only evaluate the first few hours of the recording.
                    for sensor in nfp:
                        to_del = []
                        for ts in nfp[sensor].keys():
                            x = ts
                            # For some reason, we sometimes get a string instead of a datetime. Convert here.
                            if not isinstance(ts, datetime.datetime):
                                x = parser.parse(ts)
                            if x > datetime.datetime(2018, 10, 21, 12, 6, 0):
                                to_del.append(ts)
                        for ts in to_del:
                            del nfp[sensor][ts]
                    
                    # The rest of the code is identical to the one above.
                    res = far_frr_plain(nfp, colo, maxv=101.0, minv=30.0, increments=1000, bits=bits)

                    save_result_json(res, PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits))

                data = load_result(PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits))

                try:
                    far, frr, threshold = plot_far_frr_surprisal(data, xlow=30.0, xhigh=100.0)
                except ZeroDivisionError:
                    print("Does not exist")
                    continue


                res = {"base": {"eer": {"far": far, "frr": frr, "threshold": threshold}}}

                for target_far in [0.001, 0.005, 0.01, 0.015, 0.02, 0.025, 0.03, 0.035, 0.04, 0.045, 0.05]:
                    far, frr, threshold = frr_for_far(data, target_far)
                    res["base"]["far_%s" % target_far] = {"far": far, "frr": frr, "threshold": threshold}

                # Save the whole thing
                save_result_json(res, PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits), suffix="rates")

# Surprisal
Miettinen et al. also propose an extension of their system that computes the "surprisal" of a fingerprint (i.e., how (un)predictable the fingerprint is), and discards those not meeting a predefined surprisal threshold. Due to the significant computational load of evaluating many different thresholds, we limit ourselves to a specific subset of thresholds.

Once again, the code for the mobile scenario is kept separate.

The following functions are used to generate the baseline probabilities and calculate the surprisal of specific fingerprints.

In [None]:
def generate_probabilities(fingerprints, bits=128):
    prob = {}
    # Go through all fingerprints
    for tstamp in fingerprints:
        fp = fingerprints[tstamp]
        # Parse into datetime object and round down to hour
        # Note: This is currently still day-specific, so the system is aware that the distribution
        # on weekends will be different than on weekdays for the Office experiment
        # If this is not desired, this needs to be changed here
        ts = parser.parse(str(tstamp)).replace(microsecond=0,second=0,minute=0)
        
        # Check if we already have a sub-dictionary for this, if not, initialize it
        if ts not in prob:
            prob[ts] = {bit : 0.0 for bit in range(bits)}
            prob[ts]["count"] = 0.0
        
        # Increment fp count for timeslot
        prob[ts]["count"] += 1
        
        # Count up the ones in the fingerprint positions
        for i in range(len(fp)):
            if fp[i] == "1":
                prob[ts][i] += 1
    
    # Now we have a finished state in the prob dictionary. Let's transform it into a distribution
    for ts in prob.keys():
        for i in range(bits):
            prob[ts][i] /= prob[ts]["count"]
    
    return prob
        

def surprisal(tstamp, fp, probabilities):
    # Parse out the time from the timestamp and round down
    # See note above about being agnostic (or not) to weekends
    ts = parser.parse(str(tstamp)).replace(microsecond=0,second=0,minute=0)
    
    # Compute the surprisal
    s = 0
    for i in range(len(fp)):
        if fp[i] == "1":
            s += -math.log(probabilities[ts][i], 2)
        else:
            s += -math.log(1 - probabilities[ts][i], 2)
    return s

Now calculate the error rates when only considering fingerprints with a sufficient surprisal.

In [None]:
FEATURE = "similarity_percent"
for PAPER in ["lux_miettinen", "noiseFingerprint"]:
    for bits in [1024, 256, 64]:
        for sensors, scenario, colo in zip([SENSORS_CAR, SENSORS_OFFICE], [CAR_EXP, OFF_EXP], [COLO_CAR, COLO_OFFICE]):
            for interval in [INT_2min, INT_30s, INT_5s]:
                skip_rest = False
                # Define a number of different surprisal thresholds
                for threshold in [0.2, 0.4, 0.6, 0.8]:
                    if skip_rest:
                        continue
                    # Compute actual threshold
                    surprisal_threshold = bits * threshold
                    print(bits, scenario, interval, threshold)
                    
                    # If no cache exists, load and process the data
                    if not result_exists(PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits) + "-surpr-" + str(threshold)):
                        nfp = import_fps_timeslotted(sensors, scenario, PAPER, interval, bits=bits)

                        res = far_frr_surprisal(nfp, colo, maxv=101.0, minv=30.0, increments=100, bits=bits, surprisal_margin=surprisal_threshold)

                        save_result_json(res, PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits) + "-surpr-" + str(threshold))

                    data = load_result(PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits) + "-surpr-" + str(threshold))

                    try:
                        far, frr, threshold = plot_far_frr_surprisal(data, xlow=30.0, xhigh=100.0)
                    except ZeroDivisionError:
                        print("All data consumed, skipping rest.")
                        skip_rest = True
                        continue


                    res = {"base": {"eer": {"far": far, "frr": frr, "threshold": threshold}}}

                    #for target_far in [0.001, 0.005, 0.01, 0.015, 0.02, 0.025, 0.03, 0.035, 0.04, 0.045, 0.05]:
                    #    far, frr, threshold = frr_for_far(data, target_far)
                    #    res["base"]["far_%s" % target_far] = {"far": far, "frr": frr, "threshold": threshold}

                    # Save the whole thing
                    save_result_json(res, PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits) + "-surpr-" + str(threshold), suffix="rates")

Once again, the separate code for the Mobile experiment.

In [None]:
FEATURE = "similarity_percent"
for PAPER in ["lux_miettinen", "noiseFingerprint"]:
    for bits in [1024, 256, 64]:
        for sensors, scenario, colo in zip([SENSORS_MOBILE], [MOBILE_EXP], [COLO_MOBILE]):
            # for interval in [INT_5s, INT_10s, INT_15s, INT_30s, INT_1min, INT_2min]:
            for interval in [INT_2min, INT_30s, INT_5s]:
                skip_rest = False
                for threshold in [0.0, 0.2, 0.5, 0.75, 1.0]:
                    if skip_rest:
                        continue
                    surprisal_threshold = bits * threshold
                    print(bits, scenario, interval, threshold)
                    if not result_exists(PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits) + "-surpr-" + str(threshold)):
                        nfp = import_fps_timeslotted(sensors, scenario, PAPER, interval, bits=bits)

                        for sensor in nfp:
                            to_del = []
                            for ts in nfp[sensor].keys():
                                x = ts
                                # For some reason, we sometimes get a string instead of a datetime. Convert here.
                                if not isinstance(ts, datetime.datetime):
                                    x = parser.parse(ts)
                                if x > datetime.datetime(2018, 10, 21, 12, 6, 0):
                                    to_del.append(ts)
                            for ts in to_del:
                                del nfp[sensor][ts]

                        res = far_frr_surprisal(nfp, colo, maxv=101.0, minv=30.0, increments=50, bits=bits, surprisal_margin=surprisal_threshold)

                        save_result_json(res, PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits) + "-surpr-" + str(threshold))

                    data = load_result(PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits) + "-surpr-" + str(threshold))

                    try:
                        far, frr, threshold = plot_far_frr_surprisal(data, xlow=30.0, xhigh=100.0)
                    except ZeroDivisionError:
                        print("All data consumed, skipping")
                        skip_rest = True
                        continue

                    res = {"base": {"eer": {"far": far, "frr": frr, "threshold": threshold}}}

                    # Save the whole thing
                    save_result_json(res, PAPER, interval[:-1], scenario, FEATURE, subscenario="bit-" + str(bits) + "-surpr-" + str(threshold), suffix="rates")

# Robustness

To compute the robustness of the scheme, we load the results and apply the thresholds of one to the others.

In [None]:
PAPERS = ['lux_miettinen', 'noiseFingerprint']
SCENARIOS = set([CAR_EXP, OFF_EXP, MOBILE_EXP])
INTERVALS = [INT_2min, INT_1min, INT_30s, INT_15s, INT_10s, INT_5s]
BITS = [64, 128, 256, 512, 1024]

def error_rate_for_threshold(data, threshold):
    """Calculate the error rates when using a specific threshold.
    
    :param data: The data, as generated by gen_far_frr
    :param threshold: The threshold to generate the error rates for
    :return: A 2-tuple of far, frr
    """
    false_acc = 0.0
    false_rej = 0.0
    true_acc = 0.0
    true_rej = 0.0
    try:
        data[threshold]
    except KeyError:
        prev_thresh = 0.0
        for thr in sorted(data.keys()):
            if thr < threshold:
                prev_thresh = thr
            if thr > threshold:
                # We just crossed the threshold. Check which of the two was
                # closer to the target threshold
                if abs(threshold - prev_thresh) < abs(threshold - thr):
                    # previous threshold was closer
                    threshold = prev_thresh
                else:
                    threshold = thr
    for s1 in data[threshold]:
        for s2 in data[threshold][s1]:
            # Include the numbers in the overall count
            false_acc += data[threshold][s1][s2]['fa']
            false_rej += data[threshold][s1][s2]['fr']
            true_acc += data[threshold][s1][s2]['ta']
            true_rej += data[threshold][s1][s2]['tr']

    # Calculate error rates
    far = false_acc / (false_acc + true_rej)
    frr = false_rej / (false_rej + true_acc)
        
    return (threshold, far, frr)

for paper in PAPERS:
    robustness_output = {}
    for interval in INTERVALS:
        
        for scenario in SCENARIOS:
            result = {scenario[:-1]: {}}
            print(paper, interval, scenario)
            for bit in BITS:
                if bit not in robustness_output:
                    robustness_output[bit] = {}
                if not result_exists(paper, interval[:-1], scenario, "similarity_percent", "bit-" + str(bit), "rates"):
                    # print(paper, interval, scenario, bit, "does not exist")
                    continue

                data = load_result(paper, interval[:-1], scenario, "similarity_percent", subscenario="bit-" + str(bit), suffix="rates")
                orig_far = data["base"]["eer"]["far"]
                orig_frr = data["base"]["eer"]["frr"]
                data = load_result(paper, interval[:-1], scenario, "similarity_percent", subscenario="bit-" + str(bit))
                for target_scen in SCENARIOS - set([scenario]):
                    if not result_exists(paper, interval[:-1], target_scen, "similarity_percent", "bit-" + str(bit), suffix="rates"):
                        continue
                    
                    target_data = load_result(paper, interval[:-1], target_scen, "similarity_percent", subscenario="bit-" + str(bit), suffix="rates")
                    threshold = target_data["base"]["eer"]["threshold"]
                    threshold, far, frr = error_rate_for_threshold(data, threshold)
                    result[scenario[:-1]][target_scen[:-1]] = {
                        "threshold": threshold,
                        "far": far,
                        "frr": frr,
                        "orig_far": orig_far,
                        "orig_frr": orig_frr,
                    }
                    if scenario not in robustness_output[bit]:
                        robustness_output[bit][scenario] = {target_scen: {}}
                    if target_scen not in robustness_output[bit][scenario]:
                        robustness_output[bit][scenario][target_scen] = {}
                    
                    try:
                        far_change_rel = (orig_far / far) * 100
                    except ZeroDivisionError:
                        far_change_rel = np.nan
                        
                    try:
                        frr_change_rel = (orig_frr / frr) * 100
                    except ZeroDivisionError:
                        frr_change_rel = np.nan

                    robustness_output[bit][scenario][target_scen][interval] = {
                        "far": far, 
                        "frr": frr,
                        "orig_far": orig_far,
                        "orig_frr": orig_frr,
                        "far_change_abs": orig_far - far,
                        "frr_change_abs": orig_frr - frr,
                        "total_change_abs": abs(orig_far - far) + abs(orig_frr - frr),
                        "far_change_rel": far_change_rel,
                        "frr_change_rel": frr_change_rel
                    }
                # Save the result
                save_result_json(result, paper, interval[:-1], scenario, "similarity_percent", 'bit-' + str(bit), "robustness")

    for scenario in SCENARIOS:
        for scenario2 in SCENARIOS - set([scenario]):
            for bit in robustness_output:
                # Print out table header
                # "scenario" is the used dataset, scenario2 is the one whose threshold was used
                print("Robustness", bit, scenario, scenario2)
                # What do these abbreviations mean?
                # - Int = Interval that was used
                # - FAR and FRR = Obtained false accept / reject rates
                # - ofar and ofrr = Original FAR and FRR from the base scenario, for comparison
                # - sprd = spread between FAR and FRR, i.e. abs(FAR - FRR)
                # - osprd = Spread between original FAR and FRR, i.e. abs(ofar - ofrr)
                # - aca and rca = Absolute change in false accept / reject rate, i.e. ofar - far
                # - tca = Absolute changes summed up, i.e. aca + rca
                # - acr and rcr = Relative changes in FAR and FRR, i.e. (ofar / far) * 100
                # - oeer = original EER
                # - eer = new EER, i.e. (far + frr) / 2.0
                # - eerabs = absolute change in EER
                print("Int", "FAR", "FRR", "ofar", "ofrr", "sprd", "osprd", "aca", "rca", "tca", "acr", "rcr", "oeer", "eer", "eerabs", sep='\t|')
                print("-" + "-------+"*14 + "-------")
                scenpair = robustness_output[bit][scenario][scenario2]
                # Ensure that we actually have data for that bit-scenario-scenario2 set
                for interval in INTERVALS:
                    try:
                        scenpair[interval]['far']
                    except KeyError:
                        continue
                    oeer = (scenpair[interval]['orig_far'] + scenpair[interval]['orig_frr']) / 2.0
                    eer = (scenpair[interval]['far'] + scenpair[interval]['frr']) / 2.0
                    ospread = abs(scenpair[interval]['orig_far'] - scenpair[interval]['orig_frr'])
                    spread = abs(scenpair[interval]['far'] - scenpair[interval]['frr'])
                    print(interval, 
                          round(scenpair[interval]['far'], 3), 
                          round(scenpair[interval]['frr'], 3), 
                          round(scenpair[interval]['orig_far'], 3), 
                          round(scenpair[interval]['orig_frr'], 3), 
                          round(spread, 3),
                          round(ospread, 3),
                          round(scenpair[interval]['far_change_abs'], 3), 
                          round(scenpair[interval]['frr_change_abs'], 3),
                          round(scenpair[interval]['total_change_abs'], 3),
                          round(scenpair[interval]['far_change_rel'], 1), 
                          round(scenpair[interval]['frr_change_rel'], 1),
                          round(oeer, 3),
                          round(eer, 3),
                          round(oeer - eer, 3),
                          sep='\t|')
                print("")

# Randomness
To evaluate the randomness of the generated fingerprints, we use techniques developed by Brüsch et al. in their paper ["On the Secrecy of Publicly Observable Biometric Features: Security Properties of Gait for Mobile Device Pairing" (CoRR abs/1804.03997)](https://arxiv.org/abs/1804.03997).

In [None]:
# This code is based on code by Arne Brüsch, cf. https://github.com/abruesch/randomness-figures
# It has been adapted to fit our use case. For the details on the original concept, see:
# Arne Brüsch, Ngu Nguyen, Dominik Schürmann, Stephan Sigg, and Lars Wolf. 2018. 
# On the Secrecy of Publicly Observable Biometric Features: Security Properties of Gait for 
# Mobile Device Pairing. CoRR abs/1804.03997 (2018). https://arxiv.org/abs/1804.03997
# The original code is licensed under the GPLv3.

import sys
import random
import os

import matplotlib.pyplot as plt
import matplotlib
import numpy as np
from scipy.misc import comb
from scipy.special import binom
from cycler import cycler


def random_walk(key, sum_distribution, transition_count, transition_probs, bitlength=128):
    ''' takes string of '0' and '1' and turns them into random walks in a galton board. Effectively computes
    the cumulative sums distribution for every prefix of the input string. '''

    transition_count[0] += 1
    ret = [0]
    for i, b in enumerate(key[:bitlength]):
        val = -1
        if b == '1':
            val = 1
            transition_probs[i] += 1
        ret.append(ret[-1] + val)

    sum_distribution.append(ret[-1])
    return (sum_distribution, transition_count, transition_probs)


def markov_transitions(transition_probs, transition_count, bits=128):
    ''' transition probabilities from every nth to (n+1)th bit. '''
    norm_transition_probs = [transition_probs[x] / transition_count[0]
                             for x in range(0, bits)]
    plt.clf()
    plt.xlabel('nth bit')
    plt.ylabel('Probability for 1')
    plt.ylim([0, 1])
    plt.xlim([0, bits])
    plt.rcParams.update({'font.size': 18})
    plt.plot(norm_transition_probs, color='b')
    plt.plot([0.0, bits], [0.5, 0.5], 'k:')
    plt.show()
    print("Markov:", np.median(norm_transition_probs))

from scipy.misc import comb

def binomial_plot(n=256):
    ''' Theoretical binomial distribution that is plotted as red line into figure.'''
    x = np.arange(n)
    # y = list(map(lambda xi: comb(n,xi)/2**n, x))
    y = list(binom(n,x)*0.5**x*(0.5)**(n-x))
    x = list(map(lambda r: r-n/2, x))
    plt.plot(x,y,color='r')

#def binomial_plot(bins,n):
#    variance =  n * 0.5 * 0.5
#    sigma = np.sqrt(variance)
#    y = mlab.normpdf(np.asarray(list(range(-128,128))), 0, sigma)
#    plt.plot(np.asarray(list(range(-128,128))),y,color='r')

def distribution(sum_distribution, bits=128, dist_xlim=None, save_to=None):
    ''' Plots the cumulative sums distribution and saves figure. '''
    plt.clf()
    if dist_xlim is not None:
        plt.xlim(dist_xlim)
    else:
        plt.xlim([-bits, bits])
    count, bins, ignored = plt.hist(sum_distribution, color='#007a9b', range=(-bits,bits), normed=True,rwidth=0.5, bins=bits)
    binomial_plot(bits*2)
    # Get the current axes
    ax = plt.gca()
    # Get limits
    start, end = ax.get_xlim()
    if abs(start) > 150:
        # We are appearantly plotting a very long fingerprint, let's make the ticks more sparse
        ax.xaxis.set_ticks(np.arange(start, end+1, 100))
        
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    
    if save_to is not None:
        plt.savefig(save_to, format='eps', dpi=1000)
    plt.show()
    print("Median of distribution:", np.median(sum_distribution))
    


def plot_rand_walk(keys, bits=128, dist_xlim=None, save_distribution_to=None):
    ''' turns string consisting of '0' and '1' into random walks. While the walks are currently not plotted, the distribution
    of the cumulative sums along with the markov transitions are computed and plotted using them.'''
    sum_distribution = []
    transition_count = [0]
    transition_probs = {x: 0 for x in range(0, bits)}

    matplotlib.rcParams['axes.prop_cycle'] = cycler('color',
                                                    ['#e78a33', '#eda766', '#8d4959', '#aa7782', '#bdcd61', '#cdda89',
                                                     '#8a9c33', '#a7b566'])
    plt.rcParams.update({'font.size': 18})

    plt.ylabel('Sum')
    plt.xlabel('Keylength')

    plt.ylim([-bits, bits])
    for key in keys:
        sum_distribution, transition_count, transition_probs = random_walk(key, sum_distribution, transition_count, transition_probs, bits)
    plt.tight_layout()
    plt.rcParams.update({'font.size': 18})
    distribution(sum_distribution, bits, dist_xlim=dist_xlim, save_to=save_distribution_to)
    markov_transitions(transition_probs, transition_count, bits)


def plot_heat_map(keys, pt=plt):
    ''' creates heatmap of random walks.'''
    intensity = 1
    heatmap_array = []
    for key in keys:
        val = int(key[0])
        for i, bit in enumerate(key[1:128]):
            if bit == '1':
                val = val + 1
            elif bit == '0':
                val = val - 1
            heatmap_array.append([i, val])

    np_srt_heat = np.asarray(heatmap_array)
    X = np.take(np_srt_heat, [0], axis=1).flatten()
    Y = np.take(np_srt_heat, [1], axis=1).flatten()
    # bins = (range(max(X)), range(min(Y),max(Y)))
    bins = (range(128), range(min(Y), max(Y)))
    H, xedges, yedges = np.histogram2d(X, Y, bins=bins, normed=False)
    # H = H.T
    # H = H
    H = intensity * H
    # 'viridis'
    pt.xlabel('Sum')
    pt.ylabel('Keylength')
    pt.imshow(H, cmap='viridis', norm=matplotlib.colors.LogNorm(), interpolation='nearest', origin='upper',
              extent=[yedges[0], yedges[-1], xedges[-1], xedges[0]])
    plt.rcParams.update({'font.size': 18})
    # Plot triangle
    plus_y = [i for i in range(-1, 127)]
    plus_x = [i for i in range(1, 129)]
    minus_y = [126 - i for i in range(0, 128)]
    minus_x = [-127 + i for i in range(1, 129)]
    plt.tight_layout()
    pt.plot(plus_x, plus_y, color='r')
    pt.plot(minus_x, minus_y, color='r')


def apply_plot_heat_map(keys):
    ''' Calls plot_heat_map() and additionally saves the plotted figure.'''
    plot_heat_map(keys)
    plt.show()

Use the functions defined above to evaluate and plot the bit distributions for the schemes in question.

In [None]:
for paper, modality in [('noiseFingerprint', 'audio'), ('lux_miettinen', 'lux')]:
    for scenario, sensors in [(CAR_EXP, SENSORS_CAR), (OFF_EXP, SENSORS_OFFICE), (MOBILE_EXP, SENSORS_MOBILE)]:
        for interval in [INT_5s, INT_10s, INT_15s, INT_30s, INT_1min, INT_2min]:
            for bits in [128]:
                a = import_fps_timeslotted(sensors, scenario, 'noiseFingerprint', interval, bits=bits)
                for i in a.keys():
                    keys = a[i].values()
                    print(paper, scenario, interval, bits, i)
                    plot_rand_walk(keys, bits=bits, save_distribution_to='/home/seemoo/plots/img/%s%s/%s/sensor-%s-%s-%s.eps' % (scenario, paper, modality, i, bits, interval[:-1]))