In [2]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

width = 12
height = 7
plt.rcParams["figure.figsize"] = (width, height)


import pandas as pd
import numpy as np
import tflscripts
import json
from sklearn.metrics import accuracy_score

configuration = tflscripts.read_configuration()

In [3]:
class TestResultStatistics:
    def __init__(self, result):
        self.result = result

    def actual_labels(self):
        return np.array([l if l == self.result.label else -1 for l in self.result.actual_with_all_labels])

    def accuracy(self):
        predicted = self.result.predicted
        actual = self.actual_labels()
        return accuracy_score(actual, predicted)

    def accuracy_negative(self):
        predicted = self.result.predicted
        actual = self.actual_labels()
        mask_negative = actual == -1
        return accuracy_score(actual[mask_negative], predicted[mask_negative])

    def accuracy_positive(self):
        predicted = self.result.predicted
        actual = self.actual_labels()
        mask_positive = actual == self.result.label
        return accuracy_score(actual[mask_positive], predicted[mask_positive])

    def all_activities(self):
        labels = np.unique(self.result.actual_with_all_labels)
        return [configuration['activities'][l] for l in labels]

    def source_device_name(self):
        result = self.result
        return configuration['device_roles'][result.source_dataset][result.source_device]

    def target_device_name(self):
        result = self.result
        return configuration['device_roles'][result.target_dataset][result.target_device]

    def source_room(self):
        return self.result.source_dataset.split('-')[0]

    def target_room(self):
        return self.result.target_dataset.split('-')[0]
    
    def source_placement(self):
        return self.source_device_name().split(' ')[2]

    def target_placement(self):
        return self.target_device_name().split(' ')[2]

    def source_device(self):
        return ' '.join(self.source_device_name().split(' ')[0:2])

    def target_device(self):
        return ' '.join(self.target_device_name().split(' ')[0:2])
    
    def classifier_name(self):
        return self.result.classifier
    
    def features_name(self):
        feature_types = {
            '.*': 'All',
            'MICROPHONE_|microphone': 'Microphone',
            'ACCEL_|accel_': 'Accelerometer',
            'ACCEL_|accel_|mag_': 'Accelerometer & magnetometer',
            'temperature|pressure|humidity': 'Environmental',
            'EMI|IRMOTION': 'EMI & motion',
            'MICROPHONE|microphone|ACCEL_|accel_': 'Microphone & accelerometer'
        }
        return feature_types[self.result.features]

    def type_of_transfer(self):
        result = self.result
        if self.source_room() != self.target_room():
            return 'Across spaces'

        source_device_split = self.source_device_name().split(' ')
        target_device_split = self.target_device_name().split(' ')

        if result.source_dataset == result.target_dataset and \
            self.source_device_name() == self.target_device_name():
            return 'No transfer'
        if self.source_device_name() == self.target_device_name():
            return 'Replica within spaces'
        if source_device_split[2] == target_device_split[2] and \
            source_device_split[0] == target_device_split[0]:
            return 'Same device type in same place'
        if source_device_split[2] == target_device_split[2]:
            return 'Different device in same place'
        if source_device_split[1] == target_device_split[1] and \
            source_device_split[0] == target_device_split[0]:
            return 'Same device in different place'
        if source_device_split[0] == target_device_split[0]:
            return 'Same device type in different place'
        return 'Different device in different place'

In [4]:
def accuracy_for(result):
    predicted = result.predicted
    actual = np.array([l if l == result.label else -1 for l in result.actual_with_all_labels])
    total_accuracy = accuracy_score(actual, predicted)
    mask_positive = actual == result.label
    positive_accuracy = accuracy_score(actual[mask_positive], predicted[mask_positive])
    mask_negative = actual == -1
    negative_accuracy = accuracy_score(actual[mask_negative], predicted[mask_negative])
    
    return total_accuracy, positive_accuracy, negative_accuracy

def get_results_row(dataset, device):
    test_set = tflscripts.TestSet(name='_'.join([dataset, device]))
    if test_set.exists():
        results = []
        for result in test_set.get_results():
            stats = TestResultStatistics(result)
            results.append([
                result.source_dataset,
                result.target_dataset,
                result.source_device,
                result.target_device,
                stats.source_device_name(),
                stats.target_device_name(),
                stats.source_room(),
                stats.target_room(),
                stats.source_placement(),
                stats.target_placement(),
                stats.type_of_transfer(),
                stats.classifier_name(),
                result.label_name(),
                result.features,
                stats.features_name(),
                stats.accuracy(),
                stats.accuracy_positive(),
                stats.accuracy_negative()
            ])
        return results
    else:
        return []

all_rows = []
for dataset in configuration['device_roles']:
    for device in configuration['device_roles'][dataset]:
        rows = get_results_row(dataset, device)
        all_rows += rows

columns = [
    'source_dataset',
    'target_dataset',
    'source_device',
    'target_device',
    'source_device_name',
    'target_device_name',
    'source_room',
    'target_room',
    'source_placement',
    'target_placement',
    'type_of_transfer',
    'classifier',
    'label',
    'features',
    'features_name',
    'accuracy',
    'accuracy_positive',
    'accuracy_negative'
]

df = pd.DataFrame(all_rows)
df.columns = columns

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32842 entries, 0 to 32841
Data columns (total 18 columns):
source_dataset        32842 non-null object
target_dataset        32842 non-null object
source_device         32842 non-null object
target_device         32842 non-null object
source_device_name    32842 non-null object
target_device_name    32842 non-null object
source_room           32842 non-null object
target_room           32842 non-null object
source_placement      32842 non-null object
target_placement      32842 non-null object
type_of_transfer      32842 non-null object
classifier            32842 non-null object
label                 32842 non-null object
features              32842 non-null object
features_name         32842 non-null object
accuracy              32842 non-null float64
accuracy_positive     32842 non-null float64
accuracy_negative     32842 non-null float64
dtypes: float64(3), object(15)
memory usage: 4.5+ MB


In [6]:
df.to_pickle('results.p')