In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

width = 12
height = 7
plt.rcParams["figure.figsize"] = (width, height)


import pandas as pd
import numpy as np
import tflscripts
import json
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score

tflscripts.set_dataset_folder('../../datasets/')
configuration = tflscripts.read_configuration()

In [None]:
class TestResultStatistics:
    def __init__(self, result):
        self.result = result

    def actual_labels(self):
        return np.array([l if l == self.result.label else -1 for l in self.result.actual_with_all_labels])

    def accuracy(self):
        predicted = self.result.predicted
        actual = self.actual_labels()
        return accuracy_score(actual, predicted)

    def specificity(self):
        predicted = self.result.predicted
        actual = self.actual_labels()
        mask_negative = actual == -1
        return accuracy_score(actual[mask_negative], predicted[mask_negative])

    def recall(self):
        predicted = self.result.predicted
        actual = self.actual_labels()
        return recall_score(actual, predicted, pos_label=self.result.label, average='binary')
    
    def precision(self):
        predicted = self.result.predicted
        actual = self.actual_labels()
        return precision_score(actual, predicted, pos_label=self.result.label, average='binary')
    
    def f1(self):
        predicted = self.result.predicted
        actual = self.actual_labels()
        return f1_score(actual, predicted, pos_label=self.result.label, average='binary')

    def all_activities(self):
        labels = np.unique(self.result.actual_with_all_labels)
        return [configuration['activities'][l] for l in labels]

    def source_device_name(self):
        result = self.result
        return configuration['device_roles'][result.source_dataset][result.source_device]

    def target_device_name(self):
        result = self.result
        return configuration['device_roles'][result.target_dataset][result.target_device]

    def source_room(self):
        return self.result.source_dataset.split('-')[0]

    def target_room(self):
        return self.result.target_dataset.split('-')[0]
    
    def source_placement(self):
        return self.source_device_name().split(' ')[2]

    def target_placement(self):
        return self.target_device_name().split(' ')[2]
    
    def source_device_type(self):
        return self.source_device_name().split(' ')[0]

    def target_device_type(self):
        return self.target_device_name().split(' ')[0]

    def source_device(self):
        return ' '.join(self.source_device_name().split(' ')[0:2])

    def target_device(self):
        return ' '.join(self.target_device_name().split(' ')[0:2])
    
    def classifier_name(self):
        return self.result.classifier
    
    def features_name(self):
        feature_types = {
            '.*': 'All',
            'MICROPHONE_|microphone': 'Microphone',
            'ACCEL_|accel_': 'Accelerometer',
            'ACCEL_|accel_|mag_': 'Accelerometer & magnetometer',
            'temperature|pressure|humidity': 'Environmental',
            'EMI|IRMOTION': 'EMI & motion',
            'MICROPHONE|microphone|ACCEL_|accel_': 'Microphone & accelerometer'
        }
        return feature_types[self.result.features]

    def type_of_transfer(self):
        result = self.result
        same_room = self.source_room() == self.target_room()

        source_device_split = self.source_device_name().split(' ')
        target_device_split = self.target_device_name().split(' ')

        same_device_type = source_device_split[0] == target_device_split[0]
        same_device = same_device_type and source_device_split[1] == target_device_split[1]

        if same_room:
            same_place = source_device_split[2] == target_device_split[2]

            if result.source_dataset == result.target_dataset and \
                same_device:
                return 'No transfer'

            if same_device and same_place:
                return 'Same device in same place'

            if same_place and same_device_type:
                return 'Same device type in same place'

            if same_place:
                return 'Different device in same place'

            if same_device:
                return 'Same device in different place'

            if same_device_type:
                return 'Same device type in different place'

            return 'Different device in different place'

        else:
            if same_device:
                return 'Same device across spaces'

            if same_device_type:
                return 'Same device type across spaces'

            return 'Different device across spaces'

In [None]:
def get_distribution_of_samples(dataset, device):
    activities = configuration['analysed_activities']
    activities_i = [configuration['activities'].index(a) for a in activities]
    
    df, df_labels = tflscripts.read_and_filter_dataset(
        dataset + '-1s',
        device,
        use_features='.*',
        use_activities=activities_i,
        check_all_activities=False,
        scale=True,
        with_feature_selection=False)

    df = df.loc[df.index.isin(df_labels.index)]
    df_labels = df_labels.loc[df_labels.index.isin(df.index)]
    
    return df_labels.label.value_counts()


def get_results_row(dataset, device):
    test_set = tflscripts.TestSet(name='_'.join([dataset, device]))

    if test_set.exists():
        sample_counts = get_distribution_of_samples(dataset, device)
        results = []
        for result in test_set.get_results():
            stats = TestResultStatistics(result)

            samples = sample_counts[result.label]
            if stats.type_of_transfer() == 'No transfer':
                samples = int(samples - (samples * 0.33))

            results.append([
                result.source_dataset,
                result.target_dataset,
                result.source_device,
                result.target_device,
                stats.source_device(),
                stats.target_device(),
                stats.source_device_name(),
                stats.target_device_name(),
                stats.source_device_type(),
                stats.target_device_type(),
                stats.source_room(),
                stats.target_room(),
                stats.source_placement(),
                stats.target_placement(),
                stats.type_of_transfer(),
                stats.classifier_name(),
                result.label_name(),
                samples,
                result.features,
                stats.features_name(),
                stats.accuracy(),
                stats.recall(),
                stats.specificity(),
                stats.precision(),
                stats.f1()
            ])
        return results
    else:
        return []

all_rows = []
for dataset in configuration['device_roles']:
    for device in configuration['device_roles'][dataset]:
        rows = get_results_row(dataset, device)
        all_rows += rows

columns = [
    'source_dataset',
    'target_dataset',
    'source_device_id',
    'target_device_id',
    'source_device',
    'target_device',
    'source_device_name',
    'target_device_name',
    'source_device_type',
    'target_device_type',
    'source_room',
    'target_room',
    'source_placement',
    'target_placement',
    'type_of_transfer',
    'classifier',
    'label',
    'samples',
    'features',
    'features_name',
    'accuracy',
    'recall',
    'specificity',
    'precision',
    'f1'
]

df = pd.DataFrame(all_rows)
df.columns = columns

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [None]:
activities_to_keep = configuration['compared_activities']

filtered_df = df.copy()
for dataset in activities_to_keep:
    activities = activities_to_keep[dataset]
    filtered_df = filtered_df.loc[(filtered_df['source_dataset'] != dataset) | (filtered_df['label'].isin(activities))]
    filtered_df = filtered_df.loc[(df['target_dataset'] != dataset) | (filtered_df['label'].isin(activities))]

print('Removed', len(df) - len(filtered_df), 'kept', len(filtered_df))
df = filtered_df

In [None]:
without_transfer = df.loc[df.type_of_transfer == 'No transfer']

def apply_accuracy_without_transfer(x):
    queried = without_transfer.query('source_device_name == "{}" & source_dataset == "{}" & features == "{}" & label == "{}" & classifier == "{}"'.format(
        x['source_device_name'],
        x['source_dataset'],
        x['features'],
        x['label'],
        x['classifier']
    ))

    if len(queried) > 0:
        return queried['accuracy'].mean(), \
            queried['recall'].mean(), \
            queried['specificity'].mean()

    return -1

df['accuracy_wt'], df['recall_wt'], df['specificity_wt'] = \
zip(*df.apply(apply_accuracy_without_transfer, axis=1))

df['accuracy_change'] = df['accuracy'] - df['accuracy_wt']
df['recall_change'] = df['recall'] - df['recall_wt']
df['specificity_change'] = df['specificity'] - df['specificity_wt']
df['precision_change'] = df['precision'] - df['precision_wt']
df['f1_change'] = df['f1'] - df['f1_wt']

In [None]:
df.info()

In [None]:
df.to_pickle('results.p')