In [71]:
import numpy as np
import pandas as pd
import pickle
import tflscripts
from sklearn.metrics import accuracy_score

configuration = tflscripts.read_configuration()

In [41]:
class TransferableModelsForCase:
    def __init__(self, target_device_name, target_dataset):
        self.target_device_name = target_device_name
        self.target_dataset = target_dataset
        self.transferable_models = []

In [45]:
input = open('transferable_models.p', 'rb')
transferable_models = pickle.load(input)

print('Cases')
[(i, m.target_dataset, m.target_device_name) for i, m in enumerate(transferable_models)]

Cases


[(0, 'synergy-final-iter1', 'Mite 3 Sink'),
 (1, 'synergy-final-iter2', 'Mite 3 Coffee'),
 (2, 'synergy-final-iter5', 'Mite 4 Sink'),
 (3, 'synergy-final-iter1', 'Mite 2 Table'),
 (4, 'synergy-final-iter4', 'Mite 1 Table'),
 (5, 'synergy-final-iter4', 'Mite 4 Coffee'),
 (6, 'synergy-final-iter5', 'Mite 2 Coffee'),
 (7, 'synergy-final-iter2', 'Mite 2 Sink'),
 (8, 'robotics-final', 'Mite 1 Coffee'),
 (9, 'robotics-final', 'Mite 4 Sink'),
 (10, 'scott-final-iter1', 'Mite 4 Right'),
 (11, 'scott-final-iter1', 'Mite 2 Left')]

In [126]:
def test_result_for(source_dataset,
                    source_device,
                    target_dataset,
                    target_device,
                    label,
                    features,
                    classifier):
    test_set = tflscripts.TestSet(name='_'.join([source_dataset, source_device]))
    
    results = test_set.get_results()
    results = [r for r in results if r.target_dataset == target_dataset and
        r.target_device == target_device and
        r.features == features and
        r.label_name() == label and
        r.classifier == classifier]
    
    assert(len(results) == 1)
    return results[0]

def features_to_key(features):
    feature_types = {
        'All': '.*',
        'Microphone': 'MICROPHONE_|microphone',
        'Accelerometer': 'ACCEL_|accel_',
        'Environmental': 'temperature|pressure|humidity',
        'EMI & motion': 'EMI|IRMOTION',
        'Microphone & accelerometer': 'MICROPHONE|microphone|ACCEL_|accel_'
    }
    return feature_types[features]

def classifier_to_key(classifier):
    classifiers = {
        'SVM': 'SVM',
        'LR': 'LogisticRegression',
        'RF': 'RandomForestClassifier'
    }
    return classifiers[classifier]

def device_name_to_device(dataset, device_name):
    roles = configuration['device_roles'][dataset]
    for device in roles:
        if roles[device] == device_name:
            return device

case = transferable_models[4]
results = []
for model_info in case.transferable_models:
    result = test_result_for(source_dataset=model_info['source_dataset'],
                   source_device=device_name_to_device(model_info['source_dataset'], model_info['source_device_name']),
                   target_dataset=case.target_dataset,
                   target_device=device_name_to_device(case.target_dataset, case.target_device_name),
                   label=model_info['label'],
                   features=features_to_key(model_info['features']),
                   classifier=classifier_to_key(model_info['classifier']))

    results.append(result)

In [128]:
def accuracy_of_test_result(result):
    actual = np.array([result.label if b else -1 for b in result.actual_with_all_labels == result.label])
    return accuracy_score(actual, result.predicted)

label_models = {}
for result in results:
    if result.label in label_models:
        if accuracy_of_test_result(label_models[result.label]) < accuracy_of_test_result(result):
            label_models[result.label] = result
    else:
        label_models[result.label] = result
results_by_accuracy = label_models.values()

In [123]:
df = pd.DataFrame([r.predicted for r in results]).T

def majority_vote(row):
    value_counts = row.value_counts()
    value_counts = value_counts[value_counts.index != -1]
    if len(value_counts) == 0:
        return -1
    return value_counts.idxmax()

majority_votes = [majority_vote(row) for i, row in df.iterrows()]

df['actual'] = actual
df['majority_votes'] = majority_votes

In [124]:
accuracy_score(df.loc[df.majority_votes != -1]['actual'], df.loc[df.majority_votes != -1]['majority_votes'])

0.8302001212856277