In [38]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

width = 12
height = 7
plt.rcParams["figure.figsize"] = (width, height)


import pandas as pd
import numpy as np
import tflscripts
import json
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer


configuration = tflscripts.read_configuration()
results = pd.read_pickle('results.p')
tflscripts.set_dataset_folder('../../datasets/')

In [30]:
results = results.loc[results.type_of_transfer != 'No transfer']
results = results.loc[results.accuracy_negative > 0.95]
results = results.loc[results.accuracy_positive > 0.1]

In [95]:
def key_for_test(source_dataset,
                    source_device,
                    target_dataset,
                    target_device,
                    label,
                    features,
                    classifier):
    return source_dataset + source_device + target_dataset + target_device + \
        label + features + classifier

def find_test_result(transfer):
    transfer_key = key_for_test(source_dataset=transfer['source_dataset'],
                           source_device=transfer['source_device'],
                           target_dataset=transfer['target_dataset'],
                           target_device=transfer['target_device'],
                           label=transfer['label'],
                           features=transfer['features'],
                           classifier=transfer['classifier'])


    test_set = tflscripts.TestSet(name='_'.join([transfer['source_dataset'], transfer['source_device']]))
    if test_set.exists():
        for result in test_set.get_results():
            result_key = key_for_test(source_dataset=result.source_dataset,
                               source_device=result.source_device,
                               target_dataset=result.target_dataset,
                               target_device=result.target_device,
                               label=result.label_name(),
                               features=result.features,
                               classifier=result.classifier)
            if result_key == transfer_key:
                return result
    return None
    
transfer = results.iloc[3]
test_result = find_test_result(transfer)

In [96]:
activities = [
    "Dishes",
    "Null",
    "Microwave",
    "Coffee",
    "Kettle",
    "Chopping food",
    "Conversation",
    # "Microwave door opened",
    "Microwave door closed",
    "Cupboard door opened",
    "Cupboard door closed",
    "Microwave button press",
    "Knocking",
    # "Phone vibrating",
    "Vacuum cleaning",
    "Blender running",
    "Alarm",
    "Soap dispensed",
    "Microwave done chime"
]

activities_i = [configuration['activities'].index(a) for a in activities]

df, df_labels = tflscripts.read_and_filter_dataset(
    transfer['target_dataset'] + '-1s',
    transfer['target_device'],
    use_features='.*',
    use_activities=activities_i,
    check_all_activities=False,
    scale=True,
    with_feature_selection=False
)

df = df.loc[df.index.isin(df_labels.index)]
df_labels = df_labels.loc[df_labels.index.isin(df.index)]

# df = df[test_result.columns]

In [97]:
predicted = pd.Series(test_result.predicted)
predicted.index = df.index

negative_index = df[df_labels.label != test_result.label].index
positive_predictions = predicted[predicted != -1]
positive_predictions_index = positive_predictions.index
included_index = np.unique(negative_index.tolist() + positive_predictions_index.tolist())

x_train = df.loc[df.index.isin(included_index)]

y_train = pd.Series(-1, index=x_train.index)
y_train[positive_predictions.index] = test_result.label

ppl = Pipeline([
    ('impute', Imputer()),
    ('clf', svm.SVC(kernel='linear', decision_function_shape='ovr'))
])

ppl.fit(x_train, y_train)
repredicted = ppl.predict(df)

In [98]:
def accuracy_positive(y, predicted):
    y_positive = y[y == test_result.label]
    predicted_positive = predicted[y == test_result.label]
    return accuracy_score(y_positive, predicted_positive)

In [99]:
y = pd.Series(-1, df.index)
y[df_labels.loc[df_labels.label == test_result.label].index] = test_result.label
accuracy_positive(y.values, repredicted)

0.84239130434782605

In [100]:
accuracy_positive(y.values, test_result.predicted)

0.73097826086956519

In [101]:
accuracy_score(y.values, repredicted)

0.94444444444444442

In [102]:
accuracy_score(y.values, test_result.predicted)

0.92894935752078611