In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

width = 12
height = 7
plt.rcParams["figure.figsize"] = (width, height)


from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import tflscripts
import json
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from statistics import mode

In [18]:
activities = [
    "Dishes",
    "Microwave",
    "Coffee",
    "Null",
    "Kettle",
    "Chopping food",
    "Conversation",
    "Eating popcorn",
    "Knocking",
    "Phone vibrating",
    "Microwave door closed",
    "Microwave door opened",
    "Microwave button press",
]

configuration = tflscripts.read_configuration()
activities_i = [configuration['activities'].index(a) for a in activities]

def get_y_for_label(df_labels, label):
    df_labels_modified = df_labels.copy()
    df_labels_modified.loc[df_labels_modified.label != label, 'label'] = -1
    return df_labels_modified['label']

def smooth_predictions(predicted):
    smoothed = []

    for i, prediction in enumerate(predicted):
        next_disagreements = [p for p in predicted[i:min(len(predicted), i + 3)] if p != prediction]
        if len(next_disagreements) == 0:
            smoothed.append(prediction)
        else:
            try:
                window = predicted[max(0, i - 3):min(len(predicted), i + 3)]
                smoothed.append(mode(window))
            except:
                try:
                    smoothed.append(mode([p for p in window if p != -1]))
                except:
                    smoothed.append(prediction)

    return smoothed

In [33]:
source_dataset = 'synergy-final-iter1'
source_device = '128.237.246.127'

target_dataset = 'synergy-final-iter2'
target_device = '128.237.248.186'

df_source, df_source_labels = tflscripts.read_and_filter_dataset(
        source_dataset + '-1s',
        source_device,
        use_features='.*',
        use_activities=activities_i,
        scale=True,
        with_feature_selection=False)

df_source = df_source.loc[df_source.index.isin(df_source_labels.index)]
df_source_labels = df_source_labels.loc[df_source_labels.index.isin(df_source.index)]

df_target, df_target_labels = tflscripts.read_and_filter_dataset(
    target_dataset + '-1s',
    target_device,
    use_features='.*',
    use_activities=activities_i,
    scale=True,
    with_feature_selection=False)

df_target = df_target.loc[df_target.index.isin(df_target_labels.index)]

features = '.*'

# for label in df_source_labels.label.unique():
for label in [11, 10, 14]:
    x_source = df_source.filter(regex=features)
    y_source = get_y_for_label(df_source_labels, label)

    y_target = get_y_for_label(df_target_labels, label)
    x_target = df_target[x_source.columns]

    x_target_negative = x_target[y_target == -1]
    y_target_negative = y_target[y_target == -1]
    x_target_positive = x_target[y_target != -1]
    y_target_positive = y_target[y_target != -1]

    negative_dfs = tflscripts.take_multiple_percentages_of_data(
        x_target_negative, y_target_negative,
        [0, 1])

    x_target_negative_train, y_target_negative_train = negative_dfs[0]
    x_target_negative_test, y_target_negative_test = negative_dfs[1]

    x_train, y_train = tflscripts.concat_and_reindex([x_source, x_target_negative_train],
                                           [y_source, y_target_negative_train])

    ppl = Pipeline([
        ('impute', Imputer()),
        ('clf', svm.SVC(kernel='linear', decision_function_shape='ovr'))
#         ('clf', RandomForestClassifier())
    ])

    ppl.fit(x_train, y_train)

    false_predicted = ppl.predict(x_target_negative_test)
#     false_predicted = smooth_predictions(false_predicted)

    accuracy_negative = accuracy_score(y_target_negative_test, false_predicted)

    true_predicted = ppl.predict(x_target_positive)
#     true_predicted = smooth_predictions(true_predicted)

    accuracy_positive = accuracy_score(y_target_positive, true_predicted)

    print(configuration['activities'][label])
    print('Accuracy negative', accuracy_negative)
    print('Accuracy positive', accuracy_positive)

Microwave door closed
Accuracy negative 1.0
Accuracy positive 0.888888888889
Microwave door opened
Accuracy negative 0.999113998819
Accuracy positive 0.0
Microwave button press
Accuracy negative 1.0
Accuracy positive 1.0
