In [16]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

width = 12
height = 7
plt.rcParams["figure.figsize"] = (width, height)


import pandas as pd
import numpy as np
import tflscripts
import json
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from sklearn.linear_model import LogisticRegression


configuration = tflscripts.read_configuration()
results = pd.read_pickle('results.p')
tflscripts.set_dataset_folder('../../datasets/')

In [98]:
results = results.loc[results.type_of_transfer != 'No transfer']
results = results.loc[results.accuracy_negative > 0.98]
results = results.loc[results.accuracy_positive <= 0.7]
results = results.loc[results.accuracy_positive >= 0.2]
results = results.loc[results.label == 'Dishes']
results = results.sort_values(by='samples', ascending=False)
len(results)

259

In [112]:
def key_for_test(source_dataset,
                    source_device,
                    target_dataset,
                    target_device,
                    label,
                    features,
                    classifier):
    return source_dataset + source_device + target_dataset + target_device + \
        label + features + classifier

def find_test_result(transfer):
    transfer_key = key_for_test(source_dataset=transfer['source_dataset'],
                           source_device=transfer['source_device'],
                           target_dataset=transfer['target_dataset'],
                           target_device=transfer['target_device'],
                           label=transfer['label'],
                           features=transfer['features'],
                           classifier=transfer['classifier'])


    test_set = tflscripts.TestSet(name='_'.join([transfer['source_dataset'], transfer['source_device']]))
    if test_set.exists():
        for result in test_set.get_results():
            result_key = key_for_test(source_dataset=result.source_dataset,
                               source_device=result.source_device,
                               target_dataset=result.target_dataset,
                               target_device=result.target_device,
                               label=result.label_name(),
                               features=result.features,
                               classifier=result.classifier)
            if result_key == transfer_key:
                return result
    return None
    
transfer = results.iloc[2]
test_result = find_test_result(transfer)

In [113]:
activities = configuration['analysed_activities']
activities_i = [configuration['activities'].index(a) for a in activities]

df, df_labels = tflscripts.read_and_filter_dataset(
    transfer['target_dataset'] + '-1s',
    transfer['target_device'],
    use_features='.*',
    use_activities=activities_i,
    check_all_activities=False,
    scale=True,
    with_feature_selection=False
)

df = df.loc[df.index.isin(df_labels.index)]
df_labels = df_labels.loc[df_labels.index.isin(df.index)]

# df = df[test_result.columns]

In [114]:
predicted = pd.Series(test_result.predicted)
predicted.index = df.index
df['predictions'] = predicted

In [115]:
positive_df = df.loc[df_labels.label == test_result.label]
positive_df_labels = df_labels.loc[df_labels.label == test_result.label]

negative_df = df.loc[df_labels.label != test_result.label]
negative_df_labels = df_labels.loc[df_labels.label != test_result.label]

dfs = tflscripts.take_multiple_percentages_of_data(df=positive_df,
                                                   df_labels=positive_df_labels,
                                                   ratios=[0.7, 0.3])
pos_df_train, pos_df_labels_train = dfs[0]
pos_df_test, pos_df_labels_test = dfs[1]

dfs = tflscripts.take_multiple_percentages_of_data(df=negative_df,
                                                   df_labels=negative_df_labels,
                                                   ratios=[0.7, 0.3])
neg_df_train, neg_df_labels_train = dfs[0]
neg_df_test, neg_df_labels_test = dfs[1]

In [149]:
df_train = pd.concat([pos_df_train, neg_df_train])
df_test = pd.concat([pos_df_test, neg_df_test])

df_labels_train = pd.concat([pos_df_labels_train, neg_df_labels_train])
df_labels_test = pd.concat([pos_df_labels_test, neg_df_labels_test])

y_train = df_train['predictions']
predicted_y_test = df_test['predictions']
x_train = df_train.filter(regex='^(?!predictions)')
x_test = df_test.filter(regex='^(?!predictions)')
# x_train = df_train
# x_test = df_test 
y_test = df_labels_test.label
y_test[y_test != test_result.label] = -1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [150]:
ppl = Pipeline([
    ('impute', Imputer()),
    ('clf', LogisticRegression())
])

ppl.fit(x_train, y_train)
repredicted = ppl.predict(x_test)

In [137]:
def accuracy_positive(y, predicted):
    y_positive = y[y == test_result.label]
    predicted_positive = predicted[y == test_result.label]
    return accuracy_score(y_positive, predicted_positive)

def accuracy_negative(y, predicted):
    y_negative = y[y != test_result.label]
    predicted_negative = predicted[y != test_result.label]
    return accuracy_score(y_negative, predicted_negative)

In [151]:
accuracy_positive(y_test, repredicted)

0.7168141592920354

In [152]:
accuracy_positive(y_test, predicted_y_test)

0.60176991150442483

In [153]:
accuracy_negative(y_test, repredicted)

0.96992481203007519

In [154]:
accuracy_negative(y_test, predicted_y_test)

0.99462943071965626

In [142]:
transfer

source_dataset                        synergy-final-iter4
target_dataset                        synergy-final-iter5
source_device                              128.237.227.76
target_device                              128.237.227.76
source_device_name                            Mite 2 Sink
target_device_name                          Mite 2 Coffee
source_device_type                                   Mite
target_device_type                                   Mite
source_room                                       synergy
target_room                                       synergy
source_placement                                     Sink
target_placement                                   Coffee
type_of_transfer           Same device in different place
classifier                                            SVM
label                                              Dishes
samples                                               588
features              MICROPHONE|microphone|ACCEL_|accel_
features_name 