In [17]:
import tflscripts
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

tflscripts.set_dataset_folder('../../datasets/')

configuration = tflscripts.read_configuration()

activities = configuration['analysed_activities']
activities_i = [configuration['activities'].index(a) for a in activities]

In [14]:
def read_dataset(device, dataset, scale=True):
    df, df_labels = tflscripts.read_and_filter_dataset(
            dataset + '-1s',
            device,
            use_features='.*',
            use_activities=activities_i,
            check_all_activities=False,
            scale=scale,
            with_feature_selection=False)

    df = df.loc[df.index.isin(df_labels.index)]
    df_labels = df_labels.loc[df_labels.index.isin(df.index)]

    return df, df_labels

def test(source_dataset, source_device, target_dataset, target_device, label, features, scale_independently):

    df_source, df_source_labels = read_dataset(dataset=source_dataset,
                                               device=source_device,
                                               scale=scale_independently)
    df_target, df_target_labels = read_dataset(dataset=target_dataset,
                                               device=target_device,
                                               scale=scale_independently)

    c1 = df_source.filter(regex=features).columns.tolist()
    c2 = df_target.filter(regex=features).columns.tolist()
    columns = [i for i in c1 if i in c2]

    x_train = df_source[columns]
    y_train = tflscripts.get_y_for_label(df_source_labels, label)

    if scale_independently:
        ppl = Pipeline([
            ('impute', Imputer()),
            ('clf', LogisticRegression())
        ])
    else:
        ppl = Pipeline([
            ('impute', Imputer()),
            ('scale', StandardScaler()),
            ('clf', LogisticRegression())
        ])

    ppl.fit(x_train, y_train)

    x_test = df_target[x_train.columns]
    y_test = tflscripts.get_y_for_label(df_target_labels, label)

    predicted = ppl.predict(x_test)

    mask_negative = y_test == -1
    accuracy_negative = accuracy_score(y_test[mask_negative], predicted[mask_negative])
    
    mask_positive = y_test != -1
    accuracy_positive = accuracy_score(y_test[mask_positive], predicted[mask_positive])
    
    accuracy = accuracy_score(y_test, predicted)
    
    print('Accuracy', accuracy)
    print('Accuracy positive', accuracy_positive)
    print('Accuracy negative', accuracy_negative)

In [18]:
source_dataset = 'synergy-final-iter4'
source_device = '128.237.227.76'
target_dataset = 'synergy-final-iter1'
target_device = '128.237.254.195'

label_name = 'Dishes'
label = configuration['activities'].index(label_name)

test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=True)

Accuracy 0.937942523948
Accuracy positive 0.901298701299
Accuracy negative 0.94494047619


In [19]:
test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=False)

Accuracy 0.171178675552
Accuracy positive 0.968831168831
Accuracy negative 0.0188492063492


In [20]:
source_dataset = 'scott-final-iter1'
source_device = '128.237.248.186'
target_dataset = 'synergy-final-iter1'
target_device = '128.237.254.195'

label_name = 'Dishes'
label = configuration['activities'].index(label_name)

print('Independent scaling')

test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=True)

print()
print('Common scaling')

test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=False)

Independent scaling
Accuracy 0.979591836735
Accuracy positive 1.0
Accuracy negative 0.975694444444

Common scaling
Accuracy 0.886297376093
Accuracy positive 0.311688311688
Accuracy negative 0.996031746032


In [21]:
source_dataset = 'scott-final-iter1'
source_device = '128.237.248.186'
target_dataset = 'scott-final-iter1'
target_device = 'Matrix b827eb41f96f'

label_name = 'Dishes'
label = configuration['activities'].index(label_name)

print('Independent scaling')

test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=True)

print()
print('Common scaling')

test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=False)

Independent scaling
Accuracy 0.57171881349
Accuracy positive 0.704109589041
Accuracy negative 0.548664122137

Common scaling
Accuracy 0.65623730191
Accuracy positive 0.254794520548
Accuracy negative 0.726145038168


In [22]:
source_dataset = 'scott-final-iter1'
source_device = '128.237.248.186'
target_dataset = 'scott-final-iter1'
target_device = 'xdk_2'

label_name = 'Dishes'
label = configuration['activities'].index(label_name)

print('Independent scaling')

test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=True)

print()
print('Common scaling')

test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=False)

Independent scaling
Accuracy 0.948624595469
Accuracy positive 1.0
Accuracy negative 0.939667458432

Common scaling
Accuracy 0.339805825243
Accuracy positive 0.776566757493
Accuracy negative 0.263657957245


In [23]:
source_dataset = 'scott-final-iter1'
source_device = '128.237.248.186'
target_dataset = 'synergy-final-iter2'
target_device = 'xdk_1'

label_name = 'Dishes'
label = configuration['activities'].index(label_name)

print('Independent scaling')

test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=True)

print()
print('Common scaling')

test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=False)

Independent scaling
Accuracy 0.881549815498
Accuracy positive 1.0
Accuracy negative 0.862349914237

Common scaling
Accuracy 0.326568265683
Accuracy positive 0.746031746032
Accuracy negative 0.258576329331


In [24]:
source_dataset = 'scott-final-iter1'
source_device = 'xdk_2'
target_dataset = 'synergy-final-iter2'
target_device = 'xdk_1'

label_name = 'Dishes'
label = configuration['activities'].index(label_name)

print('Independent scaling')

test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=True)

print()
print('Common scaling')

test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=False)

Independent scaling
Accuracy 0.930258302583
Accuracy positive 0.997354497354
Accuracy negative 0.919382504288

Common scaling
Accuracy 0.909594095941
Accuracy positive 0.465608465608
Accuracy negative 0.981560891938


In [25]:
source_dataset = 'scott-final-iter1'
source_device = 'Matrix b827eb41f96f'
target_dataset = 'synergy-final-iter2'
target_device = 'Matrix b827ebe6e0f8'

label_name = 'Dishes'
label = configuration['activities'].index(label_name)

print('Independent scaling')

test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=True)

print()
print('Common scaling')

test(source_dataset,
     source_device,
     target_dataset,
     target_device,
     label=label,
     features='.*',
     scale_independently=False)

Independent scaling
Accuracy 0.870062753784
Accuracy positive 0.52380952381
Accuracy negative 0.926211926212

Common scaling
Accuracy 0.860465116279
Accuracy positive 0.0
Accuracy negative 1.0
