**Trains using multiple devices, tests on one device**

No changes are made to the trained model to facilitate for the target domain.

In [None]:
import pandas as pd
import numpy as np
import math
%run ../helpers.py

dataset_path = '../../datasets/synergy-kitchen-mites-processed/'

**create_training_dataset_from_multiple_sources**

Given y labels and sources, split the y list into N equally sized chunks (N = len(sources)).

For each chunk, load its samples from the corresponding source.

In [None]:
def create_training_dataset_from_multiple_sources(sources, y_train, use_features):
    training_indices = y_train.index.tolist()
    np.random.shuffle(training_indices)

    chunk_size = math.floor(len(training_indices) / len(sources))
    since = 0
    until = chunk_size
    
    dfs = []
    X_trains = []

    for source in sources:
        if until + chunk_size > len(training_indices):
            chunk = training_indices[since:]
        else:
            chunk = training_indices[since:until]

        df = pd.DataFrame.from_csv(dataset_path + source + '.csv')
        df = df.filter(regex=(use_features))
        df = df.loc[df.id.isin(chunk)]
        dfs.append(df)

        X_train = pd.DataFrame(index=chunk)
        X_trains.append(X_train)

        since = until
        until += chunk_size

    return pd.concat(dfs), pd.concat(X_trains)

1. read labels for the complete dataset
2. split them into 70/30 train and test split
3. create training dataset using the sources (function above)
4. create testing dataset using the 30% of activities from target
5. classify and test

In [None]:
def test(sources, target, use_features):
    df_labels = pd.DataFrame.from_csv(dataset_path + 'activity_labels.csv')

    y_train, y_test = train_test_split_labels(df_labels)

    df_train, X_train = create_training_dataset_from_multiple_sources(sources, y_train, use_features)

    df_test = pd.DataFrame.from_csv(dataset_path + target + '.csv')
    df_test = df_test.filter(regex=(use_features))

    X_test = pd.DataFrame(index=y_test.index)

    return classify_with_tsfresh_features(df_train, X_train, y_train, df_test, X_test, y_test)

In [None]:
device_names = {
    '128.237.242.0': 'SS008',
    '128.237.246.127': 'SS076',
    '128.237.248.186': 'SS049',
    '128.237.253.157': 'SS018'
}

devices = [
    '128.237.246.127',
    '128.237.248.186',
    '128.237.253.157',
    '128.237.242.0'
]

combinations = [
    [[0,1], 2],
#     [[0,1], 3],
    [[0,2], 1],
    [[0,2], 3],
#     [[1,2], 0],
    [[1,3], 0],
#     [[1,3], 2],
#     [[2,3], 0],
    [[2,3], 1]
]

features = {
    "ACCEL_sst_*|id": 'Accelerometer',
    "MICROPHONE_sst_*|id": 'Microphone'
#     "MAGNETOMETER_sst_*|id": 'Magnetometer'
}

output = []

for combination in combinations:
    for feature in features:
        target = devices[combination[1]]
        sources = [devices[i] for i in combination[0]]
        result = test(sources, target, feature)
        source_names = [device_names[source] for source in sources]
        target_name = device_names[target]
        feature_name = features[feature]
        output.append(feature_name + ' from ' + ', '.join(source_names) + target_name)
        output.append(result)

for out in output:
    print(out)
    print('')

In [None]:
for out in output:
    print (out)
    print ('')