In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

width = 12
height = 7
plt.rcParams["figure.figsize"] = (width, height)


from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import tflscripts
import json
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction import DictVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import tree
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_predict
from sklearn.decomposition import TruncatedSVD
import pickle

configuration = tflscripts.read_configuration()
df_first_predictions = pd.read_pickle('results_first_metaclassifier.p')
df = pd.read_pickle('results.p')

In [2]:
def to_key(row):
    columns =  ['source_placement', 'target_placement', 'source_room', 'target_room', 'source_device', 'target_device', 'label']
    return '_'.join(row[columns])

def to_target_key(row):
    columns =  ['target_placement', 'target_room', 'target_device', 'label']
    return '_'.join(row[columns])

def to_source_key(row):
    columns =  ['target_placement', 'target_room', 'target_device', 'label']
    return '_'.join(row[columns])

def to_transfer_with_label_key(row):
    columns =  ['source_placement', 'target_placement', 'source_room', 'target_room', 'source_device_type', 'target_device_type', 'label']
    return '_'.join(row[columns])

def to_transfer_key(row):
    columns =  ['source_placement', 'target_placement', 'source_room', 'target_room', 'source_device_type', 'target_device_type']
    return '_'.join(row[columns])

def build_transfer_key(source_placement, target_placement, source_room, target_room, source_device_type, target_device_type):
    return '_'.join([source_placement, target_placement, source_room, target_room, source_device_type, target_device_type])


df['key'] = [to_key(row) for i, row in df.iterrows()]
df['target_key'] = [to_target_key(row) for i, row in df.iterrows()]
df['source_key'] = [to_source_key(row) for i, row in df.iterrows()]
df['transfer_with_label_key'] = [to_transfer_with_label_key(row) for i, row in df.iterrows()]
df['transfer_key'] = [to_transfer_key(row) for i, row in df.iterrows()]

In [3]:
predictions = df_first_predictions.predictions
df['first_predictions'] = [predictions.get(k) for k in df.key]

In [4]:
df_same_model = df.loc[(df.classifier == 'SVM') & (df.features_name == 'All')]

In [5]:
df_same_model = df_same_model.loc[(df_same_model.first_predictions == 0) | (df_same_model.first_predictions == 1)]

In [6]:
threshold = 0.75
to_use_for_specificity = df_same_model.loc[(df_same_model.f1_target_wt >= threshold) & (
    (df_same_model.f1_wt < threshold) | (df_same_model.first_predictions == 0)
)]

In [7]:
activities_to_retrain = {}
for transfer in to_use_for_specificity.transfer_key.unique():
    labels = to_use_for_specificity.loc[to_use_for_specificity.transfer_key == transfer].label.unique()
    activities_to_retrain[transfer] = labels.tolist()

output = open('activities_to_retrain.p', 'wb')
pickle.dump(activities_to_retrain, output)
output.close()

In [20]:
activities_to_retrain

{'Coffee_Coffee_robotics_robotics_Matrix_Mite': ['Chopping food',
  'Coffee',
  'Conversation',
  'Dishes',
  'Kettle',
  'Knocking',
  'Microwave'],
 'Coffee_Coffee_robotics_robotics_Matrix_XDK': ['Chopping food', 'Dishes'],
 'Coffee_Coffee_robotics_robotics_Mite_XDK': ['Chopping food', 'Dishes'],
 'Coffee_Coffee_robotics_robotics_XDK_Mite': ['Chopping food',
  'Coffee',
  'Conversation',
  'Dishes',
  'Kettle',
  'Knocking',
  'Microwave'],
 'Coffee_Coffee_robotics_synergy_Mite_Mite': ['Chopping food',
  'Conversation',
  'Kettle',
  'Microwave',
  'Coffee',
  'Knocking',
  'Microwave door closed'],
 'Coffee_Coffee_synergy_robotics_Mite_Mite': ['Chopping food',
  'Kettle',
  'Microwave',
  'Coffee',
  'Knocking'],
 'Coffee_Coffee_synergy_synergy_Matrix_Mite': ['Chopping food',
  'Conversation',
  'Dishes',
  'Kettle',
  'Microwave',
  'Alarm',
  'Blender running',
  'Coffee',
  'Cupboard door closed',
  'Cupboard door opened',
  'Knocking',
  'Microwave door closed',
  'Soap dispense

In [8]:
activity_ids_to_retrain = {}
for transfer, activities in activities_to_retrain.items():
    activity_ids = [configuration['activities'].index(a) for a in activities]
    activity_ids_to_retrain[transfer] = activity_ids

output = open('activity_ids_to_retrain.p', 'wb')
pickle.dump(activity_ids_to_retrain, output)
output.close()

In [9]:
tflscripts.set_dataset_folder('../../datasets/')

def map_result(result):
    stats = tflscripts.TestResultStatistics(result, configuration)
    
    transfer_key = build_transfer_key(source_placement=stats.source_placement(),
                                      target_placement=stats.target_placement(),
                                      source_room=stats.source_room(),
                                      target_room=stats.target_room(),
                                      source_device_type=stats.source_device_type(),
                                      target_device_type=stats.target_device_type())
    
    if transfer_key in activity_ids_to_retrain:
        activities_i = activity_ids_to_retrain[transfer_key]
        specificity = stats.specificity_with_additional_activities(activities_i)
        return [
            result.source_dataset,
            result.target_dataset,
            stats.source_device(),
            stats.target_device(),
            stats.type_of_transfer(),
            stats.classifier_name(),
            result.label_name(),
            stats.features_name(),
            len(activities_i),
            specificity
        ]

    return None

def read_dataset_device(dataset, device):
    test_set = tflscripts.TestSet(name='_'.join([dataset, device]))

    if test_set.exists():
        results = test_set.map_results(map_result)
        return results
    else:
        return []

all_rows = []
for dataset in configuration['device_roles']:
    print(dataset)
    for device in configuration['device_roles'][dataset]:
        print(device)
        rows = read_dataset_device(dataset, device)
        all_rows += rows

columns = [
    'source_dataset',
    'target_dataset',
    'source_device',
    'target_device',
    'type_of_transfer',
    'classifier',
    'label',
    'features_name',
    'num_activities_for_specificity',
    'specificity_for_retrained'
]

specificity_results = pd.DataFrame([r for r in all_rows if r != None])
specificity_results.columns = columns

scott-final-iter2
TI SensorTag 85
Matrix b827ebe6e0f8
xdk_3
Matrix b827eb96f31a
TI SensorTag 604
DialogIoT 591844599
xdk_1
128.237.247.134
128.237.246.127
DialogIoT 591844765
xdk_2
TI SensorTag 690
DialogIoT 591844595
128.237.248.186
Matrix b827eb41f96f
synergy-final-iter3
128.237.239.234
TI SensorTag 709
Matrix b827ebe6e0f8
128.237.234.0
Matrix b827eb96f31a
DialogIoT 591844595
DialogIoT 591844599
TI SensorTag 33
xdk_2
xdk_3
DialogIoT 591844765
xdk_1
TI SensorTag 535
Matrix b827eb41f96f
128.237.237.122
robotics-final
TI SensorTag 85
Matrix b827ebe6e0f8
xdk_3
Matrix b827eb96f31a
TI SensorTag 604
DialogIoT 591844599
xdk_1
128.237.247.134
128.237.246.127
DialogIoT 591844765
xdk_2
TI SensorTag 690
DialogIoT 591844595
128.237.248.186
Matrix b827eb41f96f
synergy-final-iter5
128.237.247.190
xdk_3
Matrix b827ebe6e0f8
128.237.227.76
Matrix b827eb96f31a
128.237.250.218
Matrix b827eb41f96f
xdk_1
xdk_2
synergy-mites-colocated
128.237.246.127
128.237.253.157
128.237.248.186
128.237.242.0
synergy-mi

In [13]:
specificity_results = specificity_results.groupby([
    'source_dataset',
    'target_dataset',
    'source_device',
    'target_device',
    'type_of_transfer',
    'classifier',
    'label',
    'features_name',
    'num_activities_for_specificity'
])[
    'specificity_for_retrained'
].mean().reset_index()

In [16]:
df = pd.merge(df,
        specificity_results,
        how='left',
        on=['source_dataset', 'target_dataset', 'source_device', 'target_device', 'type_of_transfer', 'classifier', 'label', 'features_name'])

In [17]:
specificity_results

Unnamed: 0,source_dataset,target_dataset,source_device,target_device,type_of_transfer,classifier,label,features_name,num_activities_for_specificity,specificity_for_retrained
0,robotics-final,robotics-final,Matrix 1,Mite 4,Different device in same place,LogisticRegression,Chopping food,Accelerometer & magnetometer,6,0.620690
1,robotics-final,robotics-final,Matrix 1,Mite 4,Different device in same place,LogisticRegression,Chopping food,All,6,0.814599
2,robotics-final,robotics-final,Matrix 1,Mite 4,Different device in same place,LogisticRegression,Chopping food,Environmental,6,0.834751
3,robotics-final,robotics-final,Matrix 1,Mite 4,Different device in same place,LogisticRegression,Chopping food,Microphone,6,0.850873
4,robotics-final,robotics-final,Matrix 1,Mite 4,Different device in same place,LogisticRegression,Chopping food,Microphone & accelerometer,6,0.865652
5,robotics-final,robotics-final,Matrix 1,Mite 4,Different device in same place,LogisticRegression,Coffee,Accelerometer & magnetometer,6,0.776982
6,robotics-final,robotics-final,Matrix 1,Mite 4,Different device in same place,LogisticRegression,Coffee,All,6,0.748321
7,robotics-final,robotics-final,Matrix 1,Mite 4,Different device in same place,LogisticRegression,Coffee,Environmental,6,0.839230
8,robotics-final,robotics-final,Matrix 1,Mite 4,Different device in same place,LogisticRegression,Coffee,Microphone,6,0.698612
9,robotics-final,robotics-final,Matrix 1,Mite 4,Different device in same place,LogisticRegression,Coffee,Microphone & accelerometer,6,0.773847


In [18]:
df.to_pickle('results_retrained_specificity.p')

In [19]:
df.loc[df.type_of_transfer == 'Same device in same place']

Unnamed: 0,source_dataset,target_dataset,source_device_id,target_device_id,source_device,target_device,source_device_name,target_device_name,source_device_type,target_device_type,...,precision_target_wt,f1_target_wt,key,target_key,source_key,transfer_with_label_key,transfer_key,first_predictions,num_activities_for_specificity,specificity_for_retrained
47451,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,0.759854,0.859916,Table_Table_synergy_synergy_Mite 1_Mite 1_Alarm,Table_synergy_Mite 1_Alarm,Table_synergy_Mite 1_Alarm,Table_Table_synergy_synergy_Mite_Mite_Alarm,Table_Table_synergy_synergy_Mite_Mite,1.0,2.0,0.997323
47452,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,0.125867,0.163535,Table_Table_synergy_synergy_Mite 1_Mite 1_Alarm,Table_synergy_Mite 1_Alarm,Table_synergy_Mite 1_Alarm,Table_Table_synergy_synergy_Mite_Mite_Alarm,Table_Table_synergy_synergy_Mite_Mite,1.0,2.0,0.986613
47453,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,0.428811,0.387187,Table_Table_synergy_synergy_Mite 1_Mite 1_Alarm,Table_synergy_Mite 1_Alarm,Table_synergy_Mite 1_Alarm,Table_Table_synergy_synergy_Mite_Mite_Alarm,Table_Table_synergy_synergy_Mite_Mite,1.0,2.0,0.488621
47454,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,0.926444,0.952316,Table_Table_synergy_synergy_Mite 1_Mite 1_Alarm,Table_synergy_Mite 1_Alarm,Table_synergy_Mite 1_Alarm,Table_Table_synergy_synergy_Mite_Mite_Alarm,Table_Table_synergy_synergy_Mite_Mite,1.0,2.0,1.000000
47455,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,0.765753,0.854119,Table_Table_synergy_synergy_Mite 1_Mite 1_Alarm,Table_synergy_Mite 1_Alarm,Table_synergy_Mite 1_Alarm,Table_Table_synergy_synergy_Mite_Mite_Alarm,Table_Table_synergy_synergy_Mite_Mite,1.0,2.0,0.997323
47456,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,0.917892,0.189849,Table_Table_synergy_synergy_Mite 1_Mite 1_Alarm,Table_synergy_Mite 1_Alarm,Table_synergy_Mite 1_Alarm,Table_Table_synergy_synergy_Mite_Mite_Alarm,Table_Table_synergy_synergy_Mite_Mite,1.0,2.0,0.966533
47457,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,0.779194,0.874724,Table_Table_synergy_synergy_Mite 1_Mite 1_Blen...,Table_synergy_Mite 1_Blender running,Table_synergy_Mite 1_Blender running,Table_Table_synergy_synergy_Mite_Mite_Blender ...,Table_Table_synergy_synergy_Mite_Mite,1.0,2.0,0.991968
47458,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,0.177713,0.226515,Table_Table_synergy_synergy_Mite 1_Mite 1_Blen...,Table_synergy_Mite 1_Blender running,Table_synergy_Mite 1_Blender running,Table_Table_synergy_synergy_Mite_Mite_Blender ...,Table_Table_synergy_synergy_Mite_Mite,1.0,2.0,0.950469
47459,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,0.480527,0.458522,Table_Table_synergy_synergy_Mite 1_Mite 1_Blen...,Table_synergy_Mite 1_Blender running,Table_synergy_Mite 1_Blender running,Table_Table_synergy_synergy_Mite_Mite_Blender ...,Table_Table_synergy_synergy_Mite_Mite,1.0,2.0,0.875502
47460,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,0.942827,0.968148,Table_Table_synergy_synergy_Mite 1_Mite 1_Blen...,Table_synergy_Mite 1_Blender running,Table_synergy_Mite 1_Blender running,Table_Table_synergy_synergy_Mite_Mite_Blender ...,Table_Table_synergy_synergy_Mite_Mite,1.0,2.0,0.990629
