In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

width = 12
height = 7
plt.rcParams["figure.figsize"] = (width, height)


from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import tflscripts
import json
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction import DictVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import tree
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_predict
from sklearn.decomposition import TruncatedSVD
import pickle

configuration = tflscripts.read_configuration()
df = pd.read_pickle('results.p')

In [2]:
print(len(df))

# for within device transfer, only use Mites
df = df.loc[((df.source_device_type == 'Mite') & (df.target_device_type == 'Mite') | (df.source_device_type != df.target_device_type))]

# dont consider across devices transfer outside of the same place
df = df.loc[df.type_of_transfer.isin([
    'Same device type in different place',
    'Different device in same place',
#     'Different device in different place',
    'Same device across spaces',
    'Same device type across spaces',
#     'Different device across spaces',
    'Same device in different place',
    'Same device type in same place',
    'Same device in same place'
])]

print(len(df))

62397
54234


In [3]:
def to_recall_bin(acc):
    bins = [
        [0.0, 0.5],
        [0.5, 1.0]
    ]
    return [i for i, b in enumerate(bins) if b[0] <= acc and b[1] >= acc][0]


df['good'] = [to_recall_bin(a) for a in df['f1']]

In [4]:
def to_key(row):
    columns =  ['source_placement', 'target_placement', 'source_room', 'target_room', 'source_device', 'target_device', 'label']
    return '_'.join(row[columns])

df['key'] = [to_key(row) for i, row in df.iterrows()]

In [5]:
subdf = df.sort_values(by=['f1'], ascending=False)
subdf = subdf.loc[(subdf.classifier == 'SVM') & (subdf.features_name == 'All')]
subdf = subdf.drop_duplicates(['key'], keep='first')
subdf.index = subdf.key
subdf['good'] = df.groupby(['key'])['good'].max()

In [7]:
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.tree import DecisionTreeClassifier

accuracies = {}

# filtered = df.loc[df.accuracy_negative >= 0.95]
filtered = subdf
filtered = filtered.loc[filtered.type_of_transfer != 'No transfer']
# filtered = filtered.loc[filtered.type_of_transfer != 'Across spaces']

x_columns = [
    'label',
    'type_of_transfer',
    'source_device_type',
    'target_device_type',
    'f1_wt'
]

def to_x_and_y(filtered, columns=x_columns):

    X = filtered[columns]

    y = filtered['good']

    X = [dict(r.iteritems()) for _, r in X.iterrows()]
    y = y.values

    return X, y


# ppl.fit(X_train, y_train)
# predicted = ppl.predict(X_test)


def predict_for_sample(sample_train, sample_test):
    X_train, y_train = to_x_and_y(sample_train)
    X_test, y_test = to_x_and_y(sample_test)

    ppl = Pipeline([
        ('vect', DictVectorizer()),
        ('impute', Imputer()),
        ('clf', DecisionTreeClassifier())
    ])

    ppl.fit(X_train, y_train)
    predicted = ppl.predict(X_test)
    accuracy = accuracy_score(y_test, predicted)
    print('Accuracy', accuracy)

    return predicted


print('Placement')
sample_test = filtered.loc[(filtered.source_room == 'synergy') & ((filtered.type_of_transfer == 'Same device in different place') | (filtered.type_of_transfer == 'Same device type in different place'))]
sample_train = filtered.loc[~filtered.index.isin(sample_test.index)]

predict_for_sample(sample_train, sample_test)

print('Room')
sample_test = filtered.loc[(filtered.source_room == 'synergy') & ((filtered.type_of_transfer == 'Same device type across spaces') | (filtered.type_of_transfer == 'Same device across spaces'))]
sample_train = filtered.loc[~filtered.index.isin(sample_test.index)]

predict_for_sample(sample_train, sample_test)

sample_test = filtered.loc[(filtered.target_room == 'synergy') & ((filtered.type_of_transfer == 'Same device type across spaces') | (filtered.type_of_transfer == 'Same device across spaces'))]
sample_train = filtered.loc[~filtered.index.isin(sample_test.index)]

predict_for_sample(sample_train, sample_test)

print('Replica Sink')
sample_test = filtered.loc[(filtered.source_placement != 'Sink') & ((filtered.type_of_transfer == 'Same device type in same place') | (filtered.type_of_transfer == 'Same device in same place'))]
sample_train = filtered.loc[~filtered.index.isin(sample_test.index)]

predict_for_sample(sample_train, sample_test)

print('Replica Table')
sample_test = filtered.loc[(filtered.source_placement != 'Table') & ((filtered.type_of_transfer == 'Same device type in same place') | (filtered.type_of_transfer == 'Same device in same place'))]
sample_train = filtered.loc[~filtered.index.isin(sample_test.index)]

predict_for_sample(sample_train, sample_test)

print('Replica Coffee')
sample_test = filtered.loc[(filtered.source_placement != 'Coffee') & ((filtered.type_of_transfer == 'Same device type in same place') | (filtered.type_of_transfer == 'Same device in same place'))]
sample_train = filtered.loc[~filtered.index.isin(sample_test.index)]

predict_for_sample(sample_train, sample_test)

Placement
Accuracy 0.638157894737
Room
Accuracy 0.681265206813
Accuracy 0.644768856448
Replica Sink
Accuracy 0.796296296296
Replica Table
Accuracy 0.734375
Replica Coffee
Accuracy 0.666666666667


array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 1])

In [7]:
len([x for x in predicted[y == 0] if x == 1])

231

In [8]:
predictions = pd.Series(predicted)
predictions.index = filtered.index
filtered['predictions'] = predictions

In [9]:
filtered.groupby(['type_of_transfer', 'predictions']).size() / filtered.groupby(['type_of_transfer', 'good']).size()

type_of_transfer                     predictions
Different device in same place       0              1.019832
                                     1              0.876777
Same device across spaces            0              1.067485
                                     1              0.905983
Same device in different place       0              1.000000
                                     1              1.000000
Same device in same place            0              1.000000
                                     1              1.000000
Same device type across spaces       0              1.095628
                                     1              0.896450
Same device type in different place  0              1.000000
                                     1              1.000000
Same device type in same place       0              1.285714
                                     1              0.935484
dtype: float64

In [10]:
filtered.groupby(['type_of_transfer', 'predictions']).size()

type_of_transfer                     predictions
Different device in same place       0              1337
                                     1               185
Same device across spaces            0               174
                                     1               106
Same device in different place       0                29
                                     1                47
Same device in same place            0                 3
                                     1                 7
Same device type across spaces       0               401
                                     1               303
Same device type in different place  0               129
                                     1               249
Same device type in same place       0                18
                                     1                58
dtype: int64

In [10]:
subdf.loc[subdf.type_of_transfer == 'Same device in same place']

Unnamed: 0_level_0,source_dataset,target_dataset,source_device_id,target_device_id,source_device,target_device,source_device_name,target_device_name,source_device_type,target_device_type,...,precision_change,f1_change,f1_change_percent,accuracy_target_wt,recall_target_wt,specificity_target_wt,precision_target_wt,f1_target_wt,good,key
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Table_Table_synergy_synergy_Mite 1_Mite 1_Vacuum cleaning,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,0.0,-0.001439,99.856115,1.0,1.0,1.0,1.0,1.0,1,Table_Table_synergy_synergy_Mite 1_Mite 1_Vacu...
Table_Table_synergy_synergy_Mite 1_Mite 1_Blender running,synergy-final-iter5,synergy-final-iter4,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,-0.003413,-0.011844,98.815567,0.99882,0.984216,0.999748,0.996212,0.990151,1,Table_Table_synergy_synergy_Mite 1_Mite 1_Blen...
Table_Table_synergy_synergy_Mite 1_Mite 1_Alarm,synergy-final-iter5,synergy-final-iter4,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,0.001601,-0.006093,99.384085,0.998584,0.994262,0.9992,0.993837,0.994024,1,Table_Table_synergy_synergy_Mite 1_Mite 1_Alarm
Table_Table_synergy_synergy_Mite 1_Mite 1_Cupboard door closed,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,-0.052632,-0.019697,97.85832,0.998785,0.962963,0.999081,0.885041,0.921449,1,Table_Table_synergy_synergy_Mite 1_Mite 1_Cupb...
Table_Table_synergy_synergy_Mite 1_Mite 1_Microwave door closed,synergy-final-iter5,synergy-final-iter4,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,-0.084615,-0.223392,65.335753,0.995987,0.455556,0.998815,0.814815,0.5221,0,Table_Table_synergy_synergy_Mite 1_Mite 1_Micr...
Table_Table_synergy_synergy_Mite 1_Mite 1_Coffee,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,-0.253019,-0.16754,66.967305,0.905196,0.587028,0.945006,0.571141,0.578251,0,Table_Table_synergy_synergy_Mite 1_Mite 1_Coffee
Table_Table_synergy_synergy_Mite 1_Mite 1_Cupboard door opened,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,-0.305556,-0.327968,22.503934,0.99605,0.355556,0.999694,0.6,0.4329,0,Table_Table_synergy_synergy_Mite 1_Mite 1_Cupb...
Table_Table_synergy_synergy_Mite 1_Mite 1_Microwave done chime,synergy-final-iter4,synergy-final-iter5,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,-0.5,-0.333333,16.666667,0.991492,0.138889,1.0,1.0,0.240825,1,Table_Table_synergy_synergy_Mite 1_Mite 1_Micr...
Table_Table_synergy_synergy_Mite 1_Mite 1_Knocking,synergy-final-iter5,synergy-final-iter4,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,-0.933333,-0.483516,0.0,0.991501,0.459367,0.999761,0.944444,0.606349,0,Table_Table_synergy_synergy_Mite 1_Mite 1_Knoc...
Table_Table_synergy_synergy_Mite 1_Mite 1_Soap dispensed,synergy-final-iter5,synergy-final-iter4,128.237.247.190,128.237.247.190,Mite 1,Mite 1,Mite 1 Table,Mite 1 Table,Mite,Mite,...,-0.777778,-0.150794,0.0,0.994334,0.166667,0.999288,0.333333,0.203704,0,Table_Table_synergy_synergy_Mite 1_Mite 1_Soap...


In [11]:
results = filtered[['source_dataset', 'target_dataset', 'source_device_id',
          'target_device_id', 'source_device', 'target_device',
          'source_device_name', 'target_device_name', 'source_device_type',
          'target_device_type', 'source_room', 'target_room', 'source_placement',
          'target_placement', 'type_of_transfer', 'label',
          'samples', 'good', 'predictions']]
results.to_pickle('results_first_metaclassifier.p')