# UCAmI evaluation notebook

In [1]:
from sklearn.metrics import accuracy_score
from rdflib_hdt import HDTStore
from rdflib import URIRef, Literal
import pandas as pd
from ink.base.connectors import AbstractConnector
from tqdm import tqdm

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix
import numpy as np
from sklearn.metrics import classification_report

In [2]:
class HDTConnector(AbstractConnector):
    def query(self, q_str):
        global store
        try:
            noi = URIRef(q_str.split('"')[1])
            res = store.hdt_document.search((noi, None, None))[0]
            val = [{"p": {"value": r[1].toPython()}, "o": {"value": r[2].n3().split('"')[1]}, "dt": "Object"} if isinstance(r[2],
                                                                                                            Literal) else {
                "p": {"value": r[1].toPython()}, "o": {"value": r[2].toPython()}} for r in res]
            return val
        except Exception as e:
            return []

    def get_all_events(self):
        global store
        res = store.hdt_document.search((None, URIRef("https://saref.etsi.org/core/hasActivity"), None))[0]
        entities = set()
        for r in tqdm(res):
            entities.add(r[0].toPython())
        return entities

    def get_all_train_activities(self):
        global store
        res = store.hdt_document.search((None, URIRef("https://saref.etsi.org/core/hasActivity"), None))[0]
        entities = set()
        rm_lst = set()
        for r in tqdm(res):
            if len(r[2].toPython())>1:
                if r[2].toPython()!="test_data":
                    entities.add((r[0].toPython(), r[2].toPython()))
                else:
                    rm_lst.add(r[0].toPython())

        return set([e for e in entities if e[0] not in rm_lst])

    def get_events_of_group(self, group):
        global store
        res = store.hdt_document.search((None, URIRef("https://example.com/partOfGroup"), Literal(group)))[0]
        entities = set()
        for r in tqdm(res):
            entities.add(r[0].toPython())
        return entities



    def get_all_test_activities(self):
        global store
        res = store.hdt_document.search((None, URIRef("https://saref.etsi.org/core/hasActivity"), None))[0]
        entities = set()
        add_lst = set()
        for r in tqdm(res):
            if len(r[2].toPython())>1:
                if r[2].toPython()=="test_data":
                    add_lst.add(r[0].toPython())

        res = store.hdt_document.search((None, URIRef("https://saref.etsi.org/core/hasActivity"), None))[0]
        for r in res:
            if len(r[2].toPython())>1:
                if r[2].toPython()!="test_data" and r[0].toPython() in add_lst:
                    entities.add((r[0].toPython(),r[2].toPython()))

        return entities

    def get_all_begin_activities(self):
        global store
        res = store.hdt_document.search((None, URIRef("http://example.org/isBeginEvent"), None))[0]
        entities = set()
        for d in res:
            res2 = store.hdt_document.search((d[0], URIRef("https://saref.etsi.org/core/hasActivity"), None))[0]

            for r in tqdm(res2):
                if len(r[2].toPython())>1:
                    entities.add((d[0].toPython(), r[2].toPython()))
        return entities

    def get_all_events_of_type(self, a):
        global store
        res = store.hdt_document.search((None, URIRef("https://saref.etsi.org/core/hasActivity"), Literal(a)))[0]
        entities = set()
        for r in tqdm(res):
            entities.add(r[0].toPython())
        return entities

    def get_event_time(self, event):
        global store
        res = store.hdt_document.search((URIRef(event), URIRef("https://saref.etsi.org/core/hasTimestamp"), None))[0]
        return str(list(res)[0][2].toPython())

    def get_info(self, event):
        global store
        res = store.hdt_document.search((URIRef(event), None, None))[0]
        return str(list(res))

In [4]:
df = pd.read_pickle('event_ucaml_depth11_for_ml.pkl')
file = "event_ucaml.hdt"
store = HDTStore(file)
connector = HDTConnector()

train_events = [x for x in connector.get_all_train_activities()]
test_events = [x for x in connector.get_all_test_activities()]
print(len(test_events))
print(len(train_events))

2203it [00:00, 37009.06it/s]
2203it [00:00, 44178.43it/s]

535
1133





In [5]:
mapping = {'Act01':'Take medication', 'Act02':'Prepare Breakfast','Act03':'Prepare lunch',
            'Act04': 'Prepare dinner', 'Act05': 'Breakfast', 'Act06':'Lunch', 'Act07':'Dinner',
           'Act08':'Eat a snack','Act09':'Watch TV','Act10':'Enter the Smartlab','Act11':'Play videogame',
           'Act12':'Relax on sofa','Act13':'Leave Smartlab','Act14': 'Visit in the Smartlab','Act15':'Put waste in the bin',
            'Act16':'Wash hands','Act17':'Brush teeth','Act18':'Use the toilet','Act19':'Wash dishes',
            'Act20': 'Washing machine', 'Act21':'Work at the table','Act22':'Dressing', 'Act23':'Go to bed',
           'Act24':'Wake up', 'Idle':'Idle' }
print(len(mapping))

25


In [13]:
from sklearn.ensemble import ExtraTreesClassifier

label_df = pd.DataFrame(train_events).set_index(0)
nodes = label_df.index
labels = label_df.values

data = df.loc[nodes,:]
data.loc[:,'label'] = labels

X = data.drop('label', axis=1).fillna(-100)
y = data['label']

#-------------------#
label_df = pd.DataFrame(test_events).set_index(0)#.groupby(0)[1].apply(list)
nodes = label_df.index
labels = label_df.values

#ndata = data.explode('label')
data = df.loc[nodes,:]
data.loc[:,'label'] = labels

X_test = data.drop('label', axis=1).fillna(-100)
y_test = data['label']

times = []
for index, row in X.iterrows():
    user = index.split('/')[-2]
    event = index.split('/')[-1].replace('event','')
    times.append(connector.get_event_time(index))
X['time'] = pd.to_datetime(times)
X['time'] = X['time'].dt.hour * 60 + X['time'].dt.minute#+ X['time'].dt.second/60

times = []
for index, row in X_test.iterrows():
    user = index.split('/')[-2]
    event = index.split('/')[-1].replace('event','')
    times.append(connector.get_event_time(index))
X_test['time'] = pd.to_datetime(times)
X_test['time'] = X_test['time'].dt.hour * 60 + X_test['time'].dt.minute# + X_test['time'].dt.second/60

for clf in [ExtraTreesClassifier(n_estimators=1000, class_weight="balanced")]:

    clf.fit(X, y)
    y_pred = [[x] for x in clf.predict(X_test)]

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy: %.2f%%" % (accuracy * 100.0))


normal_conf_mat=confusion_matrix(y_test,y_pred)
normal_conf_mat = normal_conf_mat.astype('float') / normal_conf_mat.sum(axis=1)[:, np.newaxis]
print(normal_conf_mat)

all_columns = set([x for x in y_test]+[x[0] for x in y_pred])

#conf_mat,normal_conf_mat = mlcm.cm(y_test_multi.reindex(columns=all_columns).fillna(int(0)).values,y_pred_multi.reindex(columns=all_columns).fillna(int(0)).values)
df_cm = pd.DataFrame(normal_conf_mat, index = [mapping[x] for x in list(all_columns)],columns=[mapping[x] for x in list(all_columns)])
print(df_cm)
fig, ax = plt.subplots(figsize=(17,11))
sns.heatmap(df_cm, annot=True, fmt='.1f')
plt.xticks(rotation=45)
plt.savefig('learner_plot.png')

  self.obj[key] = value
  X['time'] = pd.to_datetime(times)
  X_test['time'] = pd.to_datetime(times)


Accuracy: 76.07%
[[0.83333333 0.         0.08333333 0.         0.         0.08333333
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.        ]
 [0.         0.92307692 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.07692308 0.         0.         0.
  0.         0.         0.         0.         0.        ]
 [0.         0.         0.97619048 0.02380952 0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.        ]
 [0.         0.         0.         1.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.        ]
 [0.         0.08823529

In [14]:
from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y_pred, y_test)



0.7525706918220366

In [15]:
from sklearn.metrics import f1_score
f1_score(y_pred, y_test, average='weighted')

0.7723859087919875

In [34]:
print(classification_report(pd.DataFrame(y_test)['label'].apply(lambda x: mapping[x]), pd.DataFrame(y_pred)[0].apply(lambda x: mapping[x]), target_names=[mapping[x] for x in list(all_columns)] ))

                       precision    recall  f1-score   support

       Play videogame       1.00      0.88      0.94        34
           Wash hands       0.62      0.88      0.73        26
              Wake up       0.63      0.95      0.76        40
       Prepare dinner       0.89      0.83      0.86        29
 Put waste in the bin       0.91      0.83      0.87        12
            Go to bed       0.85      0.79      0.81        14
             Dressing       0.74      0.52      0.61        27
               Dinner       1.00      0.92      0.96        13
            Breakfast       0.89      1.00      0.94        25
                Lunch       0.64      0.60      0.62        15
             Watch TV       0.80      0.92      0.86        13
        Prepare lunch       0.76      1.00      0.86        22
        Relax on sofa       0.89      0.98      0.93        42
    Work at the table       0.75      0.75      0.75        12
Visit in the Smartlab       0.96      0.41      0.58  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
