# sensor data

In [1]:
import ace_sklearn_crfsuite
from pathlib import Path
from ace_sklearn_crfsuite import metrics
import numpy as np

# self-made

import src.utils as utils

working_path = Path().resolve()
layout_data_path = working_path / "layout_data"

# Wandering

In [5]:
def data2features(data):
    """
    Parameters
    ----------
    data : numpy.ndarray
        data.shape = (number of time, number of sensors).

    Returns
    -------
    features : list of dict
    """

    features = []
    T = data.shape[0]  # number of time
    M = data.shape[1]  # number of sensors
    for i in range(T):
        d = data[i]
        feature = {f"x_{j}": d[j] for j in range(M)}
        # if i >= 1:
        #     feature.update({f"-1 x_{j}": data[i-1][j] for j in range(M)})
        # if i >= 60:
        #     feature['sum_60'] = np.sum(data[i-60:i])
        if i == 0:
            feature["BOS"] = True
        if i == T - 1:
            feature["EOS"] = True
        feature["bias"] = 1
        features.append(feature)
    return features


_type = "raw"
data_folder_name = "test_data_1"
path = layout_data_path / "test_layout" / data_folder_name
reduced_SD_mat = utils.pickle_load(path / "experiment1", f"reduced_SD_mat_{_type}_1")
reduced_AL_mat = utils.pickle_load(path / "experiment1", f"reduced_AL_mat_{_type}_1")
SD_names = utils.pickle_load(path / "experiment1", "SD_names")
AL_names = utils.pickle_load(path / "experiment1", "AL_names")

In [6]:
# training

# X_train = [[{'a': True, 'b':1.2}, {'a': False, 'b':2}]]
# y_train = [['True', 'False']]

start = len(reduced_SD_mat) - 3000000
end = len(reduced_SD_mat)

X_train = [data2features(reduced_SD_mat[start:end, :24])]
y_train = [[str(b) for b in reduced_AL_mat[start:end, 3]]]
print(np.sum(reduced_AL_mat[start:end, 3]))

c1, c2 = 0.1, 0.1
crf = ace_sklearn_crfsuite.CRF(
    algorithm="lbfgs", c1=c1, c2=c2, max_iterations=100, all_possible_transitions=True
)
crf.fit(X_train, y_train)
utils.pickle_dump(path / "experiment1", f"crf_c1_{c1}_c2_{c2}", crf)
# test = utils.pickle_load(path / 'experiment1', f"crf_c1_{c1}_c2_{c2}")

13011


In [8]:
from collections import Counter
from sklearn.metrics import classification_report

# evaluation
_type = "raw"
data_folder_name = "test_data_2"
path = layout_data_path / "test_layout" / data_folder_name
test_SD = utils.pickle_load(path / "experiment1", f"reduced_SD_mat_{_type}_1")
test_AL = utils.pickle_load(path / "experiment1", f"reduced_AL_mat_{_type}_1")
test_SD_names = utils.pickle_load(path / "experiment1", "SD_names")
test_AL_names = utils.pickle_load(path / "experiment1", "AL_names")

X_test = [data2features(test_SD[start:end, :24])]
y_test = [[str(b) for b in test_AL[start:end, 3]]]
y_pred = crf.predict(X_test)


labels = list(crf.classes_)
print(labels)
print(metrics.flat_f1_score(y_test, y_pred, average="weighted", labels=labels))

# details
sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0]))

print(
    metrics.flat_classification_report(y_test, y_pred, labels=sorted_labels, digits=3)
)
# print(classification_report(
#     list(chain.from_iterable(y_test)), list(chain.from_iterable(y_pred)), labels=sorted_labels, digits=3
# ))


def print_transitions(trans_features):
    for (label_from, label_to), weight in trans_features:
        print("%-6s -> %-7s %0.6f" % (label_from, label_to, weight))


def print_state_features(state_features):
    for (attr, label), weight in state_features:
        print("%0.6f %-8s %s" % (weight, label, attr))


print("transition features:")
print_transitions(Counter(crf.transition_features_).most_common())

print("state features:")
print_state_features(Counter(crf.state_features_).most_common())

['False', 'True']
0.9998835758709718
              precision    recall  f1-score   support

       False      1.000     1.000     1.000   2987360
        True      0.982     0.990     0.986     12640

    accuracy                          1.000   3000000
   macro avg      0.991     0.995     0.993   3000000
weighted avg      1.000     1.000     1.000   3000000

transition features:
False  -> False   2.133736
True   -> True    1.310936
False  -> True    -7.315851
True   -> False   -7.315851
state features:
0.897318 True     x_16
0.840242 True     x_1
0.810025 True     x_6
0.788247 True     x_0
0.612350 True     x_21
0.576940 True     x_17
0.515215 True     x_22
0.496369 True     x_15
0.481397 True     x_12
0.479434 True     x_4
0.476213 True     x_18
0.458880 True     x_9
0.428461 True     x_20
0.369071 True     x_5
0.339902 True     x_19
0.323552 False    x_10
0.287443 True     x_3
0.284892 True     x_2
0.227538 True     x_23
0.217543 True     x_11
0.194568 True     x_13
0.193750 True 

# Falls

In [2]:
def data2features(data):
    """
    Parameters
    ----------
    data : numpy.ndarray
        data.shape = (number of time, number of sensors).

    Returns
    -------
    features : list of dict
    """

    features = []
    T = data.shape[0]  # number of time
    M = data.shape[1]  # number of sensors
    for i in range(T):
        d = data[i]
        feature = {f"x_{j}": d[j] for j in range(M)}
        # if i >= 1:
        #     feature.update({f"-1 x_{j}": data[i-1][j] for j in range(M)})
        if i >= 60:
            feature["sum_60"] = np.sum(data[i - 60 : i])
        if i == 0:
            feature["BOS"] = True
        if i == T - 1:
            feature["EOS"] = True
        feature["bias"] = 1
        features.append(feature)
    return features


_type = "raw"
data_folder_name = "test_data_1"
path = layout_data_path / "test_layout" / data_folder_name
reduced_SD_mat = utils.pickle_load(path / "experiment1", f"reduced_SD_mat_{_type}_1")
reduced_AL_mat = utils.pickle_load(path / "experiment1", f"reduced_AL_mat_{_type}_1")
SD_names = utils.pickle_load(path / "experiment1", "SD_names")
AL_names = utils.pickle_load(path / "experiment1", "AL_names")
anomaly_index = 4
print(AL_names)
print(reduced_SD_mat.shape)
print(reduced_AL_mat.shape)
print(np.sum(reduced_AL_mat[:, anomaly_index]))

['being semi-bedridden', 'being housebound', 'forgetting', 'wandering', 'fall while walking', 'fall while standing']
(72722821, 28)
(72722821, 6)
951


In [3]:
import numpy as np


def find_true_regions(arr):
    """
    Find continuous True regions.

    Parameters
    ----------
    arr : numpy.ndarray
        arr.shape = (n, ).

    Returns
    -------
    start_end_indices : list of tuple of int
        start_end_indices[i] = (index of start, index of end) of ith regions.

    Examples
    --------
    >>> find_true_regions(np.array([False, True, True, False, False, True, True, True, False, True]))
    >>> [(1, 3), (5, 8), (9, 10)]
    """
    indices = np.where(arr)[0]
    split_points = np.where(np.diff(indices) > 1)[0] + 1
    ranges = np.split(indices, split_points)
    start_end_indices = [(r[0], r[-1] + 1) for r in ranges]
    return start_end_indices


fall_indices = find_true_regions(reduced_AL_mat[:, anomaly_index])
print(fall_indices)

[(1374529, 1374558), (1725780, 1725816), (3124630, 3124660), (5692493, 5692525), (6864771, 6864805), (9055579, 9055612), (11664388, 11664419), (12236306, 12236337), (14427490, 14427523), (14964067, 14964101), (17422324, 17422355), (17578809, 17578833), (19145927, 19145957), (19860798, 19860819), (25014612, 25014640), (28189736, 28189766), (28661366, 28661397), (32428981, 32429016), (35107261, 35107287), (42253971, 42254002), (42500408, 42500443), (42510791, 42510823), (44901872, 44901905), (46524635, 46524663), (50442221, 50442254), (52768217, 52768252), (54453519, 54453535), (56013038, 56013071), (60706084, 60706115), (61562287, 61562328), (64855788, 64855812)]


In [4]:
# training

# X_train = [[{'a': True, 'b':1.2}, {'a': False, 'b':2}]]
# y_train = [['True', 'False']]

half_len = 100000
data_indices = [(r[0] - half_len, r[1] + half_len) for r in fall_indices]
X_train = [data2features(reduced_SD_mat[r[0] : r[1], :24]) for r in data_indices]
y_train = [
    [str(b) for b in reduced_AL_mat[r[0] : r[1], anomaly_index]] for r in data_indices
]

crf = ace_sklearn_crfsuite.CRF(
    algorithm="lbfgs", c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True
)
crf.fit(X_train, y_train)

In [5]:
from collections import Counter
from sklearn.metrics import classification_report

# evaluation
_type = "raw"
data_folder_name = "test_data_2"
path = layout_data_path / "test_layout" / data_folder_name
test_SD = utils.pickle_load(path / "experiment1", f"reduced_SD_mat_{_type}_1")
test_AL = utils.pickle_load(path / "experiment1", f"reduced_AL_mat_{_type}_1")
test_SD_names = utils.pickle_load(path / f"experiment1", "SD_names")
test_AL_names = utils.pickle_load(path / "experiment1", "AL_names")

anomaly_index = 5
test_fall_indices = find_true_regions(test_AL[:, anomaly_index])
print(test_fall_indices)

half_len = 100000
test_data_indices = [(r[0] - half_len, r[1] + half_len) for r in test_fall_indices]
X_test = [data2features(test_SD[r[0] : r[1], :24]) for r in test_data_indices]
y_test = [
    [str(b) for b in test_AL[r[0] : r[1], anomaly_index]] for r in test_data_indices
]

y_pred = crf.predict(X_test)

labels = list(crf.classes_)
print(labels)
print(metrics.flat_f1_score(y_test, y_pred, average="weighted", labels=labels))

# details
sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0]))

print(
    metrics.flat_classification_report(y_test, y_pred, labels=sorted_labels, digits=3)
)


def print_transitions(trans_features):
    for (label_from, label_to), weight in trans_features:
        print("%-6s -> %-7s %0.6f" % (label_from, label_to, weight))


def print_state_features(state_features):
    for (attr, label), weight in state_features:
        print("%0.6f %-8s %s" % (weight, label, attr))


print("transition features:")
print_transitions(Counter(crf.transition_features_).most_common())

print("state features:")
print_state_features(Counter(crf.state_features_).most_common())

[(490810, 490831), (1336048, 1336087), (3846561, 3846598), (5726091, 5726115), (7264576, 7264612), (8467347, 8467376), (8637608, 8637629), (12273516, 12273542), (12415247, 12415279), (12947694, 12947728), (14687685, 14687707), (15938852, 15938877), (16378152, 16378184), (17209211, 17209235), (17336877, 17336904), (26007867, 26007905), (27090838, 27090865), (30423831, 30423859), (31546022, 31546049), (32037386, 32037407), (32684648, 32684680), (35247375, 35247410), (35675941, 35675968), (37134881, 37134912), (37644872, 37644899), (38288556, 38288586), (38353858, 38353886), (40288372, 40288404), (40885306, 40885350), (42196073, 42196109), (42283292, 42283328), (42575656, 42575680), (45375260, 45375292), (47692682, 47692726), (47731128, 47731166), (50295690, 50295721), (50825838, 50825871), (51701206, 51701243), (54899686, 54899723), (55245380, 55245406), (56026406, 56026438), (58011056, 58011085), (58204875, 58204908), (62549513, 62549546), (66668902, 66668929), (69690030, 69690058), (70

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       False      1.000     1.000     1.000   9599788
        True      0.000     0.000     0.000      1681

    accuracy                          1.000   9601469
   macro avg      0.500     0.500     0.500   9601469
weighted avg      1.000     1.000     1.000   9601469

transition features:
False  -> False   0.407204
True   -> True    0.060465
True   -> False   -7.464795
False  -> True    -7.484195
state features:
0.653863 False    EOS
0.582265 False    BOS
0.540445 False    x_2
0.516878 False    x_11
0.491037 False    x_0
0.476786 False    x_12
0.414085 True     x_23
0.327168 False    x_14
0.326480 False    x_10
0.266219 True     x_18
0.252324 False    x_1
0.231374 True     x_15
0.223698 True     x_3
0.223143 False    x_16
0.196431 False    x_7
0.172946 True     x_6
0.170801 False    x_22
0.152803 True     bias
0.105159 False    x_20
0.092593 False    x_17
0.064207 True     x_5
0.015528 False    x_19
0.007659 False    x_21
0.0026

  _warn_prf(average, modifier, msg_start, len(result))
