## Import data

In [None]:
# !lscpu

In [None]:
# from psutil import *
# # This code will return the number of CPU
# print("Number of CPU: ", cpu_count())
# # This code will return the CPU info
# !cat /proc/cpuinfo

In [None]:
# %pip install aeon
# %pip install tsfresh

In [183]:
import numpy as np
import pandas as pd
import glob
import random
import time

from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

from aeon.classification.convolution_based import RocketClassifier
from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier
from aeon.classification.feature_based import FreshPRINCEClassifier
from aeon.classification.interval_based import RSTSF
from aeon.classification.shapelet_based import RDSTClassifier
from aeon.classification.dictionary_based import MUSE
from aeon.classification.convolution_based import MultiRocketHydraClassifier
from aeon.classification.deep_learning import InceptionTimeClassifier
from aeon.classification.hybrid import HIVECOTEV2

In [184]:
# from google.colab import files

# uploaded = files.upload()

# for fn in uploaded.keys():
#   print('User uploaded file "{name}" with length {length} bytes'.format(
#       name=fn, length=len(uploaded[fn]))

In [185]:
WINDOW_SIZE = 28
WINDOW_STRIDE = 9
# HALF_MOV_TIME = (130/2) * 52

MOVEMENT_SPECIFIC_MODEL = True

# Use the same number of windows for each movement
SAME_WINDOW_COUNT = True

random.seed(1)

In [186]:
files = glob.glob("../data/*.csv")
# files = glob.glob("*.csv")

# list of dataframes
dataframes = []

for f in files:
    dataframes.append(pd.read_csv(f))

# df = pd.read_csv("../data/20221115_100424.csv")

In [187]:
windows_all_movements = [[],[],[],[],[],[],[],[],[],[]]
labels_all_movements = [[],[],[],[],[],[],[],[],[],[]]
groups_all_movements = [[],[],[],[],[],[],[],[],[],[]]
windows_all_movements_balanced = [[],[],[],[],[],[],[],[],[],[]]
labels_all_movements_balanced = [[],[],[],[],[],[],[],[],[],[]]
groups_all_movements_balanced = [[],[],[],[],[],[],[],[],[],[]]

total_windows = 0
non_rep_windows_before = 0
non_rep_windows_after = 0

movement_index_map = {
    'Jalka sivulle oikea': 0,
    'Jalka sivulle vasen': 1,
    'Jalka taakse oikea': 2,
    'Jalka taakse vasen': 3,
    'Jalkanosto oikea': 4,
    'Jalkanosto vasen': 5,
    'Polvinosto oikea': 6,
    'Polvinosto vasen': 7,
    'Seiso ylös': 8,
    'Varpaille': 9,
}

for index, df in enumerate(dataframes):

    df = df[~df.Movement.isin(['Start calibration', 'End calibration', 'Not movement', 'Not movement inactive', 'Not movement active', 'Seiso ylös sensor vaihto'])]
    if not df.empty: # Check if needed
        sensor_placement_groups = df.groupby('SensorPlacement')
        left_sensor_df = sensor_placement_groups.get_group('left').copy()
        right_sensor_df = sensor_placement_groups.get_group('right').copy()

        left_movement_groups = left_sensor_df.groupby("Movement", sort=False)
        right_movement_groups = right_sensor_df.groupby("Movement", sort=False)

        for group_name, left_df in left_movement_groups:
            right_df = right_movement_groups.get_group(group_name)
            # The data is recorded for each sensor separately. We want all measurements from all sensors at a single timestep insted
            # This is done by separating data from different sensors and renaming those columns before combining the data on the closest common timestep
            # There is not data from all sensors on the exact same timestep

            # placement_groups = group_df.groupby("SensorPlacement")
            # left_df = placement_groups.get_group('left')

            # Sorting is required by merge_asof later
            left_df = left_df.sort_values(by=['Timestamp'])
            right_df = right_df.sort_values(by=['Timestamp'])

            # Movement and Start are dropped on the right dataframe because the data 
            # will be combined with the data from the left sensor on the closest timestep.
            # We do not want to duplicate this data from both sensors

            # drop columns that are not needed and rename remaining
            left_df.drop(['SensorPlacement', 'MagnX', 'MagnY', 'MagnZ'], axis=1, inplace=True)
            left_df.rename(columns = {'AccX':'lax', 'AccY':'lay', 'AccZ':'laz', 'GyroX':'lgx', 'GyroY':'lgy', 'GyroZ':'lgz'}, inplace = True)

            right_df.drop(['Movement', 'SensorPlacement', 'Start', 'MagnX', 'MagnY', 'MagnZ'], axis=1, inplace=True)
            right_df.rename(columns = {'AccX':'rax', 'AccY':'ray', 'AccZ':'raz', 'GyroX':'rgx', 'GyroY':'rgy', 'GyroZ':'rgz',}, inplace = True)

            # combine left and right sensor dataframes on the nearest timestamp using left-join
            combined_df = pd.merge_asof(left_df, right_df, on='Timestamp', direction='nearest')


            # remove all rows before and including the first and after and including the last movement start indicator
            # this removes bad data that is added at the start and end of recording sessions
            # also removes the last movement but there is no end movement indicator
            start_df = combined_df[combined_df["Start"] == 1]

            # debug
            if start_df.empty:
                print(group_name)

            combined_df = combined_df.loc[(combined_df["Timestamp"] > start_df.iloc[0,0]-1000) & (combined_df["Timestamp"] < start_df.iloc[-1, 0]-500), :]

            all_windows = []
            all_labels = []
            # all_groups = []

            balanced_windows = []
            balanced_labels = []
            balanced_groups = []

            # movement_name_list.append(group_name)
            # add map from group name to index nr in movement_name_list
            # movement_index_map[group_name] = len(movement_index_map)
            # windows_all_movements.append(windows)
            # labels_all_movements.append(labels)

            for i in range(0, len(combined_df) - WINDOW_SIZE, WINDOW_STRIDE):
                window_df = combined_df.iloc[i:i+WINDOW_SIZE].copy()
                all_labels.append(window_df['Start'].values.max())
                # all_groups.append(index)
                window_df.drop(['Timestamp', 'Start', 'Movement'], axis=1, inplace=True)
                window_df = window_df.T
                all_windows.append(window_df.values.tolist())
            
            if SAME_WINDOW_COUNT:
                total_windows += len(all_windows)

                all_movement_repetitions = []
                all_movement_non_repetitions = []
                
                for i in range(len(all_windows)):
                    if all_labels[i] == 1:
                        all_movement_repetitions.append(all_windows[i])
                    else:
                        all_movement_non_repetitions.append(all_windows[i])
                
                non_rep_windows_before += len(all_movement_non_repetitions)
                
                all_movement_non_repetitions = random.sample(all_movement_non_repetitions, len(all_movement_repetitions))
                non_rep_windows_after += len(all_movement_non_repetitions)

                listofzeros = [0] * len(all_movement_non_repetitions)
                listofones = [1] * len(all_movement_repetitions)

                balanced_windows = all_movement_non_repetitions
                balanced_labels = listofzeros

                balanced_windows.extend(all_movement_repetitions)
                balanced_labels.extend(listofones)

                z = list(zip(balanced_windows, balanced_labels))

                random.shuffle(z)

                balanced_windows, balanced_labels = zip(*z)
                balanced_windows = list(balanced_windows)
                balanced_labels = list(balanced_labels)
                balanced_groups = [index] * len(balanced_windows)


            windows_all_movements[movement_index_map[group_name]].append(all_windows)
            labels_all_movements[movement_index_map[group_name]].append(all_labels)
            # groups_all_movements[movement_index_map[group_name]].extend(all_groups)

            windows_all_movements_balanced[movement_index_map[group_name]].extend(balanced_windows)
            labels_all_movements_balanced[movement_index_map[group_name]].extend(balanced_labels)
            groups_all_movements_balanced[movement_index_map[group_name]].extend(balanced_groups)

In [188]:
print(f'Total window count: {total_windows}, non repetition windows before: {non_rep_windows_before}, non repetition windows after: {non_rep_windows_after}')

Total window count: 18482, non repetition windows before: 15397, non repetition windows after: 3085


In [189]:
if MOVEMENT_SPECIFIC_MODEL:
    test_labels_all_movements = [[],[],[],[],[],[],[],[],[],[]]
    predictions_all_movements = [[],[],[],[],[],[],[],[],[],[]]

    data_list = []
    test_labels_balanced = np.array([])
    predictions_balanced = np.array([])
    test_labels_all = np.array([])
    predictions_all = np.array([])

    test_labels_per_movement = [[],[],[],[],[],[],[],[],[],[]]
    predictions_per_movement = [[],[],[],[],[],[],[],[],[],[]]

    for index, windows in enumerate(windows_all_movements_balanced):
        labels = labels_all_movements_balanced[index]
        groups = groups_all_movements_balanced[index]

        logo = LeaveOneGroupOut()

        group_index = 0
        for train, test in logo.split(windows, labels, groups=groups):

            train_x = np.take(windows, train, 0)
            train_y = np.take(labels, train, 0)
            test_x = np.take(windows, test, 0)
            test_y = np.take(labels, test, 0)

            classifier = RocketClassifier(n_jobs=-1)
            # classifier = RDSTClassifier(n_jobs=-1)
            # classifier = MultiRocketHydraClassifier(n_jobs=-1)
            # classifier = KNeighborsTimeSeriesClassifier(n_jobs=-1)
            # classifier = RSTSF(n_jobs=-1)
            # classifier = MUSE(n_jobs=-1)
            # classifier = FreshPRINCEClassifier(n_jobs=-1)
            # classifier = HIVECOTEV2(n_jobs=-1)
            # classifier = InceptionTimeClassifier()

            fit_time = 0
            predict_time = 0

            t0 = time.perf_counter()
            classifier.fit(train_x, train_y)
            t1 = time.perf_counter()
            fit_time = t1-t0

            t0 = time.perf_counter()
            y_pred = classifier.predict(test_x)
            t1 = time.perf_counter()
            predict_time = t1-t0

            accuracy = accuracy_score(test_y, y_pred)
            f1 = f1_score(test_y, y_pred)
            # macro = f1_score(test_y, y_pred, average='macro')
            # micro = f1_score(test_y, y_pred, average='micro')
            # weighted = f1_score(test_y, y_pred, average='weighted')
            # none = f1_score(test_y, y_pred, average=None)
            
            # print(score)
            row = [index, accuracy, f1, fit_time, predict_time]
            data_list.append(row)

            test_labels_balanced = np.concatenate((test_labels_balanced, test_y))
            predictions_balanced = np.concatenate((predictions_balanced, y_pred))

            test_labels_per_movement[index].extend(test_y.tolist())
            predictions_per_movement[index].extend(y_pred.tolist())

            # For all windows
            test_x_all_windows = np.array(windows_all_movements[index][group_index])
            test_y_all_windows = np.array(labels_all_movements[index][group_index])
            y_pred_all_windows = classifier.predict(test_x_all_windows)
            accuracy_all_windows = accuracy_score(test_y_all_windows, y_pred_all_windows)

            # test_labels_all = np.concatenate((test_labels_all, test_y_all_windows))
            # predictions_all = np.concatenate((predictions_all, y_pred_all_windows))

            # Save labels and predictions for post processing in 2d array

            test_labels_all_movements[index].append(test_y_all_windows.tolist())
            predictions_all_movements[index].append(y_pred_all_windows.tolist())

            group_index += 1

        print(f"Movement {index} complete")

    result_df = pd.DataFrame(data_list, columns =['index', 'accuracy', 'f1', 'fit_time', 'predict_time']) 

Movement 0 complete
Movement 1 complete
Movement 2 complete
Movement 3 complete
Movement 4 complete
Movement 5 complete
Movement 6 complete
Movement 7 complete
Movement 8 complete
Movement 9 complete


In [190]:
if MOVEMENT_SPECIFIC_MODEL:
    print(result_df)
    tot_accuaracy = accuracy_score(test_labels_balanced, predictions_balanced)
    tot_f1 = f1_score(test_labels_balanced, predictions_balanced)
    print(f"Total accuracy: {tot_accuaracy}, Total f1: {tot_f1}, Min accuracy: {result_df['accuracy'].min()}, Max accuracy: {result_df['accuracy'].max()}, STD accuracy: {result_df['accuracy'].std()} Total fit time: {result_df['fit_time'].sum()}, Total predict time: {result_df['predict_time'].sum()}")

    index  accuracy        f1  fit_time  predict_time
0       0  0.850000  0.857143  2.978770      0.328014
1       0  0.903226  0.906250  2.715624      0.353985
2       0  0.816667  0.825397  2.713192      0.332090
3       0  0.766667  0.781250  3.432882      0.388187
4       0  0.866667  0.851852  3.914219      0.474324
..    ...       ...       ...       ...           ...
95      9  0.935484  0.935484  3.032344      0.374249
96      9  0.900000  0.903226  3.062647      0.381965
97      9  0.816667  0.800000  3.065967      0.405452
98      9  0.900000  0.900000  3.198472      0.367466
99      9  0.983871  0.983607  3.695051      0.422726

[100 rows x 5 columns]
Total accuracy: 0.8711507293354943, Total f1: 0.8686601685114819, Min accuracy: 0.6515151515151515, Max accuracy: 0.9838709677419355, STD accuracy: 0.06806995976087013 Total fit time: 340.0658154490375, Total predict time: 41.8972977999656


In [191]:
result_df[["index","accuracy"]].groupby("index").mean()

Unnamed: 0_level_0,accuracy
index,Unnamed: 1_level_1
0,0.871129
1,0.860323
2,0.876495
3,0.861119
4,0.895548
5,0.906122
6,0.883226
7,0.877017
8,0.79874
9,0.884624


In [192]:
accuracy_score_per_movement = []

for i in range(10):
    accuracy_score_per_movement.append(accuracy_score(test_labels_per_movement[i], predictions_per_movement[i]))

In [193]:
accuracy_score_per_movement

[0.872168284789644,
 0.860655737704918,
 0.8762214983713354,
 0.8598726114649682,
 0.8957654723127035,
 0.9061488673139159,
 0.8831168831168831,
 0.8770226537216829,
 0.7971246006389776,
 0.8848684210526315]

# Post processing

In [200]:
BEGINNING_ZERO = 2
END_ZERO = 2

correct = 0
one_off = 0
two_off = 0
more_off = 0

def flip_small_prediction_groups(group_df, min_size):
    # Small groups are skipped
    if group_df['Prediction'].count() <= min_size:
        # Get the predicted value
        # Flip all prediction values
        group_df['Prediction'] = group_df['Prediction'].replace({0:1, 1:0})
        return group_df
    else:
        return group_df

def flip_small_zero_groups(group_df, min_size):
    # Small groups are skipped
    if group_df['Prediction'].count() <= min_size:
        if group_df['Prediction'].iloc[0] == 0:
            # Get the predicted value
            # Flip all prediction values
            group_df['Prediction'] = group_df['Prediction'].replace({0:1, 1:0})
            return group_df
        else:
            return group_df
    else:
        return group_df

def get_group_middle(group_df):
    new_column =  [0] * len(group_df.index)
    group_df['Pause middle'] = new_column
    if group_df['Prediction'].iloc[0] == 1:
        group_df.iat[int(len(group_df.index) / 2), 3] = 1
        return group_df
    else:
        return group_df

for movement in range(10):
    for person in range(10):
        a = test_labels_all_movements[movement][person]
        b = predictions_all_movements[movement][person]

        # a = test_labels_all_movements[3][1]
        # b = predictions_all_movements[3][1]

        pause_prediction_df = pd.DataFrame({'Label': a, 'Original prediction': b})
        pause_prediction_df['Prediction'] = pause_prediction_df['Original prediction']

        pause_prediction_df.iloc[0:BEGINNING_ZERO, -1] = 0
        pause_prediction_df.iloc[-END_ZERO:, -1] = 0

        # group into groups of either pauses or non pauses
        groups = pause_prediction_df.groupby((pause_prediction_df['Prediction'] != pause_prediction_df['Prediction'].shift()).cumsum(), group_keys=False)
        # Flip value of prediction
        pause_prediction_df = groups.apply(flip_small_prediction_groups, min_size=1)

        # Remove groups of 2 or 3 zeros
        groups = pause_prediction_df.groupby((pause_prediction_df['Prediction'] != pause_prediction_df['Prediction'].shift()).cumsum(), group_keys=False)
        pause_prediction_df = groups.apply(flip_small_zero_groups, min_size=5)

        #middle
        groups = pause_prediction_df.groupby((pause_prediction_df['Prediction'] != pause_prediction_df['Prediction'].shift()).cumsum(), group_keys=False)
        pause_prediction_df = groups.apply(get_group_middle)

        pause_count = pause_prediction_df['Pause middle'].sum()

        if pause_count == 10:
            correct += 1
        elif pause_count == 9 or pause_count == 11:
            one_off += 1
        elif pause_count == 8 or pause_count == 12:
            two_off += 1
        else:
            more_off += 1
        
print(f"Correct: {correct}, One off: {one_off}, Two off: {two_off}, More off: {more_off}")


Correct: 80, One off: 9, Two off: 4, More off: 7


# Not same window count

# Not movement specific

In [9]:
if not MOVEMENT_SPECIFIC_MODEL:
    test_labels = np.array([])
    predictions = np.array([])

    all_windows = []
    all_labels = []
    all_groups = []
    data_list = []

    for movement_windows in windows_all_movements:
        all_windows.extend(movement_windows)

    for movement_labels in labels_all_movements:
        all_labels.extend(movement_labels)

    for movement_groups in groups_all_movements:
        all_groups.extend(movement_groups)

    # Not efficient, should use lists
    test_labels = np.array([])
    predictions = np.array([])
    all_scores = []

    logo = LeaveOneGroupOut()

    for train, test in logo.split(all_windows, all_labels, groups=all_groups):

        train_x = np.take(all_windows, train, 0)
        train_y = np.take(all_labels, train, 0)
        test_x = np.take(all_windows, test, 0)
        test_y = np.take(all_labels, test, 0)

        classifier = RocketClassifier(n_jobs=8)
        # classifier = KNeighborsTimeSeriesClassifier(n_jobs=8)
        # classifier = FreshPRINCEClassifier(n_jobs=8)
        # classifier = RSTSF(n_jobs=8)
        # classifier = RDSTClassifier(n_jobs=8)
        # classifier = MUSE(n_jobs=8)
        # classifier = MultiRocketHydraClassifier(n_jobs=8)
        # classifier = InceptionTimeClassifier()
        # classifier = HIVECOTEV2(n_jobs=8)

        fit_time = 0
        predict_time = 0

        t0 = time.perf_counter()
        classifier.fit(train_x, train_y)
        t1 = time.perf_counter()
        fit_time = t1-t0

        t0 = time.perf_counter()
        y_pred = classifier.predict(test_x)
        t1 = time.perf_counter()
        predict_time = t1-t0

        accuracy = accuracy_score(test_y, y_pred)
        
        # print(score)
        row = [accuracy, fit_time, predict_time]
        data_list.append(row)
        
        test_labels = np.concatenate((test_labels, test_y))
        predictions = np.concatenate((predictions, y_pred))

    
    result_df = pd.DataFrame(data_list, columns =['accuracy', 'fit_time', 'predict_time']) 

In [14]:
if not MOVEMENT_SPECIFIC_MODEL:
    print(result_df)
    print(f"Mean accuracy: {result_df['accuracy'].mean()}, Min accuracy: {result_df['accuracy'].min()}, Max accuracy: {result_df['accuracy'].max()} Total fit time: {result_df['fit_time'].sum()}, Total predict time: {result_df['predict_time'].sum()}")

In [None]:
pred_data = {'Label': test_labels,
        'Prediction': predictions}
 
# Create DataFrame
pred_df = pd.DataFrame(pred_data)

# Random Test

In [24]:
data_list = []
test_labels = np.array([])
predictions = np.array([])

for index, windows in enumerate(windows_all_movements):

    if index == 9:
        labels = labels_all_movements[index]
        groups = groups_all_movements[index]

        logo = LeaveOneGroupOut()

        for train, test in logo.split(windows, labels, groups=groups):

            test_x = np.take(windows, test, 0)
            test_y = np.take(labels, test, 0)

            y_pred = classifier.predict(test_x)

            accuracy = accuracy_score(test_y, y_pred)
            
            # print(score)
            row = [index, accuracy]
            data_list.append(row)

            test_labels = np.concatenate((test_labels, test_y))
            predictions = np.concatenate((predictions, y_pred))

        print(f"Person {index} complete")

result_df = pd.DataFrame(data_list, columns =['index', 'accuracy']) 

Person 9 complete


In [25]:
result_df

Unnamed: 0,index,accuracy
0,9,0.932203
1,9,0.922078
2,9,0.922018
3,9,0.918182
4,9,0.965517
5,9,0.934156
6,9,0.941176
7,9,0.94697
8,9,0.949686
9,9,0.964029


In [26]:
pred_data = {'Label': test_labels,
        'Prediction': predictions}
 
# Create DataFrame
pred_df = pd.DataFrame(pred_data)