In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
import pickle
from sklearn.metrics import f1_score
from tensorflow import keras
import yaml
#import tikzplotlib

import importlib


# import Model13_FullyConvCodeDecode
# importlib.reload(Model13_FullyConvCodeDecode)
# from Model13_FullyConvCodeDecode import ConvModel

import Model17_FullyConvCodeDecode2interpol
importlib.reload(Model17_FullyConvCodeDecode2interpol)
from Model17_FullyConvCodeDecode2interpol import ConvModel

# import Model13_FullyConvCodeDecode3interpol
# importlib.reload(Model13_FullyConvCodeDecode3interpol)
# from Model13_FullyConvCodeDecode3interpol import ConvModel

import seaborn as sns
importlib.reload(sns)
import Data_mdl13
importlib.reload(Data_mdl13)
from Data_mdl13 import DataProcessor_mdl13

import Plots
importlib.reload(Plots)
from Plots import plot_CM, plot_CM_mplt

# evaluation on only 14 classes, padded frames are labeled as 0 but exclude dform evalaution

In [2]:
import scipy
import sklearn

def evaluation(y_true, y_pred):

    mask = y_true == 0
    y_true_masked = np.ma.array(y_true, mask = mask)
    y_hat_masked = np.ma.array(np.argmax(y_pred, axis=-1), mask = mask)

    y_pred_true = np.ma.array(y_pred[np.arange(y_pred.shape[0])[:, None], np.arange(y_pred.shape[1]), y_true], mask = mask)

    y_pred_gest, y_pred_count = np.unique(y_hat_masked, axis=-1, return_counts=True)

    y_hat_seq = scipy.stats.mode(y_hat_masked, axis=1).mode
    y_true_seq = scipy.stats.mode(y_true_masked, axis=1).mode

    accuracy_sequences = np.sum(y_hat_seq == y_true_seq) / len(y_true_seq)
    accuracy_frames = np.sum(y_hat_masked == y_true_masked) / y_true_masked.count()

    y_true_flatt = y_true.flatten()
    y_pred_flatt = y_pred.reshape(-1, y_pred.shape[-1])
    nll_include_padded = -np.mean(np.log(y_pred_flatt[np.arange(len(y_true_flatt)), y_true_flatt]))
    nll_only_importantframes = -np.mean(np.log(y_pred_true))

    F1_Score_sequences = sklearn.metrics.f1_score(y_true_seq, y_hat_seq, average='macro')
    F1_Score_frames = sklearn.metrics.f1_score(y_true_masked.compressed(), y_hat_masked.compressed(), average='macro')

    results = {'accuracy_frames': accuracy_frames, 'accuracy_sequences': accuracy_sequences, 
               'NLL_only_important': nll_only_importantframes, 'NLL_include_paddedframes': nll_include_padded, 
               'F1_Score_frames': F1_Score_frames, 'F1_Score_sequences': F1_Score_sequences}
    
    return results

In [3]:
cwd = Path.cwd()

%load_ext tensorboard
%load_ext version_information
#!rm -rf ./logs/

logsdir = cwd / '..' / 'logs'
# !rmdir /s /q {logsdir}
#!tensorboard --logdir {logsdir} --host localhost --port 6006

cfg = 'config_mdl17_14class.yaml'
with open(cfg, 'r') as f:
    cfg_data = yaml.safe_load(f)
seeds = cfg_data['seeds']
folds = np.arange(1,21)

In [4]:
# process Data
resultsDFslide = pd.DataFrame()

folder = 'owpg_fullyconv'

data_processor = DataProcessor_mdl13(cfg)
data_processor.load_handgestdata()
data_processor.handangles2windows()
data_processor.save_config(folder)
data_processor.save_windowsets(folder)



foldsprocessors = dict()
for fold in folds:
    data_processor = DataProcessor_mdl13(cfg, fold = fold)
    data_processor.load_windows(folder)
    data_processor.processwindows()
    data_processor.save_windowsets_processed(folder, name='Fold'+str(fold))

# ohne die infos zu schreiben benötigt das Preprocessing 1 Minute statt 4 Minuten

permuting Samples using seed 26
Make Windows and apply framreferences...
Processing subject 1
Processing subject 2
Processing subject 3
Processing subject 4
Processing subject 5
Processing subject 6
Processing subject 7
Processing subject 8
Processing subject 9
Processing subject 10
Processing subject 11
Processing subject 12
Processing subject 13
Processing subject 14
Processing subject 15
Processing subject 16
Processing subject 17
Processing subject 18
Processing subject 19
Processing subject 20
Using seed 26
Fold: 1
Standardize and padding of data...
Standardize data...train
Write data into windows...
Standardize data...valid
Write data into windows...
Using seed 26
Fold: 2
Standardize and padding of data...
Standardize data...train
Write data into windows...
Standardize data...valid
Write data into windows...
Using seed 26
Fold: 3
Standardize and padding of data...
Standardize data...train
Write data into windows...
Standardize data...valid
Write data into windows...
Using seed 26

In [4]:
# train model
resultsDFslide = pd.DataFrame()
datapath = cwd / '..' / 'Data' / 'DHG2016' / 'owpg_fullyconv'

results_valid_labels_list = []
results_scores_list_valid = []
results_scores_list_train = []

for seedidx, n_fold in enumerate(folds):
    print('evaluating Fold', n_fold)
    seed_val = seeds[seedidx]

    fold = pickle.load(open((datapath / ('Fold' + str(n_fold) + '.pkl')).resolve(), 'rb'))
    Y_train_oh = fold['train']['Y_allframes_oh']
    Y_valid_oh = fold['valid']['Y_allframes_oh']
    X_train = fold['train']['X']
    X_valid = fold['valid']['X']
    print('training ...')


    mdl13 = ConvModel(cfg, 'Test', seed = seed_val)
    mdl13.compile_model()

    # Train the model
    mdl13.train_model(X_train, Y_train_oh, X_valid, Y_valid_oh)

    prediction = mdl13.model.predict(X_valid)
    prediction_train = mdl13.model.predict(X_train)
    results_valid_labels_list.append({'Fold': n_fold, 'Y': fold['valid']['Y_allframes'], 'Y_prob': prediction})

    results_scores_list_train.append(pd.DataFrame(evaluation( fold['train']['Y_allframes'], prediction_train), index=[n_fold]))
    results_scores_list_valid.append(pd.DataFrame(evaluation( fold['valid']['Y_allframes'], prediction), index=[n_fold]))

#results_valid_labels = pd.concat(results_valid_labels_list) # currently all predictions are stored here, not just the one sof the true class therefore not possible to make DataFrame out of it


evaluating Fold 1
training ...
Setting seed to 5
Epoch 1/30
42/42 - 10s - 245ms/step - NLL: 1.1804 - accuracy: 0.7733 - loss: 1.7355 - modified NLL: 0.6415 - precision: 0.9616 - recall: 0.5507 - val_NLL: 0.5485 - val_accuracy: 0.8188 - val_loss: 1.0121 - val_modified NLL: 0.0023 - val_precision: 0.9888 - val_recall: 0.7939
Epoch 2/30
42/42 - 4s - 86ms/step - NLL: 0.5677 - accuracy: 0.8114 - loss: 0.9560 - modified NLL: 0.0104 - precision: 0.9851 - recall: 0.7715 - val_NLL: 0.5108 - val_accuracy: 0.8392 - val_loss: 0.8827 - val_modified NLL: 0.0038 - val_precision: 0.9937 - val_recall: 0.7933
Epoch 3/30
42/42 - 4s - 86ms/step - NLL: 0.4959 - accuracy: 0.8325 - loss: 0.7973 - modified NLL: 0.0085 - precision: 0.9798 - recall: 0.7823 - val_NLL: 0.4845 - val_accuracy: 0.8442 - val_loss: 0.7947 - val_modified NLL: 6.6151e-04 - val_precision: 0.9851 - val_recall: 0.8030
Epoch 4/30
42/42 - 3s - 79ms/step - NLL: 0.4210 - accuracy: 0.8561 - loss: 0.6727 - modified NLL: 0.0079 - precision: 0.971

In [12]:
#results_valid_labels = pd.concat(results_valid_labels_list)
# results_scores_valid = pd.concat(results_scores_list_valid)
# results_scores_train = pd.concat(results_scores_list_train)

results_scores = pd.concat([pd.concat(results_scores_list_train).add_suffix('_train'),pd.concat(results_scores_list_valid).add_suffix('_valid')],axis = 1)
results_scores.rename(columns = {'fold_train': 'fold'}, inplace = True)

In [17]:

results_scores.to_pickle(datapath/'results_scores_fcn.pkl')

In [7]:
with open(datapath/'fullyconv_scores_valid.pkl', 'wb') as f:
    pickle.dump(results_scores_list_valid, f)

with open(datapath/'fullyconv_predictions.pkl', 'wb') as f:
    pickle.dump(results_valid_labels_list, f)

with open(datapath/'fullyconv_scores_train.pkl', 'wb') as f:
    pickle.dump(results_scores_list_train, f)


In [17]:
# evaluation without weighting

evaluation(prediction, fold['valid']['Y_allframes'])

{'accuracy_frames': 0.8505616559384072,
 'accuracy_sequences': 0.8357142857142857,
 'NLL_frames': 0.10866974,
 'NLL _allframes': 0.529825399548782,
 'F1_Score_sequences': 0.8280718500561586,
 'F1_Score_frames': 0.8325872082923578}

In [19]:
# evaluation without weighting

evaluation(prediction, fold['valid']['Y_allframes'])

{'accuracy_frames': 0.8505616559384072,
 'accuracy_sequences': 0.8357142857142857,
 'NLL_frames': 0.10866974,
 'NLL _allframes': 0.529825399548782,
 'F1_Score_sequences': 0.8280718500561586,
 'F1_Score_frames': 0.8325872082923578}

In [9]:
datapath = cwd / '..' / 'Data' / 'DHG2016' / 'owpg_fullyconv'

with open(datapath/'fullyconv_scores.pkl', 'rb') as f:
    res_valid = pickle.load(f)

with open(datapath/'fullyconv_scores_train.pkl', 'rb') as f:
    res_train = pickle.load(f)

res_valid_df = pd.concat(res_valid).add_suffix('_valid')
res_train_df = pd.concat(res_train).add_suffix('_train')

results_df = pd.concat([res_valid_df, res_train_df], axis=1)

In [11]:
results_df.columns

Index(['accuracy_frames_valid', 'accuracy_sequences_valid',
       'NLL_only_important_valid', 'NLL_include_paddedframes_valid',
       'F1_Score_frames_valid', 'F1_Score_sequences_valid',
       'accuracy_frames_train', 'accuracy_sequences_train',
       'NLL_only_important_train', 'NLL_include_paddedframes_train',
       'F1_Score_frames_train', 'F1_Score_sequences_train'],
      dtype='object')

In [13]:


mean = results_df.mean()
std = results_df.std()

print(f'accuracy on training data - frames: {mean["accuracy_frames_train"]*100:.1f}% +-  {std["accuracy_frames_train"]*100:.1f}%')
print(f'F1_Score on training data - frames: {mean["F1_Score_frames_train"]*100:.1f}% +- {std["F1_Score_frames_train"]*100:.1f}%')
print(f'NLL on training data - frames: {mean["NLL_only_important_train"]:.2f} +- {std["NLL_only_important_train"]:.2f}')

print(f'accuracy on validation data - frames: {mean['accuracy_frames_valid']*100:.1f}% +- {std['accuracy_frames_valid']*100:.1f}%')
print(f'F1-Score on validation data - frames: {mean['F1_Score_frames_valid']*100:.1f}% +- {std['F1_Score_frames_valid']*100:.1f}%')
print(f'NLL on validation data - frames: {mean['NLL_only_important_valid']:.2f} +- {std['NLL_only_important_valid']:.2f}')

print(f'accuracy on training data - sequences: {mean["accuracy_sequences_train"]*100:.1f}% +-  {std["accuracy_sequences_train"]*100:.1f}%')
print(f'F1_Score on training data - sequences: {mean["F1_Score_sequences_train"]*100:.1f}% +- {std["F1_Score_sequences_train"]*100:.1f}%')


print(f'accuracy on validation data - sequences: {mean['accuracy_sequences_valid']*100:.1f}% +- {std['accuracy_sequences_valid']*100:.1f}%')
print(f'F1-Score on validation data - sequences: {mean['F1_Score_sequences_valid']*100:.1f}% +- {std['F1_Score_sequences_valid']*100:.1f}%')

print(f'NLL on training data - allframes: {mean["NLL_include_paddedframes_train"]:.2f} +- {std["NLL_include_paddedframes_train"]:.2f}')
print(f'NLL on validation data - allframes: {mean['NLL_include_paddedframes_valid']:.2f} +- {std['NLL_include_paddedframes_valid']:.2f}')

accuracy on training data - frames: 83.9% +-  4.0%
F1_Score on training data - frames: 77.4% +- 4.0%
NLL on training data - frames: 0.48 +- 0.13
accuracy on validation data - frames: 75.0% +- 10.0%
F1-Score on validation data - frames: 68.1% +- 9.5%
NLL on validation data - frames: 0.81 +- 0.39
accuracy on training data - sequences: 85.5% +-  3.3%
F1_Score on training data - sequences: 85.4% +- 3.3%
accuracy on validation data - sequences: 77.5% +- 10.1%
F1-Score on validation data - sequences: 76.3% +- 10.4%
NLL on training data - allframes: 0.11 +- 0.03
NLL on validation data - allframes: 0.19 +- 0.09


Below experimental code starts

In [33]:
y_true = np.array([[0,0,0,1,1,1,0,0], [0,2,2,2,2,2,0,0]])
y_true.shape

(2, 8)

In [52]:
y_pred = np.array([[[1,0,0],[1,0,0],[1,0,0],[0,1,0],[0,1,0],[0,1,0],[1,0,0],[1,0,0]], [[1,0,0],[0,0,1],[0,0,1],[0,0,1],[0,0,1],[0,0,1],[1,0,0],[1,0,0]]])
y_pred.shape
y_pred = np.array([[[0,1,0],[0,1,0],[0,1,0],[0,0.4,0.6],[0,1,0],[0,1,0],[0,1,0],[0,1,0]], [[0,1,0],[0,0,1],[0,0,1],[0,0,1],[0,0,1],[0,0,1],[0,1,0],[0,1,0]]])

In [53]:
evaluation(y_true, y_pred)

  nll_include_padded = -np.mean(np.log(y_pred_flatt[np.arange(len(y_true_flatt)), y_true_flatt]))
  nll_only_importantframes = -np.mean(np.log(y_pred_true))


{'accuracy_frames': 0.875,
 'accuracy_sequences': 1.0,
 'NLL_only_important': 0.11453634148426937,
 'NLL_incldue_paddedframes': inf,
 'F1_Score_sequences': 1.0,
 'F1_Score_frames': 0.8545454545454545}

In [None]:
import numpy as np
from scipy.stats import mode

# Example array
y_true = np.array([[0,0,0,1,1,1,0,0], [0,2,2,2,2,2,0,0]])
y_pred = np.array([[0,0,0,1,2,2,0,0], [0,2,2,2,2,6,0,0]])
# Find the most frequent element along axis 1
#most_frequent_elements = mode(array, axis=1).mode

mask = y_true == 0
y_true_masked = np.ma.array(y_true, mask=mask)
y_hat_masked = np.ma.array(y_pred, mask=mask)

y_true_seq = scipy.stats.mode(y_true_masked, axis=-1).mode
y_hat_seq = scipy.stats.mode(y_hat_masked, axis = -1).mode

accuracy = np.sum(y_hat_masked == y_true_masked) / y_true_masked.count()
accuracy = np.sum(y_hat_seq == y_true_seq) / y_true_seq.shape[0]

accuracy

KeyboardInterrupt: 

In [18]:
mdl13.model.summary()

|# |	Name of the gesture |	Type of the gesture|
| --- | --- | --- |
|1 	|Grab 	        |Fine|
|2 	|Tap 	        |Coarse|
|3 	|Expand 	    |Fine|
|4 	|Pinch 	        |Fine|
|5 	|Rotation CW 	|Fine|
|6 	|Rotation CCW 	|Fine|
|7 	|Swipe Right 	|Coarse|
|8 	|Swipe Left 	|Coarse|
|9 	|Swipe Up 	    |Coarse|
|10 |	Swipe Down 	|Coarse|
|11 |	Swipe X 	|Coarse|
|12 |	Swipe + 	|Coarse|
|13 |	Swipe V 	|Coarse|
|14 |	Shake 	    |Coarse|

In [10]:
%version_information tensorflow, numpy, matplotlib, plotly, pandas, keras, seaborn

Software,Version
Python,3.12.2 64bit [MSC v.1916 64 bit (AMD64)]
IPython,8.25.0
OS,Windows 10 10.0.19045 SP0
tensorflow,2.16.1
numpy,1.26.4
matplotlib,3.8.0
plotly,5.9.0
pandas,2.2.1
keras,3.1.1
seaborn,0.13.2
