In [1]:

import tensorflow as tf
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
import yaml

from sklearn import metrics
import pickle
from sklearn.ensemble import RandomForestClassifier

import importlib

import BaseData20Fold
importlib.reload(BaseData20Fold)
from BaseData20Fold import DataProcessor

import Plots
importlib.reload(Plots)
from Plots import plot_CM




In [2]:
cwd = Path.cwd()
cfg = 'config_mdl2_concat.yaml'
with open(cfg, 'r') as file:
    config = yaml.safe_load(file)
seeds = config['seeds']

folds = np.arange(1,21)

datapath = cwd / '..' / 'Data' / 'DHG2016' / 'concat_sequences'

In [3]:
fold = pickle.load(open((datapath / ('Fold' + str(1) + '.pkl')).resolve(), 'rb'))
labels = np.concatenate([fold['train']['Y'], fold['valid']['Y']])
labels.shape

(labels == 0).sum() / len(labels)

0.5853682371767465

In [8]:
resultsDFlong = pd.DataFrame()
folder = 'concat_sequences'
data_processor = DataProcessor(cfg)
data_processor.load_handgestdata()
data_processor.handangles2windows()
data_processor.save_config(folder)
data_processor.save_windowsets(folder)

#folds = [5]
    
for fold in folds:
    data_processor = DataProcessor(cfg, fold = fold)
    # data_processor.load_handgestdata()
    # data_processor.handangles2windows()
    # data_processor.save_windowsets(folder)
    data_processor.load_windows(folder)
    data_processor.processwindows()
    data_processor.save_windowsets_processed(folder, name='Fold'+str(fold))

# 223 min

permuting Samples using seed 26
Make Windows and apply framreferences...
Processing subject 1
Processing subject 2
Processing subject 3
Processing subject 4
Processing subject 5
Processing subject 6
Processing subject 7
Processing subject 8
Processing subject 9
Processing subject 10
Processing subject 11
Processing subject 12
Processing subject 13
Processing subject 14
Processing subject 15
Processing subject 16
Processing subject 17
Processing subject 18
Processing subject 19
Processing subject 20
Using seed 26
Fold: 1
Standardize and padding of data...
Standardize data...train
Write data into windows...
Standardize data...valid
Write data into windows...
Using seed 26
Fold: 2
Standardize and padding of data...
Standardize data...train
Write data into windows...
Standardize data...valid
Write data into windows...
Using seed 26
Fold: 3
Standardize and padding of data...
Standardize data...train
Write data into windows...
Standardize data...valid
Write data into windows...
Using seed 26

In [3]:
with open(cfg, 'r') as file:
    cfgmdl = yaml.safe_load(file)['model']
datapath = cwd / '..' / 'Data' / 'DHG2016' / 'concat_sequences'

results_valid_labels_list = []
results_scores_list = []

for seedidx, n_fold in enumerate(folds):
    print('evaluating Fold', n_fold)
    seed_val = seeds[seedidx]
    #for seed_val in seeds:
    fold = pickle.load(open((datapath / ('Fold' + str(n_fold) + '.pkl')).resolve(), 'rb'))
    Y_train = fold['train']['Y']
    Y_valid = fold['valid']['Y']
    X_train = fold['train']['X']
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2])
    X_valid = fold['valid']['X']
    X_valid = X_valid.reshape(X_valid.shape[0], X_valid.shape[1]*X_valid.shape[2])

    print('training ...')
    # Erstellen des Klassifikators
    clf = RandomForestClassifier(n_estimators = cfgmdl['n_trees'], criterion = cfgmdl['criterion'], bootstrap = cfgmdl['bootstrap'], 
                                 oob_score = True, max_depth = cfgmdl['max_depth'], min_samples_leaf=cfgmdl['min_samples_leaf'],  max_features=cfgmdl['max_features'])
    #Trainieren des Klassifikators
    clf.fit(X_train, Y_train)

    # Prädiktion der Trainingsdaten
    Y_hat_train = clf.predict(X_train)
    #print('Korrektklassifikationsrate der Trainingsdaten', clf.score(X_train, Y_train))

    # Prädiktion der Testdaten
    Y_hat_valid = clf.predict(X_valid)
    #print('Korrektklassifikationsrate der Validierungsdaten', clf.score(X_valid, Y_valid))
    Y_probs_valid = clf.predict_proba(X_valid)
    Y_probs_train = clf.predict_proba(X_train)

    # calculate metrics
    f1_score_valid = metrics.f1_score(Y_valid, Y_hat_valid, average = 'macro')
    f1_score_train = metrics.f1_score(Y_train, Y_hat_train, average = 'macro')
    nll_valid = metrics.log_loss(Y_valid, Y_probs_valid)
    nll_train = metrics.log_loss(Y_train, Y_probs_train)
    accuracy_valid = clf.score(X_valid, Y_valid)
    accuracy_train = clf.score(X_train, Y_train)

    # Append to the lists instead of the DataFrames
    results_valid_labels_list.append(pd.DataFrame({'Fold': n_fold, 'Y': Y_valid, 'Y_hat': Y_hat_valid, 'Y_prob': Y_probs_valid[np.arange(len(Y_valid)), Y_valid]}))
    results_scores_list.append({'Fold': n_fold, 'accuracy_train': accuracy_train, 'accuracy_valid': accuracy_valid, 'f1_score_train': f1_score_train, 'f1_score_valid': f1_score_valid, 'NLL_train': nll_train, 'NLL_valid': nll_valid})
    

# Convert the lists to DataFrames after the loop
results_valid_labels = pd.concat(results_valid_labels_list)
results_scores = pd.DataFrame(results_scores_list)



#disp = metrics.ConfusionMatrixDisplay.from_predictions(Y_valid, Y_hat_valid, display_labels=range(0,15))
#disp.figure_.suptitle("Konfusionsmatrix Validierungsdaten")

# v1: acc auf validierungsdaten von 0,316
# v2: acc auf validierungsdaten von 0,463, Handposition und Quaternionen dazu genommen
# v3: acc auf validierungsdaten von 0,459, Winkelangabe durch Einheitsvektoren



evaluating Fold 1
training ...
evaluating Fold 2
training ...
evaluating Fold 3
training ...
evaluating Fold 4
training ...
evaluating Fold 5
training ...
evaluating Fold 6
training ...
evaluating Fold 7
training ...
evaluating Fold 8
training ...
evaluating Fold 9
training ...
evaluating Fold 10
training ...
evaluating Fold 11
training ...
evaluating Fold 12
training ...
evaluating Fold 13
training ...
evaluating Fold 14
training ...
evaluating Fold 15
training ...
evaluating Fold 16
training ...
evaluating Fold 17
training ...
evaluating Fold 18
training ...
evaluating Fold 19
training ...
evaluating Fold 20
training ...


In [9]:
sum(results_valid_labels['Y_hat'] == 0) / len(results_valid_labels)

0.9790747956952469

In [5]:
results_valid_labels.to_pickle(datapath/'results_valid_labels_rf.pkl')
results_scores.to_pickle(datapath/'results_scores_rf.pkl')


In [4]:
mean = results_scores.mean()
std = results_scores.std()
print(f'accuracy on training data: {mean["accuracy_train"]*100:.1f}% +-  {std["accuracy_train"]*100:.1f}%')
print(f'F1_Score on training data: {mean["f1_score_train"]*100:.1f}% +- {std["f1_score_train"]*100:.1f}%')
print(f'NLL on training data: {mean["NLL_train"]:.2f} +- {std["NLL_train"]:.2f}')

#print(f'accuracy on training data: {mean['Accuracy_train']*100:.2f}% +-  {std['Accuracy_train']*100:.2f}%')
print(f'accuracy on validation data: {mean['accuracy_valid']*100:.1f}% +- {std['accuracy_valid']*100:.1f}%')
print(f'F1-Score on validation data: {mean['f1_score_valid']*100:.1f}% +- {std['f1_score_valid']*100:.1f}%')
print(f'NLL on validation data: {mean['NLL_valid']:.2f} +- {std['NLL_valid']:.2f}')


accuracy on training data: 73.0% +-  0.4%
F1_Score on training data: 46.6% +- 1.2%
NLL on training data: 0.78 +- 0.01
accuracy on validation data: 58.7% +- 4.6%
F1-Score on validation data: 6.5% +- 1.4%
NLL on validation data: 1.45 +- 0.13


In [30]:
mean = results_scores.mean()
std = results_scores.std()
print(f'accuracy on training data: {mean["accuracy_train"]*100:.1f}% +-  {std["accuracy_train"]*100:.1f}%')
print(f'F1_Score on training data: {mean["f1_score_train"]*100:.1f}% +- {std["f1_score_train"]*100:.1f}%')
print(f'NLL on training data: {mean["NLL_train"]:.2f} +- {std["NLL_train"]:.2f}')

#print(f'accuracy on training data: {mean['Accuracy_train']*100:.2f}% +-  {std['Accuracy_train']*100:.2f}%')
print(f'accuracy on validation data: {mean['accuracy_valid']*100:.1f}% +- {std['accuracy_valid']*100:.1f}%')
print(f'F1-Score on validation data: {mean['f1_score_valid']*100:.1f}% +- {std['f1_score_valid']*100:.1f}%')
print(f'NLL on validation data: {mean['NLL_valid']:.2f} +- {std['NLL_valid']:.2f}')



# min samples leav = 6, maxdepth = 15
# accuracy on validation data: 55.70% +- 3.57%
# F1-Score on validation data: 45.52% +- 3.87%

# min samples leav = 5, maxdepth = 15
# accuracy on validation data: 56.67% +- 3.53%
# F1-Score on validation data: 46.00% +- 3.51%

# min samples leav = 5, maxdepth = 20
# accuracy on validation data: 59.33% +-  3.45%
# F1-Score on validation data: 46.22% +- 3.63%

# windowstepsoize 10 -> 4
# accuracy on validation data: 59.21% +- 3.35%
# F1-Score on validation data: 47.90% +- 3.17%
# NLL on validation data: 1.47 +- 0.11

# accuracy on validation data: 71.65% +- 7.60%
# F1-Score on validation data: 71.11% +- 8.00%
# NLL on validation data: 1.20 +- 0.22

# PS2
# accuracy on training data: 78.3% +-  0.3%
# F1_Score on training data: 60.1% +- 0.8%
# NLL on training data: 0.72 +- 0.00
# accuracy on validation data: 59.4% +- 4.5%
# F1-Score on validation data: 9.4% +- 2.0%
# NLL on validation data: 1.42 +- 0.13

# PS3
# accuracy on training data: 78.3% +-  0.3%
# F1_Score on training data: 59.7% +- 0.8%
# NLL on training data: 0.72 +- 0.00
# accuracy on validation data: 59.4% +- 4.5%
# F1-Score on validation data: 9.3% +- 2.2%
# NLL on validation data: 1.41 +- 0.13

accuracy on training data: 78.3% +-  0.3%
F1_Score on training data: 60.1% +- 0.8%
NLL on training data: 0.72 +- 0.00
accuracy on validation data: 59.4% +- 4.5%
F1-Score on validation data: 9.4% +- 2.0%
NLL on validation data: 1.42 +- 0.13


1[{"variableName": "ID_TO_MEANING", "type": "dictionary", "supportedEngines": ["pandas"], "isLocalVariable": true}]

1[{"variableName": "ID_TO_MEANING", "type": "dictionary", "supportedEngines": ["pandas"], "isLocalVariable": true}]

1[{"variableName": "ID_TO_MEANING", "type": "dictionary", "supportedEngines": ["pandas"], "isLocalVariable": true}]

1[{"variableName": "ID_TO_MEANING", "type": "dictionary", "supportedEngines": ["pandas"], "isLocalVariable": true}]

1[{"variableName": "ID_TO_MEANING", "type": "dictionary", "supportedEngines": ["pandas"], "isLocalVariable": true}]

1[{"variableName": "ID_TO_MEANING", "type": "dictionary", "supportedEngines": ["pandas"], "isLocalVariable": true}]

1[{"variableName": "ID_TO_MEANING", "type": "dictionary", "supportedEngines": ["pandas"], "isLocalVariable": true}]

1[{"variableName": "ID_TO_MEANING", "type": "dictionary", "supportedEngines": ["pandas"], "isLocalVariable": true}]

1[{"variableName": "ID_TO_MEANING", "type": "dictionary", "suppo

Ab hier optionaler Code

In [None]:

with open('config_naming.yaml', 'r') as file:
    config_naming = yaml.safe_load(file)
featurenames = config_naming['X_names']


px.scatter(clf.feature_importances_)
FImportances = pd.DataFrame(clf.feature_importances_, columns=['Importances'])
FImportances['time'] = np.repeat(np.arange(0,280), 27)
FImportances['featurename'] = 280*list(featurenames)
FImportances['name'] = FImportances['time'].astype(str) + FImportances['featurename']
Important = FImportances.loc[FImportances['Importances'] > 0.0015]

px.scatter(Important, x = 'name', y = 'Importances')

In [None]:
px.scatter(clf.feature_importances_)
FImportances = pd.DataFrame(clf.feature_importances_, columns=['Importances'])
FImportances['time'] = np.repeat(np.arange(0,280), 27)
FImportances['featurename'] = 280*list(featurenames)
FImportances['name'] = FImportances['time'].astype(str) + FImportances['featurename']
Important = FImportances.loc[FImportances['Importances'] > 0.0015]
FImportances2 = FImportances.groupby('featurename')['Importances'].mean()
px.scatter(Important, x = 'name', y = 'Importances')

In [None]:
px.scatter(FImportances2) # stimmt das???

In [None]:
px.scatter(clf.feature_importances_)

In [None]:
FImportances

Unnamed: 0,Importances,time,featurename,name
0,0.001114,0,Thumb_CMC_Spread,0Thumb_CMC_Spread
1,0.000739,0,Thumb_CMC_Flex,0Thumb_CMC_Flex
2,0.000426,0,Thumb_MCP_Flex,0Thumb_MCP_Flex
3,0.000485,0,Thumb_IP_Flex,0Thumb_IP_Flex
4,0.000707,0,Index_MCP_Spread,0Index_MCP_Spread
...,...,...,...,...
1345,0.002880,49,Handpoint_Z,49Handpoint_Z
1346,0.006072,49,Handpoint_Quaternion_v1,49Handpoint_Quaternion_v1
1347,0.001540,49,Handpoint_Quaternion_v2,49Handpoint_Quaternion_v2
1348,0.002599,49,Handpoint_Quaternion_v3,49Handpoint_Quaternion_v3


|# |	Name of the gesture |	Type of the gesture|
| --- | --- | --- |
|1 	|Grab 	        |Fine|
|2 	|Tap 	        |Coarse|
|3 	|Expand 	    |Fine|
|4 	|Pinch 	        |Fine|
|5 	|Rotation CW 	|Fine|
|6 	|Rotation CCW 	|Fine|
|7 	|Swipe Right 	|Coarse|
|8 	|Swipe Left 	|Coarse|
|9 	|Swipe Up 	    |Coarse|
|10 |	Swipe Down 	|Coarse|
|11 |	Swipe X 	|Coarse|
|12 |	Swipe V 	|Coarse|
|13 |	Swipe + 	|Coarse|
|14 |	Shake 	    |Coarse |

In [None]:
%version_information tensorflow, numpy, matplotlib, plotly, pandas

Software,Version
Python,3.12.2 64bit [MSC v.1916 64 bit (AMD64)]
IPython,8.20.0
OS,Windows 10 10.0.19045 SP0
tensorflow,2.16.1
numpy,1.26.4
matplotlib,3.8.0
plotly,5.9.0
pandas,2.2.1
Mon Jun 03 10:40:35 2024 Mitteleuropäische Sommerzeit,Mon Jun 03 10:40:35 2024 Mitteleuropäische Sommerzeit
