In [15]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
import pickle
from sklearn.metrics import f1_score
from tensorflow import keras
import yaml
#import tikzplotlib

import importlib


# import Model13_FullyConvCodeDecode
# importlib.reload(Model13_FullyConvCodeDecode)
# from Model13_FullyConvCodeDecode import ConvModel

import Model13_FullyConvCodeDecode2interpol
importlib.reload(Model13_FullyConvCodeDecode2interpol)
from Model13_FullyConvCodeDecode2interpol import ConvModel

# import Model13_FullyConvCodeDecode3interpol
# importlib.reload(Model13_FullyConvCodeDecode3interpol)
# from Model13_FullyConvCodeDecode3interpol import ConvModel

import seaborn as sns
importlib.reload(sns)
import Data_mdl13
importlib.reload(Data_mdl13)
from Data_mdl13 import DataProcessor_mdl13

import Plots
importlib.reload(Plots)
from Plots import plot_CM, plot_CM_mplt



In [16]:
cwd = Path.cwd()

%load_ext tensorboard
%load_ext version_information
#!rm -rf ./logs/

logsdir = cwd / '..' / 'logs'
# !rmdir /s /q {logsdir}
#!tensorboard --logdir {logsdir} --host localhost --port 6006

cfg = 'config_mdl13_concat.yaml'
with open(cfg, 'r') as f:
    cfg_data = yaml.safe_load(f)
seeds = cfg_data['seeds']
folds = np.arange(1,21)

folder = 'concat_sequences_fullyconv'
datapath = cwd / '..' / 'Data' / 'DHG2016' / 'concat_sequences_fullyconv'

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
The version_information extension is already loaded. To reload it, use:
  %reload_ext version_information


In [12]:
# process Data
resultsDFslide = pd.DataFrame()



data_processor = DataProcessor_mdl13(cfg)
data_processor.load_handgestdata()
data_processor.handangles2windows()
data_processor.save_config(folder)
data_processor.save_windowsets(folder)



foldsprocessors = dict()
for fold in folds:
    data_processor = DataProcessor_mdl13(cfg, fold = fold)
    data_processor.load_windows(folder)
    data_processor.processwindows()
    data_processor.save_windowsets_processed(folder, name='Fold'+str(fold))

# ohne die infos zu schreiben benötigt das Preprocessing 1 Minute statt 4 Minuten
# 284 min

permuting Samples using seed 26
Make Windows and apply framreferences...
Processing subject 1
Processing subject 2


In [17]:
# train model


results_valid_labels_list = []
results_scores_list_valid = []
results_scores_list_train = []

for seedidx, n_fold in enumerate(folds):
    print('evaluating Fold', n_fold)
    seed_val = seeds[seedidx]

    fold = pickle.load(open((datapath / ('Fold' + str(n_fold) + '.pkl')).resolve(), 'rb'))
    Y_train_oh = fold['train']['Y_allframes_oh']
    Y_valid_oh = fold['valid']['Y_allframes_oh']
    Y_valid = fold['valid']['Y_allframes']
    X_train = fold['train']['X']
    X_valid = fold['valid']['X']
    print('training ...')


    mdl13 = ConvModel(cfg, 'Test', seed = seed_val)
    mdl13.compile_model()

    # Train the model
    mdl13.train_model(X_train, Y_train_oh, X_valid, Y_valid_oh)

    Y_prob_train = mdl13.model.predict(X_train)
    Y_prob_valid = mdl13.model.predict(X_valid)
    # Y_score_true = np.max(Y_prob_valid, axis=-1).flatten()
    Y_score_true = Y_prob_valid[np.arange(Y_valid.shape[0])[:, None], np.arange(Y_valid.shape[1]), Y_valid].flatten()

    Y_hat_train = np.argmax(Y_prob_train, axis=-1).flatten()
    Y_hat_valid = np.argmax(Y_prob_valid, axis=-1).flatten()

    results_train = mdl13.model.evaluate(X_train, Y_train_oh, verbose = 0, return_dict=True)
    results_train['f1_score'] = f1_score(fold['train']['Y_allframes'].flatten(), Y_hat_train, average='macro')

    results_valid = mdl13.model.evaluate(X_valid, Y_valid_oh, verbose = 0, return_dict=True)
    results_valid['f1_score'] = f1_score(fold['valid']['Y_allframes'].flatten(), Y_hat_valid, average='macro')

    results_valid['fold'] = n_fold

    print(f'NLL: {results_valid['NLL']:,.2f}')
    print(f'Validation accuracy: {results_valid['accuracy']:,.2f}')
    print(f'Validation F1Score: {results_valid['f1_score']:,.2f}')


    #caution from here
    results_valid_labels_list.append(pd.DataFrame({'Fold': n_fold, 'Y': fold['valid']['Y_allframes'].flatten(), 'Y_hat': Y_hat_valid, 'Y_prob': Y_score_true}))
    results_scores_list_train.append(pd.DataFrame(results_train, index = [n_fold]))
    results_scores_list_valid.append(pd.DataFrame(results_valid, index = [n_fold]))
    #results_scores_list.append({'Fold': n_fold, 'Accuracy_train': accuracy_train, 'Accuracy_valid': accuracy_valid, 'F1-Score_train': f1_score_train, 'F1-Score_valid': f1_score_valid, 'NLL_train': nll_train, 'NLL_valid': nll_valid})
    
results_scores = pd.concat([pd.concat(results_scores_list_train).add_suffix('_train'),pd.concat(results_scores_list_valid).add_suffix('_valid')],axis = 1)
results_scores.rename(columns = {'fold_train': 'fold'}, inplace = True)
results_valid_labels = pd.concat(results_valid_labels_list)

evaluating Fold 1
training ...
Setting seed to 5
Epoch 1/30
1226/1226 - 163s - 133ms/step - NLL: 1.1584 - accuracy: 0.6468 - loss: 1.3589 - precision: 0.8070 - recall: 0.5167 - val_NLL: 0.9569 - val_accuracy: 0.6930 - val_loss: 1.1395 - val_precision: 0.7978 - val_recall: 0.6168
Epoch 2/30
1226/1226 - 146s - 119ms/step - NLL: 0.7289 - accuracy: 0.7611 - loss: 0.9196 - precision: 0.8446 - recall: 0.6832 - val_NLL: 0.7495 - val_accuracy: 0.7473 - val_loss: 0.9498 - val_precision: 0.8217 - val_recall: 0.6961
Epoch 3/30
1226/1226 - 146s - 119ms/step - NLL: 0.5774 - accuracy: 0.8064 - loss: 0.7751 - precision: 0.8671 - recall: 0.7497 - val_NLL: 0.7219 - val_accuracy: 0.7642 - val_loss: 0.9212 - val_precision: 0.8184 - val_recall: 0.7270
Epoch 4/30
1226/1226 - 144s - 118ms/step - NLL: 0.5128 - accuracy: 0.8260 - loss: 0.7084 - precision: 0.8775 - recall: 0.7780 - val_NLL: 0.6744 - val_accuracy: 0.7803 - val_loss: 0.8721 - val_precision: 0.8279 - val_recall: 0.7473
Epoch 5/30
1226/1226 - 146s

In [20]:
results_valid_labels.to_pickle(datapath/'results_valid_labels.pkl')
results_scores.to_pickle(datapath/'results_scores.pkl')

In [7]:
results_scores

Unnamed: 0,NLL_train,accuracy_train,loss_train,precision_train,recall_train,f1_score_train,NLL_valid,accuracy_valid,loss_valid,precision_valid,recall_valid,f1_score_valid,fold_valid
1,0.385008,0.863444,0.578677,0.898482,0.83262,0.793613,0.665227,0.7798,0.859126,0.82379,0.750615,0.651938,1
2,0.389026,0.861606,0.584013,0.894719,0.832997,0.784347,0.971316,0.724225,1.166631,0.785098,0.697033,0.45393,2
3,0.326934,0.882922,0.521456,0.912635,0.855704,0.826102,0.581181,0.812621,0.775971,0.841718,0.789725,0.72371,3
4,0.39104,0.859429,0.588602,0.890935,0.832517,0.78143,0.561893,0.812653,0.764584,0.847478,0.787075,0.646243,4
5,0.361692,0.870215,0.555491,0.904206,0.83995,0.805061,0.617805,0.807268,0.811428,0.838356,0.775672,0.661167,5
6,0.37103,0.865852,0.568357,0.897444,0.837603,0.795591,0.657011,0.799251,0.854589,0.847649,0.767156,0.600589,6
7,0.344297,0.876678,0.541302,0.904862,0.851306,0.815235,0.682715,0.767962,0.879525,0.809568,0.732279,0.647981,7
8,0.325365,0.881818,0.52087,0.907822,0.859259,0.827224,0.471508,0.830524,0.66583,0.871739,0.803608,0.73509,8
9,0.3775,0.865875,0.567579,0.903216,0.832124,0.799492,0.759284,0.767631,0.949069,0.819288,0.730189,0.614522,9
10,0.263761,0.905217,0.453359,0.920825,0.891567,0.867619,0.505105,0.832471,0.693001,0.853694,0.817732,0.739106,10


In [13]:
# learning rate 0.005%
mean = results_scores.mean()
std = results_scores.std()

print(f'accuracy on training data - frames: {mean["accuracy_train"]*100:.1f}% +-  {std["accuracy_train"]*100:.1f}%')
print(f'F1_Score on training data - frames: {mean["f1_score_train"]*100:.1f}% +- {std["f1_score_train"]*100:.1f}%')
print(f'NLL on training data - frames: {mean["NLL_train"]:.2f} +- {std["NLL_train"]:.2f}')

print(f'accuracy on validation data - frames: {mean['accuracy_valid']*100:.1f}% +- {std['accuracy_valid']*100:.1f}%')
print(f'F1-Score on validation data - frames: {mean['f1_score_valid']*100:.1f}% +- {std['f1_score_valid']*100:.1f}%')
print(f'NLL on validation data - frames: {mean['NLL_valid']:.2f} +- {std['NLL_valid']:.2f}')

accuracy on training data - frames: 88.1% +-  1.9%
F1_Score on training data - frames: 82.2% +- 3.2%
NLL on training data - frames: 0.33 +- 0.05
accuracy on validation data - frames: 79.2% +- 2.9%
F1-Score on validation data - frames: 65.9% +- 6.2%
NLL on validation data - frames: 0.67 +- 0.13


In [19]:
# learning rate 0.001
mean = results_scores.mean()
std = results_scores.std()

print(f'accuracy on training data - frames: {mean["accuracy_train"]*100:.1f}% +-  {std["accuracy_train"]*100:.1f}%')
print(f'F1_Score on training data - frames: {mean["f1_score_train"]*100:.1f}% +- {std["f1_score_train"]*100:.1f}%')
print(f'NLL on training data - frames: {mean["NLL_train"]:.2f} +- {std["NLL_train"]:.2f}')

print(f'accuracy on validation data - frames: {mean['accuracy_valid']*100:.1f}% +- {std['accuracy_valid']*100:.1f}%')
print(f'F1-Score on validation data - frames: {mean['f1_score_valid']*100:.1f}% +- {std['f1_score_valid']*100:.1f}%')
print(f'NLL on validation data - frames: {mean['NLL_valid']:.2f} +- {std['NLL_valid']:.2f}')


accuracy on training data - frames: 87.4% +-  1.3%
F1_Score on training data - frames: 81.3% +- 2.3%
NLL on training data - frames: 0.35 +- 0.04
accuracy on validation data - frames: 79.5% +- 3.1%
F1-Score on validation data - frames: 66.4% +- 7.0%
NLL on validation data - frames: 0.65 +- 0.14


In [18]:
mdl13.model.summary()

In [10]:
%version_information tensorflow, numpy, matplotlib, plotly, pandas, keras, seaborn

Software,Version
Python,3.12.2 64bit [MSC v.1916 64 bit (AMD64)]
IPython,8.25.0
OS,Windows 10 10.0.19045 SP0
tensorflow,2.16.1
numpy,1.26.4
matplotlib,3.8.0
plotly,5.9.0
pandas,2.2.1
keras,3.1.1
seaborn,0.13.2
