In [81]:
import json
from pathlib import Path
import pandas as pd
import re

In [82]:
acc_pattern = re.compile(": (\d*[.]\d*).*")
f1_pattern = re.compile("F1 score: (\d*[.]\d*).*")

metrics = []

for nb_file in Path("../torch_cnn_experiments/").glob("*.ipynb"):
    with open(nb_file, "r") as fr:
        print(nb_file)
        nb_json = json.load(fr)
        print(nb_json['cells'][-1]['outputs'][-2]['text'][-2])
        
        acc_text = acc_pattern.search(
                nb_json['cells'][-1]['outputs'][-2]['text'][-2]
            ).group(1)
        acc = float(acc_text)

        f1_text = f1_pattern.search(
                nb_json['cells'][-1]['outputs'][-2]['text'][-2]
            ).group(1)
        f1 = float(f1_text)

        metrics.append((str(nb_file.stem), round(acc*100,1), round(f1*100,1)))

metrics

../torch_cnn_experiments/human_enhancers_cohn_subword_512.ipynb
 Accuracy: 0.662925, F1 score: 0.597429, Avg loss: 0.650315 

../torch_cnn_experiments/human_enhancers_cohn_kmer_5.ipynb
 Accuracy: 0.649539, F1 score: 0.587037, Avg loss: 0.659458 

../torch_cnn_experiments/human_enhancers_cohn_kmer_7.ipynb
 Accuracy: 0.607513, F1 score: 0.515484, Avg loss: 0.676561 

../torch_cnn_experiments/demo_human_or_worm_character.ipynb
 Accuracy: 0.924000, F1 score: 0.922856, Avg loss: 0.539537 

../torch_cnn_experiments/human_enhancers_cohn_character.ipynb
 Accuracy: 0.658031, F1 score: 0.554248, Avg loss: 0.646865 

../torch_cnn_experiments/human_enhancers_cohn_kmer_3.ipynb
 Accuracy: 0.677605, F1 score: 0.647591, Avg loss: 0.653015 

../torch_cnn_experiments/demo_human_or_worm_kmer_3.ipynb
 Accuracy: 0.895840, F1 score: 0.892030, Avg loss: 0.550661 

../torch_cnn_experiments/demo_human_or_worm_subword_512.ipynb
 Accuracy: 0.885000, F1 score: 0.878703, Avg loss: 0.554625 

../torch_cnn_experimen

[('human_enhancers_cohn_subword_512', 66.3, 59.7),
 ('human_enhancers_cohn_kmer_5', 65.0, 58.7),
 ('human_enhancers_cohn_kmer_7', 60.8, 51.5),
 ('demo_human_or_worm_character', 92.4, 92.3),
 ('human_enhancers_cohn_character', 65.8, 55.4),
 ('human_enhancers_cohn_kmer_3', 67.8, 64.8),
 ('demo_human_or_worm_kmer_3', 89.6, 89.2),
 ('demo_human_or_worm_subword_512', 88.5, 87.9),
 ('human_enhancers_cohn_kmer_6', 63.4, 55.9),
 ('human_enhancers_cohn_kmer_4', 66.7, 63.8),
 ('demo_human_or_worm_kmer_2', 91.9, 91.8),
 ('demo_human_or_worm_subword_1024', 89.5, 89.5),
 ('human_enhancers_cohn_kmer_2', 67.7, 62.0),
 ('demo_human_or_worm_subword_128', 88.3, 87.7),
 ('human_enhancers_cohn_subword_1024', 66.5, 62.6),
 ('demo_human_or_worm_subword_64', 91.4, 91.1),
 ('human_enhancers_cohn_subword_128', 67.3, 64.9),
 ('human_enhancers_cohn_subword_256', 67.2, 63.3),
 ('human_enhancers_cohn_subword_64', 66.2, 58.7),
 ('demo_human_or_worm_subword_256', 88.6, 88.2)]

In [83]:
def split_experiment(ex):
    if ex.endswith('character'):
        return (
            ex[:ex.index('character') - 1], 
            'character',
            ''
        )

    if 'kmer' in ex:
        return (
            ex[:ex.index('kmer') - 1], 
            'kmer', 
            ex[ex.rindex('_') + 1:]
        )

    if 'subword' in ex:
        return (
            ex[:ex.index('subword') - 1], 
            'subword', 
            ex[ex.rindex('_') + 1:]
        )

In [84]:
for index, row in enumerate(metrics):
    metrics[index] = split_experiment(row[0]) + metrics[index][1:]
metrics


[('human_enhancers_cohn', 'subword', '512', 66.3, 59.7),
 ('human_enhancers_cohn', 'kmer', '5', 65.0, 58.7),
 ('human_enhancers_cohn', 'kmer', '7', 60.8, 51.5),
 ('demo_human_or_worm', 'character', '', 92.4, 92.3),
 ('human_enhancers_cohn', 'character', '', 65.8, 55.4),
 ('human_enhancers_cohn', 'kmer', '3', 67.8, 64.8),
 ('demo_human_or_worm', 'kmer', '3', 89.6, 89.2),
 ('demo_human_or_worm', 'subword', '512', 88.5, 87.9),
 ('human_enhancers_cohn', 'kmer', '6', 63.4, 55.9),
 ('human_enhancers_cohn', 'kmer', '4', 66.7, 63.8),
 ('demo_human_or_worm', 'kmer', '2', 91.9, 91.8),
 ('demo_human_or_worm', 'subword', '1024', 89.5, 89.5),
 ('human_enhancers_cohn', 'kmer', '2', 67.7, 62.0),
 ('demo_human_or_worm', 'subword', '128', 88.3, 87.7),
 ('human_enhancers_cohn', 'subword', '1024', 66.5, 62.6),
 ('demo_human_or_worm', 'subword', '64', 91.4, 91.1),
 ('human_enhancers_cohn', 'subword', '128', 67.3, 64.9),
 ('human_enhancers_cohn', 'subword', '256', 67.2, 63.3),
 ('human_enhancers_cohn', 'su

In [85]:
tab = pd.DataFrame.from_records(metrics, columns = ["Dataset", "Tokenization", "Parameters", "Accuracy", "F1 score"])
tab = tab.sort_values(['Dataset'])
tab

Unnamed: 0,Dataset,Tokenization,Parameters,Accuracy,F1 score
19,demo_human_or_worm,subword,256.0,88.6,88.2
3,demo_human_or_worm,character,,92.4,92.3
15,demo_human_or_worm,subword,64.0,91.4,91.1
6,demo_human_or_worm,kmer,3.0,89.6,89.2
7,demo_human_or_worm,subword,512.0,88.5,87.9
13,demo_human_or_worm,subword,128.0,88.3,87.7
10,demo_human_or_worm,kmer,2.0,91.9,91.8
11,demo_human_or_worm,subword,1024.0,89.5,89.5
17,human_enhancers_cohn,subword,256.0,67.2,63.3
16,human_enhancers_cohn,subword,128.0,67.3,64.9
