In [67]:
from copy import copy
import numpy as np
import re
import os
from typing import Iterable
from pprint import pprint

NUM_EXPERIMENTS = 101
assin_report = \
r"""RTE evaluation
Accuracy	Macro F1
--------	--------
  (.....)%	   (.....)

Similarity evaluation
Pearson		Mean Squared Error
-------		------------------
  (.....)		              (....)"""

tweetsent_report = \
r"""Acc: (......)
F1. neg: (.....) neu: (.....) post: (.....)
Acc dist min: (......)
Acc dist max: (......)
Acc dist min equal: (......)
Acc dist max equal: (......)"""

rounding_assin = [2, 3, 3, 2]
rounding_tweetsent = [5, 3, 3, 3]

def assin_match(task: str):
    return re.search(assin_report, task)

def tweetsent_match(task: str):
    return re.search(tweetsent_report, task)


def get_metrics(task: str) -> tuple:
    match = assin_match(task) if assin_match(task) else tweetsent_match(task)

    if not match:
        raise ValueError('Task report not in ASSIN or TweetSentBR format')

    metrics = [match.group(i) for i in range(1,5)]

    return metrics

def set_metrics(task: str, mean: tuple, std: tuple):
    symbol = '\u00b1'
    rounding =  rounding_assin if assin_match(task) else rounding_tweetsent 
    
    metrics_sample = tuple(get_metrics(task))
    metrics_final = [
                     str(mean[i].round(rounding[i])) + 
                     symbol + 
                     str(std[i].round(rounding[i]))
                     for i in range(0,4)
                     ]

    for idx, metric in enumerate(metrics_sample):
        task = re.sub(metric, metrics_final[idx], task)

    #task = re.sub("\n  ", "\n", task)
    #task = re.sub(r"	   ([0-9])", r"	\1", task)
    #task = re.sub(r"		              ([0-9])", r"	\1", task)

    return task

def get_tasks(report: str):
    tasks = re.split('corpus.*|Saving generated XMLs...', report)
    tasks = [task for task in tasks if re.search(r'\w', task)]
    
    return tasks

path = 'report/seed'
pattern = r'seed_2(.*).txt'

scores_lst = list()
for filename in os.listdir(path):
    seed = re.search(pattern, filename)

    if seed:
        seed = seed.group(1)
    else:
        continue

    with open(f'{path}/{filename}') as f:
        report = f.read()

    report = re.sub('\n\n\n\n*','\n\n',report)
    tasks = get_tasks(report)
    scores = tuple(map(get_metrics, tasks))
    
    assert len(scores) == NUM_EXPERIMENTS

    scores_lst.append(scores)

scores_arr = np.array(scores_lst).astype(np.float32)
scores_mean = np.mean(scores_arr, axis=0).round(decimals=5)
scores_std = np.std(scores_arr, axis=0).round(decimals=5)

sample = copy(report)
sample = re.sub('Saved evaluation:','Evaluation:', sample)
sample = re.sub('report/', '', sample)
sample = re.sub('seed/.*/', '', sample)
sample = re.sub('_eval.txt', '', sample)
sample = re.sub('/', ' ', sample)
sample = re.sub('Saving generated XMLs...', '', sample)

tasks = tuple(get_tasks(sample))
tasks_mean = tuple(map(set_metrics, tasks, scores_mean, scores_std))
report_mean = "\n-------------------------------------------\n".join(tasks_mean)

in_percentage = scores_std[:, 0]
not_in_percentage = scores_std[:, [1]]

#bound = 5

print(report_mean)



RTE evaluation
Accuracy	Macro F1
--------	--------
  85.17±0.76%	   0.548±0.037

Similarity evaluation
Pearson		Mean Squared Error
-------		------------------
  0.765±0.01		              0.34±0.02

Evaluation: st-dnn bert_base assin-ptbr


-------------------------------------------


RTE evaluation
Accuracy	Macro F1
--------	--------
  85.54±1.63%	   0.716±0.089

Similarity evaluation
Pearson		Mean Squared Error
-------		------------------
  0.786±0.01		              0.56±0.06

Evaluation: st-dnn bert_base assin-ptpt


-------------------------------------------


RTE evaluation
Accuracy	Macro F1
--------	--------
  87.03±0.61%	   0.87±0.006

Similarity evaluation
Pearson		Mean Squared Error
-------		------------------
  0.82±0.007		              0.62±0.05

Evaluation: st-dnn bert_base assin2
Saving generated JSON...

-------------------------------------------

Acc: 0.70798±0.0042
F1. neg: 0.697±0.009 neu: 0.585±0.006 post: 0.79±0.002
Acc dist min: 0.2920
Acc dist max: 0.2920
Acc d

In [1]:
!git  clone https://github.com/jubs12/mt-dnn_port.git -b seed
%cd mt-dnn_port

Cloning into 'mt-dnn_port'...
remote: Enumerating objects: 4763, done.[K
remote: Counting objects: 100% (4763/4763), done.[K
remote: Compressing objects: 100% (3485/3485), done.[K
remote: Total 28765 (delta 1150), reused 4469 (delta 918), pack-reused 24002[K
Receiving objects: 100% (28765/28765), 300.07 MiB | 15.28 MiB/s, done.
Resolving deltas: 100% (11347/11347), done.
Checking out files: 100% (12228/12228), done.
/content/mt-dnn_port
