In [1]:
import h5py
from tqdm.notebook import tqdm

from deepalign import Dataset
from deepalign import fs
from deepalign.alignments import ALIGNERS
from deepalign.alignments.confnet import ConfNet

To speed up the evaluation, we are caching all results. You will have received these cache files with the download of the GitHub release. In case you want to run your own experiments, this is the code.

In [2]:
def get_aligner(model_file, dataset):
    if 'confnet' in model_file.ad:
        aligner = ALIGNERS[model_file.ad[:-2]](dataset,
                                               use_case_attributes=model_file.use_case_attributes,
                                               use_event_attributes=model_file.use_event_attributes)
    else:
        aligner = ALIGNERS[model_file.ad]()
    aligner.load(str(fs.MODEL_DIR / model_file.name))
    return aligner

In [3]:
synthetic = ['paper', 'p2p', 'small', 'medium', 'large', 'huge', 'gigantic', 'wide']

models = sorted(list(set([f.name.replace('_forward', '').replace('_backward', '')
                          for f in fs.get_aligner_files()])))

models = [m for m in models if not (fs.RESULT_DIR / (fs.ModelFile(m).name + '.h5')).exists()]

for model in tqdm(models):
    model_file = fs.AlignerFile(model)
    dataset = Dataset(model_file.event_log_name,
                      use_case_attributes=model_file.use_case_attributes,
                      use_event_attributes=model_file.use_event_attributes)
    aligner = get_aligner(model_file, dataset)

    if isinstance(aligner, ConfNet):
        alignments, beams, costs = aligner.batch_align(dataset, batch_size=5000)
    else:
        try:
            alignments, beams, costs = aligner.align(dataset)
        except Exception as e:
            print(e)
            continue

    with h5py.File(str(fs.RESULT_DIR / (model_file.name + '.h5')), 'w') as file:
        file.create_dataset('alignments', data=alignments, compression="gzip", compression_opts=9)
        file.create_dataset('beams', data=beams, compression="gzip", compression_opts=9)
        file.create_dataset('costs', data=costs, compression="gzip", compression_opts=9)

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

100%|██████████| 651/651 [00:17<00:00, 37.05it/s]
paper-0.3-4: 100%|██████████| 651/651 [00:04<00:00, 156.82it/s]



