Considering Python 2.7.
1. Providing necessary dependencies:

In [None]:
import numpy as np

# Common arekit dependencies.
from arekit.common.entities.formatters.types import EntityFormattersService
from arekit.common.evaluation.evaluators.modes import EvaluationModes
from arekit.common.evaluation.evaluators.two_class import TwoClassEvaluator
from arekit.common.experiment.data_type import DataType
from arekit.common.experiment.folding.types import FoldingType
from arekit.common.experiment.scales.factory import create_labels_scaler
from arekit.common.utils import progress_bar_defined

# Contributaional part from arekit library.
from arekit.contrib.bert.output.eval_helper import EvalHelper
from arekit.contrib.bert.run_evaluation import LanguageModelExperimentEvaluator
from arekit.contrib.bert.samplers.types import BertSampleFormatterTypes
from arekit.contrib.experiments.factory import create_experiment
from arekit.contrib.experiments.types import ExperimentTypes
from arekit.contrib.source.ruattitudes.io_utils import RuAttitudesVersionsService
from arekit.contrib.source.rusentiframes.types import RuSentiFramesVersionsService
from arekit.contrib.source.rusentrel.opinions.formatter import RuSentRelOpinionCollectionFormatter

# Arguments
from args.rusentrel import RuSentRelVersionArg
from args.stemmer import StemmerArg
from args.terms_per_context import TermsPerContextArg
from bert_model_io import BertModelIO
from callback import CustomCallback
from common import Common

# Related project dependencies
from data_training import CustomTrainingData
from experiment_io import CustomBertIOUtils
from run_serialization import create_exp_name_suffix


2. Provide helper instance:

In [None]:
class CustomEvalHelper(EvalHelper):

    RESULTS_TEMPLATE_FILENAME = u"test_results_i{it_index}_e{epoch_index}_s{state_name}.tsv"

    def __init__(self, log_dir, state_name, ft_tag):
        assert(isinstance(ft_tag, unicode) or ft_tag is None)
        self.__log_dir = log_dir
        self.__state_name = state_name
        self.__ft_tag = ft_tag

    def __create_results_filename(self, iter_index, epoch_index):
        return CustomEvalHelper.RESULTS_TEMPLATE_FILENAME.format(it_index=iter_index,
                                                                 epoch_index=epoch_index,
                                                                 state_name=self.__state_name)

    def __get_results_dir(self, target_dir):
        return Common.combine_tag_with_full_model_name(full_model_name=target_dir,
                                                       tag=self.__ft_tag)

    def get_results_dir(self, target_dir):
        return self.__get_results_dir(target_dir)

    def get_results_filename(self, iter_index, epoch_index):
        return self.__create_results_filename(iter_index=iter_index, epoch_index=epoch_index)

3. Provide predefined parameters:

In [None]:
# Constant predefined parameters.
max_epochs_count = 200
rusentrel_version = RuSentRelVersionArg.default
terms_per_context = TermsPerContextArg.default
stemmer = StemmerArg.supported[StemmerArg.default]
eval_mode = EvaluationModes.Extraction
dist_in_terms_between_attitude_ends = None

4. Setup evaluation grid:

In [None]:
# serializing.py for looking through.
grid = {
        u"labels": [2, 3],
        u"foldings": [FoldingType.Fixed,
                      FoldingType.CrossValidation],
        u"exp_types": [ExperimentTypes.RuSentRel,
                       ExperimentTypes.RuSentRelWithRuAttitudes],
        u"entity_fmts": [EntityFormattersService.get_type_by_name(ent_fmt)
                         for ent_fmt in EntityFormattersService.iter_supported_names()],
        u"sample_types": [fmt_type for fmt_type in BertSampleFormatterTypes],
        u"ra_names": [RuAttitudesVersionsService.find_by_name(ra_name)
                      for ra_name in RuAttitudesVersionsService.iter_supported_names()],
        u'balancing': [True],
        u"frames_versions": [RuSentiFramesVersionsService.get_type_by_name(fv)
                             for fv in RuSentiFramesVersionsService.iter_supported_names()],
        u"state_names": [# Fine-tuned 2-l states.
                         u"ra-12-bert-base-nli-pretrained-2l",
                         u"ra-20-bert-base-nli-pretrained-2l",
                         u"ra-20-bert-large-nli-pretrained-2l",
                         # Fine-tuned 3-l states.
                         u"ra-12-bert-base-nli-pretrained-3l",
                         u"ra-20-bert-base-neut-nli-pretrained-3l",
                         u"ra-20-bert-large-neut-nli-pretrained-3l",
                         # Default state.
                         u"multi_cased_L-12_H-768_A-12",
                         u"rubert_cased_L-12_H-768_A-12"]
}

5. Initialize main evaluation engine:

In [None]:
def __run(labels_count, folding_type, exp_type, entity_formatter_type, sample_formatter_type,
          balance_samples, ra_version, frames_version, state_name):

    full_model_name = Common.create_full_model_name(
        sample_fmt_type=sample_formatter_type,
        entities_fmt_type=entity_formatter_type,
        labels_count=int(labels_count))
    extra_name_suffix = create_exp_name_suffix(use_balancing=balance_samples,
                                               terms_per_context=terms_per_context,
                                               dist_in_terms_between_att_ends=dist_in_terms_between_attitude_ends)

    model_io = BertModelIO(full_model_name=full_model_name)

    # Setup default evaluator.
    evaluator = TwoClassEvaluator(eval_mode)

    experiment_data = CustomTrainingData(
        labels_scaler=create_labels_scaler(labels_count),
        stemmer=stemmer,
        evaluator=evaluator,
        opinion_formatter=RuSentRelOpinionCollectionFormatter(),
        model_io=model_io,
        callback=CustomCallback(DataType.Test))

    # Composing experiment.
    experiment = create_experiment(exp_type=exp_type,
                                   experiment_data=experiment_data,
                                   folding_type=folding_type,
                                   rusentrel_version=rusentrel_version,
                                   experiment_io_type=CustomBertIOUtils,
                                   ruattitudes_version=ra_version,
                                   load_ruattitude_docs=False,
                                   do_log=False,
                                   extra_name_suffix=extra_name_suffix)

    eval_helper = CustomEvalHelper(log_dir=Common.log_dir,
                                   state_name=state_name,
                                   ft_tag=Common.get_tag_by_ruattitudes_version(ra_version))

    engine = LanguageModelExperimentEvaluator(experiment=experiment,
                                              data_type=DataType.Test,
                                              eval_helper=eval_helper,
                                              max_epochs_count=max_epochs_count)

    # Starting evaluation process.
    engine.run()

6. Declare runner through all parameters:

In [None]:
 def run_through_params_grid():
        for labels_count in grid[u"labels"]:
            for folding_type in grid[u"foldings"]:
                for exp_type in grid[u'exp_types']:
                    for entity_formatter_type in grid[u'entity_fmts']:
                        for sample_formatter_type in grid[u'sample_types']:
                            for balance_samples in grid[u'balancing']:
                                for ra_version in grid[u'ra_names']:
                                    for frames_version in grid[u'frames_versions']:
                                        for state_name in grid[u'state_names']:
                                            __run(labels_count=labels_count,
                                                  folding_type=folding_type,
                                                  exp_type=exp_type,
                                                  entity_formatter_type=entity_formatter_type,
                                                  sample_formatter_type=sample_formatter_type,
                                                  balance_samples=balance_samples,
                                                  ra_version=ra_version,
                                                  frames_version=frames_version,
                                                  state_name=state_name)
                                            yield None

7. Finally run!

In [None]:
 # Running tqdm, wrapped into progress bar.
    grid_sizes = [len(v) for v in grid.values()]
    it = progress_bar_defined(iterable=run_through_params_grid(),
                              total=np.prod(grid_sizes),
                              desc=u"Analyzing possible experiments")

    for _ in it:
        pass