In [None]:
%load_ext autoreload

%autoreload 2

In [None]:
import sys
sys.path.append('../')

In [None]:
from hover.core.neural import MultiVectorNet, VectorNet
from hover.core.dataset import SupervisableTextDataset
from hover.utils.datasets import newsgroups_dictl

In [None]:
data_dict, label_encoder, label_decoder = newsgroups_dictl()

In [None]:
import random
from tqdm import tqdm
from collections import Counter

all_labels = sorted([_l for _l in label_encoder.keys() if label_encoder[_l] >= 0], key=lambda x: label_encoder[x])

def mutation(entry_dict, prob=0.5, field='label', pool=all_labels):
    result_dict = entry_dict.copy()
    if random.uniform(0.0, 1.0) < prob:
        result_dict['label'] = random.sample(pool, 1)[0]
    return result_dict

mutated_train_dictl = [mutation(_entry) for _entry in tqdm(data_dict['train'])]
Counter([_d['label'] for _d in data_dict['train']]), Counter([_d['label'] for _d in mutated_train_dictl])

In [None]:
dataset = SupervisableTextDataset(
    raw_dictl=[],
    train_dictl=mutated_train_dictl,
    dev_dictl=data_dict['test'],
    test_dictl=[],
    feature_key="text",
    label_key="label",
)

In [None]:
!ls ../fixture_module/*/*/*model.pt*

In [None]:
!rm ../fixture_module/*/*/*model.pt*

In [None]:
from importlib import import_module

model_modules = [
    import_module(_m) for _m in [
        #'fixture_module.multi_vector_net.model1',
        'fixture_module.multi_vector_net.model2',
        'fixture_module.multi_vector_net.model1',
        'fixture_module.multi_vector_net.model3',
        'fixture_module.multi_vector_net.model4',
    ]
]

In [None]:
multi_a = MultiVectorNet([VectorNet.from_module(_m, all_labels) for _m in model_modules], verbose=10)
multi_b = MultiVectorNet([VectorNet.from_module(_m, all_labels) for _m in model_modules], verbose=10)

In [None]:
vectorizers = [_net.vectorizer for _net in multi_a.vector_nets]
train_loader = dataset.loader('train', *vectorizers, smoothing_coeff=0.1, batch_size=256)
dev_loader = dataset.loader('dev', *vectorizers, smoothing_coeff=0.0, batch_size=256)

#### Quick Notes

2021-09-08: forget rate has been helpful. Cyclic coteaching has not.

In [None]:
kwargs_a = dict(
    warmup_epochs=40,
    warmup_noise=0.0,
    warmup_lr=0.05,
    warmup_momentum=0.9,
    postwm_epochs=40,
    postwm_noise=0.5,
    postwm_lr=0.01,
    postwm_momentum=0.7,
)
kwargs_b = dict(
    warmup_epochs=40,
    warmup_noise=0.0,
    warmup_lr=0.05,
    warmup_momentum=0.9,
    postwm_epochs=40,
    postwm_noise=0.0,
    postwm_lr=0.01,
    postwm_momentum=0.7,
)

# train both MultiVectorNets
train_info_a = multi_a.train(train_loader, dev_loader=dev_loader, **kwargs_a)

In [None]:
train_info_b = multi_b.train(train_loader, dev_loader=dev_loader, **kwargs_b)

In [None]:
multi_a.evaluate_ensemble(dev_loader)[0]

In [None]:
multi_b.evaluate_ensemble(dev_loader)[0]