In [None]:
%load_ext autoreload

%autoreload 2

In [None]:
from hover.core.neural import MultiVectorNet, VectorNet
from hover.core.dataset import SupervisableTextDataset
from hover.utils.datasets import newsgroups_dictl

In [None]:
data_dict, label_encoder, label_decoder = newsgroups_dictl()

In [None]:
import random
from tqdm import tqdm
from collections import Counter

all_labels = sorted([_l for _l in label_encoder.keys() if label_encoder[_l] >= 0], key=lambda x: label_encoder[x])

def mutation(entry_dict, prob=0.5, field='label', pool=all_labels):
    result_dict = entry_dict.copy()
    if random.uniform(0.0, 1.0) < prob:
        result_dict['label'] = random.sample(pool, 1)[0]
    return result_dict

mutated_train_dictl = [mutation(_entry) for _entry in tqdm(data_dict['train'])]
Counter([_d['label'] for _d in data_dict['train']]), Counter([_d['label'] for _d in mutated_train_dictl])

In [None]:
dataset = SupervisableTextDataset(
    raw_dictl=[],
    train_dictl=mutated_train_dictl,
    dev_dictl=data_dict['test'],
    test_dictl=[],
    feature_key="text",
    label_key="label",
)

In [None]:
from importlib import import_module

model_modules = [
    import_module(_m) for _m in [
        'model1',
        'model2',
        'model3',
        #'model4',
    ]
]

In [None]:
multinetA = MultiVectorNet([VectorNet.from_module(_m, all_labels) for _m in model_modules], verbose=1)
#multinetB = MultiVectorNet([VectorNet.from_module(_m, all_labels) for _m in model_modules], verbose=1)

In [None]:
vectorizers = [_net.vectorizer for _net in multinetA.vector_nets]
train_loader = dataset.loader('train', *vectorizers, smoothing_coeff=0.1, batch_size=128)
dev_loader = dataset.loader('dev', *vectorizers, smoothing_coeff=0.0, batch_size=128)

In [None]:
from hover.utils.denoising import identity_adjacency, cyclic_except_last

def get_params(warmup_epochs=5, coteach_epochs=10, forget_rate=0.3):
    for i in range(warmup_epochs):
        yield {
            "forget_rate": 0.0,
            "optimizer": [{"lr": 0.1, "momentum": 0.9}] * 4,
            "adjacency_function": identity_adjacency,
        }
    for i in range(coteach_epochs):
        yield {
            "forget_rate": forget_rate,
            "optimizer": [{"lr": 0.01, "momentum": 0.9}] * 4,
            "adjacency_function": cyclic_except_last,
        }

paramsA = get_params(warmup_epochs=5, coteach_epochs=10, forget_rate=0.5)
paramsB = get_params(warmup_epochs=15, coteach_epochs=0, forget_rate=0.0)

#### Quick Notes

2021-09-08: forget rate has been helpful. Cyclic coteaching has not.

In [None]:
multinetA.train(train_loader, paramsA, dev_loader=dev_loader)
multinetA.evaluate_ensemble(dev_loader)

In [None]:
multinetB.train(train_loader, paramsB, dev_loader=dev_loader)
multinetB.evaluate_ensemble(dev_loader)