In [6]:
import os
import utils
import torch
from torch.utils.data import DataLoader

from data_apis.corpus_eng import SWDADialogCorpus
from data_apis.dataset import CVAEDataset
from data_apis.dataloader import get_cvae_collate

from trainer.cvae.trainer import CVAETrainer

from model.cvae import CVAEModel

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

corpus_config_path = './config/english/cvae_corpus_eng.json'
dataset_config_path = './config/english/cvae_dataset_eng.json'
trainer_config_path = './config/english/cvae_trainer_eng.json'
model_config_path = './config/english/cvae_model_eng.json'

language = "eng"

debug = True

In [2]:
corpus_config = utils.load_config(corpus_config_path)
corpus = SWDADialogCorpus(corpus_config)

dial_corpus = corpus.get_dialog_corpus()
meta_corpus = corpus.get_meta_corpus()

train_meta, valid_meta, test_meta = meta_corpus.get("train"), meta_corpus.get("valid"), meta_corpus.get("test")
train_dial, valid_dial, test_dial = dial_corpus.get("train"), dial_corpus.get("valid"), dial_corpus.get("test")

# Generate Dataset
dataset_config = utils.load_config(dataset_config_path)
utt_per_case = dataset_config["utt_per_case"]
max_utt_size = dataset_config["max_utt_len"]

train_set = CVAEDataset("Train", train_dial, train_meta, language, dataset_config)
valid_set = CVAEDataset("Valid", valid_dial, valid_meta, language, dataset_config)
test_set = CVAEDataset("Test", test_dial, test_meta, language, dataset_config)

cvae_collate = get_cvae_collate(utt_per_case, max_utt_size)

Start process train corpus...
Max utt len 96, mean utt len 14.69
Start process test corpus...
Max utt len 74, mean utt len 15.39
Start process valid corpus...
Max utt len 75, mean utt len 15.06
Start building / laoding vocab
Load corpus with train size 3, valid size 3, test size 3 raw vocab size 24497 vocab size 10000 at cut_off 4 OOV rate 0.008035
<d> index 143
<sil> index -1
67 topics in train data
['statement-non-opinion', 'acknowledge_(backchannel)', 'statement-opinion', 'abandoned_or_turn-exit/uninterpretable', 'yes-no-question', 'agree/accept', 'appreciation', 'wh-question', 'backchannel_in_question_form', 'yes_answers', 'conventional-closing', 'response_acknowledgement', 'open-question', 'no_answers', 'affirmative_non-yes_answers', 'declarative_yes-no-question', 'summarize/reformulate', 'other', 'action-directive', 'rhetorical-questions', 'conventional-opening', 'collaborative_completion', 'signal-non-understanding', 'or-clause', 'hold_before_answer/agreement', 'quotation', 'neg

In [7]:
if debug:
    print(f'debug: {debug}')
    _train_set = train_set[:10]
    _valid_set = valid_set[:10]
    _test_set = test_set[:10]

train_loader = DataLoader(_train_set, batch_size=100, shuffle=True, collate_fn=cvae_collate)
valid_loader = DataLoader(_valid_set, batch_size=60, shuffle=False, collate_fn=cvae_collate)
test_loader = DataLoader(_test_set, batch_size=60, shuffle=False, collate_fn=cvae_collate)

debug: True


In [8]:
trainer_config = utils.load_config(trainer_config_path)
model_config = utils.load_config(model_config_path)

target_model = CVAEModel(dataset_config, model_config, corpus)
if torch.cuda.is_available():
    target_model.cuda()
cvae_trainer = CVAETrainer(trainer_config, target_model)

joint_embedding_size: 602
cond_embedding_size: 675
recog_input_size: 1305
gen_input_size: 875
dec_input_size: 905


In [10]:
output_reports = cvae_trainer.experiment(train_loader, valid_loader, test_loader)