In [1]:
# dataset
import os
import torch

from starry.utils.config import Configuration
from starry.utils.dataset_factory import loadDataset


torch.set_printoptions(profile="full")

DATA_DIR = os.getenv('DATA_DIR')

config = Configuration.create('configs/paraff-score-test.yaml', volatile=True)
data, = loadDataset(config, data_dir=DATA_DIR, splits='9/10')

it = iter(data)
batch = next(it)

for k in batch:
	print(k, batch[k].shape)


Encoding measures: 100%|██████████| 42/42 [00:02<00:00, 15.62it/s]
Load paragraphs: 100%|██████████| 9/9 [00:00<00:00, 18315.74it/s]

ph_id torch.Size([2, 256])
ph_f_num torch.Size([2, 256])
ph_b_num torch.Size([2, 256])
ph_summary torch.Size([2, 256, 256])
ph_body_mask torch.Size([2, 256])
ph_next_mask torch.Size([2, 256])
input_ids torch.Size([2, 512])
output_ids torch.Size([2, 512])
body_mask torch.Size([2, 512])
position torch.Size([2, 512])





In [2]:
# save csv
batch = next(it)

ph_mask = batch['ph_id'] != 0
ph_mask[1:] = False
w_mask = batch['input_ids'] != 0
w_mask[1:] = False

ph = '\n'.join([
	','.join(map(str, batch['ph_id'][ph_mask].tolist())),
	','.join(map(str, batch['ph_f_num'][ph_mask].tolist())),
	','.join(map(str, batch['ph_b_num'][ph_mask].tolist())),
	','.join(map(str, batch['ph_summary'][ph_mask].mean(dim=-1).tolist())),
	','.join(map(str, batch['ph_body_mask'][ph_mask].tolist())),
	','.join(map(str, batch['ph_next_mask'][ph_mask].tolist())),
])
with open('./test/phases.csv', 'w') as phases:
    phases.write(ph)

def id2word (id):
    return data.dataset.measure.tokens[id]

w = '\n'.join([
	','.join(map(id2word, batch['input_ids'][w_mask].tolist())),
	','.join(map(id2word, batch['output_ids'][w_mask].tolist())),
	','.join(map(str, batch['body_mask'][w_mask].tolist())),
	','.join(map(str, batch['position'][w_mask].tolist())),
])
with open('./test/words.csv', 'w') as phases:
    phases.write(w)


In [1]:
# model
import os

from starry.utils.config import Configuration
from starry.utils.dataset_factory import loadDataset
from starry.utils.model_factory import loadModel


DATA_DIR = os.getenv('DATA_DIR')

config = Configuration.create('configs/paraff-score-test.yaml', volatile=True)
train, val = loadDataset(config, data_dir=DATA_DIR, device='cpu')
model = loadModel(config['model'], postfix='Loss')

it = iter(train)

batch = next(it)
loss, metric = model(batch)

loss, metric


Encoding measures: 100%|██████████| 42/42 [00:05<00:00,  8.31it/s]
Load paragraphs: 100%|██████████| 136/136 [00:00<00:00, 24476.52it/s]
Load paragraphs: 100%|██████████| 9/9 [00:00<00:00, 19103.61it/s]


(tensor(4.3299, grad_fn=<AddBackward0>),
 {'acc': 0.35576921701431274, 'latent_l2': 0.9999991655349731})

In [2]:
model.eval()

batch = next(it)
loss, metric = model(batch)

loss, metric


(tensor(4.4203, grad_fn=<AddBackward0>),
 {'acc': 0.14705882966518402,
  'latent_l2': 0.9999989867210388,
  'error': 0.8529411554336548,
  'error_zero_latent': 0.7941176295280457,
  'error_no_primer': 0.6470588445663452,
  'error_zero_latent_no_primer': 0.7058823108673096})