In [2]:
# dataset
import os
import torch

from starry.utils.config import Configuration
from starry.utils.dataset_factory import loadDataset


torch.set_printoptions(profile="full")

DATA_DIR = os.getenv('DATA_DIR')

config = Configuration.create('configs/paraff-score-test.yaml', volatile=True)
data, = loadDataset(config, data_dir=DATA_DIR, splits='9/10')

it = iter(data)
batch = next(it)

for k in batch:
	print(k, batch[k].shape)


Encoding measures: 100%|██████████| 168/168 [00:00<00:00, 24949.30it/s]
Load paragraphs: 100%|██████████| 9/9 [00:00<00:00, 5318.22it/s]

ph_id torch.Size([2, 256])
ph_f_num torch.Size([2, 256])
ph_b_num torch.Size([2, 256])
ph_summary torch.Size([2, 256, 256])
ph_body_mask torch.Size([2, 256])
ph_next_mask torch.Size([2, 256])
input_ids torch.Size([2, 512])
output_ids torch.Size([2, 512])
body_mask torch.Size([2, 512])
position torch.Size([2, 512])





In [9]:
# save csv
batch = next(it)

ph_mask = batch['ph_id'] != 0
ph_mask[1:] = False
w_mask = batch['input_ids'] >= 0
w_mask[1:] = False

ph = '\n'.join([
	','.join(map(str, batch['ph_id'][ph_mask].tolist())),
	','.join(map(str, batch['ph_f_num'][ph_mask].tolist())),
	','.join(map(str, batch['ph_b_num'][ph_mask].tolist())),
	','.join(map(str, batch['ph_summary'][ph_mask].mean(dim=-1).tolist())),
	','.join(map(str, batch['ph_body_mask'][ph_mask].tolist())),
	','.join(map(str, batch['ph_next_mask'][ph_mask].tolist())),
])
with open('./test/phases.csv', 'w') as phases:
    phases.write(ph)

def id2word (id):
    return data.dataset.measure.tokens[id]

w = '\n'.join([
	','.join(map(id2word, batch['input_ids'][w_mask].tolist())),
	','.join(map(id2word, batch['output_ids'][w_mask].tolist())),
	','.join(map(str, batch['body_mask'][w_mask].tolist())),
	','.join(map(str, batch['position'][w_mask].tolist())),
])
with open('./test/words.csv', 'w') as phases:
    phases.write(w)


In [3]:
# model
import os

from starry.utils.config import Configuration
from starry.utils.dataset_factory import loadDataset
from starry.utils.model_factory import loadModel


DATA_DIR = os.getenv('DATA_DIR')

config = Configuration.create('configs/paraff-score-test.yaml', volatile=True)
train, val = loadDataset(config, data_dir=DATA_DIR, device='cpu')
model = loadModel(config['model'], postfix='Loss')

it = iter(train)

batch = next(it)
loss, metric = model(batch)

loss, metric


Load paragraphs: 100%|██████████| 136/136 [00:00<00:00, 15815.28it/s]
Load paragraphs: 100%|██████████| 9/9 [00:00<00:00, 7476.48it/s]


(tensor(9.2181, grad_fn=<AddBackward0>),
 {'acc': 0.0, 'latent_l2': 0.9999990463256836})

In [2]:
model.eval()

batch = next(it)
loss, metric = model(batch)

loss, metric


(tensor(4.7760, grad_fn=<AddBackward0>),
 {'acc': 0.0,
  'latent_l2': 0.9999991655349731,
  'error': 1.0,
  'error_zero_latent': 1.0,
  'error_no_primer': 1.0,
  'error_zero_latent_no_primer': 1.0})

In [1]:
# SeqDecoderBase
import os

from starry.utils.config import Configuration
from starry.utils.dataset_factory import loadDataset
from starry.utils.model_factory import loadModel


DATA_DIR = os.getenv('DATA_DIR')

config = Configuration.create('configs/paraff-score-decoder-test.yaml', volatile=True)
train, val = loadDataset(config, data_dir=DATA_DIR, device='cpu')
model = loadModel(config['model'], postfix='Loss')

it = iter(train)

batch = next(it)
loss, metric = model(batch)

loss, metric


Load paragraphs: 100%|██████████| 136/136 [00:00<00:00, 23851.20it/s]
Load paragraphs: 100%|██████████| 9/9 [00:00<00:00, 17689.19it/s]


(tensor(4.6464, grad_fn=<NllLossBackward>), {'acc': 0.0})

In [4]:
import torch

config = Configuration.create('configs/paraff-score-decoder-test.yaml')
torch.save({'model': model.deducer.state_dict()}, config.localPath('untraied.chkpt'))


In [1]:
# PhaseGen - lora decoder
import os

from starry.utils.config import Configuration
from starry.utils.dataset_factory import loadDataset
from starry.utils.model_factory import loadModel


DATA_DIR = os.getenv('DATA_DIR')

config = Configuration.create('configs/paraff-score-phaselora-test.yaml', volatile=True)
train, val = loadDataset(config, data_dir=DATA_DIR, device='cpu')
model = loadModel(config['model'], postfix='Loss')

it = iter(train)

batch = next(it)
loss, metric = model(batch)

loss, metric


  from .autonotebook import tqdm as notebook_tqdm
Encoding measures: 100%|██████████| 168/168 [00:00<00:00, 23701.42it/s]
Load paragraphs: 100%|██████████| 136/136 [00:00<00:00, 16195.60it/s]
Load paragraphs: 100%|██████████| 9/9 [00:00<00:00, 8536.58it/s]


(tensor(4.7918, grad_fn=<AddBackward0>),
 {'acc': 0.0, 'latent_l2': 0.9999989867210388})

In [1]:
# dataset with timewise graph
import os
import torch

from starry.utils.config import Configuration
from starry.utils.dataset_factory import loadDataset


torch.set_printoptions(profile="full")

DATA_DIR = os.getenv('DATA_DIR')

print('.')	# workaround print bug

config = Configuration.create('configs/paraff-graph-test.yaml', volatile=True)
data, = loadDataset(config, data_dir=DATA_DIR, splits='9/10')

it = iter(data)
batch = next(it)

for k in batch:
	print(k, batch[k].shape)


  from .autonotebook import tqdm as notebook_tqdm


.


Load paragraphs: 100%|██████████| 9/9 [00:00<00:00, 9784.53it/s]

ph_id torch.Size([2, 256])
ph_f_num torch.Size([2, 256])
ph_b_num torch.Size([2, 256])
ph_summary torch.Size([2, 256, 256])
ph_body_mask torch.Size([2, 256])
ph_next_mask torch.Size([2, 256])
input_ids torch.Size([2, 512])
output_ids torch.Size([2, 512])
body_mask torch.Size([2, 512])
position torch.Size([2, 512])
tg_id torch.Size([2, 512])
tg_staff torch.Size([2, 512])
tg_x torch.Size([2, 512])
tg_y torch.Size([2, 512])
tg_sy1 torch.Size([2, 512])
tg_sy2 torch.Size([2, 512])
tg_confidence torch.Size([2, 512])





In [1]:
# GraphParaffEncoder
import os

from starry.utils.config import Configuration
from starry.utils.dataset_factory import loadDataset
from starry.utils.model_factory import loadModel


DATA_DIR = os.getenv('DATA_DIR')

config = Configuration.create('configs/paraff-graph-test.yaml', volatile=True)
train, val = loadDataset(config, data_dir=DATA_DIR, device='cpu')
model = loadModel(config['model'], postfix='Loss')

it = iter(train)

batch = next(it)
loss, metric = model(batch)

loss, metric


  from .autonotebook import tqdm as notebook_tqdm
Load paragraphs: 100%|██████████| 136/136 [00:00<00:00, 19079.05it/s]
Load paragraphs: 100%|██████████| 9/9 [00:00<00:00, 10163.90it/s]


(tensor(3.4621, grad_fn=<NllLossBackward0>), {'acc': 0.321739137172699})

In [2]:
model.eval()
model(next(it))

(tensor(4.7169, grad_fn=<NllLossBackward0>),
 {'acc': 0.022346368059515953,
  'error': 0.9776536226272583,
  'error_zero_latent': 0.9776536226272583,
  'error_no_primer': 0.9776536226272583,
  'error_zero_latent_no_primer': 0.9776536226272583})

In [1]:
# GraphParaffTranslator
import os

from starry.utils.config import Configuration
from starry.utils.dataset_factory import loadDataset
from starry.utils.model_factory import loadModel


DATA_DIR = os.getenv('DATA_DIR')

config = Configuration.create('configs/paraff-graph_trans-test.yaml', volatile=True)
train, val = loadDataset(config, data_dir=DATA_DIR, device='cpu')
model = loadModel(config['model'], postfix='Loss')

it = iter(train)

batch = next(it)
loss, metric = model(batch)

loss, metric


  from .autonotebook import tqdm as notebook_tqdm
Load paragraphs: 100%|██████████| 208/208 [00:00<00:00, 10095.30it/s]
Load paragraphs: 100%|██████████| 33/33 [00:00<00:00, 17795.32it/s]


(tensor(4.7787, grad_fn=<NllLossBackward0>), {'acc': 0.0})

In [1]:
# GraphParaffTranslator with position
import os

from starry.utils.config import Configuration
from starry.utils.dataset_factory import loadDataset
from starry.utils.model_factory import loadModel


DATA_DIR = os.getenv('DATA_DIR')

config = Configuration.create('configs/paraff-graph_trans-pos-test.yaml', volatile=True)
train, val = loadDataset(config, data_dir=DATA_DIR, device='cpu')
model = loadModel(config['model'], postfix='Loss')

it = iter(train)

batch = next(it)
loss, metric = model(batch)

loss, metric


Load paragraphs: 100%|██████████| 208/208 [00:00<00:00, 7101.76it/s]
Load paragraphs: 100%|██████████| 33/33 [00:00<00:00, 6532.88it/s]


(tensor(4.8810, grad_fn=<NllLossBackward0>), {'acc': 0.0})

In [2]:
# GraphParaffSummaryEncoder
import os

from starry.utils.config import Configuration
from starry.utils.dataset_factory import loadDataset
from starry.utils.model_factory import loadModel


DATA_DIR = os.getenv('DATA_DIR')

config = Configuration.create('configs/paraff-graph_sum-test.yaml', volatile=True)
train, val = loadDataset(config, data_dir=DATA_DIR, device='cpu')
model = loadModel(config['model'], postfix='Loss')

it = iter(train)

batch = next(it)
loss, metric = model(batch)

loss, metric


Encoding measures: 100%|██████████| 504/504 [00:00<00:00, 27431.89it/s]
Load paragraphs: 100%|██████████| 208/208 [00:00<00:00, 18019.15it/s]
Load paragraphs: 100%|██████████| 33/33 [00:00<00:00, 12120.14it/s]


(tensor(2.0004, grad_fn=<MseLossBackward0>), {'acc': 0.32967033982276917})

In [2]:
model.eval()

batch = next(it)
loss, metric = model(batch)

loss, metric


(tensor(4.8664, grad_fn=<NllLossBackward0>),
 {'acc': 0.0,
  'acc_boundary': 0.0,
  'error': 1.0,
  'error_zero_latent': 1.0,
  'error_no_primer': 1.0,
  'error_zero_latent_no_primer': 1.0})