# Trie inference

In [None]:
#| default_exp 00-nar-trie-inference-benchmarking

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
from nbdev.showdoc import *
import nbdev; nbdev.nbdev_export()

In [None]:
#| export
import os, pandas as pd, warnings
from tqdm.auto import tqdm

from xcai.basics import *
from xcai.models.MMM00X import BT0002, RT0005

comet_ml is installed but `COMET_API_KEY` is not set.


In [None]:
os.environ['WANDB_MODE'] = 'disabled'
warnings.filterwarnings('ignore')

## Benchmarking

In [None]:
#| export
os.environ['WANDB_MODE'] = 'disabled'

block = XCBlock.from_cfg('data', valid_pct=0.001, tokz='roberta-base')

args = XCLearningArguments(
    output_dir='/scratch/scai/phd/aiz218323/Projects/xc_nlg/outputs/default',
    generation_length_penalty=1.5,
    per_device_eval_batch_size=64,
    evaluation_strategy='steps',
    label_names=['lbl2data_idx'],
)

mname = '/home/scai/phd/aiz218323/Projects/XC_NLG/code/models/roberta-base_LM-NAR_LF-WikiSeeAlso-320K/checkpoint-174000'
model = RT0005.from_pretrained(mname, tn_targ=10_000, ig_tok=1)

trie = XCTrie.from_block(block)

In [None]:
#| export
metric = PrecRecl(block.n_lbl, block.test.data_lbl_filterer, 
                  prop=block.train.dset.data.data_lbl, pk=10, rk=10, rep_pk=[1, 3, 5, 10], rep_rk=[10])

learn = XCLearner(
    model=model, 
    args=args,
    trie=trie,
    data_collator=block.collator, 
    compute_metrics=metric,
)

metrics = learn.evaluate(block.test.dset)
print(metrics)

## Inference

In [None]:
%time block = XCBlock.from_cfg('data_meta', valid_pct=0.001, tokz='roberta-base')

CPU times: user 9min 59s, sys: 26.2 s, total: 10min 26s
Wall time: 3min 30s


In [None]:
mname = '/home/scai/phd/aiz218323/Projects/XC_NLG/code/models/roberta-base_LM-NAR_LF-WikiSeeAlso-320K/checkpoint-174000'
model = RT0005.from_pretrained(mname, tn_targ=10_000, ig_tok=0)

If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of RT0005 were not initialized from the model checkpoint at /home/scai/phd/aiz218323/Projects/XC_NLG/code/models/roberta-base_LM-NAR_LF-WikiSeeAlso-320K/checkpoint-174000 and are newly initialized: ['loss_fn.o']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
test_dset = block.test.dset.sample(n=2000, seed=50)

In [None]:
metric = PrecRecl(test_dset.n_lbl, test_dset.data.data_lbl_filterer, prop=block.train.dset.data.data_lbl,
                  pk=10, rk=200, rep_pk=[1, 3, 5, 10], rep_rk=[10, 100, 200])

In [None]:
args = XCLearningArguments(
    output_dir='/scratch/scai/phd/aiz218323/Projects/xc_nlg/outputs/default',
    generation_max_info=1,
    generation_length_penalty=0,
    per_device_eval_batch_size=64,
    evaluation_strategy='steps',
    label_names=['lbl2data_idx'],
)

In [None]:
learn = XCLearner(
    model=model, 
    args=args,
    data_collator=block.collator, 
    compute_metrics=metric,
)

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [None]:
trie = XCTrie.from_block(block)

  0%|          | 0/312330 [00:00<?, ?it/s]

In [None]:
learn.tbs.trie = trie

### `roberta-base`

In [None]:
o = learn.predict(test_dset)
display_metric(o.metrics)

Unnamed: 0,P@1,P@3,P@5,P@10,N@1,N@3,N@5,N@10,PSP@1,PSP@3,PSP@5,PSP@10,PSN@1,PSN@3,PSN@5,PSN@10,R@10,R@100,R@200,loss,runtime,samples_per_second,steps_per_second
0,17.0,9.75,6.57,3.285,17.0,15.4143,15.2326,14.9327,11.0851,10.0787,9.5993,8.704,11.0851,11.0837,11.2432,11.177,15.11,15.11,15.11,17.7134,51.1166,39.126,0.313


In [None]:
o = learn.predict(test_dset)
display_metric(o.metrics)

Unnamed: 0,P@1,P@3,P@5,P@10,N@1,N@3,N@5,N@10,PSP@1,PSP@3,PSP@5,PSP@10,PSN@1,PSN@3,PSN@5,PSN@10,R@10,R@100,R@200,loss,runtime,samples_per_second,steps_per_second
0,14.25,8.8167,6.37,3.185,14.25,13.9093,14.251,13.9665,8.907,8.9605,9.2579,8.3942,8.907,9.7857,10.3569,10.2906,14.8061,14.8061,14.8061,17.7134,50.4917,39.61,0.317


## Trie augmentation

In [None]:
args = XCLearningArguments(
    output_dir='/scratch/scai/phd/aiz218323/Projects/xc_nlg/outputs/default',
    generation_num_beams=200,
    generation_length_penalty=1.5,
    per_device_eval_batch_size=64,
    evaluation_strategy='steps',
    label_names=['lbl2data_idx'],
)

In [None]:
learn = XCLearner(
    model=model, 
    args=args,
    data_collator=block.collator, 
    compute_metrics=metric,
)

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [None]:
trie = XCTrie.from_block(block)
learn.tbs.trie = trie

  0%|          | 0/312330 [00:00<?, ?it/s]

In [None]:
metrics = learn.evaluate(test_dset)
display_metric(metrics)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Unnamed: 0,P@1,P@3,P@5,P@10,N@1,N@3,N@5,N@10,PSP@1,PSP@3,PSP@5,PSP@10,PSN@1,PSN@3,PSN@5,PSN@10,R@10,R@100,R@200,loss,runtime,samples_per_second,steps_per_second
0,14.55,8.1667,5.68,3.435,14.55,13.2213,13.3973,14.2175,9.117,8.158,8.0412,8.9522,9.117,9.1901,9.596,10.3974,16.0702,23.6574,27.3446,17.7134,1624.731,1.231,0.01


In [None]:
trie= XCTrie.from_block(block, meta=['hlk'])
learn.tbs.trie = trie

  0%|          | 0/312330 [00:00<?, ?it/s]

  0%|          | 0/2458399 [00:00<?, ?it/s]

In [None]:
learn.tbs.trie = trie
learn.tbs.n_bm = 30
learn.args.generation_num_beams = 30

In [None]:
o = learn.predict(test_dset)
display_metric(o.metrics)

Unnamed: 0,P@1,P@3,P@5,P@10,N@1,N@3,N@5,N@10,PSP@1,PSP@3,PSP@5,PSP@10,PSN@1,PSN@3,PSN@5,PSN@10,R@10,R@100,R@200,loss,runtime,samples_per_second,steps_per_second
0,7.75,4.3667,3.13,1.94,7.75,7.3079,7.5596,8.1686,5.2484,4.9572,4.9311,5.4388,5.2484,5.4932,5.732,6.2214,9.8002,17.9712,21.1916,17.7134,1628.4336,1.228,0.01


In [None]:
learn.tbs.n_bm = 10
learn.args.generation_num_beams = 10

In [None]:
o = learn.predict(test_dset)
display_metric(o.metrics)

Unnamed: 0,P@1,P@3,P@5,P@10,N@1,N@3,N@5,N@10,PSP@1,PSP@3,PSP@5,PSP@10,PSN@1,PSN@3,PSN@5,PSN@10,R@10,R@100,R@200,loss,runtime,samples_per_second,steps_per_second
0,8.45,4.7,3.46,2.225,8.45,8.0573,8.4391,9.1355,5.6088,5.1432,5.3652,6.253,5.6088,5.8519,6.2406,6.8491,11.0407,18.3343,20.1675,17.7134,128.889,15.517,0.124


In [None]:
learn.tbs.n_bm = 20
learn.args.generation_num_beams = 20

In [None]:
o = learn.predict(test_dset)
display_metric(o.metrics)

Unnamed: 0,P@1,P@3,P@5,P@10,N@1,N@3,N@5,N@10,PSP@1,PSP@3,PSP@5,PSP@10,PSN@1,PSN@3,PSN@5,PSN@10,R@10,R@100,R@200,loss,runtime,samples_per_second,steps_per_second
0,8.25,4.4,3.2,2.055,8.25,7.5829,7.8761,8.565,5.772,4.9317,5.008,5.8421,5.772,5.7204,6.0027,6.5879,10.301,18.3349,21.2567,17.7134,273.7713,7.305,0.058


## Zero shot

In [None]:
block = XCBlock.from_cfg('data', valid_pct=0.001, tokz='bert-base-uncased')

In [None]:
args = XCLearningArguments(
    output_dir='/scratch/scai/phd/aiz218323/Projects/xc_nlg/outputs/default',
    generation_length_penalty=1.5,
    per_device_eval_batch_size=64,
    evaluation_strategy='steps',
    label_names=['lbl2data_idx'],
)

In [None]:
model = BT0002.from_pretrained('bert-base-uncased', tn_targ=10_000, ig_tok=0)

If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of BT0002 were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['loss_fn.o']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
test_dset = block.test.dset.sample(n=2000, seed=50)

In [None]:
metric = PrecRecl(test_dset.n_lbl, test_dset.data.data_lbl_filterer, prop=block.train.dset.data.data_lbl,
                  pk=10, rk=200, rep_pk=[1, 3, 5, 10], rep_rk=[10, 100, 200])

In [None]:
trie = XCTrie.from_block(block)

  0%|          | 0/312330 [00:00<?, ?it/s]

In [None]:
learn = XCLearner(
    model=model, 
    args=args,
    trie=trie,
    data_collator=block.collator, 
    compute_metrics=metric,
)

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variabl

In [None]:
o = learn.predict(test_dset)
display_metric(o.metrics)

Unnamed: 0,P@1,P@3,P@5,P@10,N@1,N@3,N@5,N@10,PSP@1,PSP@3,PSP@5,PSP@10,PSN@1,PSN@3,PSN@5,PSN@10,R@10,R@100,R@200,loss,runtime,samples_per_second,steps_per_second
0,3.85,2.35,1.8,0.905,3.85,3.6577,3.8229,3.7607,4.0204,3.8023,4.0358,3.6699,4.0204,4.1757,4.419,4.4,4.123,4.123,4.123,15.7025,109.7217,18.228,0.146
