In [1]:
import sys
from pathlib import Path
main_path = Path().absolute().parent
data_path = main_path / 'data'
setting_path = main_path / 'setting_files'

sys.path.append(str(main_path / 'src'))
from collections import defaultdict

import torch
import yaml
import json
import pytorch_lightning as pl
from pytorch_lightning import seed_everything
from nlu_utils import NLUTokenizer
from nlu_models import NLUModel
from nlu_utils import NLUDataModule

with (setting_path / 'train_settings.yml').open('r') as file:
    settings = yaml.load(file, Loader=yaml.FullLoader)

data_module_settings = settings['data_module']
model_settings = settings['model']
trainer_settings = settings['trainer']

data_module = NLUDataModule(
    train_path=data_path / data_module_settings['train_file'], 
    valid_path=data_path / data_module_settings['valid_file'],
    test_path=data_path / data_module_settings['test_file'],
    labels_path=data_path / data_module_settings['labels_file'],
    batch_size=data_module_settings['batch_size'], 
    max_len=data_module_settings['max_len'],
    num_workers=data_module_settings['num_workers'],
    seed=settings['seed']
)

# if trainer_settings['deterministic']:
#     seed_everything(seed=settings['seed'], workers=True)
# deterministic = trainer_settings['deterministic']

# trainer = pl.Trainer(
#     gpus=trainer_settings['n_gpus'], 
#     max_epochs=trainer_settings['n_epochs'], 
#     num_sanity_val_steps=trainer_settings['num_sanity_val_steps'],
#     log_every_n_steps=trainer_settings['log_every_n_steps'],
#     deterministic=deterministic,
# )
tokenizer = NLUTokenizer()

In [2]:
# model_idx = 8
# model_dict = dict(enumerate([
#     ('ce_l7_tk3_0', 'epoch=11-step=167-val_loss=1.74.ckpt'),     # 0
#     ('ce_l7_tk3_1', 'epoch=8-step=125-val_loss=1.57.ckpt'),      # 1
#     ('ce_l7_tk3_3', 'epoch=6-step=97-val_loss=1.82.ckpt'),       # 2
#     ('focal_l7_tk3_0', 'epoch=17-step=251-val_loss=0.43.ckpt'),  # 3
#     ('focal_l7_tk3_1', 'epoch=19-step=279-val_loss=0.35.ckpt'),  # 4
#     ('focal_l7_tk3_3', 'epoch=18-step=265-val_loss=0.44.ckpt'),  # 5
#     ('focal2_l7_tk3_0', 'epoch=18-step=265-val_loss=0.25.ckpt'), # 6
#     ('focal2_l7_tk3_1', 'epoch=19-step=279-val_loss=0.17.ckpt'), # 7
#     ('focal2_l7_tk3_3', 'epoch=18-step=265-val_loss=0.30.ckpt'), # 8
# ]))

# s = model_dict[model_idx]
s = ('ce_l7_tk3_3', 'best_model.ckpt')
print(s)
checkpoint_path = str(main_path / 'logs' / 'nlu_simple' / s[0] / 'checkpoints' / s[1])
model = NLUModel.load_from_checkpoint(checkpoint_path)


('ce_l7_tk3_3', 'best_model.ckpt')


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [162]:
from spacy.training import biluo_tags_to_spans, iob_to_biluo
from tokenizations import get_alignments 
from spacy.training import biluo_tags_to_offsets
import pandas as pd

with (data_path / 'labels_simple.json').open('r', encoding='utf-8') as file:
    ls = json.load(file)
tags2id = ls['tags']
intent2id = ls['intent']
id2tags = {v: k for k, v in ls['tags'].items()}
id2intent = {v: k for k, v in ls['intent'].items()}

from sparql import SparqlHandler, convert_to_string

sparql = SparqlHandler(data_path / 'AccountRDF.xml')
bs_role = sparql.get_role_dict('BS')
is_role = sparql.get_role_dict('IS')

In [288]:
data = {
    0: 'What was the profit in the last year?',
    1: "if the noncurrent assets reduced by 62 percent in this fiscal year, what will be the effect to assets?",
    2: "What will be our revenue in 4th quarter?",
    3: "What is going to happen to the operating income when the cost of sales increases by 84% at this year?"
}
text = data[0]
bert_encodes = tokenizer(
    text, 
    add_special_tokens=True, 
    truncation=True, 
    max_length=64,
    return_tensors='pt'
)
# o = model(**bert_encodes)
o = model.predict(**bert_encodes)

In [289]:
o

{'attn': None, 'tags': [0, 5, 5, 5, 10, 5, 5, 14, 15, 5, 0], 'intent': [3]}

In [290]:
intent = list(map(id2intent.get, o['intent']))[0]
tags = list(map(id2tags.get, o['tags']))

bert_tkns = [s.lstrip('##') for s in tokenizer.bert_tokenize(text)]
spacy_tkns = tokenizer.spacy_tokenize(text)
biluo_tags = iob_to_biluo(tags[1:-1])

In [302]:
a2b, b2a = get_alignments(bert_tkns, spacy_tkns)
mapped_tags = []
for i, tkn in enumerate(spacy_tkns):
    mapped_tkn_ids = b2a[i]
    ts = [biluo_tags[j] for j in mapped_tkn_ids]
    mapped_tags.append(ts[0])
doc = tokenizer.spacy_nlp(text)
# doc.ents = biluo_tags_to_spans(doc, mapped_tags)

In [303]:
bert_ents = [('O', '') if t == 'O' else t.split('-') for t in mapped_tags]

In [297]:
bert_ents = [('O', '') if t == 'O' else t.split('-') for t in mapped_tags]
spacy_ents = iob_to_biluo([x.ent_iob_ if x.ent_iob_ == 'O' else f'{x.ent_iob_}-{x.ent_type_}' for x in doc])
spacy_ents = [x.replace('DATE', 'TIME') for x in spacy_ents]
spacy_ents = [('O', '') if t == 'O' else t.split('-') for t in spacy_ents]
for i, (b_ent, s_ent) in enumerate(zip(bert_ents, spacy_ents)):
    # stop words
    if spacy_tkns[i] in ['the', 'a', 'an', 'to', ]:
        continue
    if b_ent[1] == '' and s_ent[1] != '':
        bert_ents[i] = s_ent
    elif b_ent[1] == s_ent[1] and b_ent[0] != s_ent[0]:
        bert_ents[i] = s_ent
print(list(map(lambda x: x[0] if x[0] == 'O' else '-'.join(x), bert_ents)))
print(list(map(lambda x: x[0] if x[0] == 'O' else '-'.join(x), spacy_ents)))

['O', 'O', 'O', 'U-IS', 'O', 'O', 'I-TIME', 'L-TIME', 'O']
['O', 'O', 'O', 'O', 'O', 'B-TIME', 'I-TIME', 'L-TIME', 'O']


In [304]:
bert_ents

[('O', ''),
 ('O', ''),
 ('O', ''),
 ['U', 'IS'],
 ('O', ''),
 ('O', ''),
 ['B', 'TIME'],
 ['L', 'TIME'],
 ('O', '')]

In [301]:
spacy_ents

[('O', ''),
 ('O', ''),
 ('O', ''),
 ('O', ''),
 ('O', ''),
 ['B', 'TIME'],
 ['I', 'TIME'],
 ['L', 'TIME'],
 ('O', '')]

In [299]:
ensembled_tags = list(map(lambda x: x[0] if x[0] == 'O' else '-'.join(x), bert_ents))
ensembled_tags

['O', 'O', 'O', 'U-IS', 'O', 'O', 'I-TIME', 'L-TIME', 'O']

In [286]:
a2b, b2a = get_alignments(bert_tkns, spacy_tkns)
mapped_tags = []
for i, tkn in enumerate(spacy_tkns):
    mapped_tkn_ids = b2a[i]
    ts = [biluo_tags[j] for j in mapped_tkn_ids]
    mapped_tags.append(ts[0])
doc = tokenizer.spacy_nlp(text)

bert_ents = [('O', '') if t == 'O' else t.split('-') for t in mapped_tags]
spacy_ents = iob_to_biluo([x.ent_iob_ if x.ent_iob_ == 'O' else f'{x.ent_iob_}-{x.ent_type_}' for x in doc])
spacy_ents = [x.replace('DATE', 'TIME') for x in spacy_ents]
spacy_ents = [('O', '') if t == 'O' else t.split('-') for t in spacy_ents]
for i, (b_ent, s_ent) in enumerate(zip(bert_ents, spacy_ents)):
    if b_ent[1] == '' and s_ent[1] != '':
        bert_ents[i] = s_ent
    elif b_ent[1] == s_ent[1] and b_ent[0] != s_ent[0]:
        bert_ents[i] = s_ent
print(list(map(lambda x: x[0] if x[0] == 'O' else '-'.join(x), bert_ents)))
print(list(map(lambda x: x[0] if x[0] == 'O' else '-'.join(x), spacy_ents)))
ensembled_tags = list(map(lambda x: x[0] if x[0] == 'O' else '-'.join(x), bert_ents))

doc.ents = biluo_tags_to_spans(doc, ensembled_tags)

entities = []
for s, e, ent in biluo_tags_to_offsets(doc, ensembled_tags):
    lemma = ' '.join([x.lemma_ for x in tokenizer.spacy_nlp(text[s:e])])
    entities.append((lemma, ent.upper()))

nlu_results = defaultdict()
nlu_results['tags'] = defaultdict()
nlu_results['intent'] = None if intent == 'None' else intent
accounts = []
for word, tag in entities:
    if tag in ['BS', 'IS']:
        accounts.append((word, tag))
    else:
        nlu_results['tags'][tag] = word

nlu_results['tags']['ACCOUNTS'] = accounts
nlu_results

['O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-IS', 'L-IS', 'O', 'O', 'B-IS', 'I-IS', 'L-IS', 'U-APPLY', 'O', 'B-PERCENT', 'L-PERCENT', 'O', 'B-TIME', 'L-TIME', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PERCENT', 'L-PERCENT', 'O', 'B-TIME', 'L-TIME', 'O']


defaultdict(None,
            {'tags': defaultdict(None,
                         {'APPLY': 'increase',
                          'PERCENT': '84 %',
                          'TIME': 'this year',
                          'ACCOUNTS': [('operate income', 'IS'),
                           ('cost of sale', 'IS')]}),
             'intent': 'IF.fact'})

after nlu module

In [234]:
tokenizer.spacy_nlp()

{('1st', 'REL'),
 ('2nd', 'REL'),
 ('3rd', 'REL'),
 ('4th', 'REL'),
 ('calendar', 'PAST'),
 ('come', 'FUTURE'),
 ('current', 'FUTURE'),
 ('final', 'REL'),
 ('financial', 'PAST'),
 ('first', 'REL'),
 ('fiscal', 'PAST'),
 ('follow', 'FUTURE'),
 ('fourth', 'REL'),
 ('future', 'FUTURE'),
 ('last', 'PAST'),
 ('late', 'REL'),
 ('next', 'FUTURE'),
 ('past', 'PAST'),
 ('precede', 'PAST'),
 ('present', 'FUTURE'),
 ('previous', 'PAST'),
 ('recent', 'PAST'),
 ('second', 'REL'),
 ('tax', 'PAST'),
 ('third', 'REL'),
 ('upcoming', 'FUTURE')}

In [159]:
df = pd.read_csv(data_path / 'AccountWords.csv', encoding='utf-8')
words_dict = defaultdict(set)
for typ in ['year', 'quarter', 'words']:
    df_temp = df.loc[:, [typ, f'{typ}_tag', f'{typ}_desc']]
    df_temp = df_temp.loc[~df_temp[typ].isna(), :]
    for _, (w, t, desc) in df_temp.iterrows():
        if typ in ['year', 'quarter']:
            words_dict['TIME'].add((tokenizer.spacy_lemma(w), desc))
        else:
            words_dict['APPLY'].add((tokenizer.spacy_lemma(w), desc))

In [160]:
nlu_results

defaultdict(None,
            {'tags': defaultdict(None,
                         {'APPLY': 'reduce',
                          'PERCENT': '62 percent',
                          'TIME': 'fiscal year',
                          'ACCOUNTS': [('noncurrent asset', 'BS'),
                           ('asset', 'BS')]}),
             'intent': 'IF.fact'})

In [209]:
tags = nlu_results['tags']
tags = {'APPLY': 'increase', 'PERCENT': '10 %', 'ACCOUNTS': [('operate income', 'IS'), ('cost of sale', 'IS')]}

In [210]:
for acc_name, knowlegde in tags.get('ACCOUNTS'):
    acc = eng2acc[acc_name]
    print(acc, knowlegde)

OperatingIncome IS
CostOfSales IS


In [211]:
accs = tags.get('ACCOUNTS')
a_acc_name, a_knowledge = accs[0]
a_acc = eng2acc[a_acc_name]
b_acc_name, b_knowledge = accs[1]
b_acc = eng2acc[b_acc_name]
a_acc, b_acc

('OperatingIncome', 'CostOfSales')

In [212]:
def get_target_subject_accounts(role_dict, a_acc, b_acc, a_knowledge, b_knowledge):
    def check(role_dict, acc, trg_acc):
        if trg_acc in role_dict[acc]:
            return True
        
        for sub_acc in role_dict[acc]:
            o = check(role_dict, sub_acc, trg_acc)
            if o:
                return o
        else:
            return False

    target_account, subject_account = None, None
    if check(role_dict, a_acc, b_acc):
        target_account = f'{a_knowledge}.{a_acc}'
        subject_account = f'{b_knowledge}.{b_acc}'

    if check(role_dict, b_acc, a_acc):
        target_account = f'{b_knowledge}.{b_acc}'
        subject_account = f'{a_knowledge}.{a_acc}'

    return target_account, subject_account

In [232]:
check(role_dict, a_acc, b_acc)

NameError: name 'role_dict' is not defined

In [229]:
a_acc

'OperatingIncome'

In [230]:
check(is_role, a_acc, b_acc)

True

In [231]:
check(is_role, b_acc, a_acc)

False

In [214]:
get_target_subject_accounts(is_role, a_acc, b_acc, a_knowledge, b_knowledge)

(None, None)

In [215]:
is_role['OperatingIncome']

defaultdict(list,
            {'GrossProfit': ['Revenue', 'CostOfSales'],
             'CostOfSalesRatio': ['Revenue', 'CostOfSales'],
             'SellingGeneralAdministrativeRatio': ['Revenue',
              'SellingGeneralAdministrativeExpenses'],
             'ProfitRatio': ['Revenue', 'Profit'],
             'OperatingIncome': ['GrossProfit',
              'SellingGeneralAdministrativeExpenses'],
             'ProfitBeforeTax': ['OperatingIncome',
              'FinanceIncome',
              'FinancialExpenses'],
             'Profit': ['ProfitBeforeTax', 'IncomeTaxExpense'],
             'IncomeTaxRatio': ['ProfitBeforeTax', 'IncomeTaxExpense'],
             'IncomeStatement': ['Profit'],
             'SalesAndSellingGeneralAdministrativeRatio': ['CostOfSalesRatio',
              'SellingGeneralAdministrativeRatio'],
             'Ratios': ['CostOfSalesRatio',
              'SellingGeneralAdministrativeRatio',
              'SalesAndSellingGeneralAdministrativeRatio',
          

In [198]:
apply = tags.get('APPLY')
percent = tags.get('PERCENT')
apply, percent

('reduce', '62 percent')

In [207]:
['APPLY', 'IS', 'BS', 'PERCENT', 'TIME']

{100: '[UNK]',
 102: '[SEP]',
 0: '[PAD]',
 101: '[CLS]',
 103: '[MASK]',
 5: 'O',
 6: 'B-APPLY',
 7: 'I-APPLY',
 8: 'B-BS',
 9: 'I-BS',
 10: 'B-IS',
 11: 'I-IS',
 12: 'B-PERCENT',
 13: 'I-PERCENT',
 14: 'B-TIME',
 15: 'I-TIME'}

In [205]:
desc = dict(words_dict['APPLY']).get(apply)
if desc == 'UP':
    sign = 1
elif desc == 'DOWN':
    sign = -1
else:
    pass

number = 1 + sign*int(percent.split(' ')[0]) / 100
number

0.38

In [200]:
desc = dict(words_dict['APPLY']).get(apply)
desc

'DOWN'

In [173]:
bs_role['TradeReceivableTurnoverPeriod']

['TradeAndOtherCurrentReceivables', 'Revenue']

In [129]:
from datetime import datetime as dt
today = dt.now()

In [147]:
date_keyword = nlu_results['tags'].get('TIME')
quarter_dict = {
    1: ('01.01', '03.31'), 2: ('04.01', '06.30'), 3: ('07.01', '09.30'), 4: ('10.01', '12.31')
}

In [148]:
k = date_keyword.split(' ', 1)[0]
dict(words_dict['TIME']).get(k)

'REL'

In [149]:
date_keyword

'4th quarter'

In [132]:
def _get_current_quarter():
    for q, (s, e) in quarter_dict.items():
        s_time = dt.strptime(f'{today.year}.{s} 00:00:01', '%Y.%m.%d %H:%M:%S')
        e_time = dt.strptime(f'{today.year}.{e} 23:59:59', '%Y.%m.%d %H:%M:%S')
        if s_time <= today <= e_time:
            break
    return q

In [150]:
recalculate_acc_year = False
if 'year' in date_keyword:
    k = date_keyword.split(' ', 1)[0]
    desc = dict(words_dict['TIME']).get(k)
    if desc == 'PAST':
        ref_year = today.year - 1
        recalculate_acc_year = True
    elif desc == 'FUTURE':
        ref_year = today.year + 1
    else:
        ref_year = today.year
    ref_Q = None
elif 'quarter' in date_keyword:
    k = date_keyword.split(' ', 1)[0]
    if k in ['tax', 'fiscal', 'financial', 'calendar']:
        # raise error usually don't say these words
        error = True
        # return error, 'Need more specific time information'
    desc = dict(words_dict['TIME']).get(k)
    cur_Q = _get_current_quarter()
    if desc == 'PAST':
        recalculate_acc_year = True
        if cur_Q == 1:
            ref_year = today.year - 1
            ref_Q = 4
        else:
            ref_year = today.year
            ref_Q = cur_Q - 1
    elif desc == 'FUTURE':
        if cur_Q == 4:
            ref_year = today.year + 1
            ref_Q = 1
        else:
            ref_year = today.year
            ref_Q = cur_Q + 1
    else:
        # REL case
        for kq, ws in enumerate([['first', '1st'], ['second', '2nd'], ['third', '3rd'], ['fourth', '4th', 'final']], 1):
            if k in ws:
                break
        # user's talking quarter = kq
        ref_Q = kq
        ref_year = today.year
else:
    error = True

In [151]:
ref_year, ref_Q

(2022, 4)

In [117]:
quarter_dict[ref_Q][-1]

'12.31'

In [152]:
if recalculate_acc_year:
    if today >= dt.strptime(f'{today.year}.04.01', '%Y.%m.%d'):
        # still no report
        # true : acc_year = 2020  2021.12.08 >= 2021.04.01 
        # false: acc_year = 2019  2021.01.08 >= 2021.04.01 
        account_year = ref_year
    else:
        account_year = ref_year - 1
    
else:
    account_year = ref_year
account_quarter = ref_Q

In [154]:
account_year, account_quarter

(2022, 4)

In [29]:
if date_keyword is None:
    ref_year = today.year
else:
    today_year 

{('change', 'DOWN'),
 ('decline', 'DOWN'),
 ('decrease', 'DOWN'),
 ('drop', 'DOWN'),
 ('fall', 'DOWN'),
 ('grow', 'UP'),
 ('increase', 'UP'),
 ('reduce', 'DOWN'),
 ('rise', 'UP'),
 ('shrink', 'DOWN')}

In [20]:
import pandas as pd

In [96]:
df_account = pd.read_csv(data_path / 'AccountName.csv', encoding='utf-8')
ACC_DICT = defaultdict(dict)
eng2acc = defaultdict()
for _, row in df_account.iterrows():
    acc = row['acc']
    eng = row['acc_name_eng']
    kor = row['acc_name_kor']
    group = row['group']
    ACC_DICT[acc]['kor_name'] = kor
    ACC_DICT[acc]['eng_name'] = eng
    ACC_DICT[acc]['group'] = group
    
    eng_lemma = tokenizer.spacy_lemma(eng.lower())
    eng2acc[eng_lemma] = acc

In [100]:
eng2acc[acc_name]

'CostOfSalesRatio'

In [23]:
ACC_DICT['AssetsAbstract']

{'kor_name': '자산총계', 'eng_name': 'Assets', 'group': 'BS-Value-1'}

In [19]:
bs_role

defaultdict(list,
            {'AssetsAbstract': ['CurrentAssets', 'NoncurrentAssets'],
             'CurrentAssets': ['CashAndCashEquivalents',
              'TradeAndOtherCurrentReceivables',
              'PrepaidExpenses',
              'Inventories'],
             'TradeReceivableTurnoverPeriod': ['TradeAndOtherCurrentReceivables',
              'Revenue'],
             'PrepaidExpensesTurnoverPeriod': ['PrepaidExpenses', 'Revenue'],
             'InventoriesTurnoverPeriod': ['Inventories', 'Revenue'],
             'NoncurrentAssets': ['PropertyPlantAndEquipment',
              'IntangibleAssets'],
             'BalanceSheet': ['AssetsAbstract', 'LiabilitiesAndEquities'],
             'LiabilitiesAbstract': ['CurrentLiabilities',
              'NoncurrentLiabilities'],
             'CurrentLiabilities': ['TradeAndOtherCurrentPayables',
              'ShortTermBorrowings',
              'AdvancesCustomers'],
             'TradePayablesTurnoverPeriod': ['TradeAndOtherCurrentPayables

In [None]:
def get_role_dict(knowledge):
    knowledge_query = sparql.get_predefined_knowledge(knowledge=knowledge)
    sparql_results = sparql.query(knowledge_query)
    role_dict = defaultdict(list)
    for s, p, o in sparql_results:
        s, p, o = map(convert_to_string, [s, p, o])
        if s == 'CalendarOneYear' or o == 'CalendarOneYear':
            continue
        if s not in role_dict[o]:
            role_dict[o].append(s)
        
    return role_dict

In [27]:
doc = tokenizer.spacy_nlp(text)
for x in doc:
    print(x, x.ent_type_)

if 
the 
noncurrent 
assets 
reduced 
by 
62 PERCENT
percent PERCENT
in 
this DATE
fiscal DATE
year DATE
, 
what 
will 
be 
the 
effect 
to 
assets 
? 


In [71]:
import json
import os
import uuid

from IPython.core.display import display, HTML, Javascript

from bertviz.util import format_special_chars, format_attention, num_layers


def head_view(
        attention=None,
        tokens=None,
        sentence_b_start=None,
        prettify_tokens=True,
        layer=None,
        heads=None,
        encoder_attention=None,
        decoder_attention=None,
        cross_attention=None,
        encoder_tokens=None,
        decoder_tokens=None,
        include_layers = None
):
    """Render head view
        Args:
            For self-attention models:
                attention: list of ``torch.FloatTensor``(one for each layer) of shape
                    ``(batch_size(must be 1), num_heads, sequence_length, sequence_length)``
                tokens: list of tokens
                sentence_b_start: index of first wordpiece in sentence B if input text is sentence pair (optional)
            For encoder-decoder models:
                encoder_attention: list of ``torch.FloatTensor``(one for each layer) of shape
                    ``(batch_size(must be 1), num_heads, encoder_sequence_length, encoder_sequence_length)``
                decoder_attention: list of ``torch.FloatTensor``(one for each layer) of shape
                    ``(batch_size(must be 1), num_heads, decoder_sequence_length, decoder_sequence_length)``
                cross_attention: list of ``torch.FloatTensor``(one for each layer) of shape
                    ``(batch_size(must be 1), num_heads, decoder_sequence_length, encoder_sequence_length)``
                encoder_tokens: list of tokens for encoder input
                decoder_tokens: list of tokens for decoder input
            For all models:
                prettify_tokens: indicates whether to remove special characters in wordpieces, e.g. Ġ
                layer: index (zero-based) of initial selected layer in visualization. Defaults to layer 0.
                heads: Indices (zero-based) of initial selected heads in visualization. Defaults to all heads.
                include_layers: Indices (zero-based) of layers to include in visualization. Defaults to all layers.
                    Note: filtering layers may improve responsiveness of the visualization for long inputs.
    """

    attn_data = []
    if attention is not None:
        if tokens is None:
            raise ValueError("'tokens' is required")
        if encoder_attention is not None or decoder_attention is not None or cross_attention is not None \
                or encoder_tokens is not None or decoder_tokens is not None:
            raise ValueError("If you specify 'attention' you may not specify any encoder-decoder arguments. This"
                             " argument is only for self-attention models.")
        if include_layers is None:
            include_layers = list(range(num_layers(attention)))
        attention = format_attention(attention, include_layers)
        if sentence_b_start is None:
            attn_data.append(
                {
                    'name': None,
                    'attn': attention.tolist(),
                    'left_text': tokens,
                    'right_text': tokens
                }
            )
        else:
            slice_a = slice(0, sentence_b_start)  # Positions corresponding to sentence A in input
            slice_b = slice(sentence_b_start, len(tokens))  # Position corresponding to sentence B in input
            attn_data.append(
                {
                    'name': 'All',
                    'attn': attention.tolist(),
                    'left_text': tokens,
                    'right_text': tokens
                }
            )
            attn_data.append(
                {
                    'name': 'Sentence A -> Sentence A',
                    'attn': attention[:, :, slice_a, slice_a].tolist(),
                    'left_text': tokens[slice_a],
                    'right_text': tokens[slice_a]
                }
            )
            attn_data.append(
                {
                    'name': 'Sentence B -> Sentence B',
                    'attn': attention[:, :, slice_b, slice_b].tolist(),
                    'left_text': tokens[slice_b],
                    'right_text': tokens[slice_b]
                }
            )
            attn_data.append(
                {
                    'name': 'Sentence A -> Sentence B',
                    'attn': attention[:, :, slice_a, slice_b].tolist(),
                    'left_text': tokens[slice_a],
                    'right_text': tokens[slice_b]
                }
            )
            attn_data.append(
                {
                    'name': 'Sentence B -> Sentence A',
                    'attn': attention[:, :, slice_b, slice_a].tolist(),
                    'left_text': tokens[slice_b],
                    'right_text': tokens[slice_a]
                }
            )
    elif encoder_attention is not None or decoder_attention is not None or cross_attention is not None:
        if encoder_attention is not None:
            if encoder_tokens is None:
                raise ValueError("'encoder_tokens' required if 'encoder_attention' is not None")
            if include_layers is None:
                include_layers = list(range(num_layers(encoder_attention)))
            encoder_attention = format_attention(encoder_attention, include_layers)
            attn_data.append(
                {
                    'name': 'Encoder',
                    'attn': encoder_attention.tolist(),
                    'left_text': encoder_tokens,
                    'right_text': encoder_tokens
                }
            )
        if decoder_attention is not None:
            if decoder_tokens is None:
                raise ValueError("'decoder_tokens' required if 'decoder_attention' is not None")
            if include_layers is None:
                include_layers = list(range(num_layers(decoder_attention)))
            decoder_attention = format_attention(decoder_attention, include_layers)
            attn_data.append(
                {
                    'name': 'Decoder',
                    'attn': decoder_attention.tolist(),
                    'left_text': decoder_tokens,
                    'right_text': decoder_tokens
                }
            )
        if cross_attention is not None:
            if encoder_tokens is None:
                raise ValueError("'encoder_tokens' required if 'cross_attention' is not None")
            if decoder_tokens is None:
                raise ValueError("'decoder_tokens' required if 'cross_attention' is not None")
            if include_layers is None:
                include_layers = list(range(num_layers(cross_attention)))
            cross_attention = format_attention(cross_attention, include_layers)
            attn_data.append(
                {
                    'name': 'Cross',
                    'attn': cross_attention.tolist(),
                    'left_text': decoder_tokens,
                    'right_text': encoder_tokens
                }
            )
    else:
        raise ValueError("You must specify at least one attention argument.")

    if layer is not None and layer not in include_layers:
        raise ValueError(f"Layer {layer} is not in include_layers: {include_layers}")

    # Generate unique div id to enable multiple visualizations in one notebook
    vis_id = 'bertviz-%s'%(uuid.uuid4().hex)

    # Compose html
    if len(attn_data) > 1:
        options = '\n'.join(
            f'<option value="{i}">{attn_data[i]["name"]}</option>'
            for i, d in enumerate(attn_data)
        )
        select_html = f'Attention: <select id="filter">{options}</select>'
    else:
        select_html = ""
    vis_html = f"""      
        <div id='%s'>
            <span style="user-select:none">
                Layer: <select id="layer"></select>
                {select_html}
            </span>
            <div id='vis'></div>
        </div>
    """%(vis_id)

    for d in attn_data:
        attn_seq_len_left = len(d['attn'][0][0])
        if attn_seq_len_left != len(d['left_text']):
            raise ValueError(
                f"Attention has {attn_seq_len_left} positions, while number of tokens is {len(d['left_text'])} "
                f"for tokens: {' '.join(d['left_text'])}"
            )
        attn_seq_len_right = len(d['attn'][0][0][0])
        if attn_seq_len_right != len(d['right_text']):
            raise ValueError(
                f"Attention has {attn_seq_len_right} positions, while number of tokens is {len(d['right_text'])} "
                f"for tokens: {' '.join(d['right_text'])}"
            )
        if prettify_tokens:
            d['left_text'] = format_special_chars(d['left_text'])
            d['right_text'] = format_special_chars(d['right_text'])
    params = {
        'attention': attn_data,
        'default_filter': "0",
        'root_div_id': vis_id,
        'layer': layer,
        'heads': heads,
        'include_layers': include_layers
    }
    # return vis_html, params
    # require.js must be imported for Colab or JupyterLab:
    display(HTML('<script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js"></script>'))
    display(HTML(vis_html))
    
    # __location__ = os.path.realpath(
    #     os.path.join(os.getcwd(), os.path.dirname(__file__)))
    vis_js = open(os.path.join(str(Path('.').absolute()), 'head_view.js')).read().replace("PYTHON_PARAMS", json.dumps(params))
    display(Javascript(vis_js))

    return vis_js, vis_html

In [81]:
html_file = """<!DOCTYPE html>
<html lang="en" dir="ltr">
    <head>
        <meta charset="utf-8">
        <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js"></script>
    </head>
    <body>
        {}
        <button onclick="btn_alert('안녕하세요!')"> 인사하세요! </button>

        <script type="text/javascript" src="./data/vis.js"></script> 

    </body>
</html>""".format(vis_html)

In [82]:
with Path('vis.html').open('w', encoding='utf-8') as file:
    file.write(html_file)

In [None]:
from nbconvert import HTMLExporter

def save_notebook():
    display(
        Javascript("IPython.notebook.save_notebook()"),
        include=['application/javascript']
    )

def output_HTML(read_file, output_file):
    import codecs
    import nbformat
    exporter = HTMLExporter()
    # read_file is '.ipynb', output_file is '.html'
    output_notebook = nbformat.read(read_file, as_version=4)
    output, resources = exporter.from_notebook_node(output_notebook)
    codecs.open(output_file, 'w', encoding='utf-8').write(output)

In [44]:
tokens = tokenizer.bert.convert_ids_to_tokens(bert_encodes['input_ids'].tolist()[0])
attn = o['attn']

In [68]:
import site; 
print(site.getsitepackages())

['C:\\Users\\simon\\miniconda3\\envs\\venv', 'C:\\Users\\simon\\miniconda3\\envs\\venv\\lib\\site-packages']


In [83]:
from bertviz import model_view

In [73]:
vis_js, vis_html = head_view(attn, tokens)

<IPython.core.display.Javascript object>

In [70]:
with Path('vis.js').open('w', encoding='utf-8') as file:
    file.write(vis_js)

In [67]:
(vis_js)

'/**\n * @fileoverview Transformer Visualization D3 javascript code.\n *\n *\n *  Based on: https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/visualization/attention.js\n *\n * Change log:\n *\n * 12/19/18  Jesse Vig   Assorted cleanup. Changed orientation of attention matrices.\n * 12/29/20  Jesse Vig   Significant refactor.\n * 12/31/20  Jesse Vig   Support multiple visualizations in single notebook.\n * 02/06/21  Jesse Vig   Move require config from separate jupyter notebook step\n * 05/03/21  Jesse Vig   Adjust height of visualization dynamically\n * 07/25/21  Jesse Vig   Support layer filtering\n **/\n\nrequire.config({\n  paths: {\n      d3: \'//cdnjs.cloudflare.com/ajax/libs/d3/5.7.0/d3.min\',\n    jquery: \'//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min\',\n  }\n});\n\nrequirejs([\'jquery\', \'d3\'], function ($, d3) {\n\n    const params = {"attention": [{"name": null, "attn": [[[[0.05257847532629967, 0.03847360238432884, 0.05454699695110321, 0.027