In [2]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [3]:
import yaml

config_path = "Configs/config.yml"
config = yaml.safe_load(open(config_path))

In [4]:
from phonemize import phonemize
import phonemizer
import torch

global_phonemizer = phonemizer.backend.EspeakBackend(language='ms', preserve_punctuation=True,  with_stress=True)

`openai-whisper` is not available, native whisper processor is not available, will use huggingface processor instead.


In [5]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(config['dataset_params']['tokenizer'])

In [20]:
text = 'saya suka makan nasi ayam dan nasi itik.'
o = phonemize(text, global_phonemizer, tokenizer)
phoneme = ' '.join(o['phonemes'])
phoneme

'sˈajə sˈukə mˈakan nˈasi ˈajam dan nˈasi ˈiteʔ .'

In [18]:
from text_utils import TextCleaner

textcleaner = TextCleaner()

177


In [33]:
input_phonemes = textcleaner(phoneme)
input_phonemes

[61,
 156,
 43,
 52,
 83,
 16,
 61,
 156,
 63,
 53,
 83,
 16,
 55,
 156,
 43,
 53,
 43,
 56,
 16,
 56,
 156,
 43,
 61,
 51,
 16,
 156,
 43,
 52,
 43,
 55,
 16,
 46,
 43,
 56,
 16,
 56,
 156,
 43,
 61,
 51,
 16,
 156,
 51,
 62,
 47,
 148,
 16,
 4]

In [23]:
# !wget https://huggingface.co/mesolitica/PL-BERT-MS/resolve/main/step_130000.t7

In [24]:
from transformers import AlbertConfig, AlbertModel
from model import MultiTaskModel
import pickle

with open(config['dataset_params']['token_maps'], 'rb') as handle:
    token_maps = pickle.load(handle)

In [25]:
albert_base_configuration = AlbertConfig(**config['model_params'])
    
bert = AlbertModel(albert_base_configuration)
bert = MultiTaskModel(bert, 
                      num_vocab=1 + max([m['token'] for m in token_maps.values()]), 
                      num_tokens=config['model_params']['vocab_size'],
                      hidden_size=config['model_params']['hidden_size'])

In [26]:
bert = bert.to('cuda')

In [27]:
checkpoint = torch.load('step_130000.t7', map_location='cpu')
state_dict = checkpoint['net']

In [28]:
from collections import OrderedDict

new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k[7:]
    new_state_dict[name] = v

bert.load_state_dict(new_state_dict, strict=False)

<All keys matched successfully>

In [29]:
from utils import length_to_mask
device = 'cuda'

In [34]:
len(input_phonemes)

48

In [40]:
torch.tensor(input_phonemes)[None].to(device)

tensor([[ 61, 156,  43,  52,  83,  16,  61, 156,  63,  53,  83,  16,  55, 156,
          43,  53,  43,  56,  16,  56, 156,  43,  61,  51,  16, 156,  43,  52,
          43,  55,  16,  46,  43,  56,  16,  56, 156,  43,  61,  51,  16, 156,
          51,  62,  47, 148,  16,   4]], device='cuda:0')

In [39]:
length_to_mask(torch.tensor([len(input_phonemes)]))

tensor([[False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False]])

In [41]:
text_mask = length_to_mask(torch.tensor([len(input_phonemes)])).to(device)
phonemes = torch.tensor(input_phonemes)[None].to(device)

In [42]:
tokens_pred, words_pred = bert(phonemes, attention_mask=(~text_mask).int())

In [43]:
tokens_pred.shape

torch.Size([1, 48, 178])

In [44]:
[token_maps[int(i)] for i in words_pred.argmax(-1)[0]]

[{'word': 'saya', 'token': 759},
 {'word': 'saya', 'token': 759},
 {'word': 'saya', 'token': 759},
 {'word': 'saya', 'token': 759},
 {'word': 'saya', 'token': 759},
 {'word': '[SEP]', 'token': 2},
 {'word': 'suka', 'token': 1750},
 {'word': 'suka', 'token': 1750},
 {'word': 'suka', 'token': 1750},
 {'word': 'suka', 'token': 1750},
 {'word': 'suka', 'token': 1750},
 {'word': '[SEP]', 'token': 2},
 {'word': 'makan', 'token': 1186},
 {'word': 'makan', 'token': 1186},
 {'word': 'makan', 'token': 1186},
 {'word': 'makan', 'token': 1186},
 {'word': 'makan', 'token': 1186},
 {'word': 'makan', 'token': 1186},
 {'word': '[SEP]', 'token': 2},
 {'word': 'nasi', 'token': 1555},
 {'word': 'nasi', 'token': 1555},
 {'word': 'nasi', 'token': 1555},
 {'word': 'nasi', 'token': 1555},
 {'word': 'nasi', 'token': 1555},
 {'word': '[SEP]', 'token': 2},
 {'word': 'ayam', 'token': 1269},
 {'word': 'ayam', 'token': 1269},
 {'word': 'ayam', 'token': 1269},
 {'word': 'ayam', 'token': 1269},
 {'word': 'ayam', 'to