# MR Playground

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

## Data dependencies

In [2]:
model_ckpt = "morphen-alpha-220703-0805"

```
../data/models/morphen-alpha-220703-0755/pytorch_model.bin 0bcf54
```

In [3]:
from hashlib import sha1
from pathlib import Path
paths = [Path(f"../data/models/{model_ckpt}/pytorch_model.bin")]
for path_x in paths:
    h = sha1()
    h.update(Path(path_x).read_bytes())
    print(path_x, h.hexdigest()[:6])

../data/models/morphen-alpha-220703-0805/pytorch_model.bin 0bcf54


## Loading model

In [4]:
device = "cuda" \
         if torch.cuda.is_available and "GeForce" not in torch.cuda.get_device_name() \
         else "cpu"
tokenizer = AutoTokenizer.from_pretrained("bert-base-chinese", use_fast=True)
model = AutoModelForSequenceClassification.from_pretrained(f"../data/models/{model_ckpt}").to(device)


In [7]:
def mr(words):
    if isinstance(words, str):
        words = [words]
    with torch.no_grad():
        for word in words:
            out = model(**(tokenizer(word, return_tensors="pt").to(device)))
            pred = out.logits.argmax(-1).item()
            # mark = "*" if word in mr_data.token.values else " "
            print("{}: {}".format(word, model.config.id2label[pred]))
    

## Playground

In [15]:
mr("睡覺 充實 連動 出遊 停車 遛貓".split())

睡覺: VO
充實: VR
連動: AV
出遊: VO
停車: VO
遛貓: VO
