In [1]:
import malaya

## List available deep learning POS models

In [2]:
malaya.pos.available_deep_model()

['concat', 'bahdanau', 'luong', 'entity-network', 'attention']

## Describe supported POS

In [3]:
malaya.describe_pos()

ADJ - Adjective, kata sifat
ADP - Adposition
ADV - Adverb, kata keterangan
ADX - Auxiliary verb, kata kerja tambahan
CCONJ - Coordinating conjuction, kata hubung
DET - Determiner, kata penentu
NOUN - Noun, kata nama
NUM - Number, nombor
PART - Particle
PRON - Pronoun, kata ganti
PROPN - Proper noun, kata ganti nama khas
SCONJ - Subordinating conjunction
SYM - Symbol
VERB - Verb, kata kerja
X - Other


## Load CRF Model

In [4]:
crf = malaya.pos.crf()

In [5]:
string = 'KUALA LUMPUR: Sempena sambutan Aidilfitri minggu depan, Perdana Menteri Tun Dr Mahathir Mohamad dan Menteri Pengangkutan Anthony Loke Siew Fook menitipkan pesanan khas kepada orang ramai yang mahu pulang ke kampung halaman masing-masing. Dalam video pendek terbitan Jabatan Keselamatan Jalan Raya (JKJR) itu, Dr Mahathir menasihati mereka supaya berhenti berehat dan tidur sebentar  sekiranya mengantuk ketika memandu.'

In [6]:
crf.predict(string)

[('kuala', 'PROPN'),
 ('lumpur', 'PROPN'),
 ('sempena', 'PROPN'),
 ('sambutan', 'NOUN'),
 ('aidilfitri', 'NOUN'),
 ('minggu', 'NOUN'),
 ('depan', 'ADJ'),
 ('perdana', 'PROPN'),
 ('menteri', 'PROPN'),
 ('tun', 'PROPN'),
 ('dr', 'PROPN'),
 ('mahathir', 'PROPN'),
 ('mohamad', 'PROPN'),
 ('dan', 'CCONJ'),
 ('menteri', 'VERB'),
 ('pengangkutan', 'PROPN'),
 ('anthony', 'PROPN'),
 ('loke', 'PROPN'),
 ('siew', 'PROPN'),
 ('fook', 'PROPN'),
 ('menitipkan', 'VERB'),
 ('pesanan', 'NOUN'),
 ('khas', 'ADJ'),
 ('kepada', 'ADP'),
 ('orang', 'NOUN'),
 ('ramai', 'ADJ'),
 ('yang', 'PRON'),
 ('mahu', 'ADV'),
 ('pulang', 'VERB'),
 ('ke', 'ADP'),
 ('kampung', 'NOUN'),
 ('halaman', 'NOUN'),
 ('masing-masing', 'NOUN'),
 ('dalam', 'ADP'),
 ('video', 'NOUN'),
 ('pendek', 'ADJ'),
 ('terbitan', 'NOUN'),
 ('jabatan', 'NOUN'),
 ('keselamatan', 'PROPN'),
 ('jalan', 'PROPN'),
 ('raya', 'PROPN'),
 ('jkjr', 'PROPN'),
 ('itu', 'DET'),
 ('dr', 'PROPN'),
 ('mahathir', 'PROPN'),
 ('menasihati', 'VERB'),
 ('mereka', 'PRON'

## Print important features CRF model

In [7]:
crf.print_features(10)

Top-10 positive:
16.307872 DET      word:tersebut
15.868179 DET      word:para
15.590679 VERB     word:percaya
15.520492 ADP      word:dari
15.296975 DET      word:berbagai
14.691924 ADJ      word:menakjubkan
14.609917 ADJ      word:menyejukkan
14.503045 PRON     word:kapan
14.319357 DET      word:ini
14.267956 ADV      word:pernah

Top-10 negative:
-7.217718 PROPN    word:bunga
-7.258999 VERB     word:memuaskan
-7.498110 ADP      prev_word:pernah
-7.523901 ADV      next_word-suffix-3:nai
-7.874955 NOUN     prev_word-prefix-3:arw
-7.921689 NOUN     suffix-2:ke
-8.049832 ADJ      prev_word:sunda
-8.210202 PROPN    prefix-3:ora
-8.524420 NUM      prev_word:perang
-10.346546 CCONJ    prev_word-suffix-3:rja


## Print important transitions CRF model

In [8]:
crf.print_transitions(10)

Top-10 likely transitions:
PROPN  -> PROPN   5.529614
DET    -> DET     4.492123
NOUN   -> NOUN    2.600533
ADJ    -> ADJ     2.276762
CCONJ  -> CCONJ   1.888801
CCONJ  -> SCONJ   1.855106
NOUN   -> ADJ     1.729610
SCONJ  -> CCONJ   1.598273
NUM    -> NUM     1.475505
ADV    -> VERB    1.442607

Top-10 unlikely transitions:
SCONJ  -> AUX     -3.559017
X      -> SCONJ   -3.566058
SYM    -> ADJ     -3.720358
PART   -> ADP     -3.744172
X      -> CCONJ   -4.270577
PART   -> PART    -4.543812
ADV    -> X       -4.809254
ADP    -> SCONJ   -5.157816
ADP    -> CCONJ   -5.455725
ADP    -> SYM     -6.841944


## Load deep learning models

In [9]:
for i in malaya.pos.available_deep_model():
    print('Testing %s model'%(i))
    model = malaya.pos.deep_model(i)
    print(model.predict(string))
    print()

Testing concat model
[('kuala', 'PROPN'), ('lumpur', 'PROPN'), ('sempena', 'PROPN'), ('sambutan', 'NOUN'), ('aidilfitri', 'PROPN'), ('minggu', 'NOUN'), ('depan', 'ADJ'), ('perdana', 'ADJ'), ('menteri', 'NOUN'), ('tun', 'PROPN'), ('dr', 'PROPN'), ('mahathir', 'PROPN'), ('mohamad', 'PROPN'), ('dan', 'CCONJ'), ('menteri', 'NOUN'), ('pengangkutan', 'PROPN'), ('anthony', 'NOUN'), ('loke', 'NOUN'), ('siew', 'PROPN'), ('fook', 'PROPN'), ('menitipkan', 'PROPN'), ('pesanan', 'ADV'), ('khas', 'ADJ'), ('kepada', 'ADP'), ('orang', 'NOUN'), ('ramai', 'ADJ'), ('yang', 'PRON'), ('mahu', 'ADV'), ('pulang', 'VERB'), ('ke', 'ADP'), ('kampung', 'NOUN'), ('halaman', 'NOUN'), ('masing-masing', 'NOUN'), ('dalam', 'ADP'), ('video', 'NOUN'), ('pendek', 'ADJ'), ('terbitan', 'NOUN'), ('jabatan', 'NOUN'), ('keselamatan', 'NOUN'), ('jalan', 'PROPN'), ('raya', 'PROPN'), ('jkjr', 'PROPN'), ('itu', 'DET'), ('dr', 'PROPN'), ('mahathir', 'PROPN'), ('menasihati', 'NOUN'), ('mereka', 'PRON'), ('supaya', 'SCONJ'), ('berh

## Voting stack model

In [10]:
entity_network = malaya.pos.deep_model('entity-network')
bahdanau = malaya.pos.deep_model('bahdanau')
luong = malaya.pos.deep_model('luong')
malaya.stack.voting_stack([entity_network, bahdanau, crf], string)

[('kuala', 'PROPN'),
 ('lumpur', 'PROPN'),
 ('sempena', 'PROPN'),
 ('sambutan', 'NOUN'),
 ('aidilfitri', 'PROPN'),
 ('minggu', 'PROPN'),
 ('depan', 'ADJ'),
 ('perdana', 'PROPN'),
 ('menteri', 'PROPN'),
 ('tun', 'PROPN'),
 ('dr', 'PROPN'),
 ('mahathir', 'PROPN'),
 ('mohamad', 'PROPN'),
 ('dan', 'CCONJ'),
 ('menteri', 'NOUN'),
 ('pengangkutan', 'PROPN'),
 ('anthony', 'PROPN'),
 ('loke', 'PROPN'),
 ('siew', 'PROPN'),
 ('fook', 'NOUN'),
 ('menitipkan', 'PROPN'),
 ('pesanan', 'NOUN'),
 ('khas', 'ADJ'),
 ('kepada', 'ADP'),
 ('orang', 'NOUN'),
 ('ramai', 'ADJ'),
 ('yang', 'PRON'),
 ('mahu', 'ADV'),
 ('pulang', 'VERB'),
 ('ke', 'ADP'),
 ('kampung', 'NOUN'),
 ('halaman', 'NOUN'),
 ('masing-masing', 'NOUN'),
 ('dalam', 'ADP'),
 ('video', 'NOUN'),
 ('pendek', 'ADJ'),
 ('terbitan', 'NOUN'),
 ('jabatan', 'NOUN'),
 ('keselamatan', 'PROPN'),
 ('jalan', 'PROPN'),
 ('raya', 'PROPN'),
 ('jkjr', 'PROPN'),
 ('itu', 'DET'),
 ('dr', 'PROPN'),
 ('mahathir', 'PROPN'),
 ('menasihati', 'VERB'),
 ('mereka', 'PRO