## Loading Cadence

In [2]:
import sys
if "../src" not in sys.path:
    sys.path.append("../src")

In [3]:
import casa
from casa import Cadence, Crystal, MTBert, Cadet

In [4]:
cadence = Cadence.load(casa.get_data_path()/"cadence/config.json")

In [5]:
cadence.cadet

<Cadet: FastText(9851, 200), Seeds(79)>

In [7]:
print("crystal data: ", len(cadence.crystal.cxlist), len(cadence.crystal.eval_onto))

crystal data:  21 470


In [8]:
cadence.mt_bert

<MTBert(cpu): E:\LangOn\casa-cht\data\cadence\..\mtbert>

## A Cadence Sample

In [9]:
from casa.cadence.resolvers import CadenceSimpleResolver, CadenceBertOnlyResolver
from casa.cadence import visualize_tokens

In [10]:
visualize_tokens(cadence.analyze("中華其實收訊很好", strategy="simple"), 2)

[34m中[0m[34m華[0m其實[32m收[0m[32m訊[0m[36m很[0m[36m好[0m


{'text': '中華其實收訊很好',
 'tag_idx': array([90, 90, -1, -1, 91, 91,  0,  0], dtype=int64),
 'pn_prob': array([[0.90088224, 0.0991177 ],
        [0.84744275, 0.15255727],
        [0.81264174, 0.18735825],
        [0.85360914, 0.1463909 ],
        [0.935124  , 0.06487605],
        [0.9688764 , 0.03112356],
        [0.7666599 , 0.23334004],
        [0.96850425, 0.03149577]], dtype=float32)}

In [11]:
cadence.analyze("我家附近的遠傳好囉嗦", strategy="simple")

<CadenceOutput: [('遠傳電信', '[其他]客服', 'Negative')]>

In [12]:
out = cadence.analyze("我家附近的遠傳好囉嗦", strategy="simple")
out.aspects

[['遠傳電信', '[其他]客服', 'Negative', 'crystal', 'crystal']]

In [13]:
out = cadence.analyze("我家附近的遠傳好囉嗦", strategy="bertonly")
out.aspects

[['遠傳電信', None, 'Negative', 'cadet', 'mtbert']]

In [14]:
visdata = visualize_tokens(cadence.analyze("我家附近的遠傳好囉嗦", strategy="simple"))

我家附近的[34m遠[0m[34m傳[0m[31m好[0m[31m囉[0m[31m嗦[0m


In [15]:
visdata = visualize_tokens(cadence.analyze("遠傳最近是在哈囉", strategy="simple"))

[34m遠[0m[34m傳[0m[31m最[0m[31m近[0m[31m是[0m[31m在[0m[31m哈[0m[31m囉[0m


In [16]:
# experimental multiple resolver
cadence.analyze("中華超划算，而且最快", strategy="multiple")

<CadenceOutput: [('中華電信', '[資費]續約攜碼', 'Positive'), ('中華電信', '[通訊]網速', 'Positive')]>

In [17]:
out = cadence.analyze("亞太網路超差，中華就很好", strategy="simple")
out.entities

['亞太電信', '中華電信']

In [18]:
cadence.analyze("亞太網路超差，中華就很好", strategy="multiple")

<CadenceOutput: [('亞太電信', '[通訊]涵蓋', 'Negative'), ('中華電信', '[通訊]涵蓋', 'Positive')]>

In [19]:
out = cadence.analyze("亞太網路超差，中華收訊就很好", strategy="multiple")
print(out)
visdata = visualize_tokens(out, 2)

<CadenceOutput: [('亞太電信', '[通訊]涵蓋', 'Negative'), ('中華電信', '[通訊]涵蓋', 'Positive')]>
[34m亞[0m[34m太[0m[32m網[0m[32m路[0m[31m超[0m[31m差[0m，[34m中[0m[34m華[0m[32m收[0m[32m訊[0m就[36m很[0m[36m好[0m


In [20]:
out.mt_bert

{'text': '亞太網路超差，中華收訊就很好',
 'opn_logits': array([[-2.1152952 , -5.1527815 ,  6.963649  ],
        [ 5.3408756 , -3.7958367 , -2.7251062 ],
        [ 6.260398  , -4.006509  , -3.7350326 ],
        [ 4.2582    , -4.640467  , -1.4815805 ],
        [ 4.186642  , -4.1895447 , -1.3312267 ],
        [-0.24147686, -6.247472  ,  4.9183874 ],
        [-0.0094707 , -5.615987  ,  3.8122473 ],
        [ 5.953059  , -5.7677774 , -2.0524197 ],
        [ 6.537443  , -3.9425964 , -3.9490232 ],
        [ 6.835691  , -4.5912123 , -3.7815132 ],
        [ 4.1830726 , -4.937192  , -0.99616814],
        [ 5.1136556 , -4.1199703 , -2.191852  ],
        [ 2.006753  , -4.1519065 ,  0.8197124 ],
        [ 2.8441455 , -5.0043325 ,  1.7750754 ],
        [ 2.9105272 , -3.622441  ,  1.280484  ],
        [-2.1153047 , -5.152773  ,  6.9636497 ]], dtype=float32),
 'seq_polarity': 'Negative',
 'seq_probs': array([0.00368624, 0.00232451, 0.9939892 ], dtype=float32),
 'spans': ['超差'],
 'span_idxs': [[4, 5]],
 'span_pols':