In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("../src")

In [3]:
import mesh
import pandas as pd

In [21]:
sense_dir = mesh.get_data_dir()/"sense_data"
mesh.ensure_dir(sense_dir)
sense_map_frame = pd.read_csv(sense_dir/"sense_map_bn_pwn.csv", index_col=0)

In [30]:
from CwnGraph import CwnBase
cwn = CwnBase()
cwn.find_lemma("^聊$")[2].senses

[<CwnSense[06785101](聊): 姓。>]

In [76]:
TARGETS = "^[說談講聊]$"
sub_frame = sense_map_frame.loc[sense_map_frame.word.str.match(TARGETS), :]

In [77]:
sub_frame = sub_frame.groupby("pwn_synset").first().sort_values("word").reset_index()

In [78]:
from nltk.corpus import wordnet as wn
def get_definition(syn_id):
    syn_pos = syn_id[-1]
    syn_num = syn_id[:-1]
    return wn.synset_from_pos_and_offset(syn_pos, int(syn_num)).definition()

def get_examples(syn_id):
    syn_pos = syn_id[-1]
    syn_num = syn_id[:-1]
    exs = wn.synset_from_pos_and_offset(syn_pos, int(syn_num)).examples()
    return "\n".join(exs)

In [82]:
sub_frame = sub_frame.assign(
    definition=lambda df: [get_definition(x) for x in df.pwn_synset],
    examples=lambda df: [get_examples(x) for x in df.pwn_synset]
)

In [105]:
sub_frame.index.name="serial"
sub_frame.to_csv(sense_dir/"pwn_definitions_about_talk.csv", encoding="UTF-8")

# CWN Senses

In [86]:
from itertools import chain
def find_cwn_senses(lemma):
    try:
        sense_iter = (x.senses for x in cwn.find_lemma(f"^{lemma}$"))
        sense_iter = chain.from_iterable(sense_iter)
        return list(sense_iter)
    except Exception as ex:
        print(lemma)
        print(ex)
        return []

In [98]:
data = []
for w in "說談講":
    senses = find_cwn_senses(w)
    for sense_x in senses:
        data.append(dict(
            word = w,
            sense_pos = sense_x.pos,
            sense_id = sense_x.id,
            sense_def = sense_x.definition,
            sense_examples = "\n".join(sense_x.all_examples())
        ))

In [99]:
cwn_frame = pd.DataFrame.from_records(data)

In [102]:
cwn_frame.index.name = "serial"

In [103]:
cwn_frame.head()

Unnamed: 0_level_0,word,sense_pos,sense_id,sense_def,sense_examples
serial,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,說,VE,5212401,以口語媒介傳達訊息。,她<說>那麼你自由自在的飛吧！\n我<說>：「把傘打開吧。」她才不甘心地開了傘。\n為了我們...
1,說,VE,5212402,以文字媒介引述或陳述訊息。,埃及中東新聞社引述穆巴拉克的話<說>：我們現在不會宣布新的停火方案。\n曾經有篇報導<說>，...
2,說,VC,5212403,使用後述語言。,您的中國話<說>得不錯嘛！\n我以後要跟他們<說>中國話，不<說>英文。\n他頗具語言天才，...
3,說,VE,5212404,以前述對象作為談論的觀點。,在赫塞的創作歷史上來<說>，這部重要的作品算是前期邁入後期中的轉型之作。\n身為整個臺灣活動...
4,說,VC,5212405,指涉後述對象。,車爾庫笑道：「老蘇，你的兒子很有眼光啊！」蘇魯克道：「你<說>蘇普麼？」\n第十三課，賢德的...


In [104]:
cwn_frame.to_csv(sense_dir/"cwn_senses_about_talk.csv", encoding="UTF-8")