In [1]:
from multivec import BilingualModel, MonolingualModel

In [2]:
model = BilingualModel(b'/mnt/c/NLP/collo/un16m.bin')

In [3]:
model.dimension

200

In [4]:
en_model = model.src_model
zh_model = model.trg_model

In [5]:
w = '資產' # '獲益'
enlist = model.src_closest(w.encode(), n=10)
[e.decode() for (e, d) in enlist]

['assets',
 'asset',
 'Assets',
 'Asset',
 'property',
 'Written-off',
 'off-balance-sheet',
 'proceeds',
 'disposals',
 'estate']

In [6]:
w = 'sensibility'
zhlist = model.trg_closest(w.encode(), n=20)
print([z.decode() for (z, d) in zhlist])

['敏鋭', '氣質', '好奇心', '社會意識', '認知', '特質', '敏感性', '道德觀念', '自我認識', '理解力', '敏感度', '激發起', '思想道德', '感性', '覺悟', '政治覺悟', '解構', '感知', '積極向上', '表達能力']


In [7]:
def analogy(w1, w2, w3):
    '''
    Solves problems of the type:
    w1 : w2 :: w3 : __
    '''
    closest_words = []
    try:
        w1v = model.src_model.word_vec(w1.encode())
        w2v = model.src_model.word_vec(w2.encode())
        w3v = model.trg_model.word_vec(w3.encode())
        w4v = w3v + (w2v - w1v)
        closest_words = [w.decode() for (w, d) in model.trg_model.closest_to_vec(w4v, n=15)]
        closest_words = [w for w in closest_words if w not in [w1, w2, w3]]
    except:
        pass
    if len(closest_words) == 0:
        print(':-(')
    else:
        print('{} : {} :: {} : {}'.format(w1, w2, w3, closest_words[0]))
        print(closest_words)

In [8]:
w1 = 'male'
w2 = 'king'
w3 = '女性'; 
analogy(w1, w2, w3)

male : king :: 女性 : 婦女
['婦女', '國王', '王后', '殿下', '女議員', '托亞', '婦女兒童', '貝絲梅', '婦女組織', '陛下', '聖職', '君主', '王國', '女王']


In [9]:
def analogy2(w1, w2, w3):
    '''
    Solves problems of the type:
    w1 : w2 :: w3 : __
    '''
    closest_words = []
    try:
        w1v = model.trg_model.word_vec(w1.encode())
        w2v = model.trg_model.word_vec(w2.encode())
        w3v = model.src_model.word_vec(w3.encode())
        w4v = w3v + (w2v - w1v)
        closest_words = [w.decode() for (w, d) in model.src_model.closest_to_vec(w4v, n=15)]
        closest_words = [w for w in closest_words if w not in [w1, w2, w3]]
    except:
        pass
    if len(closest_words) == 0:
        print(':-(')
    else:
        print('{} : {} :: {} : {}'.format(w1, w2, w3, closest_words[0]))
        print(closest_words)

In [10]:
# adj-modifier [JJ] (w2) + noun [NO] (w1)
w1 = '疾病'
w2 = '先天'
w3 = 'disease'
# adv-modifier (w2) + verb (w1)
w1 = '氣候'
w2 = '正常'
w3 = 'phenomenon'

w1 = '男'
w2 = '國王'
w3 = 'woman'

# base=N, collocate=V
w1 = '根基'# '根本'
w2 = '鞏固' #'動搖'
w3 = 'foundations'

# base=N, collocate=V
w1 = '秘密'# '根本'
w2 = '發現' #'動搖'
w3 = 'secrets'

# base=N, collocate=V
w1 = '人選'# '根本'
w2 = '推薦' #'動搖'
w3 = 'candidate'

# verb [V] (w2) + direct object [DO] (w1)
w1 = '犯罪'
w2 = '打擊'
w3 = 'crime'; 

# verb [V] (w2) + direct object [DO] (w1)
w1 = '耕耘'
w2 = '默默'
w3 = 'work'; 

# verb [V] (w2) + direct object [DO] (w1)
w1 = '名譽'
w2 = '損害'
w3 = 'reputation'; 

w1 = '隱私'
w2 = '侵犯'
w3 = 'privacy'; 

w1v = zh_model.word_vec(w1.encode())
w2v = zh_model.word_vec(w2.encode())
w3v = en_model.word_vec(w3.encode())
w4v = w3v + (w2v - w1v)
closest_words = [w.decode() for (w, d) in en_model.closest_to_vec(w4v, n=25)]
print(' | '.join(closest_words))

violations | abuses | violation | infringement | breaches | infringements | violated | grave | violating | VIOLATIONS | Violations | violate | flagrant | breach | acts | denial | violates | mistreatment | alleged | killings | atrocities | aggressions | blatant | allegations | arbitrary


In [11]:
 en_model.closest_to_vec(w4v, n=25)

[(b'violations', 0.7513533234596252),
 (b'abuses', 0.6667124629020691),
 (b'violation', 0.6163552403450012),
 (b'infringement', 0.5891551375389099),
 (b'breaches', 0.5829546451568604),
 (b'infringements', 0.5725703239440918),
 (b'violated', 0.5636051893234253),
 (b'grave', 0.5541034936904907),
 (b'violating', 0.534368634223938),
 (b'VIOLATIONS', 0.5289359092712402),
 (b'Violations', 0.5284678936004639),
 (b'violate', 0.5158103704452515),
 (b'flagrant', 0.5001519918441772),
 (b'breach', 0.49738314747810364),
 (b'acts', 0.49735140800476074),
 (b'denial', 0.4968011975288391),
 (b'violates', 0.48997315764427185),
 (b'mistreatment', 0.48696815967559814),
 (b'alleged', 0.48206767439842224),
 (b'killings', 0.4805483818054199),
 (b'atrocities', 0.47416362166404724),
 (b'aggressions', 0.47053834795951843),
 (b'blatant', 0.4594906270503998),
 (b'allegations', 0.4562802314758301),
 (b'arbitrary', 0.45615482330322266)]

In [12]:
def collocate(w1, w2, w3):
    '''
    Given:
        Chinese base w1 and Chinese collocate w2
    Find:
        candidates for collocate to English base w3
    '''
    closest_words = []
    try:
        w1v = model.trg_model.word_vec(w1.encode())
        w2v = model.trg_model.word_vec(w2.encode())
        w3v = model.src_model.word_vec(w3.encode())
        w4v = w3v + (w2v - w1v)
        closest_words = [w.decode() for (w, d) in model.src_model.closest_to_vec(w4v, n=15)]
        closest_words = [w for w in closest_words if w not in [w1, w2, w3]]
    except:
        pass
    if len(closest_words) == 0:
        print(':-(')
    else:
        print('{} : {} :: {} : {}'.format(w1, w2, w3, closest_words[0]))
        print(closest_words)

In [13]:
# adj-modifier [JJ] (w2) + noun [NO] (w1)
w1 = '貧窮'
w2 = ''
w1 = '疾病'
w2 = '治療'
w3 = 'disease'
w3 = 'poverty'


# verb [V] (w2) + direct object [DO] (w1)
w1 = '犯罪'
w2 = '打擊'
w3 = 'crime'; 

w1 = '因素'
w2 = '確定'
w3 = 'factors'

collocate(w1, w2, w3)

因素 : 確定 :: factors : parameters
['parameters', 'criteria', 'variables', 'determine', 'determining', 'scenarios', 'benchmarks', 'indicators', 'identifying', 'metrics', 'methodologies', 'baselines', 'identify', 'thresholds']


In [14]:
def collocate2(w1, w2, w3):
    '''
    Given:
        Chinese base w1 and Chinese collocate w2
    Find:
        candidates for collocate to English base w3
    '''
    closest_words = []
    try:
        w1v = model.src_model.word_vec(w1.encode())
        w2v = model.src_model.word_vec(w2.encode())
        w3v = model.trg_model.word_vec(w3.encode())
        w4v = w3v + (w2v - w1v)
        closest_words = [w.decode() for (w, d) in model.trg_model.closest_to_vec(w4v, n=15)]
        closest_words = [w for w in closest_words if w not in [w1, w2, w3]]
    except:
        pass
    if len(closest_words) == 0:
        print(':-(')
    else:
        print('{} : {} :: {} : {}'.format(w1, w2, w3, closest_words[0]))
        print(closest_words)

In [15]:
w1 = 'victim'
w2 = 'prominent'
w3 = '犧牲者'

w1 = 'victim'
w2 = 'abuse'
w3 = '受害者'

collocate2(w1, w2, w3)

victim : abuse :: 受害者 : 虐待
['虐待', '性虐待', '剝削', '性暴力', '濫用', '性侵犯', '暴力', '欺凌', '凌辱', '凌虐', '暴力行為', '虐待老人', '性', '家庭暴力']


In [16]:
import numpy as np
from scipy.spatial.distance import cosine as cos_dist
cos_dist(model.src_model.word_vec('disease'.encode()), 
         model.trg_model.word_vec('疾病'.encode()))

0.09458018600005236

In [17]:
w1 = '策略' 
w2 = '嚴厲'  
w3 = 'policy'
analogy(w1, w2, w3)

:-(


In [6]:
w3 = 'execution'
w1 = 'rank' 
w2 = 'prioritize'  
w1 = 'decision' 
w2 = 'light'  
w3 = '決定'
analogy(w1, w2, w3)

NameError: name 'analogy' is not defined

In [19]:
#en_model.sent_vec(b'Hong Kong') - en_model.word_vec(b'Hong') - en_model.word_vec(b'Kong')


In [20]:
def en2zh(txt, n=5):
    retval = []
    for (x,s) in model.trg_closest(txt.encode(), n):
        retval.append(x.decode())
    return retval

def zh2en(txt, n=5):
    retval = []
    for (x,s) in model.src_closest(txt.encode(), n):
        retval.append(x.decode())
    return retval

In [21]:
def enSynonyms(txt, n=5):
    retval = []
    for (x,s) in model.src_model.closest(txt.encode(), n):
        retval.append(x.decode())
    return retval

def zhSynonyms(txt, n=5):
    retval = []
    for (x,s) in model.trg_model.closest(txt.encode(), n):
        retval.append(x.decode())
    return retval


In [32]:
words = '''
embryonic hazy predictably combined better vast shocking consequences results investigation
approach bother approached gnawing troubling journey conversion dampen extinguish learn
arrogant attention long-awaited knowledge seeker create beautiful scientific  uncovered
intention purpose principal stories accounts describe provide development power exploring
wilderness farming simplistic  discovery pioneers bunch recipient honour awarded connotation
exuded pioneered front-runner understanding focus limelight obstacles still honored importance
created talents proficiency complex acquainted inaugurated understanding ideas recognized
praised difficult problematic recognized obscure coaching repurpose revive community top summit
zenith pinnacle people appearance emergence subdue convincing specific fear apprehension affairs
ordinary primeval citizens regulate constrain regulate operate development propose burgeoning proposal
conjecture speculate theory amplified studies regular phenomena phenomenon natural relevant
produce versatile utilitarian practical useful discovery conclude qualitative revolve debated
atmosphere cordial environment reality demand baffled advocate championed untouched repulsive
disturbing left improving enhance visually figure character specific perfect granted position
self-serving selfish contribution amazing employ use textual  development propose introduce ideas
recognize reflect flawless perfection decline dominant dominating stability simplicity already 
practices also aware varies create remarkable completely living residential order contraption 
invention apparatus ignore improvement enrolment cut dissected escape avoid evasion rising 
cover describe myth fantastic unsuccessful erroneous encompass cover harmony compatible 
unperturbed firmly uproot unchallenged far-reaching solid halt trouble contents valid complex 
intricate involved complicated system elevate humble tools mundane practical legend elevate 
orderly regularity condone accept integration short ephemeral contribution contribute 
strides splendour change legacy learn wholesale sweeping belief stationary remained adhered 
seafood difficult attempt widespread attacking power bold allegedly superior ethos atmosphere 
certainly aggressive active flatly just detained kept retained withheld appropriated 
expropriated forfeited pocket confiscated losses aggressive hooked indulging indulge 
intriguing amassed meticulously effective functioning treasure wealthy levied exploitation 
double flourishing foster uprising upheavals principal invasion autonomy future grew scale 
scattered joining large community marine maritime superior structure arrangement emerged 
alliance allied led dissatisfied upset angry dissatisfied angered destroyed obliterated 
devastated established However concede annihilated dissatisfaction carefree unrestrained 
unbridled hunting infiltrate interfere pretend benefits brash mistreated conflict 
rebellion spurned slight treated maltreated decisive definitive power 
occasion dispersed conflicts disagreement disharmony astute shrewd fatally dispersed 
scattered drifting drift forth between robust reasoning stationary immobile pulled eternal 
ephemeral detailed detail decay immortal mortal appropriate unchanging constant immutable 
smooth regulated sizable mortality purged thinking revived pinnacle summit illegality 
violation people bulge inflate layer abode terrestrial supreme allegations far-fetched 
bothered tame tamed centered predictable abundant regular movement motion birth structure 
sedge synthesize create fashioned connected amalgamated forcefully holes migration 
migrated settled immigrated beginnings replete integration consolidation integrate mingling 
astute randomly propagate labelled marked conflict  war prosper describe kick-starting 
revitalize  fruitless complete comprehensive celestial harbinger upheaval turbulent 
disastrous woe calamitous fateful scrutinize resurrect difficult estimate bumpy 
unhappy thinking understanding understand expensive gathered limited development 
explore exploration similar money costly experiment costly broken occasionally still 
instantly orderly realm mysterious operated reversed about-face adhered stable 
declining weakening precarious decline disarray learn disappeared foundation jump
spread generous funded invasion dying waning rivalry declining stagnation changes 
integrated condemnation promotion spread shackles definite deemed wrong contention 
far-fetched assertion passionate stored freedom publishing ideas endowed proud revered 
beloved boasted mutual strategic unchallenged stranglehold backbone foundation 
pillars unshakable antiquated specially specifically orderliness reputation 
untouchable unassailable error theory concerned deliberately offended displeased 
conspired understand hideous terrify terrified shudder tort confusing Sadly resulting 
propose moderate significant description combination enlightened inspired poor 
affluent veiled enemies ineffective zone debate struggle spilled accessible attack 
misconduct iniquity offence issues censor review merit veracity implied student 
banned indeed finally unexpectedly experience appreciate 
'''
words = words.strip().split()
' | '.join(enSynonyms(words[-1], n=25))

'appreciated | welcome | grateful | appreciates | commend | convey | thank | like | acknowledge | know | applaud | appreciative | attach | inform | thanks | thanked | hope | understand | hoped | express | compliment | trusted | expect | lend | offer'

In [23]:
words = '''
一窩蜂
'''
words = words.strip().split()
' | '.join(zhSynonyms(words[-1], n=25))

'投機者 | 投機性 | 多國公司 | 掠奪性 | 爭先恐後 | 寡頭 | 擠走 | 投機 | 拉動 | 自然而然 | 拋售 | 有權有勢 | 接二連三 | 爭相 | 從眾 | 套利 | 壓低 | 出人意料 | 非正統 | 不擇手段 | 擠出 | 市場化 | 壟斷市場 | 投資商 | 貪得無厭'

In [31]:
words = {0:'壓倒性', 1:'打擊', 2:'犯罪', 3:'報到', 4:'别有用心',
         5:'紛亂',   6:'凌駕', 7:'穩健', 8:'款項', 9:'封建',
         10:'基礎', 11:'變遷', 12:'動態', 13:'鞭策', 14:'督促',
         15:'永恆', 16:'法則', 17:'亙古不變', 18:'穩健', 19:'維護',
         20:'檢舉', 21:'天才', 22:'繼續', 23:'推廣', 24:'宣導',
         25:'崗位', 26:'思維', 27:'幹部', 28:'持續', 29:'永續', 30:'焚燒',
         31:'阻絕', 32:'斷絕', 33:'體現', 34:'用途', 35:'著作', 36:'針對',
         37:'結合', 38:'一窩蜂', 39:'精神', 40:'投入', 41:'盡責', 42:'熱潮',
         43:'宣導', 44:'深邃', 45:'得罪', 46:'佩服', 47:'停課', 48:'雛形',
         49:'用盡', 50:'雛形', 51:'鉅細靡遺', 52:'倘佯', 53:'高昂', 54:'激情', 55:'範例',
         56:'翻版', 57:'雷同', 58:'預料之中', 59:'攻訐', 60:'置身事外', 61:'位高權重', 62:'道理',
         63:'合情合理', 64:'禍害', 65:'蹂躪', 66:'窒礙難行', 67:'趕出', 68:'趕出', 69:'低調', 70:'興高采烈',
         71:'瘋狂', 72:'迫切', 73:'急不可耐', 74:'貫徹', 75:'例行公事', 76:'求助', 77:'重建', 78:'契機', 
         79:'源自', 80:'移植', 81:'嚴厲',
         82:'釋懷', 83:'輪流', 84:'進退兩難', 85:'', 86:'', 87:'', 88:'', 89:'', 90:'', 91:'', 92:'', 93:'', 94:'', 95:'', 96:'', 97:'', 98:'', 99:''
        }
words = '''無力感 成效 不彰 毫無例外 破產 敗壞 名聲 死灰復燃 據聞 範疇 函授 原則上 融入 進退維谷 厲害 結合
整合 初步 快速 考量 策略 排他性 合情合理 犧牲者 受害者 認同 同情 確認 無計可施 阻撓 騷擾 驅趕 震撼 曠野
嘆為觀止 燦爛 結晶 定理 發展 得罪 養家 奧秘 縈繞 公然 犀利 僵持不下 朝氣 勃勃 然而 困擾 運作 作用力 從此
課題 嚴肅 未婚 流離失所 轉換 實質 滋生 思想 澆熄 艱深 難懂 深奧 無措 不知所措 作風 桀驁不遜 桀傲不遜 桀敖不馴
大家 瞭解 揭露 注意 結果 求知 接力 創建 凸顯 崁入 嵌入 初衷  觀點 真理 闡述 命題 發展 感受 愛恨 情仇 力量
星象 探索 時序 整理 震撼 曠野 佩服 神奇 交互 看到 知名 開創 補強 補救 落差 接軌 審定 努力 複雜 認識
棘手 下水 促成 標題 輔導 驗證 認證 鼎盛 險惡 叛變 叛離 帷幄 運籌 版圖 重疊 高山 安定 定居 部落 頂端 平民
畏懼 籌碼 洪荒 人民 主張 眾人 規律 現象 發現 製作 規律 循環 反對者 實用 熱門 舒適 獨尊 明顯 合理性 
留下 佐證 總結 嚴苛 貢獻 不遑多讓 傑出 色彩 重力 力矩 精彩  提出 喜愛 偏見 最少 自然 此外 另外 另一方面
案例 名額 細膩 涵蓋 說成 描寫 結果 妄想 契合 瓦解 看似 完整 修正 思考 困擾 關係 嚴謹 內容 成立 說明 
推翻 傳說 重新 建立 烏雲 密布 認同 統合 思想 短暫 華麗 貫穿 停留 主張 地位 啟示 乾脆 扣留 沉迷 變遷 
主體 農產品 外侮 大規模 壯大 努力 加盟 大型 加工 前所未見 令人 失望 高階 榮景 豐收 消滅 重創 確立 
盟主 幫主 血氣方剛 歸順 急行軍 雄厚 推理 涵蓋 聚集 天象 恆久 完美  破碎 層 大地 攸關 隨機 立論 相異 
情仇 愛恨 透視 動能 動量 檢視 苦境 困境 本輪 神奇 可觀 重視 發現 教會學校 蕭條 萎縮 剽悍 傳播 傳授 
束縛 必然性 必然 說明 反駁 觀察 細膩 風氣 上游 艱深 艱難 水車 彰顯 知名 知名度 鬆綁 研究員 根基 特別 
立論 井然有序 發現 名譽 公開 耕耘 刻意 在意 打造 美中不足 證實 提出 描述 影射 愚蠢 果然 探索 發掘 
'''
words = words.strip().split()
' | '.join(zh2en(words[-1], 35))

'untapped | tap | tapped | harness | potentialities | unlocking | exhumation | unlock | exhumations | unexploited | Exhumation | tapping | talents | harnessing | exhume | excavation | potential | Tapping | exhumed | potentials | gravesites | harnessed | exhuming | high-potential | unleash | unexplored | discover | uncover | graves | Excavations | dig | Exhumations | creativity | discovering | creative'

In [5]:
words = {0:'shirk', 1:'credibility', 2:'legitimacy', 3:'cynical', 4:'permeate',
         5:'tergiversate', 6:'eternal', 7:'crusade', 8:'splinter', 9:'whining',
         10:'product', 11:'economic', 12:'aspect', 13:"effect", 14:"context", 15:"outreach",
         16:'circulation', 17:'reflection', 18:'thaw', 19:'undermine', 20:'plaguing',
         21:'streak', 22:'thought-provoking', 23:'encourage', 24:'dawn', 25:'prominence',
         26:'thousands', 27:'leaders', 28:'introduced', 29:'measure', 30:'advocate', 
         31:'attempted', 32:'mimeograph', 33:'flurry', 34:'subtlety', 35:'aesthetics',
         36:'centerpiece', 37:'aloof', 38:'influence', 39:'rhetorical', 40:'promise',
         41:'articulate', 42:'painstakingly', 43:'bureaucratic', 44:'haze', 45:'afoul', 
         46:'restricted', 47:'inevitable', 48:'emotions', 49:'charged', 50:'addendum',
         51:'opening', 52:'copy', 53:'abuse', 54:'resembling', 55:'circumstances', 56:'predictable',
         57:'enslaved' , 58:'unsurprising', 59:'abuse', 60:'mock', 61:'reminiscent', 62:'victim', 
         63:'restate', 64:'provocative', 66:'irony', 67:'shrewd', 68:'properly', 
         69:'omnipotent', 70:'apparent', 71:'sense', 72:'considered',
         73:'afflicted', 74:'hyperinflation', 75:'ossified', 76:'practical', 77:'promotion', 
         78:'initiative', 79:'symptom', 80:'stakes', 81:'discredit', 
         82:'pragmatism', 83:'discredited', 84:'credential', 85:'struggles', 86:'remove', 
         87:'supplementary', 88:'sector', 89:'oblivious', 90:'publicized',
         91:'uncooperative', 92:'docile', 93:'reception', 94:'cheerful', 95:'eager', 
         96:'denounce', 97:'follow', 98:'initiative', 99:'afterthought', 100:'dutifully',
         101:'executive', 102:'rationalise', 103:'petition', 104:'denied', 105:'casually',
         106:'flourished', 107:'guidelines', 108:'material', 109:'significant', 110:'gains', 
         111:'arguments', 112:'escorts', 113:'obliterated', 114:'contrast', 115:'parity', 
         116:'unsettling', 117:'shifting', 118:'acknowledged', 119:'debt', 120:'permanently', 
         121:'details', 122:'plagued', 123:'stall', 124:'', 125:'', 126:'', 127:'', 128:'', 129:'', 130:'', 131:'', 132:'', 133:'', 134:'', 135:'', 136:'', 137:'', 138:'', 139:'', 140:'', 141:'', 142:'', 143:'', 144:'', 145:'', 146:'', 147:'', 148:'', 149:''
        }
words = '''set-up errant marshal dishonesty inefficiencies malaise problems broached
permissible sentiment untouched languishing potent undercut blanket condemnation dismay heartland
discredited lenient ruffle succinctly renewed cautious lull presumed manufactured
range floatingcomplexities manifold defy aspects facets areas entail revamped institute form terse
favored apprenticeship correspondence part-time syphoned emerging publicized ingress torture
vexing mediator increasing claimed freewheel stalled consulted minimal representation electoral
barely alarming slight resentment violence youthful uncontrollable wing impotent calculated
bearings censured censored disconsolate aloud loudly insecurities blanket pamphlet zany
exemplary spotting spot force hairdresser quietly combine preliminary better rapid timely survive
mind shared propaganda slate undeterred ignorant publicized ingenious mockery valid invalidated
indoctrination actions burgeoning inspirational inspire insensitivity incisive over-sympathy sympathetic
nonsense espouse allege checked gesture managers grim cautious pruned manageable better-educated 
membership forge assertive hedge  qualifications caution endorsement stakes patronage 
dismissive reassuring rogue restructure perquisites contacts visionary service civil desperate 
service sensibility address deal tenor until surreptitious strains allegedly prey unabashedly
materialistic approaches insatiable retrenchment compulsory bitterness graft associates 
disaffected youthful steadily mired  inextricably publicized delays evasions intransigence 
sullen hostility disillusioned exasperation exasperated receptive shunted paralyzed appeared
boisterous potent endlessly engrossing muzzled pleas stymied impede loving fierce cajole
restraint divided intransigence lobbied checked awash speakers faltering veteran anguished
bedraggled overwhelmed treat mowed dutifully actions unspectacular activists summoned 
vowed idiosyncratic crackdown significance conclusion humorous amusing incessant  strains
seriously harassed random advancement development forgive diffuse spread hostility 
covert overt corrosive intention reference allusion consolidated formidable consolidate
views lapse oversight grudgingly acquiescence disruptive lumbering blunter threats
definitive speculation collateral exacerbated militated permitted flagrant breathtaking
inflexible scenarios remarkable prestige disputed contention deflected abrogation 
ventures borderland pronouncements accommodation edged rhetoric branded blatant
looming protracted exploitation shadowing fined resentencing adulation booming 
sweatshops sprawling academics attempts anchored adequate spacious satisfactory sarcastic 
manifested clarity venture-capital emerging apocalypse defunct tortuous excoriated
said satirist freshness irreverent stalwart acerbic approached bothering henceforth
handiwork interchange reality elucidate reformulate regurgitation provide admirable
farming topography complex obscure pinnacle primeval manifestations incontrovertible 
encapsulate rising refute splendour legacy joining redemption spurned immobile stationary 
mindset contention ardent legacy unearthed candidate drawbacks portfolio jarring 
mentality mindset surmise solid censure healthcare offer share community access attendee 
connection connections remiss demise esoteric twist futuristic niche tap banned precursor
forerunner distinguished 
'''
words = words.strip().split()
w = words[-1]; print(w)
' | '.join(en2zh(w, n=50))

distinguished


NameError: name 'en2zh' is not defined

In [26]:
new_entries = [f"{x}:''" for x in range(82,100)]
', '.join(new_entries)
    

"82:'', 83:'', 84:'', 85:'', 86:'', 87:'', 88:'', 89:'', 90:'', 91:'', 92:'', 93:'', 94:'', 95:'', 96:'', 97:'', 98:'', 99:''"

In [27]:
words = {0:'推廣', 1:'抱怨', 2:'作品', 3:'財務', 4:'憑藉', 5:'深邃', 6:'得罪', 7:'限閲', 8:'高昂', 9:'情緒', 10:'反映', 
         11:'翻版', 12:'情況', 13:'預知', 14:'侵害', 15:'侵害', 16:'不出所料', 17:'置身事外', 18:'表面', 19:'位高權重', 20:'道理', 21:'馴服', 22:'拖延'}
words = '''
馴服 拖延 營私舞弊 心態 面向 視角 面向 因素 多方 探討 努力 虐待 總括 厲害 初步 包圍 正當 嘲諷
發芽 萌芽 蓬勃 犧牲者 犧牲者 同情 認同 消息 人數 刪減 呼籲 大家 別無選擇 應付  解決 破產 罷工
無計可施  強而有力  引人入勝 請求 訴求 物色 走下坡 放緩 停滯 變化 控制 驅趕 騷擾 表面 嘆為 觀止
原諒 局部性 蔓延 散佈 公然 公開 彙整 失算 失誤 設計 盡心 令人驚嘆 受挫 阻止 犀利 僵持不下 蓬勃
夠用 足夠 萬劫 不復 問題 轉換 提供 
'''
words = words.strip().split()
' | '.join(zhSynonyms(words[-1], 25))

'提供援助 | 獲得 | 提供支援 | 提供數據 | 獲取 | 索取 | 提供方便 | 給予 | 提供情報 | 配備 | 發放 | 供給 | 掌握 | 傳遞 | 尋求 | 輸送 | 劃撥 | 索要 | 傳達 | 傳授 | 提供者 | 供應 | 求助 | 取用 | 派發'

\begin{aligned}
    \mathbb E e^{i t \bar X_n }
    & = \mathbb E \exp \left\{ i \frac{t}{n} \sum_{j=1}^n X_j \right\}
    \\
    & = \mathbb E \prod_{j=1}^n \exp \left\{ i \frac{t}{n} X_j \right\}
    \\
    & = \prod_{j=1}^n \mathbb E \exp \left\{ i \frac{t}{n} X_j \right\}
    = [\phi(t/n)]^n
\end{aligned}

$$
\mathbb R^{n}, x+y=z, \mathbb P(A \subset B)
$$

In [28]:
ord('ℝ'), ord('𝜙')

(8477, 120601)