In [59]:
# LOCAL IMPORTS

import importlib
import romanizer
import db

importlib.reload(romanizer)     # Reload romanizer from korean
importlib.reload(db)

<module 'db' from 'd:\\vocab-db\\db.py'>

In [60]:
# MODEL DOWNLOAD

import spacy
import ginza

language = 'ja'
nlp = spacy.load("ja_ginza") # Initialize neural pipeline

In [61]:
# DATABASE

import os
from dotenv import load_dotenv
from psycopg2.extensions import connection

load_dotenv(override=True)

conn: connection = db.connect(language)

Connection successful!


In [None]:
# TABLE CREATION

db.create_tables(conn)

In [62]:
# PHRASE TOKENIZATION
phrase = "それはいい考えですね"

doc = nlp(phrase)

for token in doc:
    print(f"Text: {token.text} | {romanizer.romanize(token.text)}")
    print(f"  Lemma: {token.lemma_}")
    print(f"  POS: {token.pos_}")
    print(f"  Fine POS: {token.tag_}")
    print(f"  Head: {token.head.text}")
    print(f"  DepRel: {token.dep_}")
    print(f"  SpaceAfter: {token.whitespace_!r}")
    print("---")

Text: それ | sore
  Lemma: それ
  POS: PRON
  Fine POS: 代名詞
  Head: 考え
  DepRel: nsubj
  SpaceAfter: ''
---
Text: は | ha
  Lemma: は
  POS: ADP
  Fine POS: 助詞-係助詞
  Head: それ
  DepRel: case
  SpaceAfter: ''
---
Text: いい | ii
  Lemma: いい
  POS: ADJ
  Fine POS: 形容詞-非自立可能
  Head: 考え
  DepRel: acl
  SpaceAfter: ''
---
Text: 考え | kangae
  Lemma: 考え
  POS: NOUN
  Fine POS: 名詞-普通名詞-一般
  Head: 考え
  DepRel: ROOT
  SpaceAfter: ''
---
Text: です | desu
  Lemma: です
  POS: AUX
  Fine POS: 助動詞
  Head: 考え
  DepRel: cop
  SpaceAfter: ''
---
Text: ね | ne
  Lemma: ね
  POS: PART
  Fine POS: 助詞-終助詞
  Head: 考え
  DepRel: mark
  SpaceAfter: ''
---


In [63]:
from color import blue, bold, cyan, green, purple, red, yellow

print(blue("Phrase: "), phrase, '|', red(romanizer.romanize(phrase)))
words = romanizer.romanize(phrase).split(' ')

for token in doc:
    # This is a specific lemma
    print(bold(cyan("Text: ")), token.text, '|', blue(romanizer.romanize(token.text)))
    current_word += romanizer.romanize(token.text)

    upos = db.get_upos(conn, token.pos_)
    xpos = db.get_xpos(conn, [token.tag_])
    print(green("Part of Speech: "), upos, '-', xpos)

    print(yellow("Kanjis: "))
    for char in token.text:
        if (romanizer.is_kanji(char)):
            meaning = db.get_etymology(conn, f"{char}")
            print(f"    '{char}' => {bold(yellow(meaning))}")
    
    print(red("Morphemes: "))
    morphemes_info = db.get_morphemes(conn, [token.text], [token.tag_])
    for morpheme, tag, xpos_label, info in zip([token.text], [token.tag_], xpos, morphemes_info):
        print(f"    '{morpheme}': '{tag} => {xpos_label.title()}'")

    # translation = db.get_translation(conn, token.text, token.pos_, morphemes_info)
    print(purple("Translation: "), info['translation'])
    print("─"*30, '\n')


[94mPhrase: [0m それはいい考えですね | [91msorehaii kangae desune[0m
[1m[96mText: [0m[0m それ | [94msore[0m
[92mPart of Speech: [0m Pronoun - ['pronoun']
[93mKanjis: [0m
[91mMorphemes: [0m
    'それ': '代名詞 => Pronoun'
[95mTranslation: [0m that
────────────────────────────── 

[1m[96mText: [0m[0m は | [94mha[0m
[92mPart of Speech: [0m Adposition - ['binding particle']
[93mKanjis: [0m
[91mMorphemes: [0m
    'は': '助詞-係助詞 => Binding Particle'
[95mTranslation: [0m (topic marker)
────────────────────────────── 

[1m[96mText: [0m[0m いい | [94mii[0m
[92mPart of Speech: [0m Adjective - ['dependent adjective']
[93mKanjis: [0m
[91mMorphemes: [0m
    'いい': '形容詞-非自立可能 => Dependent Adjective'
[95mTranslation: [0m good
────────────────────────────── 

[1m[96mText: [0m[0m 考え | [94mkangae[0m
[92mPart of Speech: [0m Noun - ['general common noun']
[93mKanjis: [0m
    '考' => [1m[93mconsider, think over[0m[0m
[91mMorphemes: [0m
    '考え': '名詞-普通名詞-一般 => General 