In [None]:
# LOCAL IMPORTS

import importlib
import romanizer
import db

importlib.reload(romanizer)     # Reload romanizer from korean
importlib.reload(db)

In [40]:
# MODEL DOWNLOAD

import spacy
import ginza

language = 'ja'
nlp = spacy.load("ja_ginza") # Initialize neural pipeline

In [None]:
# DATABASE

import os
from dotenv import load_dotenv
from psycopg2.extensions import connection

load_dotenv(override=True)

conn: connection = db.connect(language)

In [None]:
# TABLE CREATION

db.create_tables(conn)

In [None]:
# PHRASE TOKENIZATION
phrase = "それはいい考えですね"

doc = nlp(phrase)

for token in doc:
    print(f"Text: {token.text} | {romanizer.romanize(token.text)}")
    print(f"  Lemma: {token.lemma_}")
    print(f"  POS: {token.pos_}")
    print(f"  Fine POS: {token.tag_}")
    print(f"  Head: {token.head.text}")
    print(f"  DepRel: {token.dep_}")
    print(f"  SpaceAfter: {token.whitespace_!r}")
    print("---")

In [None]:
from color import blue, bold, cyan, green, purple, red, yellow

print(blue("Phrase: "), phrase, '|', red(romanizer.romanize(phrase)))
words = romanizer.romanize(phrase).split(' ')

for token in doc:
    # This is a specific lemma
    print(bold(cyan("Text: ")), token.text, '|', blue(romanizer.romanize(token.text)))
    current_word += romanizer.romanize(token.text)

    upos = db.get_upos(conn, token.pos_)
    xpos = db.get_xpos(conn, [token.tag_])
    print(green("Part of Speech: "), upos, '-', xpos)

    print(yellow("Kanjis: "))
    for char in token.text:
        if (romanizer.is_kanji(char)):
            meaning = db.get_etymology(conn, f"{char}")
            print(f"    '{char}' => {bold(yellow(meaning))}")
    
    print(red("Morphemes: "))
    morphemes_info = db.get_morphemes(conn, [token.text], [token.tag_])
    for morpheme, tag, xpos_label, info in zip([token.text], [token.tag_], xpos, morphemes_info):
        print(f"    '{morpheme}': '{tag} => {xpos_label.title()}'")

    translation = db.get_translation(conn, token.text, token.pos_, morphemes_info)
    print(purple("Translation: "), translation)
    print("─"*30, '\n')
