In [None]:
# PACKAGE INSTALLATION

%pip install jupyter stanza supabase dotenv

In [24]:
# LOCAL IMPORTS

import importlib
import romanizer
import db

importlib.reload(romanizer)     # Reload romanizer from korean
importlib.reload(db)

<module 'db' from '/home/boddah/Documents/Stanza/db.py'>

In [5]:
# DATABASE

import os
from dotenv import load_dotenv
from supabase import create_client, Client

load_dotenv()

url: str = os.environ.get("SUPABASE_URL")
key: str = os.environ.get("SUPABASE_KEY")

supabase: Client = create_client(url, key)

In [None]:
# MODEL DOWNLOAD

import stanza

language = 'ko'
stanza.download(language)       # Download model
nlp = stanza.Pipeline(language) # Initialize neural pipeline

In [None]:
# PHRASE TOKENIZATION
phrase = "무엇을 먹을 거예요?"

doc = nlp(phrase)
dict = doc.to_dict()

print(doc.sentences)

[[
  {
    "id": 1,
    "text": "무엇을",
    "lemma": "무엇+을",
    "upos": "PRON",
    "xpos": "npd+jco",
    "head": 2,
    "deprel": "obj",
    "start_char": 0,
    "end_char": 3
  },
  {
    "id": 2,
    "text": "먹을",
    "lemma": "먹+ㄹ",
    "upos": "VERB",
    "xpos": "pvg+etm",
    "head": 3,
    "deprel": "ccomp",
    "start_char": 4,
    "end_char": 6
  },
  {
    "id": 3,
    "text": "거예요",
    "lemma": "거+이+에요",
    "upos": "VERB",
    "xpos": "nbn+jp+ef",
    "head": 0,
    "deprel": "root",
    "start_char": 7,
    "end_char": 10,
    "misc": "SpaceAfter=No"
  },
  {
    "id": 4,
    "text": "?",
    "lemma": "?",
    "upos": "PUNCT",
    "xpos": "sf",
    "head": 3,
    "deprel": "punct",
    "start_char": 10,
    "end_char": 11,
    "misc": "SpaceAfter=No"
  }
]]


In [76]:
from color import blue, bold, cyan, green, purple, red


for sentences in dict:
    print("Phrase: ", phrase)
    for words in sentences:
        print(bold(cyan("Text: ")), words['text'], '|', blue(romanizer.hangul(words['text'])))

        upos = db.getUpos(supabase, words['upos'])
        print(green("Part of Speech: "), upos)

        print(red("Morphemes: "))
        morphemes = words['lemma'].split('+')

        tags = words['xpos'].split('+')
        xpos = db.getXpos(supabase, tags)

        morphemes_info = db.getMorphemes(supabase, morphemes, tags)

        for morpheme, tag, xpos_label, info in zip(morphemes, tags, xpos, morphemes_info):
            print(f"    '{morpheme}': '{tag} => {xpos_label.title()}' | {bold(red('Translation'))}: {info['translation']}")

        translation = db.getTranslation(supabase, words['text'], words['upos'], morphemes_info)
        print(purple("Translation: "), translation)
        print("─"*30, '\n')


Phrase:  무엇을 먹을 거예요?
[1m[96mText: [0m[0m 무엇을 | [94mmueoseul[0m
[92mPart of Speech: [0m Pronoun
[91mMorphemes: [0m
    '무엇': 'npd => Demonstrative Noun/Pronoun' | [1m[91mTranslation[0m[0m: what
    '을': 'jco => Object Case Particle' | [1m[91mTranslation[0m[0m: (object particle)
[95mTranslation: [0m what (object)
────────────────────────────── 

[1m[96mText: [0m[0m 먹을 | [94mmeokeul[0m
[92mPart of Speech: [0m Verb
[91mMorphemes: [0m
    '먹': 'pvg => General Verb Root' | [1m[91mTranslation[0m[0m: to eat
    'ㄹ': 'etm => Adnominal Suffix' | [1m[91mTranslation[0m[0m: -going to (future/intentional modifier)
[95mTranslation: [0m going to eat
────────────────────────────── 

[1m[96mText: [0m[0m 거예요 | [94mgeoyeyo[0m
[92mPart of Speech: [0m Verb
[91mMorphemes: [0m
    '거': 'nbn => Nominal Bound Noun' | [1m[91mTranslation[0m[0m: thing
    '이': 'jp => Subject Particle' | [1m[91mTranslation[0m[0m: (subject particle)
    '에요': 'ef => Final End