# Test hyphenation using Czech TeX hyphenation patterns

In [1]:
%load_ext dotenv
%dotenv
import os

base_dir = os.getenv("WORKING_DIR")
os.chdir(base_dir)

In [2]:
from src.data_loader_and_saver import JSONDataLoaderAndSaver

data_loader = JSONDataLoaderAndSaver(base_dir, input_data_dir="src/syllabification")

In [3]:
test_words = data_loader.load_data("resources/test_words")

resources/test_words.json: loaded 26 records.


In [4]:
from src.syllabification.pat_data_loader import PatDataLoader

pat_data_loader = PatDataLoader(base_dir, "src/syllabification/resources")

In [5]:
patterns = pat_data_loader.load_data("csskhyphen.pat")
patterns[:10]

['.ad3aw',
 '.ads4',
 '.af3r',
 '.ai4č',
 '.ak3ry',
 '.al3s',
 '.as3k',
 '.as3t',
 '.at3at.',
 '.bel3h']

In [7]:
from src.syllabification.czech_tex_hyphenator import CzechTexHyphenator

tex_hyphenator = CzechTexHyphenator(patterns)

### Test hyphenation

In [8]:
for word in test_words:
    print(tex_hyphenator.hyphenate_word(word))

['nej', 'neob', 'ho', 's', 'po', 'da', 'řo', 'vá', 'va', 'tel', 'něj', 'ší', 'ho']
['hou', 's', 'ka']
['nej', 'atrak', 'tiv', 'něj', 'ší']
['nej', 'star', 'ší']
['nej', 'krá', 's', 'něj', 'ší']
['ne', 'mast', 'ný']
['ahoj']
['ital', 'šti', 'na']
['měk', 'ký']
['krk']
['ma', 'lí', 'ček']
['sna', 'ha']
['sna', 'cha']
['ame', 'tyst']
['hou', 'sen', 'ka']
['ko', 'ro', 'ze']
['hrad']
['skunk']
['mlok']
['šun', 'ka']
['sou', 'os', 't', 'ro', 'ví']
['rána']
['uvidím']
['vidím']
['nejv', 'ni', 'tř', 'něj', 'ším']
['tr', 'pa', 's', 'lík']


### Test hyphenation with heuristics

In [9]:
from src.syllabification.tex_heuristics import tex_heuristics

for word in test_words:
    print(tex_heuristics(tex_hyphenator.hyphenate_word(word)))

['nej', 'neob', 'hos', 'po', 'da', 'řo', 'vá', 'va', 'tel', 'něj', 'ší', 'ho']
['hous', 'ka']
['nej', 'atrak', 'tiv', 'něj', 'ší']
['nej', 'star', 'ší']
['nej', 'krás', 'něj', 'ší']
['ne', 'mast', 'ný']
['ahoj']
['ital', 'šti', 'na']
['měk', 'ký']
['krk']
['ma', 'lí', 'ček']
['sna', 'ha']
['sna', 'cha']
['ame', 'tyst']
['hou', 'sen', 'ka']
['ko', 'ro', 'ze']
['hrad']
['skunk']
['mlok']
['šun', 'ka']
['sou', 'ost', 'ro', 'ví']
['rána']
['uvidím']
['vidím']
['nejv', 'ni', 'tř', 'něj', 'ším']
['tr', 'pas', 'lík']
