In [None]:
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-

import os
import sys
from ckiptagger import data_utils, construct_dictionary, WS, POS, NER
from opencc import OpenCC

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
#os.environ['AUTOGRAPH_VERBOSITY']='10'
TKIPDATA="./"
cc = OpenCC('t2s')

def read_file(f):
    lines = f.read().splitlines()
    return lines

def pos_analysis(lst, ws, pos, ner):
    word_sentence_list = ws(lst, sentence_segmentation=True)
    pos_sentence_list = pos(word_sentence_list)
    
    return(word_sentence_list, pos_sentence_list)

def print_pos(sentence_list, word_sentence_list,  pos_sentence_list, ofp, simpfp):
    for i, sentence in enumerate(sentence_list):
        #print(sentence, word_sentence_list[i],  pos_sentence_list[i])
        print_word_pos_sentence(sentence_list, word_sentence_list[i],  pos_sentence_list[i], ofp, simpfp)

# Show results
def print_word_pos_sentence(sentence_list, word_sentence_list, pos_sentence_list, ofp, simpfp):
    assert len(word_sentence_list) == len(pos_sentence_list)
    for word, pos in zip(word_sentence_list, pos_sentence_list):
        if "CATEGORY" not in pos:
            prword = word.strip('\n')
            sprword = cc.convert(prword)
            print(f"{prword}", end=" ")
            ofp.write(f"{prword} ")
            simpfp.write(f"{sprword} ")
            
def tkip_data():
    if not os.path.isdir(TKIPDATA):
        print("Downloading CKIP data...")
        # Download data
        os.makedirs(TKIPDATA)
        data_utils.download_data(TKIPDATA)

    print("Loading CKIP data...")
    # Load model
    ws = WS(TKIPDATA+"data")
    pos = POS(TKIPDATA+"data")
    ner = NER(TKIPDATA+"data")
    
    return (ws, pos, ner)

(ws, pos, ner) = tkip_data()

while(True):
    txtfile=input("Input text file: ")
    posfile=os.path.splitext(txtfile)[0]+'.pos'
    simpfile=os.path.splitext(txtfile)[0]+'.simp'
    print("Input: ", txtfile, "Output: ", posfile, "簡體中文: ", simpfile)
    print()
    
    fp = open(txtfile, encoding="utf8")
    ofp= open(posfile, "w", encoding="utf8")
    simpfp = open(simpfile, "w", encoding="utf8")
    lines = read_file(fp)
    for l in lines:
        print(l)

    print()
    print("-----------------------------")
    (wl, pl) = pos_analysis(lines, ws, pos, ner)
    print_pos(lines, wl, pl, ofp, simpfp)
    print()
    print("-----------DONE--------------")
    fp.close()
    ofp.close()
    simpfp.close()
    
# Release model
del ws
del pos
del ner

