In [1]:
import json
import re


def load_rhythm_list():

    with open("平水韵表.txt", encoding="UTF-8") as file:
        rhythm_lines = file.readlines()
    rhythm_dict = dict()
    for rhythm_line in rhythm_lines:
        rhythm_name = re.search(".*(?=[平上去入]声:)", rhythm_line).group() 
        rhythm_tune = re.search("[平上去入](?=声:)", rhythm_line).group() 
        rhythm_characters = re.sub(".*[平上去入]声:", "", rhythm_line)  
        for character in rhythm_characters:
            if character not in rhythm_dict:
                rhythm_dict[character] = list()
            rhythm_dict[character].append([rhythm_name, rhythm_tune])
    return rhythm_dict


RHYTHM_LIST = load_rhythm_list() 

In [2]:
def get_rhythm(character):

    rhythm_set = set()
    if character in RHYTHM_LIST:
        for rhythm_item in RHYTHM_LIST.get(character):
            rhythm_set.add(rhythm_item[0])
        if len(rhythm_set) == 1:
            return list(rhythm_set)[0]
        else:
            return "/".join(list(rhythm_set))
    else:
            return "Special Char"

In [3]:
def get_tone(character):
    """
    

    :param character: <str> 
    :return: <str> 
    """
    tone_set = set()
    if character in RHYTHM_LIST:
        for rhythm_item in RHYTHM_LIST.get(character):
            tone_set.add(re.sub("[上去入]", "Z", rhythm_item[1]))
        if len(tone_set) == 1:  # 
            if (list(tone_set)[0] == "平"):
                return "P"
            return list(tone_set)[0]
        else:
            return "*"
    else:
        return "*"

In [4]:
def inspect_sentence_tone(sentence_tone):
    """
    :return: <bool> , <bool> , <str>
    """
    if re.match("[PZ*]?[P*]?[PZ*][Z*][P*][P*][Z*]", sentence_tone):  # (Z)ZPPZ
        return True, "ZZPPZ", 
    elif re.match("[PZ*]?[Z*]?[PZ*][P*][P*][Z*][Z*]", sentence_tone):  # (P)PPZZ
        return True, "PPPZZ", 
    elif re.match("[PZ*]?[P*]?[PZ*][Z*][Z*][P*][P*]", sentence_tone):  # (Z)ZZPP
        return True, "ZZZPP", 
    elif re.match("[PZ*]?[Z*]?[P*][P*][Z*][Z*][P*]", sentence_tone):  # PPZZP
        return True, "PPZZP", 
    elif re.match("[PZ*]?[P*]?[PZ*][Z*][Z*][P*][Z*]", sentence_tone):  # (Z)ZZPZ
        return True, "ZZPPZ", 
    elif re.match("[PZ*]?[P*]?[PZ*][Z*][PZ*][Z*][Z*]", sentence_tone):  # (Z)Z(P)ZZ
        return True, "ZZPPZ", 
    elif re.match("[PZ*]?[Z*]?[P*][P*][Z*][PZ*][Z*]", sentence_tone):  # PPZ(P)Z
        return True, "PPPZZ",
    elif re.match("[PZ*]?[Z*]?[PZ*][Z*][P*][P*][P*]", sentence_tone):  # (Z)ZPPP
        return True, "ZZZPP", 
    elif re.match("[PZ*]?[Z*]?[Z*][P*][P*][Z*][P*]", sentence_tone):  # ZPPZP
        return True, "PPZZP", 
    elif re.match("[PZ*]?[Z*]?[P*][P*][P*][Z*][P*]", sentence_tone):  # PPPZP
        return True, "PPZZP", 
    else:
        return False, "", "拗句"

In [5]:
def is_tone_same(tone_1, tone_2):
    """
    """
    if (tone_1 == "Z" or tone_1 == "*") and (tone_2 == "Z" or tone_2 == "*"):
        return True
    elif (tone_1 == "P" or tone_1 == "*") and (tone_2 == "P" or tone_2 == "*"):
        return True
    else:
        return False

In [6]:
def is_tone_differ(tone_1, tone_2):
    """
    :param tone_1:
    :param tone_2:
    :return:
    """
    if (tone_1 == "Z" or tone_1 == "*") and (tone_2 == "P" or tone_2 == "*"):
        return True
    elif (tone_1 == "P" or tone_1 == "*") and (tone_2 == "Z" or tone_2 == "*"):
        return True
    else:
        return False

In [7]:
def inspect_corresponding(first_type, second_type):
    """
   

    :param first_type: <str> 
    :param second_type: <str> 
    :return: <bool>
    """
    if len(first_type) != len(second_type):
        return False
    return is_tone_differ(first_type[-2], second_type[-2]) and is_tone_differ(first_type[-1], second_type[-1])

In [8]:
def inspect_sticky(last_second_type, this_first_type):
    """
    

    :param last_second_type: <str> 
    :param this_first_type: <str> 
    :return: <bool>
    """
    if len(last_second_type) != len(this_first_type):
        return False
    return is_tone_same(last_second_type[-2], this_first_type[-2])

In [None]:
def poem_analyse(title, author, content):
    print(content)
    sentences = [sentence for sentence in re.split("[，。？！]", content) if sentence != ""]
    punctuations = re.findall("[，。？！]", content)
    # check if the poem follow number of characters.
    if len(sentences) != 4 and len(sentences) != 8:
        print("************** Bad Example ********************")
        print("The poem does not follow number of lines.")
        return False

    # ehck if the sentense follows length constrain
    if not all([len(sentence) == 5 or len(sentence) == 7 for sentence in sentences]):
        print("************** Bad Example ********************")
        print("The poem does not follow number of chars.")
        return False

    # check the Ping Ze evalue.
    sentence_tone_list = list()
    for sentence in sentences:
        sentence_tone_list.append("".join([get_tone(character) for character in sentence]))
    

    # 
    if not all([sentence_tone_list[i][-1] in ["P", "*"] for i in range(len(sentences)) if i % 2 == 1]):
#         for i in range(int(len(sentences) / 2)):
#             first_sentence = sentences[2 * i + 0]  
#             second_sentence = sentences[2 * i + 1]  
# #         output_sentence = first_sentence + punctuations[2 * i + 0] + second_sentence + punctuations[2 * i + 1]
#         print(output_sentence)
#         print("《" + title + "》", author, "The poem does not follow tonal pattern")
        print("************** Bad Example ********************")
        print("The poem does not follow tonal pattern")

        return False

    print("《" + title + "》", author)

    last_second_type = ""



    for i in range(int(len(sentences) / 2)):
        first_sentence = sentences[2 * i + 0]  
        second_sentence = sentences[2 * i + 1]  
        first_tone = sentence_tone_list[2 * i + 0]  
        second_tone = sentence_tone_list[2 * i + 1]  
        second_rhythm = "（" + get_rhythm(second_sentence[-1]) + "）"  
        first_correct, first_type = inspect_sentence_tone(first_tone)
        second_correct, second_type = inspect_sentence_tone(second_tone)
        other_analysis = ""
        if first_correct and second_correct:
            if not inspect_corresponding(first_type, second_type):  
                other_analysis += "【失对】"
            if last_second_type is not None and inspect_sticky(last_second_type, first_type):  
                other_analysis += "【失黏】"

        last_second_type = second_type

        output_sentence = first_sentence + punctuations[2 * i + 0] + second_sentence + punctuations[2 * i + 1]  
        output_analysis = first_tone + "　" + second_tone + second_rhythm  

        print(output_sentence)
        print(output_analysis)
#         print("**********************************")

    return True


if __name__ == "__main__":
    with open("test.json", encoding="UTF-8") as file:
        poem_json = json.loads(file.read())
    for poem_item in poem_json["data"]:

        if poem_analyse(poem_item["title"], poem_item["author"], poem_item["content"].replace("\n", "")):
            print("press enter to continue...")
            print("************** Good Example ********************")
            input()


四時運灰琯，一夕變冬春。送寒餘雪盡，迎歲早梅新。
《》 
四時運灰琯，一夕變冬春。
Z**PZ　ZZ*PP（十一真）
送寒餘雪盡，迎歲早梅新。
ZP*Z*　**ZPP（十一真）
press enter to continue...
************** Good Example ********************

四時運灰琯，一二三四五。送寒餘雪盡，上山打老虎。
************** Bad Example ********************
The poem does not follow tonal pattern
四時運灰琯，一夕變冬春夏秋冬。送寒餘雪盡，上山打老虎。
************** Bad Example ********************
The poem does not follow number of chars.
澄潭皎鏡石崔巍，萬壑千巖暗綠苔。林亭自有幽貞趣，況復秋深爽氣來。
《》 
澄潭皎鏡石崔巍，萬壑千巖暗綠苔。
PPZ*ZPP　ZZP*Z*P（十灰）
林亭自有幽貞趣，況復秋深爽氣來。
PPZZP**　*ZP*Z**（Special Char）
press enter to continue...
************** Good Example ********************

人皆苦炎熱，我愛夏日長。熏風自南來，殿閣生微涼。
《》 
人皆苦炎熱，我愛夏日長。
PPZP*　Z*ZZ*（Special Char）
熏風自南來，殿閣生微涼。
P*ZP*　Z*PP*（Special Char）
press enter to continue...
************** Good Example ********************


In [20]:
import pandas as pd

df = pd.read_fwf('../data/valid.txt', header=None)


In [21]:
df

Unnamed: 0,0
0,東岡凖擬太丘來，竹洗塵根石拂苔。好客風廊俱有分，只愁髙唱不容陪。
1,舊喜三周貪好客，爭如一叟促歸程。長亭日晚相望處，一片平湖泠浸星。
2,長道相思苦不開，故從百里泛舟來。如何不見稽山老，卻道心因興盡回。
3,老覺浮雲念不萌，君才豈合後諸生。從教罪我春秋法，不受詩人月旦評。
4,聲猶在耳鳥關關，過眼庭陰作午班。香篆靜中便細看，綠蘿新處得髙攀。
...,...
995,亭亭澗底松，幹凌雪霜孤。既無鸞鳳翔，鳥雀来喧呼。
996,一世競榮利，紛若空中花。蚤作田園計，人生會有涯。
997,身遊闤闠間，心在孤雲杪。譬如登太山，頗覺天下小。
998,外慕亦無已，反身良有餘。乘流如可遊，韞匵豈藏諸。


In [22]:
with open('../data/WY_valid.txt', 'w') as w:
# for index, row in df.iloc[0:len(df) - validateNum].iterrows():
    for index, row in df.iloc[:].iterrows():
        content = row.values[0]
        if (len(content) == 24):
            w.write(content+'\n')
#         print(len(content))
#         w.write(content+'\n')
        


In [23]:
with open('../data/QY_valid.txt', 'w') as w:
# for index, row in df.iloc[0:len(df) - validateNum].iterrows():
    for index, row in df.iloc[:].iterrows():
        content = row.values[0]
        if (len(content) == 32):
            w.write(content+'\n')