In [1]:
from chinese import ChineseAnalyzer
import pandas as pd
import simplejson
analyzer = ChineseAnalyzer()

# Lanterns

In [2]:
motifs = [["Greeting", "新年", "新年"],
["Prosperity", "金", "金"],
["Peace", "平", "平"],
["Luck", "福", "福"],
["Abundance", "丰", "丰"],
["Career", "事業", "商业"],
["Success", "成功", "成功"],
["Health", "健康", "健康"]]

lanterns = pd.DataFrame(data=motifs, columns=["english", "chinese_traditional", "chinese_simplified"])
lanterns.head()

Unnamed: 0,english,chinese_traditional,chinese_simplified
0,Greeting,新年,新年
1,Prosperity,金,金
2,Peace,平,平
3,Luck,福,福
4,Abundance,丰,丰


In [3]:
first_intonation = []
last_intonation = []
for index, row in lanterns.iterrows():
    result = analyzer.parse(row["chinese_simplified"], traditional=False)
    for token in result.tokens():
        if result[token][0].pinyin != None:
            first_intonation.append(int(result[token][0].pinyin[0][-1:]))
            last_intonation.append(int(result[token][0].pinyin[-1][-1:]))
        else:
            first_intonation.append(0)
            last_intonation.append(0)
            
lanterns["first_intonation"] = pd.Series(first_intonation)
lanterns["last_intonation"] = pd.Series(first_intonation)
lanterns.head()

Unnamed: 0,english,chinese_traditional,chinese_simplified,first_intonation,last_intonation
0,Greeting,新年,新年,1,1
1,Prosperity,金,金,1,1
2,Peace,平,平,2,2
3,Luck,福,福,2,2
4,Abundance,丰,丰,1,1


In [4]:
themes = list(lanterns.iloc[:,0])

# Links

In [5]:
df = pd.read_csv("table.csv")
df.columns= ["chinese_simplified", "prefix", "translation"] + themes
df.fillna(False, inplace=True)
df.replace("Y", True, inplace=True)
df["id"] = df.index
df.head()

Unnamed: 0,chinese_simplified,prefix,translation,Greeting,Prosperity,Peace,Luck,Abundance,Career,Success,Health,id
0,新年快乐,Wishing you,Happy New Year,True,False,False,False,False,False,False,False,0
1,春节快乐,Wishing you,Happy New Year,True,False,False,False,False,False,False,False,1
2,恭喜发财,Wishing you,Wealth and Prosperity,True,True,False,False,False,False,False,False,2
3,岁岁平安,Wishing you,Peace every year,False,False,True,False,False,False,False,False,3
4,恭贺新禧,Wishing you,Happy New Year,True,False,False,False,False,False,False,False,4


In [6]:
links = df[themes].stack().reset_index()
links.columns = ["tile", "lantern", "filter"]
links = links[links["filter"]][["tile", "lantern"]]
links.head()

Unnamed: 0,tile,lantern
0,0,Greeting
8,1,Greeting
16,2,Greeting
17,2,Prosperity
26,3,Peace


# Tiles

In [7]:
phrases = df["chinese_simplified"]

In [8]:
result = analyzer.parse("春節快樂")

In [9]:
tiles = []
for index, row in df.iterrows():
    
    # Get full translation
    chinese_traditional = ""
    definitions = {}
    result = analyzer.parse(row["chinese_simplified"], traditional=False)
    for token in result.tokens():
        chinese_traditional += result[token][0].match
        if result[token][0].definitions != None:
            definitions[token] = result[token][0].definitions
            
    # Get partial translation (first 2 and last 2 words)
    for phrase in [row["chinese_simplified"][:2], row["chinese_simplified"][2:]]:
        result = analyzer.parse(phrase, traditional=False)
        for token in result.tokens():
            if result[token][0].definitions != None:
                definitions[token] = result[token][0].definitions
    
    # Get pinyin spaced out
    pinyin = []
    for i in [0, 1, 2, 3]:
        result = analyzer.parse(row["chinese_simplified"][i])
        pinyin.append(result.pinyin(force=True).lower())

    phrase = row["chinese_simplified"]
    result = analyzer.parse(phrase, traditional=False)
    tiles.append(
        {
            "id": row["id"],
            "prefix": row["prefix"],
            "chinese_simplified": row["chinese_simplified"],
            "chinese_traditional": chinese_traditional,
            "translation": row["translation"],
            "pinyin": pinyin,
            "definitions": definitions,
            
        }
    )

In [10]:
data = {
    "lanterns": lanterns.to_dict(orient="records"),
    "links": links.to_dict(orient="records"),
    "tiles": tiles
}

In [11]:
with open("data.json", "w") as file:
    simplejson.dump(data, file, ignore_nan=True)