In [84]:
import pandas as pd
import numpy as np
import json
import re
import unidecode

In [63]:
def flatten_json(json_dict, result=[], parent=None, gen=1):
    info = [unidecode.unidecode(json_dict['py_name']), 
            unidecode.unidecode(json_dict['jp_name']), 
            json_dict['dates'], 
            unidecode.unidecode(json_dict['info']),
            parent,
            gen
           ]
    result.append(info)
    children = json_dict['children']

    for child in children:
        flatten_json(child, result, unidecode.unidecode(json_dict['py_name']), gen+1)

    return result

In [64]:
with open('data/lineage.json') as f:
    data = json.load(f)
    
df = pd.DataFrame(flatten_json(data, []), 
                  columns=['py_name', 'jp_name', 'dates', 'info', 'teacher', 'generation']
                 )
df.head()

Unnamed: 0,py_name,jp_name,dates,info,teacher,generation
0,Bodhidharma,Daruma,"[450, 532]",<b>Bodhidharma</b> (?-532)<br>Aliases: <i>Daru...,,1
1,Dazu Huike,Taiso Eka,"[487, 593]",<b>Dazu Huike</b> (487-593)<br>Aliases: <i>Tai...,Bodhidharma,2
2,Jianzhi Sengcan,Kanchi Sosan,"[529, 606]",<b>Jianzhi Sengcan</b> (529-606)<br>Aliases: <...,Dazu Huike,3
3,Dayi Daoxin,Doshin,"[580, 651]",<b>Dayi Daoxin</b> (580-651)<br>Aliases: <i>Do...,Jianzhi Sengcan,4
4,Niutou Farong,Gozu Hoyu,"[594, 657]",<b>Niutou Farong</b> (594-657)<br>Aliases: <i>...,Dayi Daoxin,5


In [67]:
df['dates'][0][0]

450

In [69]:
def get_info_part(info_string, sep, i):
    try:
        return info_string.split(sep)[i]
    except IndexError:
        return None

def get_info_part_re(info_string, pattern, group):
    m = re.search(pattern, info_string)
    if m:
        return m.group(group)
    else:
        return None

def get_works(info_string):
    s = get_info_part_re(info_string, '<br><br><b>Works:.*i>(.*)<', 1)
    if s and s != 'Title <> Other Title':
        return s.replace('<>', ',')
    else:
        return None

In [70]:
df['bio'] = df['info'].apply(lambda x: get_info_part(x, '<br><br>', 1))
df['birth_date'] = df['info'].apply(lambda x: get_info_part_re(x, '\((\d{3,4})-', 1))
df['death_date'] = df['info'].apply(lambda x: get_info_part_re(x, '-(\d{3,4})\)', 1))
df['display_birth_date'] = df['dates'].apply(lambda x: x[0])
df['display_death_date'] = df['dates'].apply(lambda x: x[1])
df['works'] = df['info'].apply(get_works)
df

Unnamed: 0,py_name,jp_name,dates,info,teacher,generation,bio,birth_date,death_date,display_birth_date,display_death_date,works
0,Bodhidharma,Daruma,"[450, 532]",<b>Bodhidharma</b> (?-532)<br>Aliases: <i>Daru...,,1,1st Zen Patriarch. Came from India. 28th Patri...,,532.0,450,532,
1,Dazu Huike,Taiso Eka,"[487, 593]",<b>Dazu Huike</b> (487-593)<br>Aliases: <i>Tai...,Bodhidharma,2,2nd Patriarch. Chopped his own arm to show Bod...,487.0,593.0,487,593,
2,Jianzhi Sengcan,Kanchi Sosan,"[529, 606]",<b>Jianzhi Sengcan</b> (529-606)<br>Aliases: <...,Dazu Huike,3,3rd Patriarch.,529.0,606.0,529,606,Xin Xin Ming
3,Dayi Daoxin,Doshin,"[580, 651]",<b>Dayi Daoxin</b> (580-651)<br>Aliases: <i>Do...,Jianzhi Sengcan,4,4th Patriarch.,580.0,651.0,580,651,
4,Niutou Farong,Gozu Hoyu,"[594, 657]",<b>Niutou Farong</b> (594-657)<br>Aliases: <i>...,Dayi Daoxin,5,First interaction with Daoxin:<br>Daoxin: 'Wha...,594.0,657.0,594,657,Xin Ming (Mind Inscription)
5,Chihyen,Chigan,"[607, 677]",<b>Chihyen</b> (600-677)<br>Aliases: <i>Chigan...,Niutou Farong,6,7 feet 6 inches tall. Warrior turned monk at 40.,600.0,677.0,607,677,
6,Huifang,Eho,"[622, 695]",<b>Huifang</b><br>Aliases: <i>Eho</i>,Chihyen,7,,,,622,695,
7,Fachih,Hoji,"[635, 702]",<b>Fachih</b> (635-702)<br>Aliases: <i>Hoji</i...,Huifang,8,Was a disciple of Hongren before becoming Huif...,635.0,702.0,635,702,
8,Zhiwei,Chii,"[646, 722]",<b>Zhiwei</b> (646-722)<br>Aliases: <i>Chii</i...,Fachih,9,Credited with the actual formation of the Niut...,646.0,722.0,646,722,
9,Helin Xuansu,Kakurin Genso,"[668, 752]",<b>Helin Xuansu</b> (668-752)<br>Aliases: <i>K...,Zhiwei,10,May have been a teacher of Fahai who then stud...,668.0,752.0,668,752,


In [71]:
df.to_csv('data/masters.csv', index=False)

# Creating Lineage from CSV

In [97]:
df = pd.read_csv('data/masters.csv')
# df = df.fillna('')
df.head()

Unnamed: 0,py_name,kr_name,jp_name,info,teacher,generation,bio,birth_date,death_date,display_birth_date,display_death_date,works,links
0,Bodhidharma,,Daruma,<b>Bodhidharma</b> (?-532)<br>Aliases: <i>Daru...,,1,1st Zen Patriarch. Came from India. 28th Patri...,,532.0,450.0,532.0,,
1,Dazu Huike,,Taiso Eka,<b>Dazu Huike</b> (487-593)<br>Aliases: <i>Tai...,Bodhidharma,2,2nd Patriarch. Chopped his own arm to show Bod...,487.0,593.0,487.0,593.0,,
2,Jianzhi Sengcan,,Kanchi Sosan,<b>Jianzhi Sengcan</b> (529-606)<br>Aliases: <...,Dazu Huike,3,3rd Patriarch.,529.0,606.0,529.0,606.0,Xin Xin Ming,
3,Dayi Daoxin,,Doshin,<b>Dayi Daoxin</b> (580-651)<br>Aliases: <i>Do...,Jianzhi Sengcan,4,4th Patriarch.,580.0,651.0,580.0,651.0,,
4,Niutou Farong,,Gozu Hoyu,<b>Niutou Farong</b> (594-657)<br>Aliases: <i>...,Dayi Daoxin,5,First interaction with Daoxin:<br>Daoxin: 'Wha...,594.0,657.0,594.0,657.0,Xin Ming (Mind Inscription),


In [106]:
l = ['', 'nla']
", ".join(l)

', nla'

In [147]:
import csv
with open('data/masters.csv') as f:
    data = csv.reader(f)
    data=list(data)

people = data[1:]

# construct list of parents
parents = defaultdict(list)
for p in people:
    parents[p[3]].append(p)

def buildtree(t=None, parent_eid='', gen=1):
    """
    Given a parents lookup structure, construct
    a data hierarchy.
    """
    parent = parents.get(parent_eid, None)
    if parent is None:
        return t
    for py_name, kr_name, jp_name, teacher, bio, birth_date, death_date, display_birth, display_death, works, links in parent:
        info_string_template = """<b>{py_name}</b> ({bd_dates})
        <br>{gen_ord} generation
        <br>Aliases: {other_names}
        <br><br>{bio}
        """
        works_string = "<br><br><b>Works:</b> <i>{works}</i>"
        
        if kr_name:
            other_names = ", ".join([jp_name, kr_name])
        else:
            other_names = jp_name
        
        if not (birth_date or death_date):
            bd_dates = "n.d."
        elif not birth_date:
            bd_dates = "?-{}".format(death_date)
        elif not death_date:
            bd_dates = "{}-?".format(birth_date)
        else:
            bd_dates = "{}-{}".format(birth_date, death_date)
            
        gen_dict = {
            1: "1st",
            2:"2nd",
            3:"3rd",
            
                   }
        if gen <= 3:
            gen_ord = gen_dict[gen]
        else:
            gen_ord = str(gen)+'th'
            
        if works:
            info_string_template += works_string
            info_string = info_string_template.format(py_name=py_name, 
                                                      bd_dates = bd_dates,
                                                      gen_ord=gen_ord,
                                                      other_names=other_names, 
                                                      bio=bio, 
                                                      works=works)
        else:
            info_string = info_string_template.format(py_name=py_name, 
                                                      gen_ord=gen_ord,
                                                      bd_dates = bd_dates,
                                                      other_names=other_names, 
                                                      bio=bio)
            
        child = {
            'py_name': py_name,
            'jp_name': jp_name,
            'dates': [display_birth, display_death],
            'info': info_string
                }
        if t is None:
            t = child
        else:
            children = t.setdefault('children', [])
            children.append(child)
            
        buildtree(child, py_name, gen+1)
    return t

data = buildtree(t=None, parent_eid='')

with open('data/test.json', 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=4)

In [625]:
parents = defaultdict(list)

TypeError: first argument must be callable or None

In [503]:
master_dict = {}
for master in df.iterrows():
    if pd.isna(master[1][4]):
        master_dict['py_name'] = 
        master_dict['jp_name'] = 
        master_dict['dates'] = 
        master_dict['info'] = 
        master_dict['children'] = []

master_dict

Bodhidharma
Dàzǔ Huìkě
Jiànzhì Sēngcàn
Dayi Dàoxìn
Niutou Farong
Chihyen
Huifang
Fachih
Zhiwei
Helin Xuansu
Jingshan Daoqin
Niaoge Daolin
Huichung
Daman Hóngrěn
Fǎrù
Dàjiàn Huìnéng
Nányáng Huìzhōng
Danyuan Yingzhen
Nányuè Huáiràng
Mǎzŭ Dàoyī
Bǎizhàng Huáihái
Guishan Lingyou
Yang-shan Hui-chi
Xiāngyán Zhìxián
Huángbò Xīyùn
Nánquán Pǔyuàn
Damei Fachang
Heze Shenhui
Cizhou Faru
Yizhou Nanyin
Suizhou Daoyuan
Guīfēng Zōngmì
Yòngjiā Xuānjué
Qīngyuán Xíngsī
Shítóu Xīqiān
Yàoshān Wéiyǎn
Tianhuang Daowu
Longtan Chongxin
Deshan Xuanjian
Danxia Tianran
Cuiwei Wuxue
Qingping Lingzun
Touzi Datong
nan
Changzi Kuang
Shishi Shandao
Dadian Baotong
nan
Yùquán Shénxiù
Sungyueh Huian
Potsaoto
Yuankuei
Jenchien


In [493]:
df.values

numpy.ndarray

In [614]:
data['children'][0]['children']

[{'py_name': 'Jiànzhì Sēngcàn',
  'jp_name': 'Kanchi Sōsan',
  'dates': [529, 606],
  'info': '<b>Jiànzhì Sēngcàn</b> (529-606)<br>Aliases: <i>Kanchi Sōsan</i><br><br>3rd Patriarch. <br><br><b>Works:</b> <i>Xin Xin Ming</i>',
  'children': [{'py_name': 'Dayi Dàoxìn',
    'jp_name': 'Dōshin',
    'dates': [580, 651],
    'info': '<b>Dayi Dàoxìn</b> (580-651)<br>Aliases: <i>Dōshin</i><br><br>4th Patriarch.',
    'children': [{'py_name': 'Niutou Farong',
      'jp_name': 'Gozu Hōyū',
      'dates': [594, 657],
      'info': "<b>Niutou Farong</b> (594-657)<br>Aliases: <i>Gozu Hōyū</i><br><br>First interaction with Dàoxìn:<br>Dàoxìn: 'What are you up to?' <br>Farong: 'I am meditating on the mind' <br>Dàoxìn: 'What is it that is meditating? What is the mind?' <br>After studying with Dàoxìn, he began what became known as the Niutou (Oxhead) school of Zen. <br><br><b>Works:</b> <i>Xin Ming (Mind Inscription)</i> tr. Jess Row",
      'children': [{'py_name': 'Chihyen',
        'jp_name': 'Chiga