

# Bunraku Online Collection 
__Data Transformation SQL CSVs -> JSON__

<hr/>

### Data files to generate:

- __authors: 123__
- __characters: 2,107__ — 12,460 images
- __creators: 6__
- __images: 14,636__
- __kashira: 129__ — 12,364 images
- __performances: 931__ — 18,533 images
- __performers: 184__ — 10,442 images
- __plays: 178__ — 13,504 images
- __productions: 293__ — 13,477 images
- __pscenes: 2,609__ — 13,010 images
- __spucks: 16,625__ 
- __tags: 76__ — 5,668 images



# Setup:
<hr/>

In [1]:
import pandas as pd

pd.set_option('max_colwidth',1000)
pd.set_option('max_seq_items','none')
pd.set_option('display.max_colwidth',50)

path_to_csvs = "in/"


# Methods:
<hr/>

In [55]:
def df(csv_name): 
    return pd.read_csv(path_to_csvs + csv_name + '.csv', dtype=object).drop_duplicates()

def remove_offline(df):
    return pd.merge(df, imagesonline, on='image_id', how='inner')

def merge_id_array(base_df, other_df, main_id, multi_id):
    return base_df.merge(other_df.groupby(main_id)[multi_id].apply(list).reset_index(), how='left')

def re_id(df,key):
    df.rename(columns={key:'id'}, inplace=True)
    
def csv_out(df,filename):
    df.to_csv(filename, encoding='utf8', index=False)
    
def json_out(df, filename):
    df.to_json(filename, orient="records", force_ascii=False)


# Import:
<hr/>

In [3]:
# import main tables
authors = df('authors')
characters = df('characters')
creators = df('creators')
images = df('imagesonline')
kashira = df('kashira')
performances = df('performances')
performers = df('performers')
plays = df('plays')
productions = df('productions')
pscenes = df('scenes_productions')
scenes = df('scenes')
shamisenplayers = df('sceneshamisens')
musicians = df('scenekotokokyus')
narrators = df('scenetayus')
spucks = df('spucks')
tags = df('tags')

# import join tables
authors_plays = df('authors_plays')
characters_images = df('characters_images')
characters_plays = df('characters_plays')
kashira_images = df('kashira_images')
kashira_plays = df('kashira_plays')
performances_images = df('performances_images')
performers_images = df('performers_images')
plays_images = df('plays_images')
productions_images = df('productions_images')
scenes_images = df('scenes_images')
tags_images = df('tags_images')

# Clean up non-online images from image join tables
imagesonline = images.rename(columns={'id':'image_id'})

characters_images = remove_offline(characters_images)
kashira_images = remove_offline(kashira_images)
performers_images = remove_offline(performers_images)
plays_images = remove_offline(plays_images)
productions_images = remove_offline(productions_images)
scenes_images = remove_offline(scenes_images)
tags_images = remove_offline(tags_images)

# Authors:
- Add array of play_ids per author

<hr/>

In [4]:
print "Starting row count: " + str(len(authors.index))

Starting row count: 117


In [5]:
# add plays
authors = merge_id_array(authors, authors_plays, 'author_id', 'play_id')
authors.head()

Unnamed: 0,author_id,label_eng,label_ka,dates,reference,sort_ja,play_id
0,1,Asada Icchō,浅田一鳥,fl. 1741-1767,LC Authorities,あさだいっちょう,"[19, 72, 105, 122]"
1,2,Ashikawa Teruha,芦川照葉,,LC Authorities,あしかわてるは,[173]
2,4,Chikamatsu Hanji,近松半二,d. 1786 or 7,LC Authorities,ちかまつはんじ,"[11, 21, 25, 27, 29, 33, 52, 79, 80, 90, 101, ..."
3,5,Chikamatsu Kosuiken,近松湖水軒,,"繪本太功記 / 近松やなぎ, 近松湖水軒, 千葉軒合作, 東京 : 金櫻堂, 1890.12...",ちかまつこすいけん,[16]
4,6,Chikamatsu Monzaemon,近松門左衛門,,LC Authorities,ちかまつもんざえもん,"[1, 2, 3, 4, 7, 8, 17, 24, 28, 34, 40, 41, 54,..."


In [6]:
print "Final row count: " + str(len(authors.index))

Final row count: 117


# Characters:
- Add array of play_ids + array of image_ids per character

<hr/>

In [7]:
print "Starting row count: " + str(len(characters.index))

Starting row count: 2107


In [8]:
characters = characters.drop('character_code',1)

characters = merge_id_array(characters, characters_plays, 'character_id', 'play_id')
characters = merge_id_array(characters, characters_images, 'character_id', 'image_id')

characters.head()

Unnamed: 0,character_id,label_eng,label_ja,label_ka,authority_control,sort_ja,play_id,image_id
0,1,Farmer,Oyaji,おやじ,August 1989 Program,おやじ,[162],[56170]
1,2,Farmer's wife,Nyōbō,女房,August 1989 Program,にょうぼう,[162],
2,3,"Magotaro, a horse","Magotarō, Uma",孫太郎（馬）,August 1989 Program,まごたろう（うま）,[162],
3,4,Local governor,Odaikan,お代官,August 1989 Program,おだいかん,[162],
4,5,Local governor's assistant,Odaikan no kobun,お代官のこぶん,August 1989 Program,おだいかんのこぶん,[162],


In [9]:
print "Final row count: " + str(len(characters.index))

Final row count: 2107


# Images:
- Add arrays of: character_ids, kashira_ids, performance_ids, performer_ids, play_ids, production_ids, scene_ids, and tag_ids for each image.

<hr/>

In [10]:
print "Starting row count: " + str(len(images.index))

Starting row count: 14636


In [11]:
images = images.drop('online', 1)
images = images.replace({'Barbara C. Adachi':'1','Fukuda Fumio':'2','Harri Peccinotti':'3','M. Arai':'4','Toyotake Komatsudayū II':'5','Unknown. Photo: Columbia University Libraries':'6'})

In [12]:
# add characters, kashira, performances, performers, plays, productions, scenes, and tags
images = merge_id_array(images, characters_images, 'image_id', 'character_id')
images = merge_id_array(images, kashira_images, 'image_id', 'kashira_id')
images = merge_id_array(images, performances_images, 'image_id', 'performance_id')
images = merge_id_array(images, performers_images, 'image_id', 'performer_id')
images = merge_id_array(images, plays_images, 'image_id', 'play_id')
images = merge_id_array(images, productions_images, 'image_id', 'production_id')
images = merge_id_array(images, scenes_images, 'image_id', 'pscene_id')
images = merge_id_array(images, tags_images, 'image_id', 'tag_id')

In [13]:
images = images[['image_id','media_type','character_id','tag_id','kashira_id','performance_id','performer_id','play_id','production_id','pscene_id','container','container_type','creator','item_id','colser_id','notes','objid','sequence','series','slidepage_folder']]
images.head()

Unnamed: 0,image_id,media_type,character_id,tag_id,kashira_id,performance_id,performer_id,play_id,production_id,pscene_id,container,container_type,creator,item_id,colser_id,notes,objid,sequence,series,slidepage_folder
0,8281,slide,"[452, 453, 454, 455, 456, 2312]","[43, 73]","[33, 3, 35, 106, 77]",[310],,[26],[86],[128],46,Slide Binder,1,1,2,2,ldpd_bun_slide_452_2_0001_0001,1,2,452
1,8282,slide,"[452, 453, 454, 455, 456, 2312]","[43, 73]","[33, 3, 35, 106, 77]",[310],,[26],[86],[128],46,Slide Binder,1,2,2,6,ldpd_bun_slide_452_2_0002_0002,2,2,452
2,8283,slide,"[452, 453, 454, 455, 456, 2312]","[43, 73]","[33, 3, 35, 106, 77]",[310],,[26],[86],[128],46,Slide Binder,1,3,2,7,ldpd_bun_slide_452_2_0003_0003,3,2,452
3,8284,slide,"[453, 454, 455, 452, 2312]","[43, 73]","[3, 35, 33, 77]",[310],,[26],[86],[128],46,Slide Binder,1,4,2,0,ldpd_bun_slide_452_2_0004_0004,4,2,452
4,8285,slide,"[452, 454, 2312]",[43],"[33, 35, 77]",[310],,[26],[86],[128],46,Slide Binder,1,5,2,4,ldpd_bun_slide_452_2_0005_0005,5,2,452


In [14]:
print "Final row count: " + str(len(images.index))

Final row count: 14636


# Kashira:
- Add arrays of image_ids and play_ids for each kashira.

<hr/>

In [15]:
print "Starting row count: " + str(len(kashira.index))

Starting row count: 129


In [16]:
kashira = merge_id_array(kashira, kashira_images, 'kashira_id', 'image_id')
kashira = merge_id_array(kashira, kashira_plays, 'kashira_id', 'play_id')

In [17]:
kashira = kashira[['kashira_id','label_eng','label_ka','category','image_id','play_id','sort_ja']]

kashira.head()

Unnamed: 0,kashira_id,label_eng,label_ka,category,image_id,play_id,sort_ja
0,1,Amanjaku,あまんじゃく,Special,,,あまんじゃく
1,2,Ebisu,恵比須,Special,,[53],えびす
2,3,Ōshūto,大舅,Male,"[8281, 8282, 8283, 8284, 8287, 8289, 8291, 829...","[90, 27, 26, 62, 93, 24, 63, 11, 55, 78, 79, 1...",おおしゅうと
3,4,Ōdanshichi,大団七,Male,"[8667, 8669, 8670, 8671, 8672, 8676, 8687, 868...","[26, 62, 93, 95, 90, 66, 77, 17, 40, 36, 135, ...",おおだんしち
4,5,Ochō no kodomo,お蝶の子供,Children,,[155],おちょうのこども


In [18]:
print "Final row count: " + str(len(kashira.index))

Final row count: 129


# Pscenes:

(_i.e. scene data at performance level / transformation of scenesproductions.)_
- Add labels and scene_order from scenes, and arrays of spuck_ids, narrator_ids, musician_ids, shamisen_ids, and image_ids for each pscene.

<hr/>

In [19]:
print "Starting row count: " + str(len(pscenes.index))

Starting row count: 2609


In [20]:
# add labels and scene_order
scenes = scenes[['scene_id','label_eng','label_ja','label_ka','scene_order']]
pscenes = pscenes.drop('spuck_note',1).drop('tayu_shamisen_note',1)
pscenes = pscenes.merge(scenes, how='left')

In [21]:
# drop narrator with id 0 (no such performer exists)
narrators = narrators[narrators['narrator_id'] != '0']

In [22]:
pscenes = merge_id_array(pscenes, spucks, 'pscene_id', 'spuck_id')
pscenes = merge_id_array(pscenes, narrators, 'pscene_id', 'narrator_id')
pscenes = merge_id_array(pscenes, musicians, 'pscene_id', 'musician_id')
pscenes = merge_id_array(pscenes, shamisenplayers, 'pscene_id', 'shamisen_id')
pscenes = merge_id_array(pscenes, scenes_images, 'pscene_id', 'image_id')

pscenes.head()

Unnamed: 0,pscene_id,play_id,production_id,performance_id,scene_id,label_eng,label_ja,label_ka,scene_order,spuck_id,narrator_id,musician_id,shamisen_id,image_id
0,1,86,1,2,313,The Tea Stall at Torii Pass,Torii tōge chamise,鳥居峠茶店,408,,,,,
1,2,86,1,2,612,Ao no Dōmon (Blue Tunnel),Ao no Dōmon,青の洞門,409,,,,,
2,3,29,1,3,163,The Town of Numazu,Numazu,沼津,189,,,,,
3,4,29,1,3,641,House of Heisaku and One Thousand Pine Trees a...,Heisaku uchi yori senbon matsubara,平作内より千本松原,196,,,,,
4,5,47,1,4,287,The Maple Viewing Party,Momijigari,紅葉狩,360,,,,,


In [23]:
print "Final row count: " + str(len(pscenes.index))

Final row count: 2609


# Performances:
- Add arrays of image_ids, scene_ids, and character_ids for each performance.

<hr/>

In [24]:
print "Starting row count: " + str(len(performances.index))

Starting row count: 931


In [25]:
performances =  performances.drop('data_id',1).drop('code',1)

In [26]:
performances = merge_id_array(performances, performances_images, 'performance_id', 'image_id')
performances = merge_id_array(performances, pscenes, 'performance_id', 'pscene_id')
performances = merge_id_array(performances,characters_plays,'play_id','character_id')

performances.head()

Unnamed: 0,performance_id,production_id,play_id,image_id,pscene_id,character_id
0,1,142,63,,"[1438, 1439, 1440, 1441, 1442, 1443, 1444, 144...","[675, 676, 677, 678, 679, 680, 681, 682, 683, ..."
1,2,1,86,,"[1, 2]",
2,3,1,29,,"[3, 4]","[538, 539, 540, 541, 542, 543, 544, 545, 546, ..."
3,4,1,47,,[5],"[1011, 1012, 1013, 1014]"
4,5,1,18,,[6],"[175, 176, 177, 178, 179, 180, 181, 182, 183, ..."


In [27]:
print "Final row count: " + str(len(performances.index))

Final row count: 931


# Plays:
- Add arrays of authors_ids, characters_ids, image_ids, production_ids, and performance_ids for each play.

<hr/>

In [28]:
print "Starting row count: " + str(len(plays.index))

Starting row count: 178


In [29]:
plays = merge_id_array(plays,authors_plays,'play_id','author_id')
plays = merge_id_array(plays,characters_plays,'play_id','character_id')
plays = merge_id_array(plays,plays_images,'play_id','image_id')
plays = merge_id_array(plays,performances,'play_id','production_id')
plays = merge_id_array(plays,performances,'play_id','performance_id')

plays.head()

Unnamed: 0,play_id,label_ja,label_ja_sort,label_ka,sort_ja,label_eng,label_eng_sort,first_staged,reference,author_id,character_id,image_id,production_id,performance_id
0,1,Onnakoroshi abura no jigoku,Onnakoroshi abura no jigoku,女殺油地獄,おんなころしあぶらのじごく,The Woman-Killer and the Hell of Oil,"Woman-Killer and the Hell of Oil, The",1721,"Major Plays of Chikamatsu, translated by Donal...",[6],"[1112, 1113, 1114, 1115, 1116, 1117, 1118, 111...","[10777, 10778, 10779, 10780, 10781, 10782, 107...","[38, 54, 81, 113, 131, 163, 187, 234, 266, 286]","[99, 194, 287, 414, 556, 576, 651, 778, 881, 976]"
1,2,Shinjū ten no Amijima,Shinju ten no Amijima,心中天網島,しんじゅうてんのあみじま,The Love Suicides at Amijima,"Love Suicides at Amijima, The",1720,"Major Plays of Chikamatsu, translated by Donal...",[6],"[1271, 1272, 1273, 1274, 1275, 1276, 1277, 127...","[10649, 10650, 10651, 10652, 10653, 10654, 106...","[31, 73, 87, 105, 127, 131, 160, 162, 237, 239...","[110, 262, 315, 383, 457, 554, 558, 573, 780, ..."
2,3,Sonezaki shinjū,Sonezaki shinju,曽根崎心中,そねざきしんじゅう,The Love Suicides at Sonezaki,"Love Suicides at Sonezaki, The",1703,"Major Plays of Chikamatsu, translated by Donal...",[6],"[1344, 1481, 1482, 1483, 1484, 1485, 1486, 148...","[10557, 10558, 10559, 10560, 10561, 10562, 105...","[10, 46, 58, 81, 11, 105, 112, 114, 135, 131, ...","[29, 164, 213, 291, 293, 382, 412, 420, 468, 5..."
3,4,Shinjū Yoigōshin,Shinju Yoigoshin,心中宵庚申,しんじゅうよいごうしん,The Love Suicide of Hambei and Ochiyo,"Love Suicide of Hambei and Ochiyo, The",1722,"Hironaga, Shūzaburō, The Bunraku Handbook (Tok...",[6],"[1285, 1286, 1287, 1288, 1289, 1290, 1291, 129...","[8821, 8822, 8823, 8824, 8825, 8826, 8827, 882...","[63, 88, 110, 113, 143, 169, 211, 242, 248]","[232, 321, 406, 416, 492, 595, 712, 797, 819]"
4,5,Somemoyō imose no kadomatsu,Somemoyo imose no kadomatsu,染模様妹背門松,そめもよういもせのかげまつ,The Love of Osome and Hisamatsu,"Love of Osome and Hisamatsu, The",1767,"Hironaga, Shūzaburō, The Bunraku Handbook (Tok...",[34],"[1343, 1467, 1468, 1469, 1470, 1471, 1472, 147...","[10454, 10455, 10456, 10457, 10458, 10459, 104...","[2, 20, 199, 65, 101, 139, 196, 211, 271]","[13, 54, 219, 236, 367, 482, 678, 711, 891]"


In [30]:
print "Final row count: " + str(len(plays.index))

Final row count: 178


# Productions:
- Add arrays of image_ids, performane_ids, and play_ids for each production.

<hr/>

In [31]:
print "Starting row count: " + str(len(productions.index))

Starting row count: 293


In [32]:
productions = productions.drop('performance_num',1)

In [33]:
productions = merge_id_array(productions,productions_images,'production_id','image_id')
productions = merge_id_array(productions,performances,'production_id','performance_id')
productions = merge_id_array(productions,performances,'production_id','play_id')

In [34]:
productions = productions[['production_id','dates','place','label_eng','image_id','performance_id','play_id']]
productions.sort_index(axis=0)

productions.head()

Unnamed: 0,production_id,dates,place,label_eng,image_id,performance_id,play_id
0,1,1964/11/n.d.-1964/11/n.d.,not recorded,November 1964,,"[2, 3, 4, 5, 6, 7, 8]","[86, 29, 47, 18, 7, 72, 87]"
1,2,1968/02/25-1968/03/10,National Theatre of Japan,February 1968,,"[12, 13, 14, 15, 16, 17]","[83, 5, 88, 89, 6, 70]"
2,3,1968/10/27-1968/11/10,National Theatre of Japan,October 1968,,"[9, 10, 11]","[30, 66, 32]"
3,4,1969/05/11-1969/05/25,National Theatre of Japan,May 1969,,"[18, 19, 20]","[72, 68, 33]"
4,5,1969/09/14-1969/09/21,National Theatre of Japan,September 1969,,"[21, 22, 23]","[90, 91, 35]"


In [35]:
print "Final row count: " + str(len(productions.index))

Final row count: 293


# Tags:
- Add arrays of image_ids for each tag.

<hr/>

In [36]:
print "Starting row count: " + str(len(tags.index))

Starting row count: 76


In [37]:
tags['notes'] = tags['notes'].replace({r'\n': ''}, regex=True)

In [38]:
tags = merge_id_array(tags,tags_images,'tag_id','image_id')

tags.head()

Unnamed: 0,tag_id,label_eng,label_ka,description,notes,sort_ja,image_id
0,1,Asahi Theater,朝日座,,,あさひざ,"[44990, 45057, 45216, 46025, 56319, 56320, 563..."
1,2,Butai geta [high wooden clogs],舞台下駄,High wooden clogs worn on stage by head puppet...,,ぶたいげた,"[44981, 44986, 45010, 45017, 45024, 45050, 450..."
2,3,Dō [body],胴,"Body, torso, or framework of puppets.",,どう,"[44978, 45071, 45235, 45244, 45246, 46033, 461..."
3,4,Dressing room,楽屋,,,がくや,"[44990, 44992, 44993, 44997, 44998, 45000, 450..."
4,5,Geza [room for offstage musicians],下座あるいは囃子部屋,Small room over stage-right entrance occupied ...,,げざ,"[11630, 11633, 44982, 44997, 45004, 45005, 452..."


In [39]:
print "Final row count: " + str(len(tags.index))

Final row count: 76


# Performers:
- Add arrays of image_ids, musician_perf_ids, narrator_perf_ids, shamisen_perf_ids, and puppeteer_perf_ids for each performer.

<hr/>

In [40]:
print "Starting row count: " + str(len(performers.index))

Starting row count: 184


In [41]:
performers = performers[['performer_id','name_proper','alt_name','name_ka','alt_name_ka','specialty','dates','notes']]

In [42]:
# add images
performers = merge_id_array(performers,performers_images,'performer_id','image_id')

In [43]:
# add performances as musician 

### make a join table from pscenes
performer_as_musician = pscenes[['musician_id','performance_id']].dropna(how='any').rename(columns={'musician_id':'performer_id','performance_id':'musician_perf_id'})
performer_as_musician = performer_as_musician.groupby('musician_perf_id').performer_id.apply(lambda x: pd.DataFrame(x.values[0])).reset_index().drop('level_1', axis = 1)
performer_as_musician.columns = ['musician_perf_id','performer_id']
performer_as_musician.drop_duplicates(inplace=True)
### add performances
performers = merge_id_array(performers,performer_as_musician,'performer_id','musician_perf_id')


In [44]:
# add performances as narrator

### make a join table from pscenes
performer_as_narrator = pscenes[['narrator_id','performance_id']].dropna(how='any').rename(columns={'narrator_id':'performer_id','performance_id':'narrator_perf_id'})
performer_as_narrator = performer_as_narrator.groupby('narrator_perf_id').performer_id.apply(lambda x: pd.DataFrame(x.values[0])).reset_index().drop('level_1', axis = 1)
performer_as_narrator.columns = ['narrator_perf_id','performer_id']
performer_as_narrator.drop_duplicates(inplace=True)
### add performances
performers = merge_id_array(performers,performer_as_narrator,'performer_id','narrator_perf_id')

In [45]:
# add performances as shamisen player

### make a join table from pscenes
performer_as_shamisen = pscenes[['shamisen_id','performance_id']].dropna(how='any').rename(columns={'shamisen_id':'performer_id','performance_id':'shamisen_perf_id'})
performer_as_shamisen = performer_as_shamisen.groupby('shamisen_perf_id').performer_id.apply(lambda x: pd.DataFrame(x.values[0])).reset_index().drop('level_1', axis = 1)
performer_as_shamisen.columns = ['shamisen_perf_id','performer_id']
performer_as_shamisen.drop_duplicates(inplace=True)
### add performances
performers = merge_id_array(performers,performer_as_shamisen,'performer_id','shamisen_perf_id')

In [46]:
# add performances as puppeteer and kashira used

### make a join table from pscenes
spucks_performances = pscenes[['spuck_id','performance_id']].dropna(how='any').rename(columns={'performance_id':'puppeteer_perf_id'})
spucks_performances = spucks_performances.groupby('puppeteer_perf_id').spuck_id.apply(lambda x: pd.DataFrame(x.values[0])).reset_index().drop('level_1', axis = 1)
spucks_performances.columns = ['puppeteer_perf_id','spuck_id']
spucks_performances.drop_duplicates(inplace=True)
### make join table with performer_id, kashira_id, and puppeteer_perf_id
xtra_spucks = spucks[['spuck_id','pscene_id','puppeteer_id','kashira_id']].rename(columns={'puppeteer_id':'performer_id'})
xtra_spucks = xtra_spucks.merge(spucks_performances, on='spuck_id', how='left').drop('spuck_id',1).drop('pscene_id',1)
### add performances
performer_as_puppeteer = xtra_spucks[['performer_id','puppeteer_perf_id']].dropna(how='any').drop_duplicates()
performers = merge_id_array(performers,performer_as_puppeteer,'performer_id','puppeteer_perf_id')
### add kashira
performer_puppets = xtra_spucks[['performer_id','kashira_id']].dropna(how='any').drop_duplicates()
performers = merge_id_array(performers,performer_puppets,'performer_id','kashira_id')

performers.head()


Unnamed: 0,performer_id,name_proper,alt_name,name_ka,alt_name_ka,specialty,dates,notes,image_id,musician_perf_id,narrator_perf_id,shamisen_perf_id,puppeteer_perf_id,kashira_id
0,1,Takemoto Datejidayū (see also Takemoto Datetay...,Takemoto Datetayū V,竹本伊達路大夫,竹本伊達大夫 (五),Tayu,'1950-1988/04',"Bunraku meikan (1980, 1985, 1988, 1990, 1994)","[8469, 8470, 9055, 9056, 45321, 52421, 52552, ...",,"[10, 102, 112, 115, 118, 133, 142, 144, 147, 1...",,,
1,2,Takemoto Tsudayū IV,,竹本津大夫 (四),,Tayu,'1950-',[Living National Treasure]; Bunraku meikan (19...,"[8588, 9054, 9055, 9056, 9616, 9617, 9618, 961...",,"[10, 103, 143, 150, 185, 220, 258, 279, 288, 3...",,,
2,3,Tsuruzawa Kanotarō,,鶴澤叶太郎,,Shamisen,'1913-',"Bunraku meikan (1980, 1985, 1988, 1990, 1994)","[45321, 45449, 52634, 52751, 52755, 52819, 528...",,,"[101, 11, 112, 114, 116, 134, 14, 152, 156, 17...",,
3,4,Takezawa Danshichi (see also Takezawa Danjirō IV),Takezawa Danjirō IV,竹澤団七,竹澤団二郎 (四),Shamisen,'1981/04-',"Bunraku meikan (1980, 1985, 1988, 1990, 1994)","[12477, 12478, 12489]",,,"[385, 399, 403, 429, 439, 467, 485, 490, 496, ...",,
4,5,Toyotake Matsukadayū,,豊竹松香大夫,,Tayu,'1959-',"Bunraku meikan (1980, 1985, 1988, 1990, 1994)","[8337, 9715, 11874, 11875, 45449, 45681, 45691...",,"[106, 109, 11, 119, 12, 122, 131, 136, 14, 149...",[270],,


In [47]:
print "Final row count: " + str(len(performers.index))

Final row count: 184


# Clean-up IDs and export to CSV and JSON

<hr/>

In [62]:
dataframes = ["authors","characters","creators","images","kashira","performances","performers","plays","productions","pscenes","spucks","tags"]

for df in dataframes:
    exec("re_id(" + df + ",'" + df +"_id')" ) # fix ids
    exec("csv_out("+ df + ",'" + df + ".csv')") # output as csv files
    exec("json_out("+ df + ",'" + df + ".json')") # output as json files