

# Bunraku Online Collection 
__Data Transformation SQL CSVs -> JSON__

<hr/>

### Data files to generate:

- authors  [ ]
- characters [ ]
- creators [ ]
- images [ ]
- kashira [ ]
- performances [ ]
- performers [ ]
- plays [ ]
- productions [ ]
- pscenes [ ]
- spucks [ ]
- tags [ ]



# Setup:
<hr/>

In [1]:
import pandas as pd
pd.set_option('max_colwidth',1000)
pd.set_option('max_seq_items','none')

In [2]:
# import data as dtype=object to avoid NaN/Float conversion of IDs
authors = pd.read_csv('in/authors.csv', dtype=object)
characters = pd.read_csv('in/characters.csv', dtype=object)
creators = pd.read_csv('in/creators.csv', dtype=object)
images = pd.read_csv('in/imagesonline.csv', dtype=object)
kashira = pd.read_csv('in/kashira.csv', dtype=object)
performances = pd.read_csv('in/performances.csv', dtype=object)
performers = pd.read_csv('in/performers.csv', dtype=object)
plays = pd.read_csv('in/plays.csv', dtype=object)
productions = pd.read_csv('in/productions.csv', dtype=object)
pscenes = pd.read_csv('in/scenes_productions.csv', dtype=object)
scenes = pd.read_csv('in/scenes.csv', dtype=object)
shamisenplayers = pd.read_csv('in/sceneshamisens.csv', dtype=object)
musicians = pd.read_csv('in/scenekotokokyus.csv', dtype=object)
narrators = pd.read_csv('in/scenetayus.csv', dtype=object)
spucks = pd.read_csv('in/spucks.csv', dtype=object)
tags = pd.read_csv('in/tags.csv', dtype=object)

# import + drop duplicates on join tables
authors_plays = pd.read_csv('in/authors_plays.csv', dtype=object).drop_duplicates()
characters_images = pd.read_csv('in/characters_images.csv', dtype=object).drop_duplicates()
characters_plays = pd.read_csv('in/characters_plays.csv', dtype=object).drop_duplicates()
kashira_images = pd.read_csv('in/kashira_images.csv', dtype=object).drop_duplicates()
kashira_plays = pd.read_csv('in/kashira_plays.csv', dtype=object).drop_duplicates()
performances_images = pd.read_csv('in/performances_images.csv', dtype=object).drop_duplicates()
performers_images = pd.read_csv('in/performers_images.csv', dtype=object).drop_duplicates()
plays_images = pd.read_csv('in/plays_images.csv', dtype=object).drop_duplicates()
productions_images = pd.read_csv('in/productions_images.csv', dtype=object).drop_duplicates()
scenes_images = pd.read_csv('in/scenes_images.csv', dtype=object).drop_duplicates()
tags_images = pd.read_csv('in/tags_images.csv', dtype=object).drop_duplicates()

# Clean up non-online images from image join  tables
imagesonline = images.rename(columns={'id':'image_id'})
characters_images = pd.merge(characters_images, imagesonline, on='image_id', how='inner')
kashira_images = pd.merge(kashira_images, imagesonline, on='image_id', how='inner')
performers_images = pd.merge(performers_images, imagesonline, on='image_id', how='inner')
plays_images = pd.merge(plays_images, imagesonline, on='image_id', how='inner')
productions_images = pd.merge(productions_images, imagesonline, on='image_id', how='inner')
scenes_images = pd.merge(scenes_images, imagesonline, on='image_id', how='inner')
tags_images = pd.merge(tags_images, imagesonline, on='image_id', how='inner')

# Authors:
- Add array of play_ids per author

<hr/>

In [3]:
print "Starting row count: " + str(len(authors.index))

Starting row count: 123


In [4]:
# add plays
authors = authors.merge(authors_plays.groupby('author_id')['play_id'].apply(list).reset_index(), how='left')

authors.head()

Unnamed: 0,author_id,label_eng,label_ka,dates,reference,sort_ja,play_id
0,1,Asada Icchō,浅田一鳥,fl. 1741-1767,LC Authorities,あさだいっちょう,"[19, 72, 105, 122]"
1,2,Ashikawa Teruha,芦川照葉,,LC Authorities,あしかわてるは,[173]
2,4,Chikamatsu Hanji,近松半二,d. 1786 or 7,LC Authorities,ちかまつはんじ,"[11, 21, 25, 27, 29, 33, 52, 79, 80, 90, 101, 110, 141, 147]"
3,5,Chikamatsu Kosuiken,近松湖水軒,,"繪本太功記 / 近松やなぎ, 近松湖水軒, 千葉軒合作, 東京 : 金櫻堂, 1890.12, from NACSIS record",ちかまつこすいけん,[16]
4,6,Chikamatsu Monzaemon,近松門左衛門,,LC Authorities,ちかまつもんざえもん,"[1, 2, 3, 4, 7, 8, 17, 24, 28, 34, 40, 41, 54, 61, 76, 85, 87, 106, 107, 113, 136, 144, 148, 161, 174, 176]"


In [5]:
print "Final row count: " + str(len(authors.index))

Final row count: 123


# Characters:
- Add array of play_ids + array of image_ids per character

<hr/>

In [6]:
print "Starting row count: " + str(len(characters.index))

Starting row count: 2107


In [7]:
# add  plays
characters = characters.merge(characters_plays.groupby('character_id')['play_id'].apply(list).reset_index(), how='left')
# add images
characters = characters.merge(characters_images.groupby('character_id')['image_id'].apply(list).reset_index(), how='left')
characters = characters.drop('character_code',1)

characters.head()

Unnamed: 0,character_id,label_eng,label_ja,label_ka,authority_control,sort_ja,play_id,image_id
0,1,Farmer,Oyaji,おやじ,August 1989 Program,おやじ,[162],[56170]
1,2,Farmer's wife,Nyōbō,女房,August 1989 Program,にょうぼう,[162],
2,3,"Magotaro, a horse","Magotarō, Uma",孫太郎（馬）,August 1989 Program,まごたろう（うま）,[162],
3,4,Local governor,Odaikan,お代官,August 1989 Program,おだいかん,[162],
4,5,Local governor's assistant,Odaikan no kobun,お代官のこぶん,August 1989 Program,おだいかんのこぶん,[162],


In [8]:
print "Final row count: " + str(len(characters.index))

Final row count: 2107


# Images:
- Add arrays of: character_ids, kashira_ids, performance_ids, performer_ids, play_ids, production_ids, scene_ids, and tag_ids for each image.

<hr/>

In [9]:
print "Starting row count: " + str(len(images.index))

Starting row count: 14636


In [10]:
images = images.drop('online', 1)
images = images.replace({'Barbara C. Adachi':'1','Fukuda Fumio':'2','Harri Peccinotti':'3','M. Arai':'4','Toyotake Komatsudayū II':'5','Unknown. Photo: Columbia University Libraries':'6'})

In [11]:
# add characters
images = images.merge(characters_images.groupby('image_id')['character_id'].apply(list).reset_index(), how='left')
# add kashira
images = images.merge(kashira_images.groupby('image_id')['kashira_id'].apply(list).reset_index(), how='left')
# add performances
images = images.merge(performances_images.groupby('image_id')['performance_id'].apply(list).reset_index(), how='left')
# add performers
images = images.merge(performers_images.groupby('image_id')['performer_id'].apply(list).reset_index(), how='left')
# add plays
images = images.merge(plays_images.groupby('image_id')['play_id'].apply(list).reset_index(), how='left')
# add productions
images = images.merge(productions_images.groupby('image_id')['production_id'].apply(list).reset_index(), how='left')
# add scenes
images = images.merge(scenes_images.groupby('image_id')['pscene_id'].apply(list).reset_index(), how='left')
# add tags
images = images.merge(tags_images.groupby('image_id')['tag_id'].apply(list).reset_index(), how='left')

In [12]:
images = images[['image_id','media_type','character_id','tag_id','kashira_id','performance_id','performer_id','play_id','production_id','pscene_id','container','container_type','creator','item_id','colser_id','notes','objid','sequence','series','slidepage_folder']]

images.head()

Unnamed: 0,image_id,media_type,character_id,tag_id,kashira_id,performance_id,performer_id,play_id,production_id,pscene_id,container,container_type,creator,item_id,colser_id,notes,objid,sequence,series,slidepage_folder
0,8281,slide,"[452, 453, 454, 455, 456, 2312]","[43, 73]","[33, 3, 35, 106, 77]",[310],,[26],[86],[128],46,Slide Binder,1,1,2,2,ldpd_bun_slide_452_2_0001_0001,1,2,452
1,8282,slide,"[452, 453, 454, 455, 456, 2312]","[43, 73]","[33, 3, 35, 106, 77]",[310],,[26],[86],[128],46,Slide Binder,1,2,2,6,ldpd_bun_slide_452_2_0002_0002,2,2,452
2,8283,slide,"[452, 453, 454, 455, 456, 2312]","[43, 73]","[33, 3, 35, 106, 77]",[310],,[26],[86],[128],46,Slide Binder,1,3,2,7,ldpd_bun_slide_452_2_0003_0003,3,2,452
3,8284,slide,"[453, 454, 455, 452, 2312]","[43, 73]","[3, 35, 33, 77]",[310],,[26],[86],[128],46,Slide Binder,1,4,2,0,ldpd_bun_slide_452_2_0004_0004,4,2,452
4,8285,slide,"[452, 454, 2312]",[43],"[33, 35, 77]",[310],,[26],[86],[128],46,Slide Binder,1,5,2,4,ldpd_bun_slide_452_2_0005_0005,5,2,452


In [13]:
print "Final row count: " + str(len(images.index))

Final row count: 14636


# Kashira:
- Add arrays of image_ids and play_ids for each kashira.

<hr/>

In [14]:
print "Starting row count: " + str(len(kashira.index))

Starting row count: 129


In [15]:
# add images
kashira = kashira.merge(kashira_images.groupby('kashira_id')['image_id'].apply(list).reset_index(), how='left')
# add plays
kashira = kashira.merge(kashira_plays.groupby('kashira_id')['play_id'].apply(list).reset_index(), how='left')

In [16]:
kashira = kashira[['kashira_id','label_eng','label_ka','category','image_id','play_id','sort_ja']]

kashira.head()

Unnamed: 0,kashira_id,label_eng,label_ka,category,image_id,play_id,sort_ja
0,1,Amanjaku,あまんじゃく,Special,,,あまんじゃく
1,2,Ebisu,恵比須,Special,,[53],えびす
2,3,Ōshūto,大舅,Male,"[8281, 8282, 8283, 8284, 8287, 8289, 8291, 8292, 8293, 8294, 8296, 9210, 9211, 9212, 9781, 9782, 9783, 9784, 9785, 9786, 9787, 9788, 9789, 9790, 9791, 11442, 11443, 11449, 12109, 12111, 12113, 12116, 12117, 12133, 12134, 12135, 12136, 12137, 12138, 12139, 12140, 12141, 12143, 12144, 12152, 12381, 12730, 12733, 12734, 12735, 12813, 12814, 12815, 12816, 12817, 12818, 12821, 12822, 12823, 12825, 12826, 12827, 12828, 12829, 12830, 12831, 12832, 12833, 12834, 12835, 12837, 12842, 12954, 12955, 12956, 12958, 12961, 44999, 45000, 45282, 45312, 45563, 45722, 45726, 45729, 45787, 45789, 45791, 46094, 46095, 46165, 46166, 46473, 46567, 46577, 52530, 52644, 52645, 52646, 52647, 52648, 52675, 52676, 52678, 52829, 52837, 52839, 52840, 52846, 52847, 52849, 53090, 53091, 53092, 53094, 53116, 53119, 53120, 53121, 53123, 53124, 53125, 53126, 53128, 53129, 53130, 53131, 53133, 53323, 53325, 53337, 53355, 53356, 53357, 53359, 53360, 53365, 53371, 53372, 53373, 53374, 53376, 53378, 53379, 53382, 53384...","[90, 27, 26, 62, 93, 24, 63, 11, 55, 78, 79, 130, 135, 58, 148, 16, 175]",おおしゅうと
3,4,Ōdanshichi,大団七,Male,"[8667, 8669, 8670, 8671, 8672, 8676, 8687, 8688, 8689, 8690, 8691, 8692, 8693, 8695, 8696, 8697, 8698, 8699, 8700, 8701, 8702, 8703, 8704, 8706, 8708, 8712, 9246, 9247, 9262, 9263, 9264, 9310, 9311, 9312, 9313, 9321, 9324, 9325, 9326, 9327, 9328, 9329, 9330, 9331, 9332, 9333, 9334, 9338, 9339, 9340, 9341, 9342, 9343, 9344, 9345, 9346, 9347, 9348, 10005, 10006, 10007, 10008, 10011, 10014, 10015, 10016, 10017, 10018, 10019, 10020, 10021, 10023, 10028, 10029, 10030, 10031, 10032, 10033, 10034, 10035, 10037, 10038, 10039, 10041, 10042, 10045, 10047, 10050, 10051, 10052, 10053, 10054, 10055, 10056, 10057, 10058, 10059, 10061, 10062, 10063, 10064, 10065, 10066, 10071, 10072, 10073, 10074, 10075, 10076, 10079, 10080, 10082, 10083, 10085, 10086, 10087, 10088, 10090, 10092, 10118, 10119, 10120, 10121, 10122, 10124, 10125, 10126, 10129, 10130, 10132, 10133, 10134, 10135, 10136, 10138, 10139, 10140, 10143, 10974, 10976, 10979, 10981, 10982, 10983, 10984, 10988, 10989, 10990, 10991, 10992, 109...","[26, 62, 93, 95, 90, 66, 77, 17, 40, 36, 135, 118, 58, 16]",おおだんしち
4,5,Ochō no kodomo,お蝶の子供,Children,,[155],おちょうのこども


In [17]:
print "Final row count: " + str(len(kashira.index))

Final row count: 129


# Pscenes:

(_i.e. scene data at performance level / transformation of scenesproductions.)_
- Add labels and scene_order from scenes, and arrays of spuck_ids, narrator_ids, musician_ids, shamisen_ids, and image_ids for each pscene.

<hr/>

In [18]:
print "Starting row count: " + str(len(pscenes.index))

Starting row count: 2609


In [19]:
# add labels and scene_order
scenes = scenes[['scene_id','label_eng','label_ja','label_ka','scene_order']]
pscenes = pscenes.drop('spuck_note',1).drop('tayu_shamisen_note',1)
pscenes = pscenes.merge(scenes, how='left')

In [20]:
# add spuck_id
pscenes = pscenes.merge(spucks.groupby('pscene_id')['spuck_id'].apply(list).reset_index(), how='left')
# drop narrator with id 0 (no such performer exists)
narrators = narrators[narrators['narrator_id'] != '0']
# add narrator_ids
pscenes = pscenes.merge(narrators.groupby('pscene_id')['narrator_id'].apply(list).reset_index(), how='left')
# add musician_ids
pscenes = pscenes.merge(musicians.groupby('pscene_id')['musician_id'].apply(list).reset_index(), how='left')
# add shamisen_ids
pscenes = pscenes.merge(shamisenplayers.groupby('pscene_id')['shamisen_id'].apply(list).reset_index(), how='left')
# add image_ids
pscenes = pscenes.merge(scenes_images.groupby('pscene_id')['image_id'].apply(list).reset_index(), how='left')

pscenes.head()

Unnamed: 0,pscene_id,play_id,production_id,performance_id,scene_id,label_eng,label_ja,label_ka,scene_order,spuck_id,narrator_id,musician_id,shamisen_id,image_id
0,1,86,1,2,313,The Tea Stall at Torii Pass,Torii tōge chamise,鳥居峠茶店,408,,,,,
1,2,86,1,2,612,Ao no Dōmon (Blue Tunnel),Ao no Dōmon,青の洞門,409,,,,,
2,3,29,1,3,163,The Town of Numazu,Numazu,沼津,189,,,,,
3,4,29,1,3,641,House of Heisaku and One Thousand Pine Trees at Matsubara,Heisaku uchi yori senbon matsubara,平作内より千本松原,196,,,,,
4,5,47,1,4,287,The Maple Viewing Party,Momijigari,紅葉狩,360,,,,,


In [21]:
print "Final row count: " + str(len(pscenes.index))

Final row count: 2609


# Performances:
- Add arrays of image_ids, scene_ids, and character_ids for each performance.

<hr/>

In [22]:
print "Starting row count: " + str(len(performances.index))

Starting row count: 931


In [23]:
performances =  performances.drop('data_id',1).drop('code',1)

In [24]:
# add images
performances = performances.merge(performances_images.groupby('performance_id')['image_id'].apply(list).reset_index(), how='left')
# add pscenes
performances = performances.merge(pscenes.groupby('performance_id')['pscene_id'].apply(list).reset_index(), how='left')
# add characters from play_id
performances = performances.merge(characters_plays.groupby('play_id')['character_id'].apply(list).reset_index(), how='left')

performances.head()

Unnamed: 0,performance_id,production_id,play_id,image_id,pscene_id,character_id
0,1,142,63,,"[1438, 1439, 1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452]","[675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 1872, 1873, 1903, 1923, 1924, 2169, 2233, 2234, 2235, 2236, 2237, 2270, 2271, 2272, 2273]"
1,2,1,86,,"[1, 2]",
2,3,1,29,,"[3, 4]","[538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 1926, 1927, 2128, 2129, 2154, 2155, 2178, 2179, 2180, 2206, 2207, 2208, 2209, 2230, 2231, 2232, 2275]"
3,4,1,47,,[5],"[1011, 1012, 1013, 1014]"
4,5,1,18,,[6],"[175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 2013]"


In [25]:
print "Final row count: " + str(len(performances.index))

Final row count: 931


# Plays:
- Add arrays of authors_ids, characters_ids, image_ids, production_ids, and performance_ids for each play.

<hr/>

In [26]:
print "Starting row count: " + str(len(plays.index))

Starting row count: 178


In [27]:
# authors
plays = plays.merge(authors_plays.groupby('play_id')['author_id'].apply(list).reset_index(), how='left')
# characters
plays = plays.merge(characters_plays.groupby('play_id')['character_id'].apply(list).reset_index(), how='left')
# images
plays = plays.merge(plays_images.groupby('play_id')['image_id'].apply(list).reset_index(), how='left')
# productions
plays = plays.merge(performances.groupby('play_id')['production_id'].apply(list).reset_index(), how='left')
# performances
plays = plays.merge(performances.groupby('play_id')['performance_id'].apply(list).reset_index(), how='left')

plays.head()

Unnamed: 0,play_id,label_ja,label_ja_sort,label_ka,sort_ja,label_eng,label_eng_sort,first_staged,reference,author_id,character_id,image_id,production_id,performance_id
0,1,Onnakoroshi abura no jigoku,Onnakoroshi abura no jigoku,女殺油地獄,おんなころしあぶらのじごく,The Woman-Killer and the Hell of Oil,"Woman-Killer and the Hell of Oil, The",1721,"Major Plays of Chikamatsu, translated by Donald Keene, (New York: Columbia University Press, 1961); Hironaga, Shūzaburō, The Bunraku Handbook (Tokyo: Maison des Arts, 1976); Kokuritsu Gekijō jōen shiryōshū, edited by Kokuritsu Gekijō Geinō Chōsashitsu, vol. 224, February 1984; Bunraku February 1982 Performance Program",[6],"[1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 2298, 2307, 2317, 2319]","[10777, 10778, 10779, 10780, 10781, 10782, 10783, 10784, 10785, 10786, 10787, 10788, 10789, 10892, 10893, 10894, 10895, 10896, 10897, 10898, 10899, 10900, 10901, 10902, 10903, 10904, 10905, 10906, 10907, 10908, 10909, 10910, 10911, 10912, 10915, 10916, 10917, 10918, 10919, 10920, 10921, 10922, 10923, 10924, 10925, 10926, 10927, 10928, 10929, 10930, 10931, 10932, 10933, 10934, 10935, 10936, 10937, 10938, 10939, 10940, 10941, 10942, 10943, 10944, 10945, 10946, 10947, 10948, 10949, 10950, 10951, 10952, 10953, 10954, 10955, 10956, 10957, 10958, 10959, 10960, 10961, 10962, 10963, 10964, 10965, 10966, 10967, 10968, 10980, 11356, 11357, 11358, 11359, 11360, 11361, 11362, 11363, 11364, 11365, 11366, 11367, 11368, 11369, 11370, 11371, 11372, 11373, 11374, 11375, 11376, 11377, 11378, 11379, 11380, 11381, 11382, 11383, 11384, 11385, 11386, 11387, 45061, 45089, 45095, 45100, 45101, 46082, 46084, 46086, 46092, 46250, 46251, 46252, 46253, 46254, 46255, 46256, 46282, 46283, 46284, 46285, 46286, 4...","[38, 54, 81, 113, 131, 163, 187, 234, 266, 286]","[99, 194, 287, 414, 556, 576, 651, 778, 881, 976]"
1,2,Shinjū ten no Amijima,Shinju ten no Amijima,心中天網島,しんじゅうてんのあみじま,The Love Suicides at Amijima,"Love Suicides at Amijima, The",1720,"Major Plays of Chikamatsu, translated by Donald Keene, (New York: Columbia University Press, 1961); Hironaga, Shūzaburō, The Bunraku Handbook (Tokyo: Maison des Arts, 1976); Kokuritsu Gekijō jōen shiryōshū, edited by Kokuritsu Gekijō Geinō Chōsashitsu, vol. 224, February 1984; Bunraku October 1986 Performance Program",[6],"[1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1701, 1878, 1879, 1880, 2146, 2147]","[10649, 10650, 10651, 10652, 10653, 10654, 10655, 10656, 10657, 10658, 10659, 10660, 10661, 10662, 10663, 10664, 10665, 10666, 10667, 10668, 10669, 10670, 10671, 10672, 10673, 10674, 10675, 10676, 10677, 10678, 10679, 10680, 10681, 10682, 10683, 10684, 10685, 10686, 10687, 10688, 10689, 10690, 10691, 10692, 10693, 10694, 10695, 10696, 10697, 10698, 10699, 10700, 10701, 10702, 10703, 10704, 10705, 10706, 10707, 10708, 10709, 10710, 10711, 10712, 10713, 10714, 10715, 10716, 10717, 10718, 10719, 10720, 10721, 10722, 10723, 10724, 10725, 10726, 10727, 10728, 10729, 10730, 10731, 10732, 10733, 10734, 10735, 10736, 10737, 10738, 10739, 10740, 10741, 10742, 10743, 10744, 10745, 10746, 10747, 10748, 10749, 10750, 10751, 10752, 10753, 10754, 10755, 10756, 10757, 10758, 10759, 10760, 10761, 10762, 10763, 10764, 10765, 10766, 10767, 10768, 10769, 10770, 10771, 10772, 10773, 10774, 10775, 10776, 45041, 45043, 45046, 45047, 45087, 45090, 45091, 45097, 45098, 45461, 45463, 45814, 45816, 45817, 4...","[31, 73, 87, 105, 127, 131, 160, 162, 237, 239, 278, 291, 292]","[110, 262, 315, 383, 457, 554, 558, 573, 780, 791, 913, 951, 954]"
2,3,Sonezaki shinjū,Sonezaki shinju,曽根崎心中,そねざきしんじゅう,The Love Suicides at Sonezaki,"Love Suicides at Sonezaki, The",1703,"Major Plays of Chikamatsu, translated by Donald Keene, (New York: Columbia University Press, 1961); Keene, Donald, Bunraku: The Art of the Japanese Puppet Theatre (Tokyo: Kodansha International, 1973); Hironaga, Shūzaburō, The Bunraku Handbook (Tokyo: Maison des Arts, 1976); Bunraku February 1984, February 1981 Performance Programs",[6],"[1344, 1481, 1482, 1483, 1484, 1485, 1486, 1487, 2127]","[10557, 10558, 10559, 10560, 10561, 10562, 10563, 10564, 10565, 10566, 10567, 10568, 10569, 10570, 10571, 10572, 10573, 10574, 10575, 10576, 10577, 10578, 10579, 10580, 10581, 10582, 10583, 10584, 10585, 10586, 10587, 10588, 10589, 10590, 10591, 10592, 10593, 10594, 10595, 10596, 10597, 10598, 10599, 10600, 10601, 10602, 10603, 10604, 10605, 10606, 10607, 10608, 10609, 10610, 10611, 10612, 10613, 10614, 10615, 10616, 10617, 10618, 10619, 10620, 10621, 10622, 10623, 10624, 10625, 10626, 10627, 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, 10648, 11283, 11284, 11285, 11286, 11287, 11288, 11289, 11290, 11291, 11292, 11293, 11294, 11295, 11296, 11297, 11298, 11299, 11300, 11301, 11302, 11303, 11304, 11305, 11306, 11307, 11308, 11309, 11310, 11311, 11312, 11313, 11314, 11315, 11316, 11317, 11318, 11319, 11320, 11321, 11322, 11323, 11324, 11325, 11326, 11327, 11328, 11329, 11330, 11331, 11332, 1...","[10, 46, 58, 81, 11, 105, 112, 114, 135, 131, 165, 173, 215, 220, 250, 256, 285, 289, 295, 303]","[29, 164, 213, 291, 293, 382, 412, 420, 468, 555, 578, 605, 715, 740, 825, 841, 941, 946, 964, 983]"
3,4,Shinjū Yoigōshin,Shinju Yoigoshin,心中宵庚申,しんじゅうよいごうしん,The Love Suicide of Hambei and Ochiyo,"Love Suicide of Hambei and Ochiyo, The",1722,"Hironaga, Shūzaburō, The Bunraku Handbook (Tokyo: Maison des Arts, 1976); Bunraku May 1975, February 1982 Performance Program; Chikamatsu: 5 Late Plays, translated and annotated by C. Andrew Gerstle (New York: Columbia University Press, c2001)",[6],"[1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1882]","[8821, 8822, 8823, 8824, 8825, 8826, 8827, 8828, 8829, 8830, 8831, 8832, 8833, 8834, 8835, 8836, 8837, 8838, 8839, 8840, 8841, 8842, 8843, 8844, 8845, 8846, 8847, 8848, 8849, 8850, 8851, 8852, 8853, 8854, 8855, 8856, 8857, 8858, 8859, 8860, 8861, 8862, 8863, 8864, 8865, 8866, 8867, 8868, 8869, 8870, 8871, 8872, 8873, 8874, 8875, 8876, 8877, 8878, 8879, 8880, 8881, 8882, 8883, 8884, 8885, 8886, 8887, 8888, 8889, 8890, 8891, 8892, 8893, 8894, 8895, 8896, 8897, 8898, 8899, 8900, 8901, 8902, 8903, 8904, 8905, 8906, 8907, 8908, 8909, 8910, 8911, 8912, 8913, 8914, 8915, 8916, 8917, 8918, 8919, 8920, 8921, 8922, 8923, 8924, 8925, 8926, 8927, 8928, 8929, 8930, 8931, 8932, 8933, 8934, 8935, 8936, 8937, 8938, 8939, 8940, 8941, 8942, 8943, 8944, 8945, 8946, 8947, 8948, 8949, 8950, 8951, 8952, 8953, 8954, 8955, 8956, 8957, 8958, 8959, 8960, 8961, 8962, 8963, 8964, 8965, 10790, 10791, 10792, 10793, 10794, 10795, 10796, 10797, 10798, 10799, 10800, 10801, 10802, 10803, 10804, 10805, 10806, 10807,...","[63, 88, 110, 113, 143, 169, 211, 242, 248]","[232, 321, 406, 416, 492, 595, 712, 797, 819]"
4,5,Somemoyō imose no kadomatsu,Somemoyo imose no kadomatsu,染模様妹背門松,そめもよういもせのかげまつ,The Love of Osome and Hisamatsu,"Love of Osome and Hisamatsu, The",1767,"Hironaga, Shūzaburō, The Bunraku Handbook (Tokyo: Maison des Arts, 1976); Bunraku May 1976, November 1984 Performance Programs",[34],"[1343, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 2267]","[10454, 10455, 10456, 10457, 10458, 10459, 10460, 10461, 10462, 10463, 10464, 10465, 10466, 10467, 10468, 10469, 10470, 10471, 10472, 10473, 10474, 10475, 10476, 10477, 10478, 10479, 10480, 10481, 10482, 10483, 10484, 10485, 10486, 10487, 10488, 10489, 10490, 10491, 10492, 10493, 10494, 10495, 10496, 10497, 10498, 10499, 10500, 10501, 10502, 10503, 10504, 10505, 10506, 10507, 10508, 45019, 45083, 45084, 45526, 45528, 46230, 46231, 46232, 46233, 46234, 46235, 54122, 54123, 54124, 54125, 54126, 54127, 54128, 54129, 54130, 54131, 54132, 54133, 54134, 54135, 54136, 54137, 54138, 54139, 54140, 54141, 54142, 54143, 54144, 54145, 54146, 54147, 54148, 54149, 54150, 54151, 54152, 54153, 54154, 54155, 54156, 54157, 54158, 54159, 54160, 54161, 54162, 54163, 54164, 54165, 54166, 54167, 54168, 54169, 54170, 54171, 54172, 54173, 54174, 54175, 54176, 54177, 54178, 54179, 54180, 54181, 54182, 54183, 54184, 54185, 54186, 54187, 54188, 54189, 54190]","[2, 20, 199, 65, 101, 139, 196, 211, 271]","[13, 54, 219, 236, 367, 482, 678, 711, 891]"


In [28]:
print "Final row count: " + str(len(plays.index))

Final row count: 178


# Productions:
- Add arrays of image_ids, performane_ids, and play_ids for each production.

<hr/>

In [29]:
print "Starting row count: " + str(len(productions.index))

Starting row count: 293


In [30]:
productions = productions.drop('performance_num',1)

In [31]:
# images
productions = productions.merge(productions_images.groupby('production_id')['image_id'].apply(list).reset_index(), how='left')
# performances
productions = productions.merge(performances.groupby('production_id')['performance_id'].apply(list).reset_index(), how='left')
# plays
productions = productions.merge(performances.groupby('production_id')['play_id'].apply(list).reset_index(), how='left')

In [32]:
productions = productions[['production_id','dates','place','label_eng','image_id','performance_id','play_id']]
productions.sort_index(axis=0)

productions.head()

Unnamed: 0,production_id,dates,place,label_eng,image_id,performance_id,play_id
0,1,1964/11/n.d.-1964/11/n.d.,not recorded,November 1964,,"[2, 3, 4, 5, 6, 7, 8]","[86, 29, 47, 18, 7, 72, 87]"
1,2,1968/02/25-1968/03/10,National Theatre of Japan,February 1968,,"[12, 13, 14, 15, 16, 17]","[83, 5, 88, 89, 6, 70]"
2,3,1968/10/27-1968/11/10,National Theatre of Japan,October 1968,,"[9, 10, 11]","[30, 66, 32]"
3,4,1969/05/11-1969/05/25,National Theatre of Japan,May 1969,,"[18, 19, 20]","[72, 68, 33]"
4,5,1969/09/14-1969/09/21,National Theatre of Japan,September 1969,,"[21, 22, 23]","[90, 91, 35]"


In [33]:
print "Final row count: " + str(len(productions.index))

Final row count: 293


# Tags:
- Add arrays of image_ids for each tag.

<hr/>

In [34]:
print "Starting row count: " + str(len(tags.index))

Starting row count: 76


In [35]:
tags['notes'] = tags['notes'].replace({r'\n': ''}, regex=True)

In [36]:
tags = tags.merge(tags_images.groupby('tag_id')['image_id'].apply(list).reset_index(), how='left')

tags.head()

Unnamed: 0,tag_id,label_eng,label_ka,description,notes,sort_ja,image_id
0,1,Asahi Theater,朝日座,,,あさひざ,"[44990, 45057, 45216, 46025, 56319, 56320, 56321, 56322, 56350, 56351, 56352, 56353, 56354, 56355]"
1,2,Butai geta [high wooden clogs],舞台下駄,High wooden clogs worn on stage by head puppeteers.,,ぶたいげた,"[44981, 44986, 45010, 45017, 45024, 45050, 45057, 45071, 45112, 45115, 45300, 45327, 45335, 45339, 45382, 45384, 45492, 45493, 45928, 45940, 46013, 46015, 46018, 46019, 46160, 46477, 46515, 46517, 46535, 46537, 52501, 52606, 52607, 52640, 53010, 53036, 54850, 54851, 54852, 54853, 54854, 54855, 54981, 55810, 55911, 56274, 56304, 56311, 56596, 57497, 57498, 57981, 57982, 57983, 57984, 58250, 58251, 58252, 58253, 58254, 58263, 60277, 60297, 60724, 64598, 64599, 64600]"
2,3,Dō [body],胴,"Body, torso, or framework of puppets.",,どう,"[44978, 45071, 45235, 45244, 45246, 46033, 46155, 46156, 46157, 52471, 52482, 52483, 52484, 52485, 52486, 55910, 56456, 56457, 56458, 58278, 58280]"
3,4,Dressing room,楽屋,,,がくや,"[44990, 44992, 44993, 44997, 44998, 45000, 45001, 45015, 45017, 45024, 45054, 45112, 45306, 45308, 45310, 45312, 45314, 45318, 45357, 45359, 45361, 45363, 45369, 45472, 45473, 45474, 45476, 45503, 45505, 45506, 45507, 45509, 45510, 45512, 45513, 45593, 45595, 45600, 45601, 45602, 45603, 45604, 45990, 45992, 45994, 46013, 46017, 46039, 46479, 52494, 52495, 52496, 52497, 52498, 52499, 52500, 52501, 52553, 52554, 52555, 52556, 52557, 53005, 53014, 53016, 53028, 53032, 54800, 55848, 55849, 55850, 55851, 55852, 55853, 55854, 55855, 55915, 55916, 56582, 56584, 56585, 56586, 56587, 56588, 56589, 56590, 56591, 56592, 56593]"
4,5,Geza [room for offstage musicians],下座あるいは囃子部屋,Small room over stage-right entrance occupied by offstage musicians.,,げざ,"[11630, 11633, 44982, 44997, 45004, 45005, 45268, 45304, 45325, 45327, 45431, 45433, 45445, 45460, 46667, 52540, 52541, 52542, 52543, 52544, 52545, 52546, 52662, 52663, 52664, 52665, 52666, 52667, 52668, 52669, 52670, 52671, 52672, 54870, 54993, 56256, 56257, 57878, 57879, 57880, 57885, 57886, 60712, 60713]"


In [37]:
print "Final row count: " + str(len(tags.index))

Final row count: 76


# Performers:
- Add arrays of image_ids, musician_perf_ids, narrator_perf_ids, shamisen_perf_ids, and puppeteer_perf_ids for each performer.

<hr/>

In [38]:
print "Starting row count: " + str(len(performers.index))

Starting row count: 184


In [39]:
performers = performers[['performer_id','name_proper','alt_name','name_ka','alt_name_ka','specialty','dates','notes']]

In [40]:
# add images
performers = performers.merge(performers_images.groupby('performer_id')['image_id'].apply(list).reset_index(), how='left')

In [41]:
# add performances as musician 

### make a join table from pscenes
performer_as_musician = pscenes[['musician_id','performance_id']].dropna(how='any').rename(columns={'musician_id':'performer_id','performance_id':'musician_perf_id'})
performer_as_musician = performer_as_musician.groupby('musician_perf_id').performer_id.apply(lambda x: pd.DataFrame(x.values[0])).reset_index().drop('level_1', axis = 1)
performer_as_musician.columns = ['musician_perf_id','performer_id']
performer_as_musician.drop_duplicates(inplace=True)
### add performances
performers = performers.merge(performer_as_musician.groupby('performer_id')['musician_perf_id'].apply(list).reset_index(), how='left')


In [42]:
# add performances as narrator

### make a join table from pscenes
performer_as_narrator = pscenes[['narrator_id','performance_id']].dropna(how='any').rename(columns={'narrator_id':'performer_id','performance_id':'narrator_perf_id'})
performer_as_narrator = performer_as_narrator.groupby('narrator_perf_id').performer_id.apply(lambda x: pd.DataFrame(x.values[0])).reset_index().drop('level_1', axis = 1)
performer_as_narrator.columns = ['narrator_perf_id','performer_id']
performer_as_narrator.drop_duplicates(inplace=True)
### add performances
performers = performers.merge(performer_as_narrator.groupby('performer_id')['narrator_perf_id'].apply(list).reset_index(), how='left')

In [43]:
# add performances as shamisen player

### make a join table from pscenes
performer_as_shamisen = pscenes[['shamisen_id','performance_id']].dropna(how='any').rename(columns={'shamisen_id':'performer_id','performance_id':'shamisen_perf_id'})
performer_as_shamisen = performer_as_shamisen.groupby('shamisen_perf_id').performer_id.apply(lambda x: pd.DataFrame(x.values[0])).reset_index().drop('level_1', axis = 1)
performer_as_shamisen.columns = ['shamisen_perf_id','performer_id']
performer_as_shamisen.drop_duplicates(inplace=True)
### add performances
performers = performers.merge(performer_as_shamisen.groupby('performer_id')['shamisen_perf_id'].apply(list).reset_index(), how='left')

In [44]:
# add performances as puppeteer and kashira used

### make a join table from pscenes
spucks_performances = pscenes[['spuck_id','performance_id']].dropna(how='any').rename(columns={'performance_id':'puppeteer_perf_id'})
spucks_performances = spucks_performances.groupby('puppeteer_perf_id').spuck_id.apply(lambda x: pd.DataFrame(x.values[0])).reset_index().drop('level_1', axis = 1)
spucks_performances.columns = ['puppeteer_perf_id','spuck_id']
spucks_performances.drop_duplicates(inplace=True)
### make join table with performer_id, kashira_id, and puppeteer_perf_id
xtra_spucks = spucks[['spuck_id','pscene_id','puppeteer_id','kashira_id']].rename(columns={'puppeteer_id':'performer_id'})
xtra_spucks = xtra_spucks.merge(spucks_performances, on='spuck_id', how='left').drop('spuck_id',1).drop('pscene_id',1)
### add performances
performer_as_puppeteer = xtra_spucks[['performer_id','puppeteer_perf_id']].dropna(how='any').drop_duplicates()
performers = performers.merge(performer_as_puppeteer.groupby('performer_id')['puppeteer_perf_id'].apply(list).reset_index(), how='left')
### add kashira
performer_puppets = xtra_spucks[['performer_id','kashira_id']].dropna(how='any').drop_duplicates()
performers = performers.merge(performer_puppets.groupby('performer_id')['kashira_id'].apply(list).reset_index(), how='left')

performers.head()


Unnamed: 0,performer_id,name_proper,alt_name,name_ka,alt_name_ka,specialty,dates,notes,image_id,musician_perf_id,narrator_perf_id,shamisen_perf_id,puppeteer_perf_id,kashira_id
0,1,Takemoto Datejidayū (see also Takemoto Datetayū V),Takemoto Datetayū V,竹本伊達路大夫,竹本伊達大夫 (五),Tayu,'1950-1988/04',"Bunraku meikan (1980, 1985, 1988, 1990, 1994)","[8469, 8470, 9055, 9056, 45321, 52421, 52552, 52634, 52708, 52709, 52781, 53397, 53398, 54622, 55168, 55169, 55174, 55175, 55253, 55272, 55478, 58217, 60332]",,"[10, 102, 112, 115, 118, 133, 142, 144, 147, 158, 17, 174, 178, 190, 197, 203, 206, 21, 216, 223, 240, 245, 267, 269, 272, 304, 316, 32, 322, 327, 349, 35, 370, 377, 383, 387, 398, 40, 404, 412, 417, 420, 435, 439, 441, 444, 45, 452, 489, 49, 495, 502, 515, 533, 535, 548, 55, 554, 557, 587, 59, 600, 608, 637, 68, 74, 783, 79, 866, 885, 91, 927, 93, 936, 938, 941, 968, 983]",,,
1,2,Takemoto Tsudayū IV,,竹本津大夫 (四),,Tayu,'1950-',"[Living National Treasure]; Bunraku meikan (1980, 1985, 1988, 1990, 1994)","[8588, 9054, 9055, 9056, 9616, 9617, 9618, 9619, 9621, 9628, 9667, 9669, 10100, 12477, 12478, 12489, 44990, 44993, 44994, 45001, 45322, 45325, 45333, 45341, 45355, 45357, 45359, 45361, 45363, 45367, 45369, 45383, 45385, 45395, 45575, 45578, 45579, 45581, 45603, 45604, 45662, 45665, 45671, 45673, 45675, 45679, 45687, 45689, 46192, 46193, 46361, 46363, 46365, 52548, 52549, 52550, 52586, 52690, 52691, 52692, 52693, 52694, 52695, 52696, 52697, 52698, 52700, 52701, 52702, 52730, 52738, 52739, 52742, 52803, 52804, 52805, 52806, 52807, 53165, 53958, 53959, 53960, 53961, 54204, 54206, 54622, 54784, 55024, 55351, 55353, 55354, 55397, 55483, 55485, 56282, 56564, 56569, 56571, 56572, 56573, 56574, 56593, 56622, 56957, 57387, 57396, 57936, 57937, 57938, 57939, 57940, 57941, 57942, 57943, 57944, 57945, 57946, 57947, 57948, 57949, 57950, 57951, 57952, 57953, 57954, 57955, 57956, 57957, 57958, 57959, 57960, 57961, 57962, 57963, 57964, 57965, 57966, 58449, 58450, 58507, 59068, 59069, 59070, 59127,...",,"[10, 103, 143, 150, 185, 220, 258, 279, 288, 318, 32, 350, 353, 356, 383, 385, 399, 413, 429, 439, 467, 49, 490, 496, 519, 52, 531, 551, 554, 558, 589, 59, 695, 698, 707, 708, 709, 718, 720, 74, 80, 927, 93, 942, 974, 980, 981]",,,
2,3,Tsuruzawa Kanotarō,,鶴澤叶太郎,,Shamisen,'1913-',"Bunraku meikan (1980, 1985, 1988, 1990, 1994)","[45321, 45449, 52634, 52751, 52755, 52819, 52820, 53291, 58827, 60489]",,,"[101, 11, 112, 114, 116, 134, 14, 152, 156, 175, 185, 199, 202, 209, 212, 216, 223, 239, 241, 27, 276, 28, 285, 287, 301, 316, 317, 324, 33, 345, 35, 350, 370, 377, 383, 394, 398, 405, 41, 435, 442, 444, 448, 453, 49, 495, 520, 530, 538, 548, 554, 565, 65, 70, 77, 78, 88, 93, 942, 980, 981]",,
3,4,Takezawa Danshichi (see also Takezawa Danjirō IV),Takezawa Danjirō IV,竹澤団七,竹澤団二郎 (四),Shamisen,'1981/04-',"Bunraku meikan (1980, 1985, 1988, 1990, 1994)","[12477, 12478, 12489]",,,"[385, 399, 403, 429, 439, 467, 485, 490, 496, 519, 531, 551, 554, 558, 575, 589, 598, 599, 608, 612, 617, 621, 623, 631, 646, 656, 669, 672, 730, 732, 751, 758, 794, 807, 811, 814, 838, 841, 843, 851, 859, 864, 877, 883, 892, 911, 912, 922, 952, 955, 963]",,
4,5,Toyotake Matsukadayū,,豊竹松香大夫,,Tayu,'1959-',"Bunraku meikan (1980, 1985, 1988, 1990, 1994)","[8337, 9715, 11874, 11875, 45449, 45681, 45691, 46196, 52740, 52751, 52771, 52772, 52773, 53965, 53966, 53967, 53968, 53969, 54625, 57060, 58227, 58228, 58229, 58245, 59912, 59914]",,"[106, 109, 11, 119, 12, 122, 131, 136, 14, 149, 153, 156, 159, 175, 18, 190, 197, 199, 209, 216, 22, 225, 233, 237, 24, 243, 25, 252, 253, 263, 268, 272, 277, 285, 290, 299, 308, 314, 319, 344, 35, 361, 366, 368, 369, 37, 38, 380, 395, 404, 41, 411, 415, 431, 442, 452, 456, 464, 467, 481, 484, 490, 497, 505, 506, 522, 529, 53, 538, 539, 542, 549, 553, 565, 585, 592, 597, 603, 617, 621, 625, 630, 631, 643, 653, 656, 662, 665, 670, 681, 69, 699, 72, 731, 739, 743, 745, 759, 760, 77, 772, 784, 795, 801, 809, 81, 812, 820, 833, 835, 847, 85, 851, 856, 864, 866, 872, 88, 886, 890, 900, 904, 911, 912, 925, 94, 953, 956, 963, 968, 969, 973, 975, 979]",[270],,


In [45]:
print "Final row count: " + str(len(performers.index))

Final row count: 184


# Clean-up IDs and export to CSV and JSON

<hr/>

In [53]:
# replace descriptive id (used for joins) with generic id
authors.rename(columns={'author_id':'id'}, inplace=True)
characters.rename(columns={'character_id':'id'}, inplace=True)
creators.rename(columns={'creator_id':'id'}, inplace=True)
images.rename(columns={'image_id':'id'}, inplace=True)
kashira.rename(columns={'kashira_id':'id'}, inplace=True)
performances.rename(columns={'performance_id':'id'}, inplace=True)
performers.rename(columns={'performer_id':'id'}, inplace=True)
plays.rename(columns={'play_id':'id'}, inplace=True)
productions.rename(columns={'production_id':'id'}, inplace=True)
pscenes.rename(columns={'pscene_id':'id'}, inplace=True)
spucks.rename(columns={'spuck_id':'id'}, inplace=True)
tags.rename(columns={'tag_id':'id'}, inplace=True)

In [54]:
# export online data as CSV
authors.to_csv('authors.csv', encoding='utf8', index=False)
characters.to_csv('characters.csv', encoding='utf8', index=False)
creators.to_csv('creators.csv', encoding='utf8', index=False)
images.to_csv('images.csv', encoding='utf8', index=False)
kashira.to_csv('kashira.csv', encoding='utf8', index=False)
performances.to_csv('performances.csv', encoding='utf8', index=False)
performers.to_csv('performers.csv', encoding='utf8', index=False)
plays.to_csv('plays.csv', encoding='utf8', index=False)
productions.to_csv('productions.csv', encoding='utf8', index=False)
pscenes.to_csv('pscenes.csv', encoding='utf8', index=False)
spucks.to_csv('spucks.csv', encoding='utf8', index=False)
tags.to_csv('tags.csv', encoding='utf8', index=False)

In [55]:
# export online data as JSON
authors.to_json('authors.json', orient="records", force_ascii=False)
characters.to_json('characters.json', orient="records", force_ascii=False)
creators.to_json('creators.json', orient="records", force_ascii=False)
images.to_json('images.json', orient="records", force_ascii=False)
kashira.to_json('kashira.json', orient="records", force_ascii=False)
performances.to_json('performances.json', orient="records", force_ascii=False)
performers.to_json('performers.json', orient="records", force_ascii=False)
plays.to_json('plays.json', orient="records", force_ascii=False)
productions.to_json('productions.json', orient="records", force_ascii=False)
pscenes.to_json('pscenes.json', orient="records", force_ascii=False)
spucks.to_json('spucks.json', orient="records", force_ascii=False)
tags.to_json('tags.json', orient="records", force_ascii=False)