# Task: Output Bunraku authors to json list using authors_plays join table to grab an array of play_ids for each author.

_________________



In [1]:
import pandas as pd

In [2]:
# read csvs as data frames
authors_df = pd.read_csv('authors.csv')
play_auth_df = pd.read_csv('authors_plays.csv')

## Starting authors table:
_____________

In [3]:
authors_df.head()

Unnamed: 0,author_id,label,label_ka,dates,reference,sort_ja
0,1,Asada Icchō,浅田一鳥,fl. 1741-1767,LC Authorities,あさだいっちょう
1,2,Ashikawa Teruha,芦川照葉,,LC Authorities,あしかわてるは
2,4,Chikamatsu Hanji,近松半二,d. 1786 or 7,LC Authorities,ちかまつはんじ
3,5,Chikamatsu Kosuiken,近松湖水軒,,"繪本太功記 / 近松やなぎ, 近松湖水軒, 千葉軒合作, 東京 : 金櫻堂, 1890.12...",ちかまつこすいけん
4,6,Chikamatsu Monzaemon,近松門左衛門,,LC Authorities,ちかまつもんざえもん


## Join table for authors and plays:

In [4]:
play_auth_df.head()

Unnamed: 0,play_id,author_id
0,1,6
1,2,6
2,3,6
3,4,6
4,5,34


In [5]:
# make a new author dataframe with inner join of play_ids.¶
# note: this drops authors who have no plays assigned to them.
authors_w_play_id = pd.merge(authors_df, play_auth_df, on='author_id', how='inner')

## New authors table, with a full record per play:
______________

In [6]:
authors_w_play_id.head()

Unnamed: 0,author_id,label,label_ka,dates,reference,sort_ja,play_id
0,1,Asada Icchō,浅田一鳥,fl. 1741-1767,LC Authorities,あさだいっちょう,19
1,1,Asada Icchō,浅田一鳥,fl. 1741-1767,LC Authorities,あさだいっちょう,72
2,1,Asada Icchō,浅田一鳥,fl. 1741-1767,LC Authorities,あさだいっちょう,105
3,1,Asada Icchō,浅田一鳥,fl. 1741-1767,LC Authorities,あさだいっちょう,122
4,2,Ashikawa Teruha,芦川照葉,,LC Authorities,あしかわてるは,173


In [7]:
# since author_id is not a consistent iterator, get list of total author_ids
auth_id_list = list(authors_w_play_id.author_id.unique())

In [8]:
# get column list to use in making author dictionaries
cols =  authors_w_play_id.columns

In [9]:
# make empty data frame to fill during for loop
final_auth_df = pd.DataFrame({})

In [10]:
# for each author :
for id_num in auth_id_list:
    # make a temporary slice of all records of the author
    temp_auth = authors_w_play_id.loc[authors_w_play_id['author_id'] == id_num]
    
    # make a dictionary of the first record of the author
    vals = []
    for col in cols:
        vals.append(temp_auth[:1][col].values[0])
    
    temp_dict = dict(zip(cols, vals))
        
    # make a temporary array of all the plays associated with the author
    temp_auth_plays = list(temp_auth['play_id'])
    
    # replace the author's single play id with the array of plays
    temp_dict['play_id'] = temp_auth_plays
    
    # append updated author dictionary to dataframe
    final_auth_df = final_auth_df.append(temp_dict, ignore_index=True)

In [11]:
# troublshoot author_id being recast as float
final_auth_df['author_id'] = final_auth_df['author_id'].astype(int).astype('str')

## Final author table, with array of plays per author record:
___________

In [12]:
final_auth_df.head()

Unnamed: 0,author_id,dates,label,label_ka,play_id,reference,sort_ja
0,1,fl. 1741-1767,Asada Icchō,浅田一鳥,"[19, 72, 105, 122]",LC Authorities,あさだいっちょう
1,2,,Ashikawa Teruha,芦川照葉,[173],LC Authorities,あしかわてるは
2,4,d. 1786 or 7,Chikamatsu Hanji,近松半二,"[11, 21, 25, 27, 29, 33, 52, 79, 80, 90, 101, ...",LC Authorities,ちかまつはんじ
3,5,,Chikamatsu Kosuiken,近松湖水軒,[16],"繪本太功記 / 近松やなぎ, 近松湖水軒, 千葉軒合作, 東京 : 金櫻堂, 1890.12...",ちかまつこすいけん
4,6,,Chikamatsu Monzaemon,近松門左衛門,"[1, 2, 3, 4, 7, 8, 17, 24, 28, 34, 40, 41, 54,...",LC Authorities,ちかまつもんざえもん


## Final author table as json records:
____________

In [13]:
final_auth_df.to_json( orient="records", force_ascii=False)
# save final dataframe to json file
# json_name = 'authors_with_play_id_array.json'
# final_auth_df.to_json(json_name, orient="records", force_ascii=False)

'[{"author_id":"1","dates":"fl. 1741-1767","label":"Asada Icch\xc5\x8d","label_ka":"\xe6\xb5\x85\xe7\x94\xb0\xe4\xb8\x80\xe9\xb3\xa5","play_id":[19,72,105,122],"reference":"LC Authorities","sort_ja":"\xe3\x81\x82\xe3\x81\x95\xe3\x81\xa0\xe3\x81\x84\xe3\x81\xa3\xe3\x81\xa1\xe3\x82\x87\xe3\x81\x86"},{"author_id":"2","dates":null,"label":"Ashikawa Teruha","label_ka":"\xe8\x8a\xa6\xe5\xb7\x9d\xe7\x85\xa7\xe8\x91\x89","play_id":[173],"reference":"LC Authorities","sort_ja":"\xe3\x81\x82\xe3\x81\x97\xe3\x81\x8b\xe3\x82\x8f\xe3\x81\xa6\xe3\x82\x8b\xe3\x81\xaf"},{"author_id":"4","dates":"d. 1786 or 7","label":"Chikamatsu Hanji","label_ka":"\xe8\xbf\x91\xe6\x9d\xbe\xe5\x8d\x8a\xe4\xba\x8c","play_id":[11,21,25,27,29,33,52,79,80,90,101,110,141,147],"reference":"LC Authorities","sort_ja":"\xe3\x81\xa1\xe3\x81\x8b\xe3\x81\xbe\xe3\x81\xa4\xe3\x81\xaf\xe3\x82\x93\xe3\x81\x98"},{"author_id":"5","dates":null,"label":"Chikamatsu Kosuiken","label_ka":"\xe8\xbf\x91\xe6\x9d\xbe\xe6\xb9\x96\xe6\xb0\xb4\xe8\x