In [1]:
from urllib.request import urlopen
import urllib.request
import urllib
from urllib.error import URLError, HTTPError
from bs4 import BeautifulSoup
import sys, os.path, re, csv
import pandas as pds
import numpy as np
from datetime import datetime
import codecs
import zipfile

# Defining url of Aozora-bunko and local work directory.
base_url = "http://www.aozora.gr.jp/"
data_dir = "./"
aozora_dir = data_dir + "aozora_data/"
log_dir = aozora_dir + "log/"

# The project uses csv with the name of author and his/her url in Aozora-bunko
target_author_file = data_dir + "target_author.csv"

auth_target = []
with open(target_author_file,"r") as f:
    reader = csv.reader(f)
    for row in reader:
        auth_target.append(row)
auth_target

print (auth_target)

[['author', ' "url"'], ['natsume', ' http://www.aozora.gr.jp/index_pages/person148.html#sakuhin_list_1'], ['akutagawa', ' http://www.aozora.gr.jp/index_pages/person879.html#sakuhin_list_1'], ['mori', ' http://www.aozora.gr.jp/index_pages/person129.html#sakuhin_list_1'], ['miyazawa', ' https://www.aozora.gr.jp/index_pages/person81.html#sakuhin_list_1']]


In [2]:
# Make directories for authors and csv, text extraction, and utf converted directories for them.

def make_workdir(aozora_dir=aozora_dir, auth_target=auth_target):
    if not os.path.exists(aozora_dir):
        try:
            os.makedirs(aozora_dir)
            print ("make: " + aozora_dir)
        except OSError as e:
            print (e)
            
    if not os.path.exists(log_dir):
        try:
            os.makedirs(log_dir)
            print ("make: " + log_dir)
        except OSError as e:
            print (e)
        
    for w in auth_target[1:]:
        auth_dir = '{}{}/'.format(aozora_dir, w[0])
        if not os.path.exists(auth_dir):
            try:
                os.makedirs(auth_dir)
                print ("make: " + auth_dir)
            except OSError as e:
                print (e)
        if not os.path.exists(auth_dir + "csv/"):
            try:
                os.makedirs(auth_dir + "csv/")
                print ("make: " + auth_dir + "csv/")
            except OSError as e:
                print (e)
        if not os.path.exists(auth_dir + "ext/"):
            try:
                os.makedirs(auth_dir + "ext/")
                print ("make: " + auth_dir + "ext/")
            except OSError as e:
                print (e)
        if not os.path.exists(auth_dir + "utf/"):
            try:
                os.makedirs(auth_dir + "utf/")
                print ("make: " + auth_dir + "utf/")
            except OSError as e:
                print (e)

In [3]:
# Downloads all the zip files from author's written pieces.

def download_zip(auth_target=auth_target):
    for w in auth_target[1:]:
        print ("starting %s" % w[0])
        auth_dir = '{}{}/'.format(aozora_dir, w[0])
        url = w[1]
        
        html = urlopen(url)

        if html.getcode() == 200:
            soup = BeautifulSoup(html, "lxml")
            piece_list = soup.find("ol")
            piece_links = piece_list.find_all("a")
            piece_links_np = np.array([["datetime","title","url","zip"]])
            for i in piece_links:
                title = i.string
                link = base_url + i["href"].replace("../", "")
                if "cards" in link:
                    print ("    piece: %s for %s" % (title, link))
                    piece_html = urlopen(link)
                    if piece_html.getcode() == 200:
                        soup = BeautifulSoup(piece_html, "lxml")
                        zip_part = soup.find_all("a", href=re.compile(".zip"))
                        if zip_part != []:
                            zip_file = zip_part[0]["href"]
                            zip_url = urllib.parse.urljoin(link, zip_file)
                            print ("        zip_url: %s" % zip_url)
                            now = datetime.now().strftime("%Y/%m/%d %H:%M:%S")
                            tmp = np.array([[now, title, link, zip_url]])
                            piece_links_np = np.vstack((piece_links_np, tmp))

                            file_name = os.path.basename(zip_url)
                            file_full_path = '{}{}'.format(auth_dir, file_name)
                            urllib.request.urlretrieve(zip_url, filename=file_full_path)

            piece_links_pds = pds.DataFrame(piece_links_np[1:,:], columns=piece_links_np[0,:])
            piece_links_pds.to_csv(log_dir + w[0] + '_dl_log.csv', quoting=csv.QUOTE_ALL)
        print ("finished %s" % w[0])

In [4]:
# Extract zip files to txt files. Its character encoding is in SHIfT-JIS.

def zip_extract(auth_target=auth_target):
    log_np = np.array([["datetime", "author", "zip"]])
    for w in auth_target[1:]:
        auth_dir = '{}{}/'.format(aozora_dir, w[0])
        ext_dir = '{}{}'.format(auth_dir, "ext/")
        files = os.listdir(auth_dir)
        for file in files:
            if "zip" in file:
                file_fullpath = auth_dir + file
                with zipfile.ZipFile(file_fullpath, 'r') as zip_file:
                    zip_file.extractall(path=ext_dir)
                    now = datetime.now().strftime("%Y/%m/%d %H:%M:%S")
                    tmp = np.array([[now, w[0], zip_file]])
                    log_np = np.vstack((log_np, tmp))
                    print ("extracted: " + str(zip_file))
        zip_ext_pds = pds.DataFrame(log_np[1:,:], columns=log_np[0,:])
        zip_ext_pds.to_csv(log_dir + w[0] + '_zip_log.csv', quoting=csv.QUOTE_ALL)

In [5]:
# Making txt files with SHIFT-JIS converted to UTF-8.

def convert_sjis_to_utf8(auth_target=auth_target):
    log_np = np.array([["datetime", "author", "file"]])
    for w in auth_target[1:]:
        auth_dir = '{}{}/'.format(aozora_dir, w[0])
        ext_dir = '{}{}'.format(auth_dir, "ext/")
        utf_dir = '{}{}'.format(auth_dir, "utf/")
        files = os.listdir(ext_dir)
        for file in files:
            if "txt" in file:
                file_name = ext_dir + file
                save_name = utf_dir + file
                fout = codecs.open(file_name, 'r', 'shift_jis')
                fsave = codecs.open(save_name, 'w+', 'utf-8')
                try:
                    for row in fout:
                        fsave.write(row)
                except Exception as e:
                    print (file + "gets exception: " + str(type(e)))
                finally:
                    fout.close()
                    fsave.close()
                    print ("converted: " + save_name)      
                    now = datetime.now().strftime("%Y/%m/%d %H:%M:%S")
                    tmp = np.array([[now, w[0], file]])
                    log_np = np.vstack((log_np, tmp))              
        convert_pds = pds.DataFrame(log_np[1:,:], columns=log_np[0,:])
        convert_pds.to_csv(log_dir + w[0] + '_cvt_log.csv', quoting=csv.QUOTE_ALL)

In [6]:
# Cleansing UTF-8 texts and convert them to files to CSV.

def data_cleanse(auth_target=auth_target):
    for w in auth_target[1:]:
        print ("starting: " + w[0])
        auth_dir = '{}{}/'.format(aozora_dir, w[0])
        ext_dir = '{}{}'.format(auth_dir, "ext/")
        utf_dir = '{}{}'.format(auth_dir, "utf/")
        csv_dir = '{}{}'.format(auth_dir, "csv/")
        files = os.listdir(utf_dir)
        for file in files:
            if "txt" in file:
                print ("     file: " + file)
                file_name = utf_dir + file
                np_lines = np.array([["auth","piece","line"]])
                f = open(file_name, 'r')

                lines = f.read()
                f.close
                
                lines = lines.replace(u'。', '。\n')
                lines = lines.split('\n')

                ruby = re.compile(u'\《.+?\》')
                chuki = re.compile(u'\［.+?\］')
                zen_sp = re.compile(u'　')
                zen_sp2 = re.compile(u'\u3000')

                for line in lines:
                    line_mod = ruby.sub("", line)
                    line_mod = chuki.sub("", line_mod)
                    line_mod = zen_sp.sub("", line_mod)
                    line_mod = zen_sp2.sub("", line_mod)
                    np_tmp = np.array([[w[0], file, line_mod]])
                    np_lines = np.vstack((np_lines, np_tmp))

                s_line = 1
                e_line = len(lines)
                np_lines_cut = np_lines[s_line:e_line,:]


                file = file.replace(".txt", "")
                lines_pds = pds.DataFrame(np_lines_cut, columns=np_lines[0,:])
                lines_pds.to_csv(csv_dir + file + '.csv', quoting=csv.QUOTE_ALL)

        print ("finished: " + w[0])

In [7]:
if __name__ == "__main__":
    make_workdir(aozora_dir, auth_target)
    download_zip(auth_target)
    zip_extract(auth_target)
    convert_sjis_to_utf8(auth_target)
    data_cleanse(auth_target)

starting natsume
    piece: イズムの功過 for http://www.aozora.gr.jp/cards/000148/card2314.html
        zip_url: http://www.aozora.gr.jp/cards/000148/files/2314_ruby_2291.zip
    piece: 一夜 for http://www.aozora.gr.jp/cards/000148/card1086.html
        zip_url: http://www.aozora.gr.jp/cards/000148/files/1086_ruby_5742.zip
    piece: 永日小品 for http://www.aozora.gr.jp/cards/000148/card758.html
        zip_url: http://www.aozora.gr.jp/cards/000148/files/758_ruby_6056.zip
    piece: 岡本一平著並画『探訪画趣』序 for http://www.aozora.gr.jp/cards/000148/card2669.html
        zip_url: http://www.aozora.gr.jp/cards/000148/files/2669_ruby_6341.zip
    piece: 思い出す事など for http://www.aozora.gr.jp/cards/000148/card792.html
        zip_url: http://www.aozora.gr.jp/cards/000148/files/792_ruby_2117.zip
    piece: カーライル博物館 for http://www.aozora.gr.jp/cards/000148/card1046.html
        zip_url: http://www.aozora.gr.jp/cards/000148/files/1046_ruby_4521.zip
    piece: 薤露行 for http://www.aozora.gr.jp/cards/000148/card769.html
 

    piece: 手紙 for http://www.aozora.gr.jp/cards/000148/card798.html
        zip_url: http://www.aozora.gr.jp/cards/000148/files/798_ruby_2413.zip
    piece: 『伝説の時代』序 for http://www.aozora.gr.jp/cards/000148/card42156.html
        zip_url: http://www.aozora.gr.jp/cards/000148/files/42156_ruby_16023.zip
    piece: 点頭録 for http://www.aozora.gr.jp/cards/000148/card4672.html
        zip_url: http://www.aozora.gr.jp/cards/000148/files/4672_ruby_8553.zip
    piece: 『東洋美術図譜』 for http://www.aozora.gr.jp/cards/000148/card2313.html
        zip_url: http://www.aozora.gr.jp/cards/000148/files/2313_ruby_2287.zip
    piece: 道楽と職業 for http://www.aozora.gr.jp/cards/000148/card757.html
        zip_url: http://www.aozora.gr.jp/cards/000148/files/757_ruby_2986.zip
    piece: 長塚節氏の小説「土」 for http://www.aozora.gr.jp/cards/000148/card2682.html
        zip_url: http://www.aozora.gr.jp/cards/000148/files/2682_ruby_6333.zip
    piece: 中味と形式 for http://www.aozora.gr.jp/cards/000148/card788.html
        zip_url: h

        zip_url: http://www.aozora.gr.jp/cards/000879/files/178_ruby_2210.zip
    piece: アグニの神 for http://www.aozora.gr.jp/cards/000879/card43014.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/43014_ruby_17392.zip
    piece: アグニの神 for http://www.aozora.gr.jp/cards/000879/card15.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/15_ruby_904.zip
    piece: 悪魔 for http://www.aozora.gr.jp/cards/000879/card3804.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/3804_ruby_27189.zip
    piece: 浅草公園 for http://www.aozora.gr.jp/cards/000879/card21.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/21_ruby_1427.zip
    piece: 兄貴のような心持 for http://www.aozora.gr.jp/cards/000879/card43361.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/43361_ruby_17690.zip
    piece: あの頃の自分の事 for http://www.aozora.gr.jp/cards/000879/card17.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/17_ruby_377.zip
    piece: あばばばば for 

    piece: 貝殻 for http://www.aozora.gr.jp/cards/000879/card65.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/65_ruby_1386.zip
    piece: 解嘲 for http://www.aozora.gr.jp/cards/000879/card3764.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/3764_ruby_27200.zip
    piece: 蛙 for http://www.aozora.gr.jp/cards/000879/card3800.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/3800_ruby_27201.zip
    piece: 格さんと食慾 for http://www.aozora.gr.jp/cards/000879/card43365.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/43365_ruby_25702.zip
    piece: 影 for http://www.aozora.gr.jp/cards/000879/card64.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/64_ruby_1561.zip
    piece: 片恋 for http://www.aozora.gr.jp/cards/000879/card74.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/74_ruby_853.zip
    piece: かちかち山 for http://www.aozora.gr.jp/cards/000879/card3814.html
        zip_url: http://www.aozora.gr.jp/cards

        zip_url: http://www.aozora.gr.jp/cards/000879/files/33202_ruby_12223.zip
    piece: 校正後に for http://www.aozora.gr.jp/cards/000879/card89.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/89_ruby_1231.zip
    piece: 合理的、同時に多量の人間味 for http://www.aozora.gr.jp/cards/000879/card43376.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/43376_ruby_25698.zip
    piece: 黄粱夢 for http://www.aozora.gr.jp/cards/000879/card88.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/88_ruby_160.zip
    piece: 黒衣聖母 for http://www.aozora.gr.jp/cards/000879/card85.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/85_ruby_991.zip
    piece: 小杉未醒氏 for http://www.aozora.gr.jp/cards/000879/card43373.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/43373_ruby_25734.zip
    piece: 古千屋 for http://www.aozora.gr.jp/cards/000879/card82.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/82_ruby_1451.zip
    piece: 骨董羹 for http

        zip_url: http://www.aozora.gr.jp/cards/000879/files/155_ruby_1146.zip
    piece: 娼婦美と冒険 for http://www.aozora.gr.jp/cards/000879/card3770.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/3770_ruby_27223.zip
    piece: 食物として for http://www.aozora.gr.jp/cards/000879/card3795.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/3795_ruby_27224.zip
    piece: 虱 for http://www.aozora.gr.jp/cards/000879/card148.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/148_ruby_264.zip
    piece: しるこ for http://www.aozora.gr.jp/cards/000879/card24452.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/24452_ruby_11237.zip
    piece: 白 for http://www.aozora.gr.jp/cards/000879/card149.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/149_ruby_1581.zip
    piece: 蜃気楼 for http://www.aozora.gr.jp/cards/000879/card147.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/147_ruby_1325.zip
    piece: 新緑の庭 for http://ww

        zip_url: http://www.aozora.gr.jp/cards/000879/files/45623_ruby_20632.zip
    piece: 東西問答 for http://www.aozora.gr.jp/cards/000879/card3788.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/3788_ruby_27274.zip
    piece: 道祖問答 for http://www.aozora.gr.jp/cards/000879/card135.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/135_ruby_765.zip
    piece: 動物園 for http://www.aozora.gr.jp/cards/000879/card3810.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/3810_ruby_27233.zip
    piece: 東洋の秋 for http://www.aozora.gr.jp/cards/000879/card2369.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/2369_ruby_1406.zip
    piece: 都会で for http://www.aozora.gr.jp/cards/000879/card2324.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/2324_ruby_1517.zip
    piece: 杜子春 for http://www.aozora.gr.jp/cards/000879/card170.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/170_ruby_348.zip
    piece: 杜子春 for http://

    piece: 一人の無名作家 for http://www.aozora.gr.jp/cards/000879/card3787.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/3787_ruby_27247.zip
    piece: 雛 for http://www.aozora.gr.jp/cards/000879/card47.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/47_ruby_720.zip
    piece: 病牀雑記 for http://www.aozora.gr.jp/cards/000879/card3780.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/3780_ruby_27248.zip
    piece: 病中雑記 for http://www.aozora.gr.jp/cards/000879/card3786.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/3786_ruby_27249.zip
    piece: ひょっとこ for http://www.aozora.gr.jp/cards/000879/card54.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/54_ruby_741.zip
    piece: 平田先生の翻訳 for http://www.aozora.gr.jp/cards/000879/card4303.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/4303_ruby_5518.zip
    piece: 比呂志との問答 for http://www.aozora.gr.jp/cards/000879/card3791.html
        zip_url: http://www.a

        zip_url: http://www.aozora.gr.jp/cards/000879/files/100_ruby_1154.zip
    piece: 森先生 for http://www.aozora.gr.jp/cards/000879/card43390.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/43390_txt_25738.zip
    piece: 文部省の仮名遣改定案について for http://www.aozora.gr.jp/cards/000879/card1133.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/1133_ruby_5515.zip
    piece: 野人生計事 for http://www.aozora.gr.jp/cards/000879/card3743.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/3743_ruby_27275.zip
    piece: 保吉の手帳から for http://www.aozora.gr.jp/cards/000879/card182.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/182_ruby_1184.zip
    piece: 藪の中 for http://www.aozora.gr.jp/cards/000879/card24454.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/24454_ruby_46815.zip
    piece: 藪の中 for http://www.aozora.gr.jp/cards/000879/card179.html
        zip_url: http://www.aozora.gr.jp/cards/000879/files/179_ruby_168.zip
    piece:

    piece: 駆落 for http://www.aozora.gr.jp/cards/000075/card432.html
        zip_url: http://www.aozora.gr.jp/cards/000075/files/432_ruby_22834.zip
    piece: 家常茶飯　附・現代思想 for http://www.aozora.gr.jp/cards/000075/card4250.html
        zip_url: http://www.aozora.gr.jp/cards/000075/files/4250_ruby_32179.zip
    piece: カズイスチカ for http://www.aozora.gr.jp/cards/000129/card680.html
        zip_url: http://www.aozora.gr.jp/cards/000129/files/680_ruby_23197.zip
    piece: 仮名遣意見 for http://www.aozora.gr.jp/cards/000129/card677.html
        zip_url: http://www.aozora.gr.jp/cards/000129/files/677_ruby_22836.zip
    piece: かのように for http://www.aozora.gr.jp/cards/000129/card678.html
        zip_url: http://www.aozora.gr.jp/cards/000129/files/678_ruby_22883.zip
    piece: 鴉 for http://www.aozora.gr.jp/cards/001194/card50916.html
        zip_url: http://www.aozora.gr.jp/cards/001194/files/50916_ruby_39174.zip
    piece: 樺太脱獄記 for http://www.aozora.gr.jp/cards/000362/card2068.html
        zip_url: http:

        zip_url: http://www.aozora.gr.jp/cards/000129/files/43038_ruby_15353.zip
    piece: 高瀬舟縁起 for http://www.aozora.gr.jp/cards/000129/card46234.html
        zip_url: http://www.aozora.gr.jp/cards/000129/files/46234_ruby_22009.zip
    piece: 痴人と死と for http://www.aozora.gr.jp/cards/001192/card50914.html
        zip_url: http://www.aozora.gr.jp/cards/001192/files/50914_ruby_39170.zip
    piece: 沈黙の塔 for http://www.aozora.gr.jp/cards/000129/card3336.html
        zip_url: http://www.aozora.gr.jp/cards/000129/files/3336_ruby_23053.zip
    piece: 追儺 for http://www.aozora.gr.jp/cards/000129/card693.html
        zip_url: http://www.aozora.gr.jp/cards/000129/files/693_ruby_333.zip
    piece: 津下四郎左衛門 for http://www.aozora.gr.jp/cards/000129/card2082.html
        zip_url: http://www.aozora.gr.jp/cards/000129/files/2082_ruby_23122.zip
    piece: 辻馬車 for http://www.aozora.gr.jp/cards/000883/card3416.html
        zip_url: http://www.aozora.gr.jp/cards/000883/files/3416_ruby_28550.zip
    piece: 

        zip_url: http://www.aozora.gr.jp/cards/000129/files/45272_ruby_19023.zip
    piece: 老人 for http://www.aozora.gr.jp/cards/000075/card703.html
        zip_url: http://www.aozora.gr.jp/cards/000075/files/703_ruby_23240.zip
    piece: ロビンソン・クルソオ for http://www.aozora.gr.jp/cards/000129/card53713.html
        zip_url: http://www.aozora.gr.jp/cards/000129/files/53713_txt_45670.zip
    piece: 私が十四五歳の時 for http://www.aozora.gr.jp/cards/000129/card49252.html
        zip_url: http://www.aozora.gr.jp/cards/000129/files/49252_txt_36149.zip
    piece: 鱷 for http://www.aozora.gr.jp/cards/000363/card2069.html
        zip_url: http://www.aozora.gr.jp/cards/000363/files/2069_ruby_6004.zip
    piece: 笑 for http://www.aozora.gr.jp/cards/000366/card2072.html
        zip_url: http://www.aozora.gr.jp/cards/000366/files/2072_ruby_20499.zip
finished mori
starting miyazawa
    piece: 〔青びかる天弧のはてに〕 for http://www.aozora.gr.jp/cards/000081/card53398.html
        zip_url: http://www.aozora.gr.jp/cards/0000

    piece: 看痾 for http://www.aozora.gr.jp/cards/000081/card53411.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/53411_txt_43155.zip
    piece: 〔甘藍の球は弾けて〕 for http://www.aozora.gr.jp/cards/000081/card53374.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/53374_txt_43156.zip
    piece: 黄いろのトマト for http://www.aozora.gr.jp/cards/000081/card1919.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/1919_ruby_17998.zip
    piece: 機会 for http://www.aozora.gr.jp/cards/000081/card53392.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/53392_txt_43157.zip
    piece: 饑餓陣営 for http://www.aozora.gr.jp/cards/000081/card1921.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/1921_ruby_17598.zip
    piece: 疑獄元兇 for http://www.aozora.gr.jp/cards/000081/card48221.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/48221_ruby_32491.zip
    piece: 気のいい火山弾 for http://www.aozora.gr.jp/cards/000081/card4440.html
        zi

        zip_url: http://www.aozora.gr.jp/cards/000081/files/53394_ruby_43236.zip
    piece: 疾中 for http://www.aozora.gr.jp/cards/000081/card471.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/471_ruby_19936.zip
    piece: 詩ノート for http://www.aozora.gr.jp/cards/000081/card47029.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/47029_ruby_45860.zip
    piece: 〔島わにあらき潮騒を〕 for http://www.aozora.gr.jp/cards/000081/card53444.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/53444_txt_43180.zip
    piece: 〔霜枯れのトマトの気根〕 for http://www.aozora.gr.jp/cards/000081/card53420.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/53420_txt_43181.zip
    piece: 〔霧降る萱の細みちに〕 for http://www.aozora.gr.jp/cards/000081/card53353.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/53353_ruby_43182.zip
    piece: 十月の末 for http://www.aozora.gr.jp/cards/000081/card46602.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/46602_ru

    piece: 月夜のでんしんばしら for http://www.aozora.gr.jp/cards/000081/card43756.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/43756_ruby_17596.zip
    piece: 月夜のでんしんばしらの軍歌 for http://www.aozora.gr.jp/cards/000081/card46266.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/46266_ruby_23617.zip
    piece: 土神と狐 for http://www.aozora.gr.jp/cards/000081/card4436.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/4436_ruby_7716.zip
    piece: 土神ときつね for http://www.aozora.gr.jp/cards/000081/card46607.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/46607_ruby_33173.zip
    piece: 〔土をも掘らん汗もせん〕 for http://www.aozora.gr.jp/cards/000081/card53436.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/53436_txt_43204.zip
    piece: 〔つめたき朝の真鍮に〕 for http://www.aozora.gr.jp/cards/000081/card53379.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/53379_txt_43205.zip
    piece: 手紙　一 for http://www.aozora.gr.jp/cards/00008

    piece: ひかりの素足 for http://www.aozora.gr.jp/cards/000081/card458.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/458_ruby_19934.zip
    piece: 秘境 for http://www.aozora.gr.jp/cards/000081/card53419.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/53419_ruby_43215.zip
    piece: 〔卑屈の友らをいきどほろしく〕 for http://www.aozora.gr.jp/cards/000081/card53366.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/53366_txt_43216.zip
    piece: ビジテリアン大祭 for http://www.aozora.gr.jp/cards/000081/card2589.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/2589_ruby_25128.zip
    piece: 〔ひとひははかなくことばをくだし〕 for http://www.aozora.gr.jp/cards/000081/card53432.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/53432_txt_43217.zip
    piece: ひのきとひなげし for http://www.aozora.gr.jp/cards/000081/card1920.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/1920_ruby_17597.zip
    piece: 火の島 for http://www.aozora.gr.jp/cards/000081/card

    piece: 四又の百合 for http://www.aozora.gr.jp/cards/000081/card1116.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/1116_ruby.zip
    piece: ラジュウムの雁 for http://www.aozora.gr.jp/cards/000081/card4864.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/4864_txt_14540.zip
    piece: 龍と詩人 for http://www.aozora.gr.jp/cards/000081/card4865.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/4865_txt_14539.zip
    piece: 〔りんごのみきのはひのひかり〕 for http://www.aozora.gr.jp/cards/000081/card53375.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/53375_ruby_43237.zip
    piece: 〔昤々としてひかれるは〕 for http://www.aozora.gr.jp/cards/000081/card53377.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/53377_txt_43238.zip
    piece: 若い木霊 for http://www.aozora.gr.jp/cards/000081/card43801.html
        zip_url: http://www.aozora.gr.jp/cards/000081/files/43801_ruby_17992.zip
    piece: 〔われかのひとをこととふに〕 for http://www.aozora.gr.jp/cards/000081/card533

extracted: <zipfile.ZipFile filename='./aozora_data/natsume/789_ruby_5639.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/natsume/50478_ruby_40733.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/natsume/2315_ruby_2410.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/natsume/1750_ruby_19434.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/natsume/775_ruby_2064.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/natsume/798_ruby_2413.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/natsume/753_ruby_1701.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/natsume/2672_ruby_6337.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/natsume/2673_ruby_6339.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/natsume/4683_ruby_9475.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/natsume/2670_ruby_6329.zip' mode='r'>
extracted: 

extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/51864_txt_39682.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/112_ruby_3165.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/80_ruby_753.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/73_ruby_1217.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/106_ruby_889.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/3777_ruby_27255.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/3744_ruby_27272.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/142_ruby_249.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/129_ruby_1142.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/3767_ruby_27191.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/2692_ruby_26930.zip' mode

extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/150_ruby_1239.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/43386_ruby_25696.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/103_ruby_948.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/59_ruby_849.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/3752_ruby_27240.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/3790_ruby_27261.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/3808_ruby_27238.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/54_ruby_741.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/130_ruby_825.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/1138_ruby_6111.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/akutagawa/1130_ruby_5514.zip' mode

extracted: <zipfile.ZipFile filename='./aozora_data/mori/892_ruby_20935.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/mori/2049_ruby_19718.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/mori/432_ruby_22834.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/mori/2547_ruby.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/mori/681_ruby_22935.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/mori/3614_ruby_12061.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/mori/688_ruby_23233.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/mori/2051_ruby_22885.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/mori/3413_ruby_28547.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/mori/2079_ruby_23238.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/mori/50912_ruby_39237.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./a

extracted: <zipfile.ZipFile filename='./aozora_data/miyazawa/45655_ruby_35393.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/miyazawa/53372_txt_43195.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/miyazawa/53385_txt_43226.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/miyazawa/46268_txt_23613.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/miyazawa/53426_txt_43192.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/miyazawa/3060_ruby_24445.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/miyazawa/4602_ruby_8285.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/miyazawa/45658_ruby_35347.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/miyazawa/43752_ruby_17595.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/miyazawa/46607_ruby_33173.zip' mode='r'>
extracted: <zipfile.ZipFile filename='./aozora_data/miyazawa/53413_ruby_43191

converted: ./aozora_data/natsume/utf/sorekara.txt
converted: ./aozora_data/natsume/utf/seiyoniwa_nai.txt
converted: ./aozora_data/natsume/utf/tsuchini_tsuite.txt
converted: ./aozora_data/natsume/utf/syojyosaku_tsuikaidan.txt
converted: ./aozora_data/natsume/utf/buncho.txt
converted: ./aozora_data/natsume/utf/kyoikuto_bungei.txt
converted: ./aozora_data/natsume/utf/omoidasu_kotonado.txt
converted: ./aozora_data/natsume/utf/masaoka_shiki.txt
converted: ./aozora_data/natsume/utf/nagatsuka_takashishino.txt
converted: ./aozora_data/natsume/utf/mon.txt
converted: ./aozora_data/natsume/utf/sorekara_yokoku.txt
converted: ./aozora_data/natsume/utf/tegami.txt
converted: ./aozora_data/natsume/utf/conrad.txt
converted: ./aozora_data/natsume/utf/kokoro_kokoku.txt
converted: ./aozora_data/natsume/utf/tentoroku.txt
converted: ./aozora_data/natsume/utf/sakubutsuno_hihyo.txt
converted: ./aozora_data/natsume/utf/jinsei.txt
converted: ./aozora_data/natsume/utf/densetsuno_jidai_jo.txt
converted: ./aozora_

converted: ./aozora_data/akutagawa/utf/aru_katakiuchino_hanashi.txt
converted: ./aozora_data/akutagawa/utf/kesato_morito.txt
converted: ./aozora_data/akutagawa/utf/sonokorono_akamonseikatsu.txt
converted: ./aozora_data/akutagawa/utf/ronen.txt
converted: ./aozora_data/akutagawa/utf/kimono.txt
converted: ./aozora_data/akutagawa/utf/natsumesenseito_takitasan.txt
converted: ./aozora_data/akutagawa/utf/kodokujigoku.txt
converted: ./aozora_data/akutagawa/utf/susanono_mikoto.txt
converted: ./aozora_data/akutagawa/utf/haru.txt
converted: ./aozora_data/akutagawa/utf/san'emonno_tsumi.txt
converted: ./aozora_data/akutagawa/utf/kin_shogun.txt
converted: ./aozora_data/akutagawa/utf/bungeitekina_amarini.txt
converted: ./aozora_data/akutagawa/utf/ababababa.txt
converted: ./aozora_data/akutagawa/utf/hiratasensei.txt
converted: ./aozora_data/akutagawa/utf/soshun.txt
converted: ./aozora_data/akutagawa/utf/sogiki.txt
converted: ./aozora_data/akutagawa/utf/rojo.txt
converted: ./aozora_data/akutagawa/utf/e

converted: ./aozora_data/mori/utf/sokkyo_shijin.txt
converted: ./aozora_data/mori/utf/natsume_soseki_ron.txt
converted: ./aozora_data/mori/utf/watashiga_jushigosaino_toki.txt
converted: ./aozora_data/mori/utf/asobi.txt
converted: ./aozora_data/mori/utf/saijitsu.txt
converted: ./aozora_data/mori/utf/moso.txt
converted: ./aozora_data/mori/utf/kanzanjittoku_engi_shinji.txt
converted: ./aozora_data/mori/utf/fukushindan.txt
converted: ./aozora_data/mori/utf/furui_techokara.txt
converted: ./aozora_data/mori/utf/andreas_thameyers.txt
converted: ./aozora_data/mori/utf/tsuri.txt
converted: ./aozora_data/mori/utf/kuriyama_daizen.txt
converted: ./aozora_data/mori/utf/abe_ichizoku.txt
converted: ./aozora_data/mori/utf/gojiingahara.txt
converted: ./aozora_data/mori/utf/yume.txt
converted: ./aozora_data/mori/utf/teikensensei.txt
converted: ./aozora_data/mori/utf/izawa_ranken.txt
converted: ./aozora_data/mori/utf/michino_ki.txt
converted: ./aozora_data/mori/utf/nezumizaka.txt
converted: ./aozora_data

     file: sanzankoji.txt
     file: gakushato_meiyo.txt
     file: sorekara.txt
     file: seiyoniwa_nai.txt
     file: tsuchini_tsuite.txt
     file: syojyosaku_tsuikaidan.txt
     file: buncho.txt
     file: kyoikuto_bungei.txt
     file: omoidasu_kotonado.txt
     file: masaoka_shiki.txt
     file: nagatsuka_takashishino.txt
     file: mon.txt
     file: sorekara_yokoku.txt
     file: tegami.txt
     file: conrad.txt
     file: kokoro_kokoku.txt
     file: tentoroku.txt
     file: sakubutsuno_hihyo.txt
     file: jinsei.txt
     file: densetsuno_jidai_jo.txt
     file: mohoto_dokuritsu.txt
     file: tayama_katai.txt
     file: baien_jo.txt
     file: hakase_mondaito_murdoch.txt
     file: michikusa.txt
     file: bokuno_mukashi.txt
     file: watashino_keikashita.txt
     file: shumino_iden.txt
     file: hakase_mondaino_nariyuki.txt
     file: tsuchini_tsuie.txt
     file: nakamito_keishiki.txt
     file: gendainihonno_kaika.txt
     file: bungei_iin.txt
     file: hasegawa.txt
 

     file: sato_haruoshino_koto.txt
     file: haruno_yo.txt
     file: aru_shakaishugisha.txt
     file: fugawarina_sakuhin.txt
     file: konparukai.txt
     file: zoku_basho_zakki.txt
     file: sennin.txt
     file: saigo_takamori.txt
     file: shujuno_kotobano_jo.txt
     file: shosetsuno_dokusha.txt
     file: shuchu.txt
     file: anokorono_jibun.txt
     file: mino_mawari.txt
     file: wasurerarenu_insho.txt
     file: chokoyuki.txt
     file: yarigatake_kiko.txt
     file: bunsho.txt
     file: pekin_nikki_sho.txt
     file: taisho_juninen.txt
     file: sobyo_sandai.txt
     file: takita_tetsutarokun.txt
     file: soteini.txt
     file: torokko.txt
     file: enju.txt
     file: aru_katakiuchino_hanashi.txt
     file: kesato_morito.txt
     file: sonokorono_akamonseikatsu.txt
     file: ronen.txt
     file: kimono.txt
     file: natsumesenseito_takitasan.txt
     file: kodokujigoku.txt
     file: susanono_mikoto.txt
     file: haru.txt
     file: san'emonno_tsumi.txt
     

     file: dongurito_yamaneko.txt
     file: genso.txt
     file: soya_1.txt
     file: akegata.txt
     file: sonokatachi_shutokuni.txt
     file: kunezumi.txt
     file: aoyagikyoyuo_okuru.txt
     file: ryuto_shijin.txt
     file: nametokoyamano_kuma.txt
     file: koi.txt
     file: nomin_geijutsu_gairon_koyo.txt
     file: tokkobetorako.txt
     file: nijino_enoguzara.txt
     file: shuronoha_yayani.txt
     file: kogo.txt
     file: haruto_shura.txt
     file: juen.txt
     file: soya_2.txt
     file: shokubutsu_ishi.txt
     file: saruno_koshikake.txt
     file: wakai_kodama.txt
     file: otsuberuto_zo.txt
     file: yabureshi_shonenno.txt
     file: ichono_mi.txt
     file: kyoen.txt
     file: zashikibokkono_hanashi.txt
     file: futagawa_kokonite.txt
     file: haruto_shura_2.txt
     file: kendo.txt
     file: gadolf_r.txt
     file: taireifukuno_reigaitekikoka.txt
     file: kenjukoen.txt
     file: neko.txt
     file: haruto_shura_3.txt
     file: yamaotokono_shigatsu.tx