In [6]:
from arelle import ModelManager
from arelle import Cntlr
import os
import zipfile
import glob
import pandas as pd
import datetime
import requests

In [7]:
def make_edinet_info_list(edinetcodedlinfo_filepath):
    edinet_info = pd.read_csv(edinetcodedlinfo_filepath, skiprows=1,
                                 encoding='cp932')
    edinet_info = edinet_info[["ＥＤＩＮＥＴコード", "提出者業種"]]
    edinet_info_list = edinet_info.values.tolist()
    return edinet_info_list

In [8]:
def unzip_file(zip_dir,xbrl_file_expressions):
    zip_files = glob.glob(os.path.join(zip_dir, '*.zip'))

    number_of_zip_lists = len(zip_files)
    print("number_of_zip_lists：", number_of_zip_lists)

    for index, zip_file in enumerate(zip_files):
        print(zip_file, ":", index + 1, "/", number_of_zip_lists)
        with zipfile.ZipFile(zip_file) as zip_f:
            zip_f.extractall(zip_dir)
            zip_f.close()

    xbrl_files = glob.glob(xbrl_file_expressions)
    return xbrl_files

In [9]:
def make_edinet_company_info_list(xbrl_files,edinet_info_list):
    edinet_company_info_list = []
    for index, xbrl_file in enumerate(xbrl_files):
        edinet_code = ""  # EDINETCODE
        filer_name_jp = ""  # 企業名
        industry_code = ""  # 業種
        salary_info = ""  # 平均年間給与（円）
        service_years = ""  # 平均勤続年数（年）
        service_months = ""  # 平均勤続年数（月）
        age_years = ""  # 平均年齢（歳）
        age_months = ""  # 平均年齢（月）
        number_of_employees = ""  # 従業員数（人）
        total_number_of_issued_share = '' #発行済株式総数
        net_asset_per_share = '' #BPS
        basic_earnings_per_share = '' #EPS
        diluted_earnings_per_share = '' #調整後EPS
        payout_ratio = '' #配当性向
        
        
        
        
        
        
        company_info_list = []  # 企業情報

        ctrl = Cntlr.Cntlr()
        model_manager = ModelManager.initialize(ctrl)
        model_xbrl = model_manager.load(xbrl_file)

        print(xbrl_file, ":", index + 1, "/", len(xbrl_files))

        for fact in model_xbrl.facts:

            if fact.concept.qname.localName == 'EDINETCodeDEI':
                print("EDINETコード", fact.value)
                edinet_code = fact.value

                for code_name in edinet_info_list:
                    if code_name[0] == edinet_code:
                        print("業種",code_name[1])
                        industry_code = code_name[1]
                        break

            elif fact.concept.qname.localName == 'FilerNameInJapaneseDEI':
                print("企業名", fact.value)
                filer_name_jp = fact.value

            elif fact.concept.qname.localName == 'AverageAnnualSalaryInformationAboutReportingCompanyInformationAboutEmployees':
                print("平均年間給与（円）", fact.value)
                salary_info = fact.value

            elif fact.concept.qname.localName == 'AverageLengthOfServiceYearsInformationAboutReportingCompanyInformationAboutEmployees':
                print("平均勤続年数（年）", fact.value)
                service_years = fact.value

            elif fact.concept.qname.localName == 'AverageLengthOfServiceMonthsInformationAboutReportingCompanyInformationAboutEmployees':
                print("平均勤続年数（月）", fact.value)
                service_months = fact.value

            elif fact.concept.qname.localName == 'AverageAgeYearsInformationAboutReportingCompanyInformationAboutEmployees':
                print("平均年齢（歳）", fact.value)
                age_years = fact.value

            elif fact.concept.qname.localName == 'AverageAgeMonthsInformationAboutReportingCompanyInformationAboutEmployees':
                print("平均年齢（月）", fact.value)
                age_months = fact.value

            elif fact.concept.qname.localName == 'NumberOfEmployees':
                if fact.contextID == 'CurrentYearInstant_NonConsolidatedMember':
                    print("従業員数（人）", fact.value)
                    number_of_employees = fact.value
            
            elif fact.concept.qname.localName == 'TotalNumberOfIssuedSharesSummaryOfBusinessResults':
                print("発行済株式総数", fact.value)
                total_number_of_issued_share = fact.value
            
            elif fact.concept.qname.localName == 'NetAssetsPerShareSummaryOfBusinessResults':
                print("BPS", fact.value)
                net_asset_per_share = fact.value
                
            elif fact.concept.qname.localName == 'BasicEarningsLossPerShareSummaryOfBusinessResults':
                print("EPS", fact.value)
                basic_earnings_per_share = fact.value
            
            elif fact.concept.qname.localName == 'DilutedEarningsPerShareSummaryOfBusinessResults':
                print("調整後EPS", fact.value)
                diluted_earnings_per_share = fact.value
            
            elif fact.concept.qname.localName == 'PayoutRatioSummaryOfBusinessResults':
                print("配当性向", fact.value)
                payout_ratio = fact.value

        print("")
        company_info_list.append(edinet_code)
        company_info_list.append(filer_name_jp)
        company_info_list.append(industry_code)
        company_info_list.append(salary_info)

        if len(service_months) != 0:
            service_years_decimal = round(int(service_months) / 12, 1)
            service_years = int(service_years) + service_years_decimal
            service_years = str(service_years)

        company_info_list.append(service_years)

        if len(age_months) != 0:
            age_years_decimal = round(int(age_months) / 12, 1)
            age_years = int(age_years) + age_years_decimal
            age_years = str(age_years)

        company_info_list.append(age_years)
        company_info_list.append(number_of_employees)
        
        company_info_list.append(total_number_of_issued_share)
        company_info_list.append(net_asset_per_share)
        company_info_list.append(basic_earnings_per_share)
        company_info_list.append(diluted_earnings_per_share)
        company_info_list.append(payout_ratio)

        edinet_company_info_list.append(company_info_list)

    return edinet_company_info_list

In [20]:
def write_csv_of_employee_info(edinet_company_info_list):

    employee_frame = pd.DataFrame(edinet_company_info_list,
                         columns=['EDINETCODE', '企業名', '業種', '平均年間給与（円）', ' 平均勤続年数（年）', '平均年齢（歳）', '従業員数（人）','発行済株式総数','BPS','EPS','調整後EPS','配当性向'])

    print(employee_frame)
    employee_frame.to_csv("E://★★★★★有価証券報告書//決算書データベース//finance_database.csv.", encoding='cp932')

In [21]:
def main():
    edinetcodedlinfo_filepath = r'E://★★★★★有価証券報告書//EDINETCODE//EdinetcodeDlInfo.csv'
    edinet_info_list = make_edinet_info_list(edinetcodedlinfo_filepath)

    zip_dir = 'E://★★★★★有価証券報告書//決算書zip//'
    xbrl_file_expressions = 'E://★★★★★有価証券報告書//決算書zip//XBRL//PublicDoc//*.xbrl'
    xbrl_files = unzip_file(zip_dir,xbrl_file_expressions)

    edinet_company_info_list = make_edinet_company_info_list(xbrl_files,edinet_info_list)
    print(edinet_company_info_list)

    write_csv_of_employee_info(edinet_company_info_list)
    print("extract finish")

In [None]:
if __name__ == "__main__":
    main()

number_of_zip_lists： 3597
E://★★★★★有価証券報告書//決算書zip\S100HIYY.zip : 1 / 3597
E://★★★★★有価証券報告書//決算書zip\S100I5KA.zip : 2 / 3597
E://★★★★★有価証券報告書//決算書zip\S100I6K4.zip : 3 / 3597
E://★★★★★有価証券報告書//決算書zip\S100IBY5.zip : 4 / 3597
E://★★★★★有価証券報告書//決算書zip\S100ICUK.zip : 5 / 3597
E://★★★★★有価証券報告書//決算書zip\S100ICV6.zip : 6 / 3597
E://★★★★★有価証券報告書//決算書zip\S100ICVH.zip : 7 / 3597
E://★★★★★有価証券報告書//決算書zip\S100ICVT.zip : 8 / 3597
E://★★★★★有価証券報告書//決算書zip\S100IDBS.zip : 9 / 3597
E://★★★★★有価証券報告書//決算書zip\S100IF2Y.zip : 10 / 3597
E://★★★★★有価証券報告書//決算書zip\S100IFEM.zip : 11 / 3597
E://★★★★★有価証券報告書//決算書zip\S100IG28.zip : 12 / 3597
E://★★★★★有価証券報告書//決算書zip\S100IG84.zip : 13 / 3597
E://★★★★★有価証券報告書//決算書zip\S100IGCO.zip : 14 / 3597
E://★★★★★有価証券報告書//決算書zip\S100IGD6.zip : 15 / 3597
E://★★★★★有価証券報告書//決算書zip\S100IGJ9.zip : 16 / 3597
E://★★★★★有価証券報告書//決算書zip\S100IGLA.zip : 17 / 3597
E://★★★★★有価証券報告書//決算書zip\S100IGRF.zip : 18 / 3597
E://★★★★★有価証券報告書//決算書zip\S100IGTI.zip : 19 / 3597
E://★★★★★有価証券報告書//決算書zip\S100IGWE