In [None]:
# -*- coding: utf-8 -*-

In [None]:
pip install arelle

Collecting arelle
  Downloading arelle-2.2.tar.gz (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 7.0 MB/s 
[?25hBuilding wheels for collected packages: arelle
  Building wheel for arelle (setup.py) ... [?25l[?25hdone
  Created wheel for arelle: filename=arelle-2.2-py3-none-any.whl size=2060307 sha256=c0367475d722acfa1821c38e4364a957bfd474f4881fb42c40be98bdd214e8f1
  Stored in directory: /root/.cache/pip/wheels/8c/b2/69/919f97f4f77fd85a26e52df16f1763d68cd18210acc9320ad5
Successfully built arelle
Installing collected packages: arelle
Successfully installed arelle-2.2


In [None]:
pip install isodate

Collecting isodate
  Downloading isodate-0.6.0-py2.py3-none-any.whl (45 kB)
[?25l[K     |███████▏                        | 10 kB 21.5 MB/s eta 0:00:01[K     |██████████████▍                 | 20 kB 21.7 MB/s eta 0:00:01[K     |█████████████████████▌          | 30 kB 24.1 MB/s eta 0:00:01[K     |████████████████████████████▊   | 40 kB 18.7 MB/s eta 0:00:01[K     |████████████████████████████████| 45 kB 2.2 MB/s 
Installing collected packages: isodate
Successfully installed isodate-0.6.0


In [None]:
from arelle import ModelManager
from arelle import Cntlr
import os
import zipfile
import glob
import pandas as pd

In [None]:
def make_edinet_info_list(edinetcodedlinfo_filepath):
    edinet_info = pd.read_csv(edinetcodedlinfo_filepath, skiprows=1,
                                 encoding='cp932')
    edinet_info = edinet_info[["ＥＤＩＮＥＴコード", "提出者業種"]]
    edinet_info_list = edinet_info.values.tolist()
    return edinet_info_list

def unzip_file(zip_dir,xbrl_file_expressions):
    zip_files = glob.glob(os.path.join(zip_dir, '*.zip'))

    number_of_zip_lists = len(zip_files)
    print("number_of_zip_lists：", number_of_zip_lists)

    for index, zip_file in enumerate(zip_files):
        print(zip_file, ":", index + 1, "/", number_of_zip_lists)
        with zipfile.ZipFile(zip_file) as zip_f:
            zip_f.extractall(zip_dir)
            zip_f.close()

    xbrl_files = glob.glob(xbrl_file_expressions)
    return xbrl_files

def make_edinet_company_info_list(xbrl_files,edinet_info_list):
    edinet_company_info_list = []
    for index, xbrl_file in enumerate(xbrl_files):
        edinet_code = ""  # EDINETCODE
        filer_name_jp = ""  # 企業名
        industry_code = ""  # 業種
        temporary_workers = ""  # 臨時雇用者数
        net_income = ""  # 当期純利益
        ROE = ""  # 自己資本利益率
        total_assets = ""  # 総資産
        number_of_female_directors = ""  # 女性役員数（人）
        number_of_male_directors = ""  # 男性役員数（人）
        salary_info = ""  # 平均年間給与（円）
        service_years = ""  # 平均勤続年数（年）
        age_years = ""  # 平均年齢（歳）
        number_of_employees = ""  # 従業員数（人）
        company_info_list = []  # 企業情報

        ctrl = Cntlr.Cntlr()
        model_manager = ModelManager.initialize(ctrl)
        model_xbrl = model_manager.load(xbrl_file)

        print(xbrl_file, ":", index + 1, "/", len(xbrl_files))

        for fact in model_xbrl.facts:

            if fact.concept.qname.localName == 'EDINETCodeDEI':
                print("EDINETコード", fact.value)
                edinet_code = fact.value

                for code_name in edinet_info_list:
                    if code_name[0] == edinet_code:
                        print("業種",code_name[1])
                        industry_code = code_name[1]
                        break

            elif fact.concept.qname.localName == 'FilerNameInJapaneseDEI':
                print("企業名", fact.value)
                filer_name_jp = fact.value

            elif fact.concept.qname.localName == 'AverageNumberOfTemporaryWorkers':
                if fact.contextID == 'CurrentYearInstant_NonConsolidatedMember':
                    print("臨時雇用者数", fact.value)
                    temporary_workers = fact.value

            elif fact.concept.qname.localName == 'NetIncomeLossSummaryOfBusinessResults':
                if fact.contextID == 'CurrentYearDuration_NonConsolidatedMember':
                    print("当期純利益", fact.value)
                    net_income = fact.value

            elif fact.concept.qname.localName == 'RateOfReturnOnEquitySummaryOfBusinessResults':
                if fact.contextID == 'CurrentYearDuration_NonConsolidatedMember':
                    print("自己資本利益率", fact.value)
                    ROE = fact.value
            
            elif fact.concept.qname.localName == 'TotalAssetsSummaryOfBusinessResults':
                if fact.contextID == 'CurrentYearInstant_NonConsolidatedMember':
                    print("総資産", fact.value)
                    total_assets = fact.value
            
            elif fact.concept.qname.localName == 'NumberOfFemaleDirectorsAndOtherOfficers':
                if fact.contextID == 'FilingDateInstant':
                    print("女性役員数（人）", fact.value)
                    number_of_female_directors = fact.value
            
            elif fact.concept.qname.localName == 'NumberOfMaleDirectorsAndOtherOfficers':
                if fact.contextID == 'FilingDateInstant':
                    print("男性役員数（人）", fact.value)
                    number_of_male_directors = fact.value

            elif fact.concept.qname.localName == 'AverageAnnualSalaryInformationAboutReportingCompanyInformationAboutEmployees':
                print("平均年間給与（円）", fact.value)
                salary_info = fact.value

            elif fact.concept.qname.localName == 'AverageLengthOfServiceYearsInformationAboutReportingCompanyInformationAboutEmployees':
                print("平均勤続年数（年）", fact.value)
                service_years = fact.value

            elif fact.concept.qname.localName == 'AverageAgeYearsInformationAboutReportingCompanyInformationAboutEmployees':
                print("平均年齢（年）", fact.value)
                age_years = fact.value

            elif fact.concept.qname.localName == 'NumberOfEmployees':
                if fact.contextID == 'CurrentYearInstant_NonConsolidatedMember':
                    print("従業員数（人）", fact.value)
                    number_of_employees = fact.value

        print("")
        company_info_list.append(edinet_code)
        company_info_list.append(filer_name_jp)
        company_info_list.append(industry_code)
        company_info_list.append(net_income)
        company_info_list.append(ROE)
        company_info_list.append(total_assets)
        company_info_list.append(number_of_female_directors)
        company_info_list.append(number_of_male_directors)
        company_info_list.append(temporary_workers)
        company_info_list.append(salary_info)
        company_info_list.append(service_years)
        company_info_list.append(age_years)
        company_info_list.append(number_of_employees)

        edinet_company_info_list.append(company_info_list)

    return edinet_company_info_list

def write_csv_of_employee_info(edinet_company_info_list):

    employee_frame = pd.DataFrame(edinet_company_info_list,
                         columns=['EDINETCODE', '企業名', '業種',  ' 当期純利益', '自己資本利益率', '総資産','女性役員数（人）','男性役員数（人）','臨時雇用者数','平均年間給与（円）', ' 平均勤続年数（年）', '平均年齢（歳）', '従業員数（人）'])

    print(employee_frame)
    employee_frame.to_csv("/content/drive/MyDrive/Graduation Paper Data/total_info_2020q4.csv", encoding='cp932')


def main():
    edinetcodedlinfo_filepath = '/content/drive/MyDrive/Graduation Paper Data/EdinetcodeDlInfo.csv'
    edinet_info_list = make_edinet_info_list(edinetcodedlinfo_filepath)

    zip_dir = '/content/drive/MyDrive/Graduation Paper Data/20201001-20201231'
    xbrl_file_expressions = '/content/drive/MyDrive/Graduation Paper Data/20201001-20201231/XBRL/PublicDoc/*.xbrl'
    xbrl_files = unzip_file(zip_dir,xbrl_file_expressions)

    edinet_company_info_list = make_edinet_company_info_list(xbrl_files,edinet_info_list)
    print(edinet_company_info_list)

    write_csv_of_employee_info(edinet_company_info_list)
    print("extract finish")

if __name__ == "__main__":
    main()


[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
企業名 株式会社一や

/content/drive/MyDrive/Graduation Paper Data/20201001-20201231/XBRL/PublicDoc/jpcrp030000-asr-001_E05705-000_2020-07-31_01_2020-10-29.xbrl : 34 / 330
当期純利益 1155812000
総資産 7293139000
自己資本利益率 0.365
従業員数（人） 709
臨時雇用者数 
従業員数（人） 709
平均年齢（年） 33.2
平均勤続年数（年） 7.5
平均年間給与（円） 5778067
男性役員数（人） 11
女性役員数（人） 
EDINETコード E05705
業種 情報・通信業
企業名 株式会社アイル

/content/drive/MyDrive/Graduation Paper Data/20201001-20201231/XBRL/PublicDoc/jpcrp030000-asr-001_E35611-000_2020-07-31_01_2020-10-29.xbrl : 35 / 330
当期純利益 13991000
総資産 533242000
自己資本利益率 0.039
従業員数（人） 16
臨時雇用者数 
従業員数（人） 16
臨時雇用者数 
平均年齢（年） 44.5
平均勤続年数（年） 3.1
平均年間給与（円） 7872000
男性役員数（人） 6
女性役員数（人） 2
EDINETコード E35611
業種 サービス業
企業名 株式会社さくらさくプラス

/content/drive/MyDrive/Graduation Paper Data/20201001-20201231/XBRL/PublicDoc/jpcrp030000-asr-001_E34999-000_2020-07-31_01_2020-10-29.xbrl : 36 / 330
当期純利益 347761000
総資産 11281415000
自己資本利益率 0.052
従業員数（人） 34
臨時雇用者数 17
従業員数（人） 34
臨時雇用者数 17
平均年齢（年） 37.4
平均勤続年数（年） 2.5
平均年間給