# Data download example

#### Account
Before download you need to create an EDINET API account
https://disclosure2dl.edinet-fsa.go.jp/guide/static/disclosure/WZEK0110.html


# Setting

In [1]:
from time import sleep
from tqdm import tqdm
import numpy as np
import pandas as pd
import sys
import warnings
warnings.filterwarnings('ignore')

from pathlib import Path

# temporary download directory
DATA_PATH = Path("../data")
(DATA_PATH / "raw/xbrl_doc").mkdir(parents=True, exist_ok=True) # ダウンロードしたzipファイルの保管場所
(DATA_PATH / "raw/xbrl_doc_ext").mkdir(parents=True, exist_ok=True) # zipファイルから抽出したXBRL、スキーマファイル、リンクベースファイルの保管場所

your_api_key: str = input("EDINET API key: please input your EDINET API key: ")


# 1. Get list of submitted documents

In [2]:
from .edinet_api import request_term, edinet_response_metadata, request_doc, EdinetResponseDf, EdinetResponseList

In [3]:
res_results:EdinetResponseList = request_term(api_key=your_api_key, start_date_str='2024-06-15', end_date_str='2024-06-30')

100%|██████████| 16/16 [34:55<00:00, 130.98s/it]


In [4]:
edinet_response_metadata_obj = edinet_response_metadata(
    tse_sector_url = "https://www.jpx.co.jp/markets/statistics-equities/misc/tvdivq0000001vg2-att/data_j.xls",
    tmp_path_str = str(DATA_PATH)
)
edinet_response_metadata_obj.set_data(res_results)
filename = str(DATA_PATH / "data.jsonl")
edinet_response_metadata_obj.save(filename)

In [5]:
edinet_response_metadata_obj = edinet_response_metadata(
    filename = str(DATA_PATH / "data.jsonl"),
    tse_sector_url = "https://www.jpx.co.jp/markets/statistics-equities/misc/tvdivq0000001vg2-att/data_j.xls",
    tmp_path_str = str(DATA_PATH)
    )

yuho_df:EdinetResponseDf = edinet_response_metadata_obj.get_yuho_df()

In [6]:
yuho_df_filtered:EdinetResponseDf = yuho_df.query("sector_label_33 == '食料品'")
print("foof sector:",len(yuho_df_filtered))

業種が食料品の有価証券報告書数: 79


In [7]:
yuho_df_filtered = yuho_df_filtered.set_index("docID").head(30)
yuho_df_filtered.columns

Index(['index', 'access_date', 'seqNumber', 'edinetCode', 'secCode', 'JCN',
       'filerName', 'fundCode', 'ordinanceCode', 'formCode', 'docTypeCode',
       'periodStart', 'periodEnd', 'submitDateTime', 'docDescription',
       'issuerEdinetCode', 'subjectEdinetCode', 'subsidiaryEdinetCode',
       'currentReportReason', 'parentDocID', 'opeDateTime', 'withdrawalStatus',
       'docInfoEditStatus', 'disclosureStatus', 'xbrlFlag', 'pdfFlag',
       'attachDocFlag', 'englishDocFlag', 'csvFlag', 'legalStatus',
       'sector_label_33'],
      dtype='object')

In [8]:
yuho_df_filtered.head()

Unnamed: 0_level_0,index,access_date,seqNumber,edinetCode,secCode,JCN,filerName,fundCode,ordinanceCode,formCode,...,withdrawalStatus,docInfoEditStatus,disclosureStatus,xbrlFlag,pdfFlag,attachDocFlag,englishDocFlag,csvFlag,legalStatus,sector_label_33
docID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
S100TMYO,231,2024-12-19,232,E00385,22200,4110001006378,亀田製菓株式会社,,10,30000,...,0,0,0,1,1,1,0,1,1,食料品
S100TNWB,198,2024-12-19,199,E31012,28830,2010001048993,株式会社大冷,,10,30000,...,0,0,0,1,1,1,0,1,1,食料品
S100TLP1,214,2024-12-19,215,E00406,22670,7010401029746,株式会社ヤクルト本社,,10,30000,...,0,0,0,1,1,1,0,1,1,食料品
S100TN7J,514,2024-12-19,515,E00354,21070,2010001034845,東洋精糖株式会社,,10,30000,...,0,0,0,1,1,1,0,1,1,食料品
S100TO96,580,2024-12-19,581,E27294,25880,9090001010626,株式会社プレミアムウォーターホールディングス,,10,30000,...,0,0,0,1,1,1,0,1,1,食料品


# 2. Download the documents

In [9]:
res_results = []
for docid in tqdm(yuho_df_filtered.index):
    out_filename = str(DATA_PATH / "raw/xbrl_doc" / (docid + ".zip"))
    res_results.append(request_doc(api_key=your_api_key, docid=docid, out_filename_str=out_filename))
    sleep(0.5)
print("Failed: ",len([res for res in res_results if res.status == 'failure']))

100%|██████████| 30/30 [01:26<00:00,  2.88s/it]

取得失敗数:  0



