- This notebook calls EDINET API to get the list of documents per day.
- This notebook needs to be parameterized to trigger everyday to get the list of documents submitted by a day ago. 

In [3]:
import zipfile
import pandas as pd
import requests
from tqdm import tqdm
import datetime
import time
import json

EDINET_API_URL = "https://disclosure.edinet-fsa.go.jp/api/v2"

# load const
with open('parameters.json', 'r') as file:
    data = json.load(file)

# Define the database connection string (update with your PostgreSQL details)
DATABASE_URI = data["DATABASE_URI"]
EDINET_KEY = data["EDINET_KEY"]
URL = "https://disclosure.edinet-fsa.go.jp/api/v2/documents.json"

In [2]:
#https://note.com/python_beginner/n/na0e51d80bc35
#utility functions

class ApiResponseError(Exception): # EDINET APIのHTTPレスポンスが200でも、APIのBODY内でレスポンスが404の時がある
    pass

def make_day_list(start_date, end_date):
    print("start_date：", start_date)
    print("end_day：", end_date)

    period = end_date - start_date
    period = int(period.days)
    day_list = []
    for d in range(period):
        day = start_date + datetime.timedelta(days=d)
        day_list.append(day)

    day_list.append(end_date)

    return day_list


def make_doc_id_list(target_date):  
    securities_report_doc_list = []
    params = {"date" : target_date, "type" : 2, "Subscription-Key" : EDINET_KEY}
    time.sleep(10)
    
    try:   
        res = requests.get(URL, params = params)
        res.raise_for_status()
        json_data = res.json()
          
        if json_data['metadata']['status'] != '200':
            raise ApiResponseError('APIのステータスが200以外のレスポンスです')
             
        for num in range(len(json_data["results"])):          
            securities_report_doc_list.append([
                json_data['results'][num]['docID'],
                json_data['results'][num]['edinetCode'],
                json_data['results'][num]['secCode'],
                json_data['results'][num]['JCN'],
                json_data['results'][num]['filerName'],
                json_data['results'][num]['fundCode'],
                json_data['results'][num]['ordinanceCode'],
                json_data['results'][num]['formCode'],
                json_data['results'][num]['docTypeCode'],
                json_data['results'][num]['periodStart'],
                json_data['results'][num]['periodEnd'],
                json_data['results'][num]['submitDateTime'],
                json_data['results'][num]['docDescription'],
                json_data['results'][num]['issuerEdinetCode'],
                json_data['results'][num]['subjectEdinetCode'],
                json_data['results'][num]['currentReportReason'],
                json_data['results'][num]['parentDocID'],
                json_data['results'][num]['opeDateTime'],
                json_data['results'][num]['xbrlFlag'],
                json_data['results'][num]['pdfFlag'],
                json_data['results'][num]['csvFlag']
            ])
        return securities_report_doc_list
        
    except RequestException as e:
        print("request failed. error=(%s)", e.response.text)
        return securities_report_doc_list
        
    except ApiResponseError as e:
        print(e)
        return securities_report_doc_list     

In [78]:
#https://zenn.dev/robes/articles/f6dfcc5cfbbdb6

In [23]:
start_date = datetime.date(2024, 7, 22)
end_date = datetime.date(2024, 7, 23)
securities_report_doc_list = []
columns = ["docID","edinetCode", "secCode", "JCN", "filerName", "fundCode", "ordinanceCode", "formCode", "docTypeCode", "periodStart", "periodEnd", "submitDateTime", "docDescription", "issuerEdinetCode", "subjectEdinetCode", "currentReportReason", "parentDocID", "opeDateTime", "xbrlFlag", "pdfFlag", "csvFlag"]

#print(start_date.strftime('%Y%m%d'))
day_list = make_day_list(start_date, end_date)

for target_date in tqdm(day_list):
    # 1日の有報を取得する
    daily_reports = make_doc_id_list(target_date)
    securities_report_doc_list.extend(daily_reports)

df = pd.DataFrame(data = securities_report_doc_list, columns=columns)
df[(df["formCode"]=="030000") & (df["ordinanceCode"]=="010")].to_csv(f'./csv_data/documentList/{start_date}_documentlist.csv', index=False)

start_date： 2024-07-22
end_day： 2024-07-23


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:21<00:00, 10.73s/it]
