## テキストの分析をする

In [2]:
import os
import requests
import pandas as pd
import time
import zipfile

API_ENDPOINT = "https://disclosure.edinet-fsa.go.jp/api/v2"  # v2を使用する
sd_dfs = []

# データを取得する
for date in ["2023-06-20", "2023-06-21", "2023-06-22", "2023-06-23"]:
    time.sleep(1)
    request_params = {
        "date": date,
        "type": 2,  # 1=メタデータのみ、2=提出書類一覧及びメタデータ
        "Subscription-Key": os.environ.get(
            "EDINET_API_KEY"
        ),  # v1を使用する場合は不要, 2024年３月29日（金）まで利用可能
    }

    docs_submitted_json = requests.get(
        f"{API_ENDPOINT}/documents.json", request_params
    ).json()

    sd_df = pd.DataFrame(docs_submitted_json["results"])
    sd_df = sd_df[sd_df["docDescription"].str.contains("有価証券報告書", na=False)]
    sd_dfs.append(sd_df)

sd_dfs = pd.concat(sd_dfs)

### 取得したデータの確認

In [20]:
海運３社 = sd_dfs[sd_dfs["secCode"].isin(["91040", "91070", "91010"])]  # 証券コード
海運３社

Unnamed: 0,seqNumber,docID,edinetCode,secCode,JCN,filerName,fundCode,ordinanceCode,formCode,docTypeCode,...,opeDateTime,withdrawalStatus,docInfoEditStatus,disclosureStatus,xbrlFlag,pdfFlag,attachDocFlag,englishDocFlag,csvFlag,legalStatus
326,327,S100QZLI,E04236,91040,4010401082896,株式会社商船三井,,10,30000,120,...,,0,0,0,1,1,1,0,1,1
410,411,S100R01E,E04235,91010,7010001023785,日本郵船株式会社,,10,30000,120,...,,0,0,0,1,1,1,0,1,1
816,817,S100QZPH,E04237,91070,8140001005720,川崎汽船株式会社,,10,30000,120,...,,0,0,0,1,1,1,0,1,1


In [23]:
def save_csv(docID, doc_type=1):
    print(f"{docID}のデータを取得中")
    time.sleep(5)

    r = requests.get(
        f"{API_ENDPOINT}/documents/{docID}",
        {
            "type": doc_type,
            "Subscription-Key": os.environ.get("EDINET_API_KEY"),
        },
    )

    if r is None:
        print("データの取得に失敗しました。csvFlag==1かどうか確認してください。")
    else:
        os.makedirs(f"{docID}", exist_ok=True)
        temp_zip = "uuid_A345E318_0C3A_4D32_B55B_CC1A84F3A7B2.zip"

        with open(temp_zip, "wb") as f:
            for chunk in r.iter_content(chunk_size=1024):
                f.write(chunk)

        with zipfile.ZipFile(temp_zip) as z:
            z.extractall(f"{docID}")

        os.remove(temp_zip)


save_csv("S100QZLI")

S100QZLIのデータを取得中


In [68]:
from edinet_xbrl.edinet_xbrl_parser import EdinetXbrlParser

parser = EdinetXbrlParser()
parsed_xbrl = parser.parse_file(
    r"S100QZLI\XBRL\PublicDoc\jpcrp030000-asr-001_E04236-000_2023-03-31_01_2023-06-20.xbrl"
)

key = "jpcrp_cor:DividendPaidPerShareSummaryOfBusinessResults"
context_ref = "CurrentYearDuration_NonConsolidatedMember"
summary_of_business_info = parsed_xbrl.get_data_by_context_ref(key, context_ref)
summary_of_business = summary_of_business_info.get_value()

from IPython.display import HTML

HTML(summary_of_business)

In [67]:
summary_of_business_info

In [46]:
list_jpcrp_cor_tag = [
    "NetSalesSummaryOfBusinessResults",  # 売上高
    "OrdinaryIncomeLossSummaryOfBusinessResults",  # 経常利益
    "NetIncomeLossSummaryOfBusinessResults",  # 当期純利益
    "NetAssetsSummaryOfBusinessResults",  # 純資産額
    "TotalAssetsSummaryOfBusinessResults",  # 総資産額
    "NetAssetsPerShareSummaryOfBusinessResults",  # 1株当たり純資産額
    "DividendPaidPerShareSummaryOfBusinessResults",  # 1株当たり配当額
    "InterimDividendPaidPerShareSummaryOfBusinessResults",  # 1株当たり中間配当額
    "BasicEarningsLossPerShareSummaryOfBusinessResults",  # 1株当たり当期純利益
    "EquityToAssetRatioSummaryOfBusinessResults",  # 自己資本比率
    "RateOfReturnOnEquitySummaryOfBusinessResults",  # 自己資本利益率
    "PriceEarningsRatioSummaryOfBusinessResults",  # 株価収益率
    "PayoutRatioSummaryOfBusinessResults",  # 配当性向
    "TotalShareholderReturn",  # 株主総利回り
]

SyntaxError: EOL while scanning string literal (1213690431.py, line 1)