Goal: fetch 5Y standardized statements for peers via OpenDART.

In [6]:
# 01_dart_pull.ipynb — Cell 1: Config & env
import os, io, zipfile, xml.etree.ElementTree as ET
from datetime import datetime
import requests, pandas as pd

# Load .env (repo root) for DART_API_KEY
try:
    from dotenv import load_dotenv
    load_dotenv()
except Exception:
    pass

API = "https://engopendart.fss.or.kr/engapi"   # OpenDART (English) base; works for corpCode & financials
KEY = os.getenv("DART_API_KEY")
assert KEY and len(KEY) == 40, "Set DART_API_KEY (40 chars) in your .env at the repo root."

# Peers (tickers): APR, Amorepacific, LG H&H
PEER_TICKERS = ["278470", "090430", "051900"]

# Last 5 fiscal years (incl. current; empty years will be skipped automatically)
current_year = datetime.now().year
YEARS = list(range(current_year - 4, current_year + 1))

# Report & statement types
REPRT = {"A": "11011"}           # Annual
STATEMENTS = ["BS", "IS", "CF"]  # Balance Sheet, Income Statement, Cash Flow


In [7]:
def get_corp_table():
    """
    Download corpCode.xml (zipped) and return DataFrame:
    ['corp_code','corp_name','stock_code'].
    """
    url = f"{API}/corpCode.xml"
    r = requests.get(url, params={"crtfc_key": KEY}, timeout=60)
    r.raise_for_status()
    with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
        xml_name = zf.namelist()[0]
        xml_bytes = zf.read(xml_name)
    root = ET.fromstring(xml_bytes)
    rows = []
    for el in root.findall(".//list"):
        rows.append({
            "corp_code": el.findtext("corp_code"),
            "corp_name": el.findtext("corp_name"),
            "stock_code": el.findtext("stock_code"),
        })
    return pd.DataFrame(rows)

def resolve_by_stock(df, stock_codes):
    stock_codes = {str(s).zfill(6) for s in stock_codes}
    out = df[df["stock_code"].isin(stock_codes)].copy()
    return out[["stock_code","corp_name","corp_code"]].reset_index(drop=True)

def fetch_fnltt_singl_all(corp_code, bsns_year, reprt_code, fs_div="CFS", sj_div=None):
    """
    Call fnlttSinglAcntAll for one company-year-report.
    Params:
      corp_code (8d), bsns_year (YYYY), reprt_code ('11011' Annual), fs_div ('CFS' or 'OFS'), sj_div ('BS','IS','CF' or None)
    Returns tidy DataFrame with 'thstrm_amount' etc. Already numeric.
    """
    params = {
        "crtfc_key": KEY,
        "corp_code": corp_code,
        "bsns_year": str(bsns_year),
        "reprt_code": reprt_code,
        "fs_div": fs_div
    }
    if sj_div:
        params["sj_div"] = sj_div
    url = f"{API}/fnlttSinglAcntAll.json"
    r = requests.get(url, params=params, timeout=60)
    r.raise_for_status()
    data = r.json()
    if data.get("status") != "000":
        return pd.DataFrame()
    df = pd.DataFrame(data.get("list", []))
    if df.empty:
        return df

    keep = ["corp_code","bsns_year","reprt_code","fs_div","sj_div","sj_nm",
            "account_id","account_nm",
            "thstrm_amount","thstrm_add_amount",
            "frmtrm_amount","frmtrm_add_amount",
            "bfefrmtrm_amount","currency"]
    df = df[[c for c in keep if c in df.columns]].copy()

    # Numeric coercion (remove commas)
    for c in ["thstrm_amount","thstrm_add_amount","frmtrm_amount","frmtrm_add_amount","bfefrmtrm_amount"]:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c].astype(str).str.replace(",", ""), errors="coerce")
    return df


In [8]:
# Resolve corp codes
corp_df = get_corp_table()
peer_map = resolve_by_stock(corp_df, PEER_TICKERS)
display(peer_map)  # expect 3 rows (APR, Amorepacific, LG H&H)

# Pull data
frames = []
for _, row in peer_map.iterrows():
    code = row.corp_code
    for y in YEARS:
        for sj in STATEMENTS:
            df = fetch_fnltt_singl_all(code, y, REPRT["A"], fs_div="CFS", sj_div=sj)
            if not df.empty:
                frames.append(df)

peer_raw = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
print("peer_raw shape:", peer_raw.shape)
display(peer_raw.head(10))

Unnamed: 0,stock_code,corp_name,corp_code
0,90430,아모레퍼시픽,583424
1,51900,LG생활건강,356370
2,278470,에이피알,1190568


peer_raw shape: (7389, 12)


Unnamed: 0,corp_code,bsns_year,reprt_code,sj_div,sj_nm,account_id,account_nm,thstrm_amount,thstrm_add_amount,frmtrm_amount,bfefrmtrm_amount,currency
0,583424,2021,11011,BS,Statement of financial position,ifrs-full_CurrentAssets,유동자산,2018527000000.0,,1825732000000.0,1803064000000.0,KRW
1,583424,2021,11011,BS,Statement of financial position,ifrs-full_CashAndCashEquivalents,현금및현금성자산,537693600000.0,,793728300000.0,673580400000.0,KRW
2,583424,2021,11011,BS,Statement of financial position,dart_ShortTermDepositsNotClassifiedAsCashEquiv...,금융기관예치금,101825500000.0,,57882300000.0,70600000000.0,KRW
3,583424,2021,11011,BS,Statement of financial position,ifrs-full_CurrentFinancialAssetsAtFairValueThr...,당기손익-공정가치금융자산,490839300000.0,,166463200000.0,137522900000.0,KRW
4,583424,2021,11011,BS,Statement of financial position,dart_ShortTermTradeReceivable,매출채권,295769500000.0,,283770100000.0,365884400000.0,KRW
5,583424,2021,11011,BS,Statement of financial position,ifrs-full_TradeAndOtherCurrentReceivables,기타수취채권,41647240000.0,,24723690000.0,32485370000.0,KRW
6,583424,2021,11011,BS,Statement of financial position,ifrs-full_CurrentTaxAssets,당기법인세자산,3330887000.0,,19235960000.0,4014570000.0,KRW
7,583424,2021,11011,BS,Statement of financial position,dart_OtherCurrentAssets,기타유동자산,50130930000.0,,65445880000.0,66386910000.0,KRW
8,583424,2021,11011,BS,Statement of financial position,ifrs-full_Inventories,재고자산,493223600000.0,,414483000000.0,452589100000.0,KRW
9,583424,2021,11011,BS,Statement of financial position,ifrs-full_NoncurrentAssetsOrDisposalGroupsClas...,매각예정자산,4066226000.0,,,,KRW


In [9]:
# build wide table + save CSVs 
from pathlib import Path

out_dir = Path("Projects/kbeauty-device-brief/data/processed")
out_dir.mkdir(parents=True, exist_ok=True)

# Add corp_name for convenience (join by corp_code)
peer_raw_named = peer_raw.merge(peer_map[["corp_code","corp_name","stock_code"]],
                                on="corp_code", how="left")

# 1) Save the long-format data
peer_raw_named.to_csv(out_dir / "peer_raw.csv", index=False)
print("Saved:", out_dir / "peer_raw.csv")

# 2) Build a wide table: one row per (company, year); columns prefixed by statement (BS/IS/CF)
def choose_account_col(df):
    if "account_id" in df and df["account_id"].notna().any():
        return "account_id"  # stable IFRS tag if provided
    return "account_nm"

acct_col = choose_account_col(peer_raw_named)

pivot = (
    peer_raw_named
      .pivot_table(index=["corp_code","corp_name","stock_code","bsns_year","sj_div","currency"],
                   columns=acct_col, values="thstrm_amount", aggfunc="first")
      .reset_index()
)

# Join BS/IS/CF side-by-side with prefixes to avoid duplicate names across statements
blocks = []
for sj in ["BS","IS","CF"]:
    block = pivot[pivot["sj_div"] == sj].drop(columns=["sj_div"])
    if not block.empty:
        block = block.set_index(["corp_code","corp_name","stock_code","bsns_year","currency"])
        block.columns = [f"{sj}:{c}" for c in block.columns]
        blocks.append(block)

peer_wide = pd.concat(blocks, axis=1).reset_index().sort_values(["corp_name","bsns_year"])
peer_wide.to_csv(out_dir / "peer_wide.csv", index=False)

print("Saved:", out_dir / "peer_wide.csv")
display(peer_wide.head(8))


Saved: Projects/kbeauty-device-brief/data/processed/peer_raw.csv
Saved: Projects/kbeauty-device-brief/data/processed/peer_wide.csv


Unnamed: 0,corp_code,corp_name,stock_code,bsns_year,currency,BS:-Standard account code not used-,BS:dart_AcquisitionOfTreasuryShares,BS:dart_AdjustmentsForAmortisationExpense,BS:dart_AdjustmentsForAssetsLiabilitiesOfOperatingActivities,BS:dart_AdjustmentsForBadDebtExpenses,...,CF:ifrs-full_SellingGeneralAndAdministrativeExpense,CF:ifrs-full_ShareOfOtherComprehensiveIncomeOfAssociatesAndJointVenturesAccountedForUsingEquityMethodThatWillBeReclassifiedToProfitOrLossNetOfTax,CF:ifrs-full_ShareOfProfitLossOfAssociatesAccountedForUsingEquityMethod,CF:ifrs-full_ShareOfProfitLossOfAssociatesAndJointVenturesAccountedForUsingEquityMethod,CF:ifrs-full_SharePremium,CF:ifrs-full_ShorttermBorrowings,CF:ifrs-full_ShorttermDepositsNotClassifiedAsCashEquivalents,CF:ifrs-full_TradeAndOtherCurrentPayables,CF:ifrs-full_TradeAndOtherCurrentPayablesToTradeSuppliers,CF:ifrs-full_TradeAndOtherCurrentReceivables
0,356370,LG생활건강,51900,2021,KRW,38107740000.0,,,,,...,,,,,,,,,,
1,356370,LG생활건강,51900,2022,KRW,52024000000.0,,,,,...,,,,,,,,,,
2,356370,LG생활건강,51900,2023,KRW,138182500000.0,,,,,...,,,,7730730000.0,,,,,,
3,356370,LG생활건강,51900,2024,KRW,,,,,,...,,,,,,,,,,
4,583424,아모레퍼시픽,90430,2021,KRW,94729520000.0,,,,,...,,,,,,,,,,
5,583424,아모레퍼시픽,90430,2022,KRW,181406900000.0,,,,,...,,,,,,,,,,
6,583424,아모레퍼시픽,90430,2023,KRW,,,,,,...,,,,,,,,,,
7,583424,아모레퍼시픽,90430,2024,KRW,,,,,,...,,,,,,,,,,
