In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

RAW  = Path("../data/raw")
PROC = Path("../data/processed"); PROC.mkdir(parents=True, exist_ok=True)

In [2]:
peers_raw = pd.read_csv(RAW / "peers" / "all_peers_fnltt_raw_long.csv", dtype=str)

def _to_num_ekrw(x):
    if pd.isna(x): return np.nan
    s = str(x).replace(",", "").replace("(", "-").replace(")", "").strip()
    return pd.to_numeric(s, errors="coerce") / 1e8  # 억원

keep = ["peer_stock_code","peer_corp_name","bsns_year","sj_div","account_nm",
        "thstrm_amount","frmtrm_amount"]
p = peers_raw[keep].copy()
p["thstrm_amount"] = p["thstrm_amount"].map(_to_num_ekrw)
p["frmtrm_amount"] = p["frmtrm_amount"].map(_to_num_ekrw)

p = p.rename(columns={"peer_stock_code":"ticker","peer_corp_name":"company",
                      "bsns_year":"year","sj_div":"statement"})
p["year"] = p["year"].astype(int)

cur = p.rename(columns={"thstrm_amount":"amount"})[["ticker","company","year","statement","account_nm","amount"]]
pri = p.rename(columns={"frmtrm_amount":"amount"})[["ticker","company","year","statement","account_nm","amount"]]
pri["year"] = pri["year"] - 1

peer_long = (pd.concat([cur, pri], ignore_index=True)
               .dropna(subset=["amount"])
               .sort_values(["company","year"]))
peer_long.to_csv(PROC / "peer_long.csv", index=False)
peer_long.head(3)

Unnamed: 0,ticker,company,year,statement,account_nm,amount
12693,51900,LG생활건강,2019,BS,유동자산,21630.74841
12694,51900,LG생활건강,2019,BS,현금및현금성자산,6471.040592
12695,51900,LG생활건강,2019,BS,단기금융상품,722.634506


In [3]:
J = RAW / "jion"
is_en = pd.read_excel(J/"Jion_IS.xlsx")
bs_en = pd.read_excel(J/"Jion_BS.xlsx")
cf_en = pd.read_excel(J/"Jion_CF.xlsx")


map_df = pd.concat([
    is_en.assign(file="IS")[["file","account_kr","account_kr_clean","account_en"]],
    bs_en.assign(file="BS")[["file","account_kr","account_kr_clean","account_en"]],
    cf_en.assign(file="CF")[["file","account_kr","account_kr_clean","account_en"]],
], ignore_index=True).drop_duplicates()
map_df.to_csv(PROC/"jion_account_mapping_used.csv", index=False)
map_df.head(10)

Unnamed: 0,file,account_kr,account_kr_clean,account_en
0,IS,I. 매출액,I. 매출액,Revenue
1,IS,제품매출액,제품매출액,Revenue
2,IS,기타매출액,기타매출액,Revenue
3,IS,II. 매출원가<주석14>,II. 매출원가,Cost of sales
4,IS,제품매출원가,제품매출원가,Cost of sales
5,IS,기초제품재고액,기초제품재고액,기초제품재고액
6,IS,당기제품제조원가,당기제품제조원가,당기제품제조원가
7,IS,타계정에서대체액,타계정에서대체액,타계정에서대체액
8,IS,기말제품재고액,기말제품재고액,기말제품재고액
9,IS,III. 매출총이익,III. 매출총이익,Gross profit


In [4]:
def tidy_long(df, statement, cur_year=2025):
    cur = (df[["account_en","current_amount"]].rename(columns={"current_amount":"amount"})
           .assign(company="지온메디텍", ticker=None, year=cur_year, statement=statement, period_tag="current"))
    pri = (df[["account_en","prior_amount"]].rename(columns={"prior_amount":"amount"})
           .assign(company="지온메디텍", ticker=None, year=cur_year-1, statement=statement, period_tag="prior"))
    return (pd.concat([cur, pri], ignore_index=True)
              .dropna(subset=["amount"])
              .rename(columns={"account_en":"account"}))

jion_long = pd.concat([
    tidy_long(is_en,"IS"),
    tidy_long(bs_en,"BS"),
    tidy_long(cf_en,"CF"),
], ignore_index=True)

USE = {
  "Revenue","Cost of sales","Operating profit (loss)","Selling, general and administrative expenses",
  "Advertising and promotion expenses","Inventories","Loss on valuation of inventories",
  "Current assets","Current liabilities","Total liabilities","Total equity",
  "Cash and cash equivalents","Short-term borrowings","Cash flows from operating activities",
  "Increase in short-term borrowings"
}
jion_long = jion_long[jion_long["account"].isin(USE)].sort_values(["year","statement","account"])
jion_long.to_csv(PROC/"jion_long.csv", index=False)
jion_long.head(10)

Unnamed: 0,account,amount,company,ticker,year,statement,period_tag
170,Cash and cash equivalents,4967469000.0,지온메디텍,,2024,BS,prior
168,Current assets,12810000000.0,지온메디텍,,2024,BS,prior
181,Current assets,27169220000.0,지온메디텍,,2024,BS,prior
204,Current assets,617895100.0,지온메디텍,,2024,BS,prior
208,Current liabilities,12892440000.0,지온메디텍,,2024,BS,prior
216,Current liabilities,19363810000.0,지온메디텍,,2024,BS,prior
178,Inventories,2962287000.0,지온메디텍,,2024,BS,prior
210,Short-term borrowings,4230000000.0,지온메디텍,,2024,BS,prior
224,Total equity,7722972000.0,지온메디텍,,2024,BS,prior
215,Total liabilities,336430300.0,지온메디텍,,2024,BS,prior


In [5]:
def canon(x: str) -> str:
    m = {
        "Revenue":"revenue",
        "Cost of sales":"cogs",
        "Operating profit (loss)":"op_inc",
        "Selling, general and administrative expenses":"sga",
        "Advertising and promotion expenses":"ad_exp",
        "Inventories":"inventories",
        "Loss on valuation of inventories":"inv_val_loss",
        "Current assets":"cur_assets",
        "Current liabilities":"cur_liab",
        "Total liabilities":"tot_liab",
        "Total equity":"equity",
        "Cash and cash equivalents":"cash",
        "Short-term borrowings":"st_borrow",
        "Cash flows from operating activities":"cfo",
        "Increase in short-term borrowings":"inc_st_borrow"
    }
    return m.get(x, None)

jion_wide_en = (jion_long.pivot_table(index=["company","ticker","year"], columns="account", values="amount", aggfunc="sum")
                .reset_index().sort_values(["company","year"]))
jion_wide_en.to_csv(PROC/"jion_wide_en.csv", index=False)

tmp = jion_long.copy()
tmp["canon"] = tmp["account"].map(canon)
tmp = tmp[tmp["canon"].notna()]
jion_wide = (tmp.pivot_table(index=["company","ticker","year"], columns="canon", values="amount", aggfunc="sum")
             .reset_index().sort_values(["company","year"]))
jion_wide.to_csv(PROC/"jion_wide.csv", index=False)

print("Wrote:", PROC/"jion_account_mapping_used.csv")
print("Wrote:", PROC/"jion_long.csv")
print("Wrote:", PROC/"jion_wide_en.csv")
print("Wrote:", PROC/"jion_wide.csv")

Wrote: ../data/processed/jion_account_mapping_used.csv
Wrote: ../data/processed/jion_long.csv
Wrote: ../data/processed/jion_wide_en.csv
Wrote: ../data/processed/jion_wide.csv


In [6]:
import re, unicodedata

def _norm(s):
    s = unicodedata.normalize("NFKC", str(s)).lower()
    return re.sub(r"[\s·•/,\-()_{}\[\]]+", "", s)

SYN = {
    "revenue":  ["매출액","매출","영업수익","revenue","sales","salesrevenue"],
    "cogs":     ["매출원가","원가","cogs","costofsales","costofgoodsold"],
    "op_inc":   ["영업이익","operatingincome","operatingprofit"],
    "sga":      ["판매비와관리비","판매비및관리비","sellinggeneralandadministrative"],
    "ad_exp":   ["광고선전비","광고비","판매촉진비","마케팅비","advertisingexpense","marketingexpense","promotionexpense"],
    "inventories": ["재고자산","inventories","inventory"],
    "inv_val_loss": ["재고자산평가손실","inventoryvaluationloss","lossonvaluationofinventories","재고평가손실","재고자산손상"],
    "cur_assets":   ["유동자산","currentassets"],
    "cur_liab":     ["유동부채","currentliabilities"],
    "tot_liab":     ["부채총계","totalliabilities","liabilitiestotal","부 채"],
    "equity":       ["자본총계","totalequity","equitytotal","stockholdersequity"],
    "cash":         ["현금및현금성자산","cashandcashequivalents"],
    "st_borrow":    ["단기차입금","shorttermborrowings","shorttermdebt","shorttermloans"],
    "cfo":          ["영업활동으로인한현금흐름","영업활동현금흐름","cashflowsfromoperatingactivities","netcashprovidedbyoperatingactivities","operatingcashflows"],
    "inc_st_borrow":["단기차입금의증가","increaseinshorttermborrowings","shorttermborrowingsincrease"]
}
SYN_N = {k: {_norm(x) for x in v} for k, v in SYN.items()}

def _canon_peers(acct):
    nm = _norm(acct)
    for k, keys in SYN_N.items():
        if any(key in nm for key in keys) or nm in keys:
            return k
    return None

d = pd.read_csv(PROC/"peer_long.csv")
d["canon"] = d["account_nm"].map(_canon_peers)
d = d[d["canon"].notna()]

peer_wide = (d.pivot_table(index=["company","ticker","year"], columns="canon", values="amount", aggfunc="first")
               .reset_index()
               .sort_values(["company","year"]))
peer_wide.to_csv(PROC/"peer_wide.csv", index=False)
peer_wide.tail(3)

canon,company,ticker,year,ad_exp,cash,cfo,cogs,cur_assets,cur_liab,equity,inventories,op_inc,revenue,sga,st_borrow,tot_liab
15,에이피알,278470,2022,0.0025,572.532616,316.307862,,1370.240201,762.665393,1002.579947,504.601611,392.284794,210.701557,2522.461647,215.0,72.465227
16,에이피알,278470,2023,,1253.345725,1078.409614,,2193.079957,822.964214,1969.488688,565.209724,1041.937249,230.052217,2912.546575,75.0,2873.094882
17,에이피알,278470,2024,,903.663544,791.241617,,2860.274454,1451.367586,3235.24074,1097.45677,1227.055444,479.474061,4208.941551,75.0,5651.435632
