# 40_categorical_ordinal — 범주형-서열형(일반)

In [None]:

%run 00_config.ipynb
import pandas as pd, numpy as np
from scipy.stats import spearmanr

df = pd.read_excel(DATA_PATH)
w = choose_weight(df)

# 서열 변수
CATEGORICAL_ORDINAL_VARS = [
    "q1","q2","q3","q4","q5_1","q5_2","q5_3","q5_4","q5_5","q7","q8","q12","q13","DM4","DM10"
]
CATEGORICAL_ORDINAL_VARS = [c for c in CATEGORICAL_ORDINAL_VARS if c in df.columns]

def wprop(series, w):
    s = series.dropna()
    w = w.loc[s.index]
    tot = w.sum()
    if tot<=0: return pd.Series(dtype=float)
    return (w.groupby(s).sum()/tot*100).sort_index()

# 가중/비가중 분포
rows = []
for col in CATEGORICAL_ORDINAL_VARS:
    ws = wprop(df[col], w)
    us = (df[col].value_counts(dropna=True, normalize=True)*100).round(2)
    for lvl, pct in ws.items():
        rows.append({"variable": col, "level": lvl, "weighted_%": round(pct,2)})
    for lvl, pct in us.items():
        rows.append({"variable": col, "level": lvl, "unweighted_%": pct})
ord_summary = pd.DataFrame(rows).groupby(["variable","level"], as_index=False).sum(numeric_only=True)\
                                .sort_values(["variable","level"])
ord_summary.to_csv(OUT_DIR/"categorical_ordinal_summary.csv", index=False, encoding="utf-8-sig")

# Spearman(q3, q7)
rows = []
for v in CATEGORICAL_ORDINAL_VARS:
    for tname, tvar in {"SAT_CITY":"q3","SAT_COUNCIL":"q7"}.items():
        if v not in df.columns or tvar not in df.columns: continue
        m = df[v].notna() & df[tvar].notna()
        if m.sum()<3:
            rho, p = (np.nan, np.nan)
        else:
            rho, p = spearmanr(df.loc[m, v], df.loc[m, tvar])
        rows.append({"var": v, "target": tname, "spearman_rho": rho, "p_value": p, "n": int(m.sum())})
ord_corr = pd.DataFrame(rows).sort_values(["target","var"])
ord_corr.to_csv(OUT_DIR/"categorical_ordinal_spearman.csv", index=False, encoding="utf-8-sig")

ord_summary.head(10)
