<a href="https://colab.research.google.com/github/kospi-2025/EVT/blob/main/%5Bcode%5Dbest_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd

base_url = "https://raw.githubusercontent.com/kospi-2025/EVT/main/"

df = pd.read_csv(base_url + "source_data/" + "ticker_info.csv")
df['id'] = df['id'].astype(str).str.zfill(6)
df['Yahoo_Ticker'] = df['id'] + ".KS"

ticker_to_name = dict(zip(df["Yahoo_Ticker"], df["name"]))
ticker_to_sector = dict(zip(df["Yahoo_Ticker"], df["sector"]))

logDD = pd.read_csv(base_url + "[csv]logDD.csv", index_col=0, parse_dates=True)

In [3]:
!pip install -q lmoments3
!pip install -q pyextremes

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.5/46.5 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.4/47.4 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import lmoments3 as lm

# 예: logDD = pd.Series([...])
def theoretical_tau4_from_tau3(tau3):
    return (tau3 * (5 * tau3 + 1)) / (tau3 + 5)

def compute_l_moments_ratio(excesses):
    try:
        lmr = lm.lmom_ratios(excesses, nmom=4)
        l1, l2 = lmr[0], lmr[1]
        tau3, tau4 = lmr[2], lmr[3]
        return tau3, tau4
    except:
        return np.nan, np.nan

def find_best_thresholds(series, quantiles=np.linspace(0.01, 0.99, 99), top_n=5):
    series = series.dropna().sort_index()
    results = []

    for q in quantiles:
        u = series.quantile(q)
        excess = series[series > u] - u
        if len(excess) < 10:
            continue

        tau3_emp, tau4_emp = compute_l_moments_ratio(excess)
        if np.isnan(tau3_emp) or np.isnan(tau4_emp):
            continue

        tau4_theory = theoretical_tau4_from_tau3(tau3_emp)
        error = ((tau4_emp / tau4_theory) - 1) ** 2

        results.append({
            "quantile": q,
            "threshold": u,
            "tau3_emp": tau3_emp,
            "tau4_emp": tau4_emp,
            "tau4_theory": tau4_theory,
            "error": error
        })

    df = pd.DataFrame(results)
    df_sorted = df.sort_values("error").reset_index(drop=True)
    return df_sorted.head(top_n)


In [5]:
from tqdm import tqdm

all_results = []

for ticker in tqdm(logDD.columns, desc="Processing tickers"):
    series = logDD[ticker]

    try:
        df_ticker = find_best_thresholds(series, quantiles=np.linspace(0.01, 0.99, 99), top_n=5)
        df_ticker = df_ticker.copy()  # 슬라이스 경고 방지용 복사본

        df_ticker.loc[:, "Ticker"] = ticker
        df_ticker.loc[:, "Name"] = ticker_to_name.get(ticker, "")
        df_ticker.loc[:, "Sector"] = ticker_to_sector.get(ticker, "")

        all_results.append(df_ticker)
    except Exception as e:
        print(f"Error processing {ticker}: {e}")

# 하나로 합치기
df_all = pd.concat(all_results, ignore_index=True)

# 저장
df_all.to_csv("[csv]best_5.csv", index=False)

Processing tickers: 100%|██████████| 192/192 [02:14<00:00,  1.43it/s]


In [6]:
pd.read_csv("[csv]best_5.csv")

Unnamed: 0,quantile,threshold,tau3_emp,tau4_emp,tau4_theory,error,Ticker,Name,Sector
0,0.82,0.026448,0.447241,0.267386,0.265706,0.000040,000080.KS,HiteJinro,Consumer_Staples
1,0.83,0.027458,0.449929,0.265902,0.268281,0.000079,000080.KS,HiteJinro,Consumer_Staples
2,0.53,0.013495,0.421963,0.246248,0.242020,0.000305,000080.KS,HiteJinro,Consumer_Staples
3,0.54,0.013794,0.422081,0.246530,0.242129,0.000330,000080.KS,HiteJinro,Consumer_Staples
4,0.84,0.028376,0.451138,0.264450,0.269442,0.000343,000080.KS,HiteJinro,Consumer_Staples
...,...,...,...,...,...,...,...,...,...
955,0.54,0.020398,0.375415,0.201478,0.200933,0.000007,402340.KS,SK Square,IT
956,0.55,0.021111,0.376473,0.200824,0.201830,0.000025,402340.KS,SK Square,IT
957,0.69,0.027758,0.370743,0.198485,0.196992,0.000057,402340.KS,SK Square,IT
958,0.68,0.027457,0.372910,0.197190,0.198816,0.000067,402340.KS,SK Square,IT
