<a href="https://colab.research.google.com/github/kospi-2025/EVT/blob/main/EVT10_EVT_full.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import time
import os


In [2]:
import pandas as pd
import numpy as np

# 1. GitHub에서 ticker_info.csv 불러오기
ticker_info_url = "https://raw.githubusercontent.com/kospi-2025/EVT/main/source_data/ticker_info.csv"
df = pd.read_csv(ticker_info_url)
df['id'] = df['id'].astype(str).str.zfill(6)
df['Yahoo_Ticker'] = df['id'] + ".KS"

# 2. ^KS200 수동 추가
df_index = pd.DataFrame({
    "Yahoo_Ticker": ["^KS200"],
    "name": ["KOSPI 200 Index"],
    "sector": ["Index"]
})

df_info = pd.concat([df, df_index], ignore_index=True)

# 3. 매핑 딕셔너리 생성
ticker_to_name = dict(zip(df_info["Yahoo_Ticker"], df_info["name"]))
ticker_to_sector = dict(zip(df_info["Yahoo_Ticker"], df_info["sector"]))

# 4. 섹터 목록 만들기 (고유값)
sectors = df_info["sector"].dropna().unique()

# 5. 섹터별 파일 불러오기
base_url = "https://raw.githubusercontent.com/kospi-2025/EVT/main/source_data/"
sector_data = {}

for sec in sectors:
    file_name = sec + ".csv"
    url = f"{base_url}{file_name}"
    try:
        df_sector = pd.read_csv(url, header=[0, 1], index_col=0, parse_dates=True)
        sector_data[sec] = df_sector
        print(f"✅ Loaded {sec}")
    except Exception as e:
        print(f"❌ Failed to load {sec}: {e}")

#==================================

temp = pd.concat(sector_data.values(), axis=1).sort_index(axis=1)

tickers_to_drop = ["000660.KS", "032640.KS"]

data = temp.loc[:, ~temp.columns.get_level_values(1).isin(tickers_to_drop)]
logDD = -np.log(data["Low"]/data["Close"].shift(1)).where(lambda x: x < 0)

✅ Loaded Communication_Services
✅ Loaded Constructions
✅ Loaded Consumer_Discretionary
✅ Loaded Consumer_Staples
✅ Loaded Energy_Chemicals
✅ Loaded Financials
✅ Loaded Health_Care
✅ Loaded Heavy_Industries
✅ Loaded Industrials
✅ Loaded IT
✅ Loaded Steels_Materials
✅ Loaded Index


In [3]:
# ===========================================
# 📦 1. Setup
# ===========================================
!pip install -q lmoments3

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.stats import genpareto
import lmoments3 as lm
from lmoments3 import distr

# ===========================================
# 📈 2. Helper Functions
# ===========================================

# 2.1 Basic Statistics
def basic_statistics(exceedances):
    return {
        "mean": exceedances.mean(),
        "std": exceedances.std(),
        "skewness": stats.skew(exceedances),
        "kurtosis": stats.kurtosis(exceedances, fisher=False)
    }

# 2.2 L-moment Estimation
def fit_gpd_lmoment(exceedances):
    try:
        lmr = lm.lmom_ratios(exceedances, nmom=4)
        params_lm = lm.distr.gpa.lmom_fit(exceedances)
        l1, l2, tau3, tau4 = lmr
        xi, beta = params_lm['c'], params_lm['scale']
        return xi, beta, l1, l2, tau3, tau4
    except Exception as e:
        raise ValueError(f"L-moment fitting failed: {e}")

# 2.3 MLE Estimation
def fit_gpd_mle(exceedances):
    params = genpareto.fit(exceedances, floc=0)
    xi, loc, beta = params
    return xi, beta

# 2.4 Anderson-Darling statistic
def ad_statistic(exceedances, xi, beta):
    sorted_data = np.sort(exceedances)
    F = genpareto.cdf(sorted_data, c=xi, loc=0, scale=beta)
    F = np.clip(F, 1e-10, 1-1e-10)
    n = len(sorted_data)
    i = np.arange(1, n+1)
    ad_stat = -n - np.mean((2*i - 1) * (np.log(F) + np.log(1 - F[::-1])))
    return ad_stat

# 2.5 Bootstrap AD p-value
def bootstrap_ad_pvalue(exceedances, xi, beta, B=500):
    real_ad = ad_statistic(exceedances, xi, beta)
    n = len(exceedances)
    bootstrap_ads = []
    for _ in range(B):
        synthetic = genpareto.rvs(c=xi, loc=0, scale=beta, size=n)
        try:
            ad_sim = ad_statistic(synthetic, xi, beta)
            bootstrap_ads.append(ad_sim)
        except:
            continue
    bootstrap_ads = np.array(bootstrap_ads)
    p_value = np.mean(bootstrap_ads > real_ad)
    return real_ad, p_value

# 2.6 Log-likelihood for GPD
def log_likelihood_gpd(data, xi, beta):
    if beta <= 0:
        return -np.inf
    return np.sum(genpareto.logpdf(data, c=xi, loc=0, scale=beta))

# 2.7 AIC and BIC
def compute_aic_bic(logL, k, n):
    aic = 2 * k - 2 * logL
    bic = k * np.log(n) - 2 * logL
    return aic, bic

# ===========================================
# ⚡ 3. Master Analysis Function
# ===========================================

def analyze_one_ticker_upgraded(series, quantiles=[0.7,0.8,0.9,0.95,0.99], B=500, use_zero=True):
    results = []
    data = series.dropna().values
    total_n = len(data)

    thresholds = [np.quantile(data, q) for q in quantiles]
    if use_zero:
        thresholds = [0] + thresholds

    for idx, threshold in enumerate(thresholds):
        exceedances = data[data > threshold] - threshold
        n_exceed = len(exceedances)

        if n_exceed < 10:
            continue

        try:
            # Basic Stats
            basic = basic_statistics(exceedances)

            # L-moment fitting
            xi_L, beta_L, l1, l2, tau3, tau4 = fit_gpd_lmoment(exceedances)

            # MLE fitting
            xi_MLE, beta_MLE = fit_gpd_mle(exceedances)

            # AD + bootstrap (L-moment fit)
            ad_L, pval_L = bootstrap_ad_pvalue(exceedances, xi_L, beta_L, B=B)

            # AD + bootstrap (MLE fit)
            ad_MLE, pval_MLE = bootstrap_ad_pvalue(exceedances, xi_MLE, beta_MLE, B=B)

            # Log-likelihoods and AIC/BIC
            ll_L = log_likelihood_gpd(exceedances, xi_L, beta_L)
            ll_MLE = log_likelihood_gpd(exceedances, xi_MLE, beta_MLE)
            aic_L, bic_L = compute_aic_bic(ll_L, 2, n_exceed)
            aic_MLE, bic_MLE = compute_aic_bic(ll_MLE, 2, n_exceed)

            # Record everything
            result = {
                "Total_n": total_n,
                "Quantile": 0 if idx == 0 else quantiles[idx-1],
                "Threshold": threshold,
                "n_exceedances": n_exceed,
                "mean": basic["mean"],
                "std": basic["std"],
                "skewness": basic["skewness"],
                "kurtosis": basic["kurtosis"],
                "L1": l1,
                "L2": l2,
                "tau3_Lskewness": tau3,
                "tau4_Lkurtosis": tau4,
                "GPD_shape_Lmoment": xi_L,
                "GPD_scale_Lmoment": beta_L,
                "GPD_shape_MLE": xi_MLE,
                "GPD_scale_MLE": beta_MLE,
                "AD_stat_Lmoment": ad_L,
                "Bootstrap_pval_Lmoment": pval_L,
                "AD_stat_MLE": ad_MLE,
                "Bootstrap_pval_MLE": pval_MLE,
                "LogL_Lmoment": ll_L,
                "LogL_MLE": ll_MLE,
                "AIC_Lmoment": aic_L,
                "BIC_Lmoment": bic_L,
                "AIC_MLE": aic_MLE,
                "BIC_MLE": bic_MLE
            }
            results.append(result)

        except Exception as e:
            print(f"Skipping threshold {threshold} due to error: {e}")
            continue

    return pd.DataFrame(results)

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.5/46.5 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:

# Assume you already have logDD loaded
quantiles = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99]
B = 500

ticker_results = []
for ticker in logDD.columns:
    print(f"Analyzing {ticker}...")
    series = logDD[ticker]
    df_ticker = analyze_one_ticker_upgraded(series, quantiles=quantiles, B=B, use_zero=True)
    df_ticker["Ticker"] = ticker
    df_ticker["Sector"] = ticker_to_sector.get(ticker, "KOSPI200")
    df_ticker["Name"] = ticker_to_name.get(ticker, ticker)
    ticker_results.append(df_ticker)


# Combine all into one big table
final_table = pd.concat(ticker_results, ignore_index=True)

# Save
final_table.to_csv("evt_analysis_full.csv", index=False)

Analyzing 000080.KS...
Analyzing 000100.KS...
Analyzing 000120.KS...
Analyzing 000150.KS...
Analyzing 000210.KS...
Analyzing 000240.KS...
Analyzing 000270.KS...
Analyzing 000720.KS...
Analyzing 000810.KS...
Analyzing 000880.KS...
Analyzing 001040.KS...
Analyzing 001120.KS...
Analyzing 001430.KS...
Analyzing 001440.KS...
Analyzing 001450.KS...
Analyzing 001680.KS...
Analyzing 001740.KS...
Analyzing 001800.KS...
Analyzing 002380.KS...
Analyzing 002710.KS...
Analyzing 002790.KS...
Analyzing 002840.KS...
Analyzing 003030.KS...
Analyzing 003090.KS...
Analyzing 003230.KS...
Analyzing 003240.KS...
Analyzing 003490.KS...
Analyzing 003550.KS...
Analyzing 003620.KS...
Analyzing 003670.KS...
Analyzing 004000.KS...
Analyzing 004020.KS...
Analyzing 004170.KS...
Analyzing 004370.KS...
Analyzing 004490.KS...
Analyzing 004990.KS...
Analyzing 005070.KS...
Analyzing 005250.KS...
Analyzing 005300.KS...
Analyzing 005380.KS...
Analyzing 005420.KS...
Analyzing 005490.KS...
Analyzing 005830.KS...
Analyzing 0