<a href="https://colab.research.google.com/github/kospi-2025/EVT/blob/main/EVT22_LMO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LogDD

In [None]:
!pip install -q lmoments3
!pip install -q pyextremes

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats

import lmoments3 as lm

from scipy.stats import genpareto
from lmoments3 import distr
from pyextremes import plot_mean_residual_life
from pyextremes import plot_parameter_stability
from pyextremes import get_extremes

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/46.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.5/46.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.4/47.4 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import pandas as pd
import numpy as np

# 1. GitHub에서 ticker_info.csv 불러오기
ticker_info_url = "https://raw.githubusercontent.com/kospi-2025/EVT/main/source_data/ticker_info.csv"
df = pd.read_csv(ticker_info_url)
df['id'] = df['id'].astype(str).str.zfill(6)
df['Yahoo_Ticker'] = df['id'] + ".KS"

# 2. ^KS200 수동 추가
df_index = pd.DataFrame({
    "Yahoo_Ticker": ["^KS200"],
    "name": ["KOSPI 200 Index"],
    "sector": ["Index"]
})

df_info = pd.concat([df, df_index], ignore_index=True)

# 3. 매핑 딕셔너리 생성
ticker_to_name = dict(zip(df_info["Yahoo_Ticker"], df_info["name"]))
ticker_to_sector = dict(zip(df_info["Yahoo_Ticker"], df_info["sector"]))

# 4. 섹터 목록 만들기 (고유값)
sectors = df_info["sector"].dropna().unique()

# 5. 섹터별 파일 불러오기
base_url = "https://raw.githubusercontent.com/kospi-2025/EVT/main/source_data/"
sector_data = {}

for sec in sectors:
    file_name = sec + ".csv"
    url = f"{base_url}{file_name}"
    try:
        df_sector = pd.read_csv(url, header=[0, 1], index_col=0, parse_dates=True)
        sector_data[sec] = df_sector
        print(f"✅ Loaded {sec}")
    except Exception as e:
        print(f"❌ Failed to load {sec}: {e}")

#==================================

temp = pd.concat(sector_data.values(), axis=1).sort_index(axis=1)

tickers_to_drop = ["000660.KS", "032640.KS"]

data = temp.loc[:, ~temp.columns.get_level_values(1).isin(tickers_to_drop)]
logDD = -np.log(data["Low"]/data["Close"].shift(1)).where(lambda x: x < 0)

✅ Loaded Communication_Services
✅ Loaded Constructions
✅ Loaded Consumer_Discretionary
✅ Loaded Consumer_Staples
✅ Loaded Energy_Chemicals
✅ Loaded Financials
✅ Loaded Health_Care
✅ Loaded Heavy_Industries
✅ Loaded Industrials
✅ Loaded IT
✅ Loaded Steels_Materials
✅ Loaded Index


# LMO

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import lmoments3 as lm
from pyextremes import get_extremes

# GPD 이론 곡선
xi_vals = np.linspace(-0.5, 1.0, 500)
tau3_theory = (1 - xi_vals) / (3 + xi_vals)
tau4_theory = (1 - xi_vals) * (2 - xi_vals) / ((3 + xi_vals) * (4 + xi_vals))

# 설정
r_range = [1, 10]
markers = ['o', 'x', '^', 'D', 'v', '*', 's', 'P']

tickers = logDD.columns

with PdfPages("hosking_diagrams_all.pdf") as pdf:
    for i in range(0, len(tickers), 6):
        fig, axes = plt.subplots(2, 3, figsize=(15, 10))
        axes = axes.flatten()

        for j, ticker in enumerate(tickers[i:i+6]):
            ax = axes[j]
            series = logDD[ticker].dropna().sort_index()
            u_range = np.linspace(series.quantile(0.51), series.quantile(0.99), 29)

            # GPD 이론 곡선
            ax.plot(tau3_theory, tau4_theory, 'k-', label='GPD Theory')

            for idx, r in enumerate(r_range):
                tau3_list = []
                tau4_list = []

                for u in u_range:
                    excesses = get_extremes(series, method="POT", threshold=u, r=f"{r}d")
                    try:
                        lmr = lm.lmom_ratios(excesses, nmom=4)
                        tau3_list.append(lmr[2])
                        tau4_list.append(lmr[3])
                    except:
                        continue

                if tau3_list:
                    ax.plot(tau3_list, tau4_list,
                            marker=markers[idx % len(markers)],
                            linestyle='-', label=f'r = {r}', alpha=0.8)
                    ax.scatter(tau3_list[0], tau4_list[0],
                               color='red', edgecolor='black', zorder=5)
                    ax.annotate("s", (tau3_list[0], tau4_list[0]),
                                textcoords="offset points", xytext=(5, 5),
                                ha='left', fontsize=10)

            title = ticker_to_name.get(ticker, ticker)
            ax.set_title(f"{title} ({ticker})", fontsize=11)
            ax.set_xlabel(r'$\tau_3$')
            ax.set_ylabel(r'$\tau_4$')
            ax.grid(True)
            ax.legend(fontsize=8)

        # 남은 subplot 제거
        for k in range(j + 1, 6):
            fig.delaxes(axes[k])

        plt.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)