<a href="https://colab.research.google.com/github/kospi-2025/EVT/blob/main/%5Bcode%5DlogDD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

In [2]:
base_url = "https://raw.githubusercontent.com/kospi-2025/EVT/main/"

df = pd.read_csv(base_url + "source_data/" + "ticker_info.csv")
df['id'] = df['id'].astype(str).str.zfill(6)
df['Yahoo_Ticker'] = df['id'] + ".KS"

ticker_to_name = dict(zip(df["Yahoo_Ticker"], df["name"]))
ticker_to_sector = dict(zip(df["Yahoo_Ticker"], df["sector"]))

In [3]:
sectors = df["sector"].dropna().unique()
sector_data = {}

for sec in sectors:
    file_name = "source_data/" + sec + ".csv"
    url = f"{base_url}{file_name}"
    try:
        temp = pd.read_csv(url, header=[0, 1], index_col=0, parse_dates=True)
        sector_data[sec] = temp
        print(f"✅ Loaded {sec}")
    except Exception as e:
        print(f"❌ Failed to load {sec}: {e}")

temp = pd.concat(sector_data.values(), axis=1).sort_index(axis=1)


✅ Loaded Communication_Services
✅ Loaded Constructions
✅ Loaded Consumer_Discretionary
✅ Loaded Consumer_Staples
✅ Loaded Energy_Chemicals
✅ Loaded Financials
✅ Loaded Health_Care
✅ Loaded Heavy_Industries
✅ Loaded Industrials
✅ Loaded IT
✅ Loaded Steels_Materials


In [4]:
# There are some tickers that should be excluded due to data errors.

tickers_to_drop = ["000660.KS", "032640.KS"]
tickers_to_drop = tickers_to_drop + ["278470.KS", "450080.KS", "454910.KS",	"456040.KS", "457190.KS", "489790.KS"] # less than 500 obs

data = temp.loc[:, ~temp.columns.get_level_values(1).isin(tickers_to_drop)]
logDD = -np.log(data["Low"]/data["Close"].shift(1)).where(lambda x: x < 0)
logDD.to_csv("[csv]logDD.csv")

In [5]:
pd.read_csv("[csv]logDD.csv", index_col=0)

Unnamed: 0_level_0,000080.KS,000100.KS,000120.KS,000150.KS,000210.KS,000240.KS,000270.KS,000720.KS,000810.KS,000880.KS,...,326030.KS,329180.KS,336260.KS,352820.KS,361610.KS,373220.KS,375500.KS,377300.KS,383220.KS,402340.KS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-03,,,,,,,,,,,...,,,,,,,,,,
2000-01-04,,,,,,,,,,,...,,,,,,,,,,
2000-01-05,7.470432e-02,0.049791,0.088853,9.845054e-08,9.289365e-02,0.057894,0.055959,6.773822e-02,0.050644,0.102279,...,,,,,,,,,,
2000-01-06,,0.070866,0.069843,6.108786e-02,8.149329e-02,0.056967,0.044125,8.233029e-02,0.095310,0.108683,...,,,,,,,,,,
2000-01-07,7.410793e-02,0.034869,0.012685,,2.790855e-02,,0.022815,,,0.013008,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-23,4.807721e-03,,0.006200,2.215600e-02,7.190784e-08,0.009091,0.016941,2.037069e-08,,,...,,,0.021215,0.006631,,0.035591,,0.013195,0.020352,
2024-12-24,1.962302e-08,0.041493,0.008521,9.990068e-03,4.447746e-03,0.029520,0.013972,5.763723e-03,0.010811,0.008905,...,0.018314,0.013072,0.026556,0.033057,0.033198,0.025389,0.010630,0.013048,0.035221,0.015941
2024-12-26,1.684755e-02,0.019015,0.014706,3.278982e-02,8.915316e-03,0.019814,0.009990,1.160552e-02,0.010870,0.010733,...,0.016402,,0.026024,0.003074,0.047423,0.018746,0.027694,0.034289,0.030248,0.011146
2024-12-27,1.769958e-02,0.028270,,2.566778e-02,2.803922e-02,0.039130,0.028587,2.159068e-02,0.024625,0.020466,...,0.006799,0.048119,0.023441,0.028536,0.024419,0.011679,0.011364,0.003861,,0.019976


In [6]:
n = 500  # your fixed threshold
selected = logDD.loc[:, logDD.notna().sum() <= n]

In [7]:
selected

2000-01-03
2000-01-04
2000-01-05
2000-01-06
2000-01-07
...
2024-12-23
2024-12-24
2024-12-26
2024-12-27
2024-12-30
