In [2]:
!pip install yfinance tqdm pytz fredapi --quiet



In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

In [4]:
import yfinance as yf
import pandas as pd
from tqdm import tqdm
import pytz

In [5]:
# ============================================
# 수집할 종목 리스트 정의
# ============================================
symbols = [
    "NVDA","AAPL","MSFT","AMZN","GOOGL","AVGO","GOOG","META","TSLA","BRK-B",
    "JPM","LLY","WMT","ORCL","V","MA","XOM","NFLX","JNJ","PLTR",
    "COST","BAC","ABBV","AMD","HD","PG","GE","CVX","KO","UNH",
    "IBM","CSCO","WFC","MU","CAT","MS","AXP","PM","RTX","GS",
    "TMUS","CRM","ABT","TMO","MRK","MCD","APP","LRCX","DIS","ISRG",
    "LIN","PEP","UBER","AMAT","QCOM","INTC","INTU","C","NOW","T"
]

In [6]:
# =============================
# 2. 시계열 데이터 수집 (뉴욕시간 기준)
# =============================
START = "2022-11-07"
END   = "2025-11-08"   # end는 제외 → 11/07까지 포함

In [7]:
print("[1/2] 시계열 데이터 수집 중 ...")
data = yf.download(symbols, start=START, end=END, interval="1d", auto_adjust=False)

[1/2] 시계열 데이터 수집 중 ...


[*********************100%***********************]  60 of 60 completed


In [8]:
# data.index = data.index.tz_localize("UTC").tz_convert("America/New_York")

# print("가져온 기간 (뉴욕 시간):")
# print(data.index.min(), "→", data.index.max())


# UTC로 지정 → 뉴욕시간으로 변환
data.index = pd.to_datetime(data.index, utc=True).tz_convert("America/New_York")

# '세션 날짜'를 계산: 전날 저녁 라벨 → 다음날 거래일로 이동
session_dates = (data.index + pd.Timedelta(days=1)).normalize()

# '09:30' (개장시각) 타임스탬프로 교체
session_times = session_dates + pd.Timedelta(hours=9, minutes=30)

# 이걸 인덱스로 지정
data.index = session_times

print("인덱스(라벨)를 실제 뉴욕 개장 시각(09:30)으로 조정 완료")
print(data.index.min(), "→", data.index.max())

인덱스(라벨)를 실제 뉴욕 개장 시각(09:30)으로 조정 완료
2022-11-07 09:30:00-05:00 → 2025-11-07 09:30:00-05:00


In [9]:
print("데이터 레벨명:", data.columns.names)   # ['Attributes', 'Symbols'] 형태
print("기간:", data.index.min().date(), "~", data.index.max().date())

데이터 레벨명: ['Price', 'Ticker']
기간: 2022-11-07 ~ 2025-11-07


In [10]:
# 필요한 컬럼 정리
ohlcv = data[["High", "Low", "Close", "Volume"]].copy()
ohlcv.columns = [f"{ticker}_{col}" for col, ticker in ohlcv.columns]

In [11]:
# 변동성(High-Low)
volatility = pd.DataFrame(index=ohlcv.index)
for sym in symbols:
    h, l = f"{sym}_High", f"{sym}_Low"
    if h in ohlcv.columns and l in ohlcv.columns:
        volatility[f"{sym}_Volatility"] = ohlcv[h] - ohlcv[l]

price_features = pd.concat([ohlcv, volatility], axis=1).sort_index()

In [12]:
price_features.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 754 entries, 2022-11-07 09:30:00-05:00 to 2025-11-07 09:30:00-05:00
Columns: 300 entries, AAPL_High to T_Volatility
dtypes: float64(240), int64(60)
memory usage: 1.7 MB


In [13]:
price_features.head()

Unnamed: 0_level_0,AAPL_High,ABBV_High,ABT_High,AMAT_High,AMD_High,AMZN_High,APP_High,AVGO_High,AXP_High,BAC_High,...,LIN_Volatility,PEP_Volatility,UBER_Volatility,AMAT_Volatility,QCOM_Volatility,INTC_Volatility,INTU_Volatility,C_Volatility,NOW_Volatility,T_Volatility
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-11-07 09:30:00-05:00,139.149994,148.800003,100.040001,95.120003,63.529999,92.099998,15.28,47.773998,147.539993,37.34,...,4.529999,1.789993,1.210001,3.020004,4.019997,0.58,13.019989,0.650002,13.829987,0.25
2022-11-08 09:30:00-05:00,141.429993,151.630005,100.260002,98.790001,64.959999,91.720001,15.8,48.880001,151.339996,37.419998,...,4.820007,3.309998,1.51,4.010002,4.970001,1.02,20.970001,0.899998,16.190002,0.43
2022-11-09 09:30:00-05:00,138.550003,150.570007,101.269997,96.610001,63.049999,89.480003,14.83,47.900002,149.470001,36.959999,...,5.540009,2.670013,1.564999,2.370003,3.240005,0.779999,11.519989,0.709999,7.629974,0.34
2022-11-10 09:30:00-05:00,146.869995,150.660004,104.440002,104.889999,68.800003,98.690002,16.4,50.716999,155.789993,38.220001,...,5.589996,4.360001,1.219999,5.409996,6.470001,1.18,15.75,2.199997,19.909973,0.389999
2022-11-11 09:30:00-05:00,150.009995,150.520004,105.910004,110.889999,73.32,101.190002,17.395,51.900002,158.880005,38.599998,...,4.77002,6.25,1.734999,6.099998,3.170006,1.759998,13.299988,2.130001,19.73999,0.349998


In [14]:
price_features.tail()

Unnamed: 0_level_0,AAPL_High,ABBV_High,ABT_High,AMAT_High,AMD_High,AMZN_High,APP_High,AVGO_High,AXP_High,BAC_High,...,LIN_Volatility,PEP_Volatility,UBER_Volatility,AMAT_Volatility,QCOM_Volatility,INTC_Volatility,INTU_Volatility,C_Volatility,NOW_Volatility,T_Volatility
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-11-03 09:30:00-05:00,270.850006,219.0,124.059998,239.789993,260.910004,258.600006,654.900024,374.850006,363.540009,53.639999,...,13.25,2.660004,3.57,5.789993,4.360001,1.400002,16.549988,2.589996,36.119995,0.559999
2025-11-04 09:30:00-05:00,271.48999,217.080002,125.849998,236.970001,257.380005,257.01001,630.539978,368.950012,362.540009,54.0,...,7.320007,2.869995,4.949997,7.199997,7.050003,1.619999,22.650024,3.860001,32.080017,0.269999
2025-11-05 09:30:00-05:00,271.700012,217.429993,126.709999,241.910004,259.649994,251.0,623.780029,364.869995,367.820007,53.07,...,4.790009,1.699997,2.920006,11.809998,9.109985,1.579998,8.870056,3.269997,20.320007,0.299999
2025-11-06 09:30:00-05:00,273.399994,219.679993,125.0,240.770004,253.509995,250.380005,656.299988,363.5,369.26001,53.75,...,5.899994,1.910004,1.949997,7.840012,8.449997,3.040001,18.789978,2.369995,28.98999,0.699999
2025-11-07 09:30:00-05:00,272.290009,220.990005,126.620003,232.520004,235.869995,244.899994,633.590027,354.5,370.0,53.580002,...,8.820007,2.400009,2.872002,9.139999,6.0,1.299999,21.140015,3.459999,22.549988,0.460001


In [15]:
# CSV 저장
price_features.to_csv("../raw_data/주식 데이터.csv")

### 기준금리 데이터 가져오기

In [16]:
import pandas as pd

path = "../raw_data/기준금리.xlsx"

# 1. 엑셀 불러오기
df = pd.read_excel(path)

# 2. '날짜' 컬럼을 datetime으로 변환 후 'YYYY-MM-DD' 형식으로 변경
df["날짜"] = pd.to_datetime(df["날짜"], errors="coerce").dt.strftime("%Y-%m-%d")

# 3. '날짜' → 'Date' 로 컬럼 이름 변경
df = df.rename(columns={"날짜": "Date"})

# 4. Date, 금리 컬럼만 남기고 싶으면
rate_df = df[["Date", "금리"]].copy()

# 5. Date 기준 오름차순 정렬
rate_df = rate_df.sort_values("Date")

print(rate_df.head())

          Date    금리
29  2022-03-17  0.50
28  2022-05-05  1.00
27  2022-06-16  1.75
26  2022-07-28  2.50
25  2022-09-22  3.25


In [17]:
rate_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 29 to 0
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    30 non-null     object 
 1   금리      30 non-null     float64
dtypes: float64(1), object(1)
memory usage: 720.0+ bytes


In [18]:
import pandas as pd

# 1. Date를 datetime으로 변환 (현재 'YYYY-MM-DD' 문자열 상태라고 가정)
rate_df["Date"] = pd.to_datetime(rate_df["Date"])

# 2. 날짜 범위 필터링 (2022-11-01 ~ 2025-11-07 포함)
start = pd.to_datetime("2022-11-01")
end   = pd.to_datetime("2025-11-07")

rate_df = rate_df[(rate_df["Date"] >= start) & (rate_df["Date"] <= end)]

# 3. Date 기준 오름차순 정렬
rate_df = rate_df.sort_values("Date")

# 4. 다시 'YYYY-MM-DD' 문자열로 쓰고 싶으면
rate_df["Date"] = rate_df["Date"].dt.strftime("%Y-%m-%d")

print(rate_df.head())

          Date    금리
24  2022-11-03  4.00
23  2022-12-15  4.50
22  2023-02-02  4.75
21  2023-03-23  5.00
20  2023-05-04  5.25


In [19]:
print(rate_df)

          Date    금리
24  2022-11-03  4.00
23  2022-12-15  4.50
22  2023-02-02  4.75
21  2023-03-23  5.00
20  2023-05-04  5.25
19  2023-06-15  5.25
18  2023-07-27  5.50
17  2023-09-21  5.50
16  2023-11-02  5.50
15  2023-12-14  5.50
14  2024-02-01  5.50
13  2024-03-21  5.50
12  2024-05-02  5.50
11  2024-06-13  5.50
10  2024-08-01  5.50
9   2024-09-19  5.00
8   2024-11-08  4.75
7   2024-12-19  4.50
6   2025-01-30  4.50
5   2025-03-20  4.50
4   2025-05-08  4.50
3   2025-06-19  4.50
2   2025-07-31  4.50
1   2025-09-18  4.25
0   2025-10-30  4.00


In [20]:
# 5. 필요하면 CSV로 저장
rate_df.to_csv("../raw_data/기준금리.csv", index=False, encoding="utf-8-sig")

### 환경변수 데이터 가져오기(단기채, 유가, 고용자수, 실업률 추가)

In [21]:
import pandas as pd
import yfinance as yf
from fredapi import Fred

# ============================================
# 공통 설정
# ============================================
FRED_API_KEY = "0cfb957db66cb7dd8622b000bf3941db"
fred = Fred(api_key=FRED_API_KEY)

START = "2022-01-01"
END   = "2025-11-08"   # end는 제외 → 11/07까지 포함

symbols = ['^VIX','DX-Y.NYB','TLT','GC=F','GLD','SHY']


# ============================================
# 1) yfinance: 가격 데이터 수집 + 가공
# ============================================
def get_price_features(symbols, start, end):
    print("[1/2] yfinance 시계열 데이터 수집 중 ...")
    data = yf.download(symbols, start=start, end=end, interval="1d", auto_adjust=False)

    # 인덱스를 UTC로 지정 → 뉴욕시간으로 변환
    data.index = pd.to_datetime(data.index, utc=True).tz_convert("America/New_York")

    # '세션 날짜'를 계산: 전날 저녁 라벨 → 다음날 거래일로 이동
    session_dates = (data.index + pd.Timedelta(days=1)).normalize()

    # '09:30' (개장시각) 타임스탬프로 교체
    session_times = session_dates + pd.Timedelta(hours=9, minutes=30)

    # 이걸 인덱스로 지정
    data.index = session_times

    print("인덱스(라벨)를 실제 뉴욕 개장 시각(09:30)으로 조정 완료")
    print(data.index.min(), "→", data.index.max())

    # # level0 컬럼 중 'Adj Close' → 'AdjClose' (공백 제거)
    # data = data.rename(columns={'Adj Close': 'AdjClose'}, level=0)

    print("데이터 레벨명:", data.columns.names)   # ['Attributes', 'Symbols'] 형태
    print("기간:", data.index.min().date(), "~", data.index.max().date())

    # 필요한 컬럼 정리 (OHLCV + 변동성)
    ohlcv = data[["High", "Low", "Close", "Volume"]].copy()
    ohlcv.columns = [f"{ticker}_{col}" for col, ticker in ohlcv.columns]

    # 변동성(High-Low)
    volatility = pd.DataFrame(index=ohlcv.index)
    for sym in symbols:
        h, l = f"{sym}_High", f"{sym}_Low"
        if h in ohlcv.columns and l in ohlcv.columns:
            volatility[f"{sym}_Volatility"] = ohlcv[h] - ohlcv[l]

    price_features = pd.concat([ohlcv, volatility], axis=1).sort_index()

    return price_features


# ============================================
# 2) FRED: 거시지표 + GDP 수집 + 정렬
# ============================================
def get_macro_features(start, end, target_index):
    """
    start, end: FRED observation 범위
    target_index: price_features.index (뉴욕 09:30, 거래일 기준)
    """
    print("[2/2] FRED 거시지표 수집 중 ...")

    # 2-1) WTI 현물 spot 가격 (DCOILWTICO, 일별)
    wti = fred.get_series(
        "DCOILWTICO",
        observation_start=start,
        observation_end=end
    ).to_frame("WTI_Spot")

    # 2-2) PAYEMS (비농업 고용자 수, 월별)
    payems = fred.get_series(
        "PAYEMS",
        observation_start=start,
        observation_end=end
    ).to_frame("PAYEMS")

    # 2-3) UNRATE (실업률, 월별)
    unrate = fred.get_series(
        "UNRATE",
        observation_start=start,
        observation_end=end
    ).to_frame("UNRATE")

    # # 2-4) US_GDP_Real (실질 GDP, 분기별, GDPC1)
    # gdp = fred.get_series(
    #     "GDPC1",
    #     observation_start=start,
    #     observation_end=end
    # ).to_frame("US_GDP_Real")

    # 인덱스를 datetime으로 통일
    # for df in [wti, payems, unrate, gdp]:
    for df in [wti, payems, unrate]:
        df.index = pd.to_datetime(df.index)

    # 하나로 합치기
    macro = pd.concat([wti, payems, unrate], axis=1)
    # macro = pd.concat([wti, payems, unrate, gdp], axis=1)

    # FRED 인덱스를 뉴욕 09:30으로 맞추기
    macro.index = macro.index.tz_localize("America/New_York") + pd.Timedelta(hours=9, minutes=30)

    # price_features.index(=뉴욕 09:30, 거래일 기준)에 맞춰 재인덱싱 + ffill
    macro_aligned = macro.reindex(target_index).ffill()

    return macro_aligned


# ============================================
# 3) 메인: 둘을 결합
# ============================================
price_features = get_price_features(symbols, START, END)
macro_aligned  = get_macro_features(START, END, price_features.index)

final_df = price_features.join(macro_aligned, how="left")

print("최종 컬럼 목록:")
print(final_df.columns)

print("\n샘플 데이터:")
print(final_df.head(10))


[1/2] yfinance 시계열 데이터 수집 중 ...


[*********************100%***********************]  6 of 6 completed


인덱스(라벨)를 실제 뉴욕 개장 시각(09:30)으로 조정 완료
2022-01-03 09:30:00-05:00 → 2025-11-07 09:30:00-05:00
데이터 레벨명: ['Price', 'Ticker']
기간: 2022-01-03 ~ 2025-11-07
[2/2] FRED 거시지표 수집 중 ...
최종 컬럼 목록:
Index(['DX-Y.NYB_High', 'GC=F_High', 'GLD_High', 'SHY_High', 'TLT_High',
       '^VIX_High', 'DX-Y.NYB_Low', 'GC=F_Low', 'GLD_Low', 'SHY_Low',
       'TLT_Low', '^VIX_Low', 'DX-Y.NYB_Close', 'GC=F_Close', 'GLD_Close',
       'SHY_Close', 'TLT_Close', '^VIX_Close', 'DX-Y.NYB_Volume',
       'GC=F_Volume', 'GLD_Volume', 'SHY_Volume', 'TLT_Volume', '^VIX_Volume',
       '^VIX_Volatility', 'DX-Y.NYB_Volatility', 'TLT_Volatility',
       'GC=F_Volatility', 'GLD_Volatility', 'SHY_Volatility', 'WTI_Spot',
       'PAYEMS', 'UNRATE'],
      dtype='object')

샘플 데이터:
                           DX-Y.NYB_High    GC=F_High    GLD_High   SHY_High  \
Date                                                                           
2022-01-03 09:30:00-05:00      96.330002  1830.099976  169.009995  85.459999   
2022-01-04 09:3

In [22]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 970 entries, 2022-01-03 09:30:00-05:00 to 2025-11-07 09:30:00-05:00
Data columns (total 33 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   DX-Y.NYB_High        969 non-null    float64
 1   GC=F_High            969 non-null    float64
 2   GLD_High             967 non-null    float64
 3   SHY_High             967 non-null    float64
 4   TLT_High             967 non-null    float64
 5   ^VIX_High            967 non-null    float64
 6   DX-Y.NYB_Low         969 non-null    float64
 7   GC=F_Low             969 non-null    float64
 8   GLD_Low              967 non-null    float64
 9   SHY_Low              967 non-null    float64
 10  TLT_Low              967 non-null    float64
 11  ^VIX_Low             967 non-null    float64
 12  DX-Y.NYB_Close       969 non-null    float64
 13  GC=F_Close           969 non-null    float64
 14  GLD_Close            967 non-null    floa

In [23]:
final_df

Unnamed: 0_level_0,DX-Y.NYB_High,GC=F_High,GLD_High,SHY_High,TLT_High,^VIX_High,DX-Y.NYB_Low,GC=F_Low,GLD_Low,SHY_Low,...,^VIX_Volume,^VIX_Volatility,DX-Y.NYB_Volatility,TLT_Volatility,GC=F_Volatility,GLD_Volatility,SHY_Volatility,WTI_Spot,PAYEMS,UNRATE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-03 09:30:00-05:00,96.330002,1830.099976,169.009995,85.459999,146.940002,18.540001,95.629997,1798.800049,168.000000,85.419998,...,0.0,1.980001,0.700005,2.660004,31.299927,1.009995,0.040001,75.99,,
2022-01-04 09:30:00-05:00,96.459999,1815.300049,169.720001,85.489998,144.130005,17.809999,96.029999,1800.000000,168.729996,85.440002,...,0.0,1.469999,0.430000,1.389999,15.300049,0.990005,0.049995,77.00,,
2022-01-05 09:30:00-05:00,96.330002,1824.599976,170.929993,85.459999,144.160004,20.170000,95.889999,1813.099976,168.899994,85.370003,...,0.0,3.590000,0.440002,1.449997,11.500000,2.029999,0.089996,77.83,,
2022-01-06 09:30:00-05:00,96.389999,1791.300049,167.750000,85.339996,143.440002,21.059999,96.040001,1787.099976,166.860001,85.290001,...,0.0,1.980000,0.349998,1.150009,4.200073,0.889999,0.049995,79.47,,
2022-01-07 09:30:00-05:00,96.300003,1797.000000,168.009995,85.320000,143.240005,20.799999,95.709999,1784.400024,166.860001,85.260002,...,0.0,2.230000,0.590004,1.680008,12.599976,1.149994,0.059998,79.00,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-11-03 09:30:00-05:00,99.989998,4020.000000,370.839996,82.750000,89.820000,18.830000,99.709999,3959.000000,367.540009,82.709999,...,0.0,1.830000,0.279999,0.400002,61.000000,3.299988,0.040001,61.79,159540.0,4.3
2025-11-04 09:30:00-05:00,100.260002,3995.399902,365.739990,82.779999,90.059998,20.480000,99.739998,3927.399902,361.390015,82.750000,...,0.0,2.580000,0.520004,0.320000,68.000000,4.349976,0.029999,61.38,159540.0,4.3
2025-11-05 09:30:00-05:00,100.360001,3983.500000,367.100006,82.760002,89.550003,20.020000,100.059998,3929.899902,364.649994,82.680000,...,0.0,2.710001,0.300003,0.670006,53.600098,2.450012,0.080002,60.40,159540.0,4.3
2025-11-06 09:30:00-05:00,100.110001,4007.500000,368.179993,82.820000,89.849998,20.309999,99.669998,3979.899902,364.700012,82.769997,...,0.0,2.670000,0.440002,0.320000,27.600098,3.479980,0.050003,60.24,159540.0,4.3


In [24]:
import datetime as dt

start_date = dt.date(2022, 11, 7)
end_date   = dt.date(2025, 11, 7)

mask = (
    (final_df.index.date >= start_date) &
    (final_df.index.date <= end_date)
)

final_df = final_df.loc[mask].copy()

print(final_df.index.min(), "→", final_df.index.max())
print(len(final_df))

2022-11-07 09:30:00-05:00 → 2025-11-07 09:30:00-05:00
757


In [25]:
final_df

Unnamed: 0_level_0,DX-Y.NYB_High,GC=F_High,GLD_High,SHY_High,TLT_High,^VIX_High,DX-Y.NYB_Low,GC=F_Low,GLD_Low,SHY_Low,...,^VIX_Volume,^VIX_Volatility,DX-Y.NYB_Volatility,TLT_Volatility,GC=F_Volatility,GLD_Volatility,SHY_Volatility,WTI_Spot,PAYEMS,UNRATE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-11-07 09:30:00-05:00,111.269997,1677.900024,156.570007,80.639999,94.860001,25.670000,110.050003,1672.900024,155.809998,80.599998,...,0.0,1.330000,1.219994,1.620003,5.000000,0.760010,0.040001,91.80,154210.0,3.6
2022-11-08 09:30:00-05:00,110.610001,1712.099976,159.869995,80.709999,94.959999,26.160000,109.370003,1703.199951,155.979996,80.639999,...,0.0,1.920000,1.239998,1.190002,8.900024,3.889999,0.070000,88.80,154210.0,3.6
2022-11-09 09:30:00-05:00,110.639999,1720.199951,160.350006,80.839996,95.019997,26.350000,109.449997,1706.000000,158.449997,80.699997,...,0.0,1.330000,1.190002,1.329994,14.199951,1.900009,0.139999,85.79,154210.0,3.6
2022-11-10 09:30:00-05:00,110.989998,1750.300049,163.509995,81.290001,98.430000,26.590000,107.709999,1708.199951,161.660004,81.160004,...,0.0,3.750000,3.279999,1.790001,42.100098,1.849991,0.129997,86.52,154210.0,3.6
2022-11-11 09:30:00-05:00,108.440002,1766.000000,164.580002,81.279999,98.370003,23.910000,106.279999,1757.599976,163.419998,81.169998,...,0.0,1.539999,2.160004,0.830002,8.400024,1.160004,0.110001,89.14,154210.0,3.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-11-03 09:30:00-05:00,99.989998,4020.000000,370.839996,82.750000,89.820000,18.830000,99.709999,3959.000000,367.540009,82.709999,...,0.0,1.830000,0.279999,0.400002,61.000000,3.299988,0.040001,61.79,159540.0,4.3
2025-11-04 09:30:00-05:00,100.260002,3995.399902,365.739990,82.779999,90.059998,20.480000,99.739998,3927.399902,361.390015,82.750000,...,0.0,2.580000,0.520004,0.320000,68.000000,4.349976,0.029999,61.38,159540.0,4.3
2025-11-05 09:30:00-05:00,100.360001,3983.500000,367.100006,82.760002,89.550003,20.020000,100.059998,3929.899902,364.649994,82.680000,...,0.0,2.710001,0.300003,0.670006,53.600098,2.450012,0.080002,60.40,159540.0,4.3
2025-11-06 09:30:00-05:00,100.110001,4007.500000,368.179993,82.820000,89.849998,20.309999,99.669998,3979.899902,364.700012,82.769997,...,0.0,2.670000,0.440002,0.320000,27.600098,3.479980,0.050003,60.24,159540.0,4.3


In [26]:
# 방법 1) drop 사용 (추천)
final_df = final_df.drop(columns=['DX-Y.NYB_High', 'GC=F_High', 'GLD_High', 'SHY_High', 'TLT_High',
       '^VIX_High', 'DX-Y.NYB_Low', 'GC=F_Low', 'GLD_Low', 'SHY_Low',
       'TLT_Low', '^VIX_Low','DX-Y.NYB_Volume','^VIX_Volume'])

In [27]:
final_df

Unnamed: 0_level_0,DX-Y.NYB_Close,GC=F_Close,GLD_Close,SHY_Close,TLT_Close,^VIX_Close,GC=F_Volume,GLD_Volume,SHY_Volume,TLT_Volume,^VIX_Volatility,DX-Y.NYB_Volatility,TLT_Volatility,GC=F_Volatility,GLD_Volatility,SHY_Volatility,WTI_Spot,PAYEMS,UNRATE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2022-11-07 09:30:00-05:00,110.120003,1676.500000,155.850006,80.610001,93.279999,24.350000,42.0,4425700.0,4391400.0,14522800.0,1.330000,1.219994,1.620003,5.000000,0.760010,0.040001,91.80,154210.0,3.6
2022-11-08 09:30:00-05:00,109.620003,1712.099976,159.449997,80.690002,94.300003,25.540001,23.0,11299600.0,7092100.0,20360600.0,1.920000,1.239998,1.190002,8.900024,3.889999,0.070000,88.80,154210.0,3.6
2022-11-09 09:30:00-05:00,110.550003,1710.099976,158.649994,80.820000,94.610001,26.090000,497.0,6040600.0,4587600.0,15637500.0,1.330000,1.190002,1.329994,14.199951,1.900009,0.139999,85.79,154210.0,3.6
2022-11-10 09:30:00-05:00,108.209999,1750.300049,163.479996,81.260002,98.250000,23.530001,325.0,9939900.0,6631600.0,45380900.0,3.750000,3.279999,1.790001,42.100098,1.849991,0.129997,86.52,154210.0,3.6
2022-11-11 09:30:00-05:00,106.290001,1766.000000,164.559998,81.239998,97.889999,22.520000,257.0,4622400.0,4246000.0,15943100.0,1.539999,2.160004,0.830002,8.400024,1.160004,0.110001,89.14,154210.0,3.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-11-03 09:30:00-05:00,99.870003,4000.300049,368.779999,82.720001,89.739998,17.170000,0.0,7254300.0,5418800.0,40752300.0,1.830000,0.279999,0.400002,61.000000,3.299988,0.040001,61.79,159540.0,4.3
2025-11-04 09:30:00-05:00,100.220001,3947.699951,362.320007,82.750000,89.940002,19.000000,657.0,11540100.0,6502000.0,26383800.0,2.580000,0.520004,0.320000,68.000000,4.349976,0.029999,61.38,159540.0,4.3
2025-11-05 09:30:00-05:00,100.199997,3980.300049,366.510010,82.690002,88.959999,18.010000,559.0,8357300.0,3952600.0,44643400.0,2.710001,0.300003,0.670006,53.600098,2.450012,0.080002,60.40,159540.0,4.3
2025-11-06 09:30:00-05:00,99.730003,3979.899902,366.070007,82.790001,89.760002,19.500000,650.0,6891100.0,3725200.0,33027400.0,2.670000,0.440002,0.320000,27.600098,3.479980,0.050003,60.24,159540.0,4.3


In [28]:
final_df.to_csv("../raw_data/external_environment_variable.csv")