In [None]:
import vectorbt as vbt
import pandas as pd
import numpy as np
from numba import njit
import os

ModuleNotFoundError: No module named 'pkg_resources'

1-1. Standardize Data

In [None]:
folder_path = 'DATA/GOLD'
for file in os.listdir(folder_path):
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path,
                    delim_whitespace=True,
                    names=['date', 'open', 'high', 'low', 'close', 'vol', 'del', 'spread'],
                    header=0)
    df["date"] = pd.to_datetime(df["date"])
    df.drop(columns="del", inplace=True)
    df.set_index("date", inplace=True)
    df.sort_index(inplace=True)
    df.dropna(inplace=True)

    numeric_columns = ['open', 'high', 'low', 'close', 'vol', 'spread']
    df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
    df.to_csv(file_path)


1-2. Add Features

In [None]:
def calculate_ta_features(df):
    """
    Calculate technical indicators for candlestick data.
    
    Args:
        df (pd.DataFrame): DataFrame with columns: date, open, high, low, close, vol, spread
        
    Returns:
        pd.DataFrame: Original DataFrame with added technical indicators
    """
    # Ensure datetime format and sort
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date').reset_index(drop=True)
    
    # Calculate EMAs
    df['EMA_20'] = ta.ema(df['close'], length=20)
    df['EMA_50'] = ta.ema(df['close'], length=50)
    df['EMA_100'] = ta.ema(df['close'], length=100)
    df['EMA_200'] = ta.ema(df['close'], length=200)
    
    # Calculate RSI (14-period default)
    df['RSI_14'] = ta.rsi(df['close'], length=14)
    
    # Calculate ATR (14-period default)
    atr = ta.atr(df['high'], df['low'], df['close'], length=14)
    df['ATR_14'] = atr
    
    # Calculate ADX (14-period default)
    adx = ta.adx(df['high'], df['low'], df['close'], length=14)
    df = pd.concat([df, adx], axis=1)  # Adds ADX_14, DMP_14, DMN_14
    
    return df