# 必要なライブラリのインポート

In [None]:
import pandas as pd
from pathlib import Path
import glob
import os

pd.set_option("display.max_columns", None)

# パラメータ管理

In [None]:
# データフォルダの指定
data_folder = Path("../data/ohlcv")

# 最新のOHLCV Parquetファイルを取得
list_of_files = glob.glob(os.path.join(data_folder, 'df_ohlcv_*.parquet'))
if not list_of_files:
    raise FileNotFoundError(f"No ohlcv parquet files found in {data_folder}")
latest_file = max(list_of_files, key=os.path.getctime)
ohlcv_file_path = Path(latest_file)

# 出力ファイルパスの生成
output_filename = ohlcv_file_path.stem.replace('_atr', '') + '_atr.parquet'
output_ohlcv_file_path = data_folder / output_filename

# ATRの計算期間
window_size = 14

print(f"Input file: {ohlcv_file_path}")
print(f"Output file: {output_ohlcv_file_path}")

# ATRの計算

In [None]:
def calculate_atr(ohlcv_df: pd.DataFrame, window_size: int) -> pd.DataFrame:
    df = ohlcv_df.copy()

    # 正しいカラム名が存在するか確認 (op, hi, lo, cl)
    required_cols = {'op', 'hi', 'lo', 'cl'}
    if not required_cols.issubset(df.columns):
        raise ValueError(f"Input DataFrame must contain columns: {required_cols}")

    # True Rangeの計算
    df['tr1'] = df['hi'] - df['lo']
    df['tr2'] = abs(df['hi'] - df['cl'].shift(1))
    df['tr3'] = abs(df['lo'] - df['cl'].shift(1))
    df['tr'] = df[['tr1', 'tr2', 'tr3']].max(axis=1)

    # ATRの計算 (Simple Moving Average)
    df['atr'] = df['tr'].rolling(window=window_size).mean()

    # 不要な中間カラムを削除
    df = df.drop(columns=['tr1', 'tr2', 'tr3', 'tr'])
    
    # ATR計算初期のNaN値を削除
    df = df.dropna(subset=['atr'])

    return df

# メイン処理

In [None]:
# データの読み込み
ohlcv_df = pd.read_parquet(ohlcv_file_path)

# ATRの計算
ohlcv_df_with_atr = calculate_atr(ohlcv_df=ohlcv_df, window_size=window_size)

# ファイルに保存
ohlcv_df_with_atr.to_parquet(output_ohlcv_file_path)

print("Processing complete.")
print(f"{len(ohlcv_df_with_atr)} rows saved to {output_ohlcv_file_path}")

print("--- Head ---")
print(ohlcv_df_with_atr.head())
print("--- Tail ---")
print(ohlcv_df_with_atr.tail())