In [5]:
import pandas as pd
import numpy as np
import warnings
import os

warnings.filterwarnings('ignore')

# --- Config ---
class ConfigG2:
    QUARTERS = ['2023_Q1', '2023_Q3', '2023_Q4', '2024_Q2', '2024_Q4', '2025_Q1', '2025_Q2']
    SPECS = {'CAD': {'p': 100000.0, 'c': 10.0}, 'AUD': {'p': 100000.0, 'c': 10.0}}
    
    EXIT_TIME = pd.to_datetime("16:50").time()
    RESTART_TIME = pd.to_datetime("18:10").time()
    
    # 最適化されたパラメータ (四半期リセット用)
    WINDOW_BETA = 300
    WINDOW_Z = 120
    ENTRY = 2.5
    EXIT = 0.0

def mySR(x, scale=252):
    if np.nanstd(x) == 0: return 0
    return np.sqrt(scale) * np.nanmean(x) / np.nanstd(x)

# --- Path Finder (ここを追加) ---
# データの場所を自動検索する関数
def find_data_path(quarter, prefix='data2'):
    filename = f'{prefix}_{quarter}.parquet'
    # 探す候補のパスリスト
    potential_paths = [
        f'data/{filename}',          # 同じフォルダのdata
        f'../data/{filename}',       # 1つ上のdata
        f'../../data/{filename}',    # 2つ上のdata (元の指定)
        f'../../../data/{filename}', # 3つ上
        filename                     # 同じフォルダに直置き
    ]
    
    for path in potential_paths:
        if os.path.exists(path):
            return path
    return None

# --- Execution ---
print("\n--- Calculating Group 2 (CAD/AUD) Stats ---")
results = []

print(f"Current Working Directory: {os.getcwd()}") # 現在地を表示

for q in ConfigG2.QUARTERS:
    try:
        # 自動検索機能を使用
        file_path = find_data_path(q, prefix='data2')
        
        if file_path is None:
            raise FileNotFoundError(f"Could not find data file for {q} in standard locations.")
            
        # 見つかったパスを表示（確認用）
        if q == ConfigG2.QUARTERS[0]:
            print(f"Found data at: {file_path}")

        df = pd.read_parquet(file_path)
        df.set_index('datetime', inplace=True)
        
        # 1. Clean Time (休憩時間削除)
        df.loc[df.between_time("17:00", "18:00").index] = np.nan
        
        # 2. Signals
        y = df['CAD'] * ConfigG2.SPECS['CAD']['p']
        x = df['AUD'] * ConfigG2.SPECS['AUD']['p']
        
        min_b = max(50, int(ConfigG2.WINDOW_BETA * 0.1))
        cov = x.rolling(ConfigG2.WINDOW_BETA, min_periods=min_b).cov(y)
        var = x.rolling(ConfigG2.WINDOW_BETA, min_periods=min_b).var()
        beta = (cov/var).fillna(1.0)
        
        spread = y - (beta * x)
        
        min_z = max(10, int(ConfigG2.WINDOW_Z * 0.1))
        z_score = (spread - spread.rolling(ConfigG2.WINDOW_Z, min_periods=min_z).mean()) / \
                  spread.rolling(ConfigG2.WINDOW_Z, min_periods=min_z).std()
        
        # 3. Loop
        times = df.index.time
        z_vals = z_score.values
        pos_c, pos_a = np.zeros(len(df)), np.zeros(len(df))
        curr_c, curr_a = 0, 0
        
        for i in range(len(df)):
            if times[i] >= ConfigG2.EXIT_TIME and times[i] < ConfigG2.RESTART_TIME:
                curr_c, curr_a = 0, 0; pos_c[i]=0; pos_a[i]=0; continue
            
            if np.isnan(z_vals[i]): pos_c[i]=curr_c; pos_a[i]=curr_a; continue
            
            z = z_vals[i]
            if curr_c == 0:
                if z > ConfigG2.ENTRY: curr_c, curr_a = -1, 1
                elif z < -ConfigG2.ENTRY: curr_c, curr_a = 1, -1
            else:
                if curr_c == -1 and z <= ConfigG2.EXIT: curr_c, curr_a = 0, 0
                elif curr_c == 1 and z >= -ConfigG2.EXIT: curr_c, curr_a = 0, 0
            
            pos_c[i], pos_a[i] = curr_c, curr_a
            
        # 4. PnL
        pc = pd.Series(pos_c, index=df.index).shift(1).fillna(0)
        pa = pd.Series(pos_a, index=df.index).shift(1).fillna(0)
        
        mask = (pd.Series(df.index, index=df.index).dt.time >= ConfigG2.EXIT_TIME) & \
               (pd.Series(df.index, index=df.index).dt.time < ConfigG2.RESTART_TIME)
        pc[mask], pa[mask] = 0, 0
        
        gross = (pc * df['CAD'].diff() * 100000.0) + (pa * df['AUD'].diff() * 100000.0)
        tc = (pc.diff().abs().fillna(0) * 10.0) + (pa.diff().abs().fillna(0) * 10.0)
        net = gross - tc
        
        d_net = net.resample('D').sum()
        d_net = d_net[d_net != 0]
        
        net_sr = mySR(d_net, 252)
        stat = (net_sr - 0.5) * max(0, np.log(abs(d_net.sum()/1000))) if d_net.sum() != 0 else 0
        
        results.append({
            'Quarter': q,
            'Net SR': round(net_sr, 2),
            'Net PnL': round(d_net.sum(), 2),
            'Trades/Day': round((pc.diff().abs()+pa.diff().abs()).resample('D').sum().mean(), 2),
            'Stat': round(stat, 2)
        })

    except Exception as e:
        print(f"Skipping {q}: {e}")

# Display Table
df_res2 = pd.DataFrame(results)
print("\n=== Group 2 Results ===")
print(df_res2.to_markdown(index=False))
if not df_res2.empty:
    print(f"Total Stat: {df_res2['Stat'].sum():.2f}")


--- Calculating Group 2 (CAD/AUD) Stats ---
Current Working Directory: /Users/shintarou/QF/HFD/project/hfd_class/script/models/group2
Found data at: ../../../data/data2_2023_Q1.parquet

=== Group 2 Results ===
| Quarter   |   Net SR |   Net PnL |   Trades/Day |   Stat |
|:----------|---------:|----------:|-------------:|-------:|
| 2023_Q1   |    -2.66 |  -2561.29 |         3.2  |  -2.97 |
| 2023_Q3   |    -6.73 |  -5995.14 |         3.73 | -12.96 |
| 2023_Q4   |    -1.24 |   -880.64 |         3.42 |  -0    |
| 2024_Q2   |    -4.21 |  -2870.38 |         3.78 |  -4.97 |
| 2024_Q4   |    -5.23 |  -3501.21 |         3.52 |  -7.18 |
| 2025_Q1   |    -2.55 |  -1619.93 |         3.6  |  -1.47 |
| 2025_Q2   |    -4.95 |  -4741.33 |         3.3  |  -8.48 |
Total Stat: -38.03
