In [None]:
import pandas as pd

# Assuming the file is uploaded to the /content/ directory
data_path = '/content/df_OHLCV_stocks_etfs.parquet'

df_ohlcv = pd.read_parquet(data_path, engine='pyarrow')

# Display the first few rows to confirm it's loaded correctly
display(df_ohlcv.head())

In [None]:
import pandas as pd

data_path = r'c:\Users\ping\Files_win10\python\py311\stocks\data\df_OHLCV_stocks_etfs.parquet'
df_ohlcv = pd.read_parquet(data_path, engine='pyarrow')
print(f'df_ohlcv.info():\n{df_ohlcv.info()}')
df_ohlcv

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# ==============================================================================
# 1. IMPROVED DATA PIPELINE (With Z-Score Normalization)
# ==============================================================================

def get_rolling_z_score(data_matrix, window=60):
    """
    Normalizes data so the Agent sees 'Standard Deviations' instead of 'Percent'.
    This solves the Vanishing Gradient problem.
    """
    df = pd.DataFrame(data_matrix)
    # Calculate rolling mean and std
    roll_mean = df.rolling(window=window, min_periods=window).mean()
    roll_std = df.rolling(window=window, min_periods=window).std()

    # Handle cases where standard deviation is zero (all values in window are the same)
    # This prevents division by zero and resulting NaNs/Infs.
    # Where roll_std is 0, z-score should be 0.
    zero_std_mask = (roll_std == 0)

    # For non-zero std, calculate z-score
    z_scores = (df - roll_mean) / (roll_std + 1e-8)

    # Apply mask: where std was zero, set z_score to 0. Otherwise use calculated z_score
    z_scores[zero_std_mask] = 0.0

    # Fill NaN (the first 60 days) with 0
    return z_scores.fillna(0.0).values

def prepare_robust_data(df_ohlcv, split_ratio=0.8):
    print("--- ⚒⚒⚒ Preparing Level 2 Data (Normalized) ---")

    # 1. Pivot
    closes = df_ohlcv['Adj Close'].unstack(level=0)
    print(f"    > Closes DataFrame shape: {closes.shape}")
    print(f"    > Closes has NaN: {closes.isnull().any().any()}")

    # 2. Log Returns
    log_returns_raw = np.log(closes / closes.shift(1))

    # Explicitly replace infinite values with 0.0 and fill NaNs using boolean indexing
    log_returns = log_returns_raw.copy()
    log_returns[np.isinf(log_returns)] = 0.0
    log_returns[np.isnan(log_returns)] = 0.0

    print(f"    > Log Returns DataFrame shape: {log_returns.shape}")
    print(f"    > Log Returns has NaN (after clean): {log_returns.isnull().any().any()}")
    print(f"    > Log Returns has Inf (after clean): {np.isinf(log_returns).any().any()}")

    # 3. NORMALIZE (The Fix)
    # We pass the NORMALIZED data to the Input (X)
    # But we keep raw log_returns for the Reward Calculation (y)
    print("    > Calculating Rolling Z-Scores (60d)...")
    norm_features = get_rolling_z_score(log_returns.values)
    print(f"    > Normalized Features shape: {norm_features.shape}")
    print(f"    > Normalized Features has NaN: {np.isnan(norm_features).any()}")
    print(f"    > Normalized Features has Inf: {np.isinf(norm_features).any()}")

    # 4. Split
    split_idx = int(len(log_returns) * split_ratio)

    # Train Sets
    X_train = norm_features[:split_idx]
    y_train = log_returns.values[:split_idx] # Rewards must be real $

    # Test Sets
    X_test = norm_features[split_idx:]
    y_test = log_returns.values[split_idx:]

    return X_train, y_train, X_test, y_test

# ==============================================================================
# 2. IMPROVED AGENT (Gradient Clipping & Scaling)
# ==============================================================================

# ==============================================================================
# LONG ONLY AGENT (0% to 100% Invested)
# ==============================================================================

class LongOnlyAgent:
    def __init__(self, input_dim, learning_rate=0.001):
        self.weights = np.random.randn(input_dim) * 0.1
        self.lr = learning_rate
        self.velocity = np.zeros_like(self.weights)

    def _sigmoid(self, x):
        # Stable Sigmoid function: Maps any number to range [0.0, 1.0]
        # 0.0 = 100% Cash (Safe)
        # 1.0 = 100% Long (Invested)
        return 1 / (1 + np.exp(-x))

    def predict(self, states):
        # 1. Compute Linear Logits
        logits = np.dot(states, self.weights)

        # 2. Apply Sigmoid (Forces output to be positive)
        return self._sigmoid(logits)

    def train_step(self, states, actions, market_returns):
        # REWARD LOGIC:
        # If Market Drops (-5%) and Action is 0.0 (Cash) -> Reward = 0.0
        # If Market Drops (-5%) and Action is 1.0 (Long) -> Reward = -5.0
        # Result: Agent learns that 0.0 > -5.0, so it learns to go to Cash during crashes.

        # Scale rewards for better gradients
        scaled_rewards = (actions * market_returns) * 100.0

        # 3. DERIVATIVE CHANGE
        # Derivative of Tanh is (1 - x^2)
        # Derivative of Sigmoid is x * (1 - x)
        d_sigmoid = actions * (1 - actions)

        # Standard Gradient Calculation
        grads = states * d_sigmoid[:, None] * scaled_rewards[:, None]
        mean_grad = np.mean(grads, axis=0)

        # Optimizer (Momentum)
        self.velocity = (0.9 * self.velocity) + (0.1 * mean_grad)
        self.weights += self.lr * self.velocity

        return np.mean(actions * market_returns)

class BatchEnv:
    def __init__(self, X, y, lookback=20):
        self.X = X # Normalized Inputs
        self.y = y # Raw Returns (for PnL)
        self.lookback = lookback
        self.n_days, self.n_assets = X.shape

    def get_batch(self, batch_size=2048):
        asset_idx = np.random.randint(0, self.n_assets, size=batch_size)
        day_idx = np.random.randint(0, self.n_days - self.lookback - 1, size=batch_size)

        X_batch = []
        y_batch = []

        for i in range(batch_size):
            t, a = day_idx[i], asset_idx[i]
            X_batch.append(self.X[t : t + self.lookback, a])
            y_batch.append(self.y[t + self.lookback, a])

        return np.array(X_batch), np.array(y_batch)

# ==============================================================================
# 3. RUNNER
# ==============================================================================

def run_level2_experiment(df_ohlcv):
    # A. Data
    X_train, y_train, X_test, y_test = prepare_robust_data(df_ohlcv)

    # B. Setup
    LOOKBACK = 60

    EPOCHS = 1500 # More epochs
    BATCH_SIZE = 4096 # Bigger batch

    env = BatchEnv(X_train, y_train, lookback=LOOKBACK)
    agent = LongOnlyAgent(input_dim=LOOKBACK, learning_rate=0.05)

    history = []

    print("\n✨ Starting Level 2 Training (Normalized)...")

    for i in range(EPOCHS):
        X, y = env.get_batch(BATCH_SIZE)
        actions = agent.predict(X)
        avg_pnl = agent.train_step(X, actions, y)
        history.append(avg_pnl)

        if i % 200 == 0:
            # Check if weights are moving
            w_norm = np.linalg.norm(agent.weights)
            print(f"Epoch {i:4d} | Batch PnL: {avg_pnl*100:.5f}% | Weight Mag: {w_norm:.4f}")

    # C. Test
    print("\n⚗⚗⚗ Running Validation...")
    test_env = BatchEnv(X_test, y_test, lookback=LOOKBACK)
    X_val, y_val = test_env.get_batch(10000)

    actions_val = agent.predict(X_val)
    results = actions_val * y_val
    final_score = np.mean(results)

    # D. Stats
    win_rate = np.mean(results > 0)
    print(f"FINAL TEST RESULT: Avg PnL: {final_score*100:.5f}%")
    print(f"Win Rate: {win_rate*100:.1f}% (Random is 50%)")
    print(f"LOOKBACK: {LOOKBACK}")
    print(f"LEARNING RATE: {agent.lr}")

# ==============================================================================
    # E. VISUALIZATION (Updated with Negative Lookback Axis)
    # ==============================================================================
    plt.figure(figsize=(12, 5))

    # --- Left Chart: Training Curve ---
    plt.subplot(1, 2, 1)
    plt.plot(pd.Series(history).rolling(100).mean())
    plt.title(f"Training Curve (LR: {agent.lr})")
    plt.ylabel("Avg PnL (Reward)")
    plt.xlabel("Epochs")
    plt.grid(True, alpha=0.3)

    # --- Right Chart: Strategy Weights ---
    plt.subplot(1, 2, 2)

    # Create X-Axis values: from -20 to -1
    # np.arange(-20, 0) generates [-20, -19, ... , -1]
    days_lookback = np.arange(-LOOKBACK, 0)

    # Plot bars using the negative x-values
    plt.bar(days_lookback, agent.weights)

    plt.title("Learned Strategy Weights")
    plt.ylabel("Weight Influence (Negative=Avoid, Positive=Buy)")
    plt.xlabel("Days Lookback (Yesterday = -1)")

    # Force the X-axis to show every integer tick
    plt.xticks(days_lookback, fontsize=8)

    # Add a zero line for clarity
    plt.axhline(0, color='black', lw=1)
    plt.grid(axis='y', alpha=0.3)

    plt.tight_layout()
    plt.show()

# Run
run_level2_experiment(df_ohlcv)

####################  
####################  

In [2]:
import pandas as pd
import numpy as np

# pd.set_option('display.max_rows', None)  display all rows
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 50)
pd.set_option('display.precision', 4)

In [3]:
def filter_to_anchor_dates(df: pd.DataFrame, anchor: str = 'SPY') -> pd.DataFrame:
    # 1. Ensure Index is sorted for performance
    if not df.index.is_monotonic_increasing:
        df = df.sort_index()

    # 2. Extract the exact Index (list of Timestamps) for the anchor
    # This assumes level 0 is Ticker and level 1 is Date
    anchor_dates = df.loc[anchor].index
    
    print(f"Filtering to exact trading dates of {anchor} ({len(anchor_dates)} days)...")

    # 3. Apply the Intersection Logic
    # THIS IS THE REPLACEMENT LINE
    # It keeps rows only if their Date (level 1) exists in SPY's Date index
    return df.loc[df.index.get_level_values(1).isin(anchor_dates)]

In [4]:
data_path = r'c:\Users\ping\Files_win10\python\py311\stocks\data\df_OHLCV_stocks_etfs.parquet'
df_ohlcv = pd.read_parquet(data_path, engine='pyarrow')
print(f'df_ohlcv.info():\n{df_ohlcv.info()}')
df_ohlcv

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 9622377 entries, ('A', Timestamp('1999-11-18 00:00:00')) to ('^VIX', Timestamp('2025-12-17 00:00:00'))
Data columns (total 5 columns):
 #   Column     Dtype  
---  ------     -----  
 0   Adj Open   float64
 1   Adj High   float64
 2   Adj Low    float64
 3   Adj Close  float64
 4   Volume     int64  
dtypes: float64(4), int64(1)
memory usage: 404.5+ MB
df_ohlcv.info():
None


Unnamed: 0_level_0,Unnamed: 1_level_0,Adj Open,Adj High,Adj Low,Adj Close,Volume
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A,1999-11-18,27.2452,29.9398,23.9518,26.3470,74716433
A,1999-11-19,25.7108,25.7482,23.8396,24.1764,18198345
A,1999-11-22,24.7378,26.3470,23.9893,26.3470,7857768
A,1999-11-23,25.4488,26.1225,23.9518,23.9518,7138325
A,1999-11-24,24.0267,25.1120,23.9518,24.5881,5785607
...,...,...,...,...,...,...
^VIX,2025-12-11,16.8400,16.8800,14.8500,14.8500,0
^VIX,2025-12-12,14.9900,17.8500,14.8500,15.7400,0
^VIX,2025-12-15,16.5100,17.3500,15.9900,16.5000,0
^VIX,2025-12-16,17.2800,17.6100,16.1900,16.4800,0


In [5]:
df_ohlcv = filter_to_anchor_dates(df=df_ohlcv)
print(f'df_ohlcv.info():\n{df_ohlcv.info()}')
df_ohlcv

Filtering to exact trading dates of SPY (8279 days)...
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 8288650 entries, ('A', Timestamp('1999-11-18 00:00:00')) to ('^VIX', Timestamp('2025-12-17 00:00:00'))
Data columns (total 5 columns):
 #   Column     Dtype  
---  ------     -----  
 0   Adj Open   float64
 1   Adj High   float64
 2   Adj Low    float64
 3   Adj Close  float64
 4   Volume     int64  
dtypes: float64(4), int64(1)
memory usage: 348.5+ MB
df_ohlcv.info():
None


Unnamed: 0_level_0,Unnamed: 1_level_0,Adj Open,Adj High,Adj Low,Adj Close,Volume
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A,1999-11-18,27.2452,29.9398,23.9518,26.3470,74716433
A,1999-11-19,25.7108,25.7482,23.8396,24.1764,18198345
A,1999-11-22,24.7378,26.3470,23.9893,26.3470,7857768
A,1999-11-23,25.4488,26.1225,23.9518,23.9518,7138325
A,1999-11-24,24.0267,25.1120,23.9518,24.5881,5785607
...,...,...,...,...,...,...
^VIX,2025-12-11,16.8400,16.8800,14.8500,14.8500,0
^VIX,2025-12-12,14.9900,17.8500,14.8500,15.7400,0
^VIX,2025-12-15,16.5100,17.3500,15.9900,16.5000,0
^VIX,2025-12-16,17.2800,17.6100,16.1900,16.4800,0


In [6]:
print(f'df_ohlcv.shape: {df_ohlcv.shape}\n')
print(f'df_ohlcv.head():\n{df_ohlcv.head()}\n')
print(f'df_ohlcv.tail():\n{df_ohlcv.tail()}\n')
print(f'df_ohlcv.info():\n{df_ohlcv.info()}')

df_ohlcv.shape: (8288650, 5)

df_ohlcv.head():
                   Adj Open  Adj High  Adj Low  Adj Close    Volume
Ticker Date                                                        
A      1999-11-18   27.2452   29.9398  23.9518    26.3470  74716433
       1999-11-19   25.7108   25.7482  23.8396    24.1764  18198345
       1999-11-22   24.7378   26.3470  23.9893    26.3470   7857768
       1999-11-23   25.4488   26.1225  23.9518    23.9518   7138325
       1999-11-24   24.0267   25.1120  23.9518    24.5881   5785607

df_ohlcv.tail():
                   Adj Open  Adj High  Adj Low  Adj Close  Volume
Ticker Date                                                      
^VIX   2025-12-11     16.84     16.88    14.85      14.85       0
       2025-12-12     14.99     17.85    14.85      15.74       0
       2025-12-15     16.51     17.35    15.99      16.50       0
       2025-12-16     17.28     17.61    16.19      16.48       0
       2025-12-17     16.45     18.00    15.82      17.62      

In [7]:
# 1. Grab the "Master Calendar" from SPY
#    (We do this before unstacking to ensure we have the full range)
spy_dates = df_ohlcv.loc['SPY'].index

# 2. Unstack the data (Tickers move to columns)
closes = df_ohlcv['Adj Close'].unstack(level=0)

# 3. Force the index to match SPY exactly using .reindex()
#    - This restores the 1993-1999 dates (filling columns like 'A' with NaN)
#    - This removes any dates that SPY did
closes = closes.reindex(spy_dates)
print(f"    > Closes DataFrame shape: {closes.shape}")
print(f"    > Closes has NaN: {closes.isnull().any().any()}")

    > Closes DataFrame shape: (8279, 1605)
    > Closes has NaN: True


In [8]:
# 2. Log Returns
log_returns_raw = np.log(closes / closes.shift(1))

# Explicitly replace infinite values with 0.0 and fill NaNs using boolean indexing
log_returns = log_returns_raw.copy()
log_returns[np.isinf(log_returns)] = 0.0
log_returns[np.isnan(log_returns)] = 0.0

print(f"    > Log Returns DataFrame shape: {log_returns.shape}")
print(f"    > Log Returns has NaN (after clean): {log_returns.isnull().any().any()}")
print(f"    > Log Returns has Inf (after clean): {np.isinf(log_returns).any().any()}")

  result = func(self.values, **kwargs)


    > Log Returns DataFrame shape: (8279, 1605)
    > Log Returns has NaN (after clean): False
    > Log Returns has Inf (after clean): False


In [10]:
print(f'closes.shape:{closes.shape}')
print(f'closes:\n{closes}')

closes.shape:(8279, 1605)
closes:
Ticker           A       AA    AAL     AAON      AAPL    ABBV  ABEV    ABNB       ABT     ABVX   ACGL    ACI    ACM     ACN     ACWI     ACWX      ADBE    ADC       ADI      ADM       ADP      ADSK   ADT    AEE     AEG    AEIS       AEM       AEP     AER      AES       AFG       AFL   AFRM     AG      AGCO     AGG    AGI   AGNC    AHR       AIG    AIQ     AIRR       AIT     AIZ       AJG   AKAM   AKRE     AL    ALAB      ALB    ALC    ALGN     ALL    ALLE   ALLY    ALNY   ALSN     ALV     AM      AMAT  AMCR       AMD       AME     AMG      AMGN      AMH   AMKR   AMLP     AMP   AMRZ     AMT   AMTM    AMX    AMZN        AN    ANET       AON      AOS      APA       APD    APG       APH   APLD     APO      APP    APPF   APTV     AR     ARCC    ARE    ARES    ARGX   ARKK     ARM   ARMK   ARWR     AS     ASML    ASND     ASR   ASTS    ASX     ATI       ATO     ATR     AU   AUR    AVAV     AVB     AVDE     AVDV     AVEM    AVGO     AVLV   AVTR     AVUS     AV

In [9]:
print(f'log_returns_raw.shape:{log_returns_raw.shape}')
print(f'log_returns_raw:\n{log_returns_raw}')

log_returns_raw.shape:(8279, 1605)
log_returns_raw:
Ticker           A      AA     AAL    AAON    AAPL    ABBV    ABEV    ABNB     ABT    ABVX    ACGL     ACI     ACM     ACN    ACWI    ACWX    ADBE     ADC     ADI     ADM     ADP    ADSK     ADT     AEE     AEG    AEIS     AEM     AEP     AER     AES     AFG     AFL    AFRM      AG    AGCO     AGG     AGI    AGNC     AHR     AIG     AIQ    AIRR     AIT     AIZ     AJG    AKAM    AKRE      AL    ALAB     ALB     ALC    ALGN     ALL    ALLE    ALLY    ALNY    ALSN     ALV      AM    AMAT    AMCR         AMD     AME     AMG    AMGN     AMH    AMKR    AMLP     AMP    AMRZ     AMT    AMTM     AMX        AMZN      AN    ANET     AON     AOS     APA     APD     APG     APH    APLD     APO     APP    APPF    APTV      AR    ARCC     ARE    ARES    ARGX    ARKK     ARM    ARMK    ARWR      AS    ASML    ASND     ASR    ASTS     ASX     ATI     ATO     ATR      AU     AUR    AVAV     AVB    AVDE    AVDV    AVEM    AVGO    AVLV    AVTR    AVUS  

In [17]:
print(f'log_returns.shape:{log_returns.shape}')
print(f'log_returns:\n{log_returns}')

log_returns.shape:(8279, 1605)
log_returns:
Ticker           A      AA     AAL    AAON    AAPL    ABBV    ABEV    ABNB     ABT    ABVX    ACGL     ACI     ACM     ACN    ACWI    ACWX    ADBE     ADC     ADI     ADM     ADP    ADSK     ADT     AEE     AEG    AEIS     AEM     AEP     AER     AES     AFG     AFL    AFRM      AG    AGCO     AGG     AGI    AGNC     AHR     AIG     AIQ    AIRR     AIT     AIZ     AJG    AKAM    AKRE      AL    ALAB     ALB     ALC    ALGN     ALL    ALLE    ALLY    ALNY    ALSN     ALV      AM    AMAT    AMCR         AMD     AME     AMG    AMGN     AMH    AMKR    AMLP     AMP    AMRZ     AMT    AMTM     AMX        AMZN      AN    ANET     AON     AOS     APA     APD     APG     APH    APLD     APO     APP    APPF    APTV      AR    ARCC     ARE    ARES    ARGX    ARKK     ARM    ARMK    ARWR      AS    ASML    ASND     ASR    ASTS     ASX     ATI     ATO     ATR      AU     AUR    AVAV     AVB    AVDE    AVDV    AVEM    AVGO    AVLV    AVTR    AVUS        AV

In [22]:
print(f'log_returns.values.shape:{log_returns.values.shape}\n')
print(f'log_returns.values:\n{log_returns.values}\n')
print(f'log_returns.values[1].shape: {log_returns.values[1].shape}\n')
log_returns.values[1]

log_returns.values.shape:(8279, 1605)

log_returns.values:
[[ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.         -0.01155584  0.         ...  0.00778044  0.
  -0.00727276]
 [ 0.          0.00851118  0.         ...  0.00069798  0.
  -0.00650938]
 ...
 [ 0.00656489 -0.01418773  0.02443172 ...  0.00116727  0.0055451
   0.04715514]
 [-0.01007907  0.02324928  0.04215183 ... -0.00856586 -0.00604866
  -0.00121286]
 [-0.01834382  0.00599574 -0.03047855 ... -0.00394972 -0.00634416
   0.0668871 ]]

log_returns.values[1].shape: (1605,)



array([ 0.        , -0.01155584,  0.        , ...,  0.00778044,
        0.        , -0.00727276])

In [25]:
window = 3

df = pd.DataFrame(log_returns.values)
# Calculate rolling mean and std
roll_mean = df.rolling(window=window, min_periods=window).mean()
roll_std = df.rolling(window=window, min_periods=window).std()

# Handle cases where standard deviation is zero (all values in window are the same)
# This prevents division by zero and resulting NaNs/Infs.
# Where roll_std is 0, z-score should be 0.
zero_std_mask = (roll_std == 0)

# For non-zero std, calculate z-score
z_scores = (df - roll_mean) / (roll_std + 1e-8)

# Apply mask: where std was zero, set z_score to 0. Otherwise use calculated z_score
z_scores[zero_std_mask] = 0.0

# Fill NaN (the first 60 days) with 0
z_scores.fillna(0.0).values

array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.94580227,  0.        , ..., -0.49437753,
         0.        , -0.47921223],
       ...,
       [ 0.90242467, -0.46712218,  1.09304251, ..., -0.04523596,
         0.40256273,  0.49074074],
       [-0.0944243 ,  1.13384004,  0.94294874, ..., -0.90099776,
        -0.59636764, -1.13691523],
       [-0.87158063,  0.05212189, -1.12265108, ..., -0.03429019,
        -0.59900267,  0.83556153]])

In [34]:
df.loc[2,1]
# df

0.008511179103322804

In [28]:
roll_mean.loc[2,1]

-0.0010148881815274253