In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
from pprint import pprint
from IPython.display import display, Markdown

# --- 1. PANDAS & IPYTHON OPTIONS ---
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 3000*5)
pd.set_option('display.float_format', '{:.6f}'.format)
%load_ext autoreload
%autoreload 2

# --- 2. PROJECT PATH CONFIGURATION ---
NOTEBOOK_DIR = Path.cwd()
PARENT_DIR = NOTEBOOK_DIR.parent
ROOT_DIR = NOTEBOOK_DIR.parent.parent  # Adjust if your notebook is in a 'notebooks' subdirectory
DATA_DIR = ROOT_DIR / 'data'
SRC_DIR = ROOT_DIR / 'src'

# Add 'src' to the Python path to import custom modules
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

# --- 3. IMPORT CUSTOM MODULES ---
import utils
import plotting_utils

# --- 4. INITIAL_CAPITAL ---
INITIAL_CAPITAL = 100000

# --- 5. RISK FREE ANNUAL RATE ---
RISK_FREE_ANNUAL_RATE = 0.04

# --- 6. VERIFICATION ---
print("--- Path Configuration ---")
print(f"✅ Project Root: {ROOT_DIR}")
print(f"✅ Parent Dir:   {PARENT_DIR}")
print(f"✅ Notebook Dir: {NOTEBOOK_DIR}")
print(f"✅ Data Dir:     {DATA_DIR}")
print(f"✅ Source Dir:   {SRC_DIR}")
assert all([ROOT_DIR.exists(), DATA_DIR.exists(), SRC_DIR.exists()]), "A key directory was not found!"

print("\n--- Module Verification ---")
print(f"✅ Successfully imported 'utils' and 'plotting_utils'.")

In [None]:
df = pd.read_parquet(DATA_DIR / 'df_adj_close.parquet')

In [None]:
print(f'df:\n{df}')
print("\ndf.info():")
df.info()
print(f'\ndf.index.names:\n{df.index.names}')

In [None]:
# returns is a DataFrame with the same shape and index as df
returns = df.pct_change()

# Optionally drop the first row (all NaNs) if you don’t need it
returns = returns.dropna()

# Add a new column 'CASH' with all values set to 0
returns['CASH'] = 0

In [None]:
print(f'returns:\n{returns}')
print("\nreturns.info():")
returns.info()
print(f'\nreturns.index.names:\n{returns.index.names}')

In [None]:
SLIDING_WINDOW_WIDTH = 300
SLIDING_WINDOW_STEP = 60
SLIDING_WINDOW_STEP = 30

n_test_rows = SLIDING_WINDOW_WIDTH - SLIDING_WINDOW_STEP
n_train_rows = SLIDING_WINDOW_STEP

assert SLIDING_WINDOW_STEP < int(0.3 * SLIDING_WINDOW_WIDTH), "SLIDING_WINDOW_STEP must be less than 0.3 * SLIDING_WINDOW_WIDTH"
print("SLIDING_WINDOW Assertion passed!")

In [None]:
# Initialize an empty list to store the rolling window chunks
rolling_chunks = []

# Loop through the DataFrame with a step of SLIDING_WINDOW_STEP rows
for start in range(0, len(returns) - SLIDING_WINDOW_WIDTH + 1, SLIDING_WINDOW_STEP):
    end = start + SLIDING_WINDOW_WIDTH
    chunk = returns.iloc[start:end]
    rolling_chunks.append(chunk)

# Now rolling_chunks is a list of DataFrames, each containing SLIDING_WINDOW_WIDTH rows (or fewer for the last chunk)
# Print the number of chunks and the shape of each chunk
for i, chunk in enumerate(rolling_chunks):
    print(f"Chunk {i+1:<4} shape: {chunk.shape} | First index: {chunk.index[0].strftime('%Y-%m-%d')} | Last index: {chunk.index[-1].strftime('%Y-%m-%d')}")


In [None]:
# Number of dataframes for training
n_train_chunks = int(len(rolling_chunks) * 0.75)

# Number of dataframes for training
train_chunks = rolling_chunks[: n_train_chunks]
# Number of dataframes reserved for verification
reserve_chunks = rolling_chunks[n_train_chunks :]

print(f'Number of training dataframes : {n_train_chunks}')
print(f'number of rolling_chunks dataframes in train_chunks: {len(train_chunks)}')
print(f'number of rolling_chunks dataframes in reserve_chunks:  {len(reserve_chunks)}')

In [None]:
# train_file

In [None]:
import papermill as pm
import pandas as pd
from pathlib import Path # Use pathlib for cleaner path handling

# NOTEBOOK_DIR = Path("c:/Users/ping/Files_win10/python/py311/stocks/notebooks_PyPortfOpt/_working") # Define your directory

# Assuming train_chunks is already defined and contains your data

for i, train_chunk in enumerate(train_chunks[0:1]): # Using enumerate for clarity if needed
    returns_train = train_chunk.iloc[:n_test_rows]
    returns_test = train_chunk.iloc[n_test_rows:]

    # Define the full file paths with extensions
    train_file = NOTEBOOK_DIR / f"/temp/returns_train_chunk_{i}.parquet"
    test_file = NOTEBOOK_DIR / f"/temp/returns_test_chunk_{i}.parquet"

    # Save DataFrames to disk with the correct extension
    returns_train.to_parquet(train_file)
    returns_test.to_parquet(test_file)

    # Define the output notebook name
    out_notebook = f"/temp/_pm_out_working_5b2_chunk_{i}.ipynb"

    # Execute the notebook, passing the full paths
    pm.execute_notebook(
        "working_5b2.ipynb",
        out_notebook,
        parameters={
            "returns_train_path": str(train_file), # Pass the full path as a string
            "returns_test_path": str(test_file),   # Pass the full path as a string
        },
        kernel_name="python3"
    )