In [1]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
from pprint import pprint
from IPython.display import display, Markdown

# --- 1. PANDAS & IPYTHON OPTIONS ---
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 3000*5)
pd.set_option('display.float_format', '{:.6f}'.format)
%load_ext autoreload
%autoreload 2

# --- 2. PROJECT PATH CONFIGURATION ---
NOTEBOOK_DIR = Path.cwd()
PARENT_DIR = NOTEBOOK_DIR.parent
ROOT_DIR = NOTEBOOK_DIR.parent.parent  # Adjust if your notebook is in a 'notebooks' subdirectory
DATA_DIR = ROOT_DIR / 'data'
SRC_DIR = ROOT_DIR / 'src'

# Add 'src' to the Python path to import custom modules
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

# # --- 3. IMPORT CUSTOM MODULES ---
import utils
import plotting_utils

# --- 4. INITIAL_CAPITAL ---
INITIAL_CAPITAL = 100000

# --- 5. RISK FREE ANNUAL RATE ---
RISK_FREE_ANNUAL_RATE = 0.04

# --- 6. VERIFICATION ---
print("--- Path Configuration ---")
print(f"✅ Project Root: {ROOT_DIR}")
print(f"✅ Parent Dir:   {PARENT_DIR}")
print(f"✅ Notebook Dir: {NOTEBOOK_DIR}")
print(f"✅ Data Dir:     {DATA_DIR}")
print(f"✅ Source Dir:   {SRC_DIR}")
assert all([ROOT_DIR.exists(), DATA_DIR.exists(), SRC_DIR.exists()]), "A key directory was not found!"

print("\n--- Module Verification ---")
print(f"✅ Successfully imported 'utils' and 'plotting_utils'.")

--- Path Configuration ---
✅ Project Root: c:\Users\ping\Files_win10\python\py311\stocks
✅ Parent Dir:   c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt
✅ Notebook Dir: c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt\_working
✅ Data Dir:     c:\Users\ping\Files_win10\python\py311\stocks\data
✅ Source Dir:   c:\Users\ping\Files_win10\python\py311\stocks\src

--- Module Verification ---
✅ Successfully imported 'utils' and 'plotting_utils'.


In [2]:
df = pd.read_parquet(DATA_DIR / 'df_adj_close.parquet')

In [3]:
print(f'df:\n{df}')
print("\ndf.info():")
df.info()
print(f'\ndf.index.names:\n{df.index.names}')

df:
Ticker              A        AA       AAL      AAON       AAPL       ABBV     ABEV        ABT      ACGL        ACM        ACN       ACWI      ACWX       ADBE       ADC        ADI       ADM        ADP       ADSK        AEE      AEG        AEM        AEP        AER       AES        AFG        AFL       AGCO       AGG       AGI     AGNC       AIG      AIRR        AIT        AIZ        AJG      AKAM        AL       ALB       ALGN       ALK        ALL       ALLE      ALLY       ALNY      ALSN        ALV       AMAT     AMCR        AMD        AME       AMGN       AMH      AMLP        AMP        AMT       AMX       AMZN         AN       ANET        AON       AOS       APA        APD        APH        APO      APTV        AR      ARCC       ARE       ARES      ARKK      ARMK        ARW       ASML        ASR       ASX       ATI        ATO        ATR        AU       AVAV        AVB       AVGO        AVY        AWI        AWK       AXON        AXP       AXS      AXTA        AYI       AZN      

In [4]:
# returns is a DataFrame with the same shape and index as df
returns = df.pct_change()

# Optionally drop the first row (all NaNs) if you don’t need it
returns = returns.dropna()

# Add a new column 'CASH' with all values set to 0
returns['CASH'] = 0

In [5]:
print(f'returns:\n{returns}')
print("\nreturns.info():")
returns.info()
print(f'\nreturns.index.names:\n{returns.index.names}')

returns:
Ticker             A        AA       AAL      AAON      AAPL      ABBV      ABEV       ABT      ACGL       ACM       ACN      ACWI      ACWX      ADBE       ADC       ADI       ADM       ADP      ADSK       AEE       AEG       AEM       AEP       AER       AES       AFG       AFL      AGCO       AGG       AGI      AGNC       AIG      AIRR       AIT       AIZ       AJG      AKAM        AL       ALB      ALGN       ALK       ALL      ALLE      ALLY      ALNY      ALSN       ALV      AMAT      AMCR       AMD       AME      AMGN       AMH      AMLP       AMP       AMT       AMX      AMZN        AN      ANET       AON       AOS       APA       APD       APH       APO      APTV        AR      ARCC       ARE      ARES      ARKK      ARMK       ARW      ASML       ASR       ASX       ATI       ATO       ATR        AU      AVAV       AVB      AVGO       AVY       AWI       AWK      AXON       AXP       AXS      AXTA       AYI       AZN       AZO         B        BA      BABA       BAC 

In [6]:
SLIDING_WINDOW_WIDTH = 300
SLIDING_WINDOW_STEP = 30

n_test_rows = SLIDING_WINDOW_WIDTH - SLIDING_WINDOW_STEP
n_train_rows = SLIDING_WINDOW_STEP

assert SLIDING_WINDOW_STEP < int(0.3 * SLIDING_WINDOW_WIDTH), "SLIDING_WINDOW_STEP must be less than 0.3 * SLIDING_WINDOW_WIDTH"
print("SLIDING_WINDOW Assertion passed!")

SLIDING_WINDOW Assertion passed!


In [7]:
# Initialize an empty list to store the rolling window chunks
rolling_chunks = []

# Loop through the DataFrame with a step of SLIDING_WINDOW_STEP rows
for start in range(0, len(returns) - SLIDING_WINDOW_WIDTH + 1, SLIDING_WINDOW_STEP):
    end = start + SLIDING_WINDOW_WIDTH
    chunk = returns.iloc[start:end]
    rolling_chunks.append(chunk)

# Now rolling_chunks is a list of DataFrames, each containing SLIDING_WINDOW_WIDTH rows (or fewer for the last chunk)
# Print the number of chunks and the shape of each chunk
for i, chunk in enumerate(rolling_chunks):
    print(f"Chunk {i+1:<4} shape: {chunk.shape} | First index: {chunk.index[0].strftime('%Y-%m-%d')} | Last index: {chunk.index[-1].strftime('%Y-%m-%d')}")


Chunk 1    shape: (300, 1216) | First index: 2015-01-05 | Last index: 2016-03-14
Chunk 2    shape: (300, 1216) | First index: 2015-02-18 | Last index: 2016-04-26
Chunk 3    shape: (300, 1216) | First index: 2015-04-01 | Last index: 2016-06-08
Chunk 4    shape: (300, 1216) | First index: 2015-05-14 | Last index: 2016-07-21
Chunk 5    shape: (300, 1216) | First index: 2015-06-26 | Last index: 2016-09-01
Chunk 6    shape: (300, 1216) | First index: 2015-08-10 | Last index: 2016-10-14
Chunk 7    shape: (300, 1216) | First index: 2015-09-22 | Last index: 2016-11-28
Chunk 8    shape: (300, 1216) | First index: 2015-11-03 | Last index: 2017-01-11
Chunk 9    shape: (300, 1216) | First index: 2015-12-16 | Last index: 2017-02-24
Chunk 10   shape: (300, 1216) | First index: 2016-02-01 | Last index: 2017-04-07
Chunk 11   shape: (300, 1216) | First index: 2016-03-15 | Last index: 2017-05-22
Chunk 12   shape: (300, 1216) | First index: 2016-04-27 | Last index: 2017-07-05
Chunk 13   shape: (300, 1216

In [8]:
# Number of dataframes for training
n_train_chunks = int(len(rolling_chunks) * 0.75)

# Number of dataframes for training
train_chunks = rolling_chunks[: n_train_chunks]
# Number of dataframes reserved for verification
reserve_chunks = rolling_chunks[n_train_chunks :]

print(f'Number of training dataframes : {n_train_chunks}')
print(f'number of rolling_chunks dataframes in train_chunks: {len(train_chunks)}')
print(f'number of rolling_chunks dataframes in reserve_chunks:  {len(reserve_chunks)}')

Number of training dataframes : 59
number of rolling_chunks dataframes in train_chunks: 59
number of rolling_chunks dataframes in reserve_chunks:  20


In [None]:
import papermill as pm
import pandas as pd
from pathlib import Path # Use pathlib for cleaner path handling

# NOTEBOOK_DIR = Path("c:/Users/ping/Files_win10/python/py311/stocks/notebooks_PyPortfOpt/_working") # Define your directory

# --- MODIFICATIONS START ---

# 1. Define the path for the temporary directory
temp_data_dir = NOTEBOOK_DIR / "temp"

# 2. Create the directory if it doesn't already exist.
#    The `exist_ok=True` argument prevents an error if the directory is already there.
temp_data_dir.mkdir(exist_ok=True)

# --- MODIFICATIONS END ---


# Assuming train_chunks is already defined and contains your data

for i, train_chunk in enumerate(train_chunks[0:1]): # Using enumerate for clarity if needed
# for i, train_chunk in enumerate (train_chunks): # Using enumerate for clarity if needed    
    returns_train = train_chunk.iloc[:n_test_rows]
    returns_test = train_chunk.iloc[n_test_rows:]

    # 3. Modify the file paths to point to the new 'temp' subdirectory
    train_file = temp_data_dir / f"returns_train_chunk_{i}.parquet"
    test_file = temp_data_dir / f"returns_test_chunk_{i}.parquet"

    # Save DataFrames to disk in the temp directory
    returns_train.to_parquet(train_file)
    returns_test.to_parquet(test_file)

    # Define the output notebook name
    out_notebook = temp_data_dir / f"_pm_out_5b_hrp_highest_sharpe_{i}.ipynb"

    # Execute the notebook, passing the full paths from the temp directory
    pm.execute_notebook(
        # "working_5b_hrp.ipynb",
        "pm_run_5b_all_lower_sharpe.ipynb",
        out_notebook,
        parameters={
            "returns_train_path": str(train_file), # Pass the full path as a string
            "returns_test_path": str(test_file),   # Pass the full path as a string
        },
        kernel_name="python3"
    )

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

Passed unknown parameter: returns_train_path
Passed unknown parameter: returns_test_path


Executing:   0%|          | 0/23 [00:00<?, ?cell/s]