In [70]:
from portfolio_tester.config import Asset, Portfolio, SamplerConfig, SimConfig, Goal
from portfolio_tester.data.fetchers import fetch_prices_monthly, prep_returns_and_macro, fetch_fred_series
from portfolio_tester.sampling.bootstrap import ReturnSampler
from portfolio_tester.engine.simulator import MonteCarloSimulator
from portfolio_tester.analytics.metrics import cagr, twrr_annualized, max_drawdown
import numpy as np

In [71]:
# 1) Portfolio (MVP)
p = Portfolio([
        Asset("VTI","Vanguard Total Stock Market ETF",0.30),
        Asset("TLT","iShares 20+ Year Treasury Bond ETF",0.40),
        Asset("IEF","iShares 7-10 Year Treasury Bond ETF",0.15),
        Asset("GSG","iShares S&P GSCI Commodity-Indexed Trust",0.075),
        Asset("GLD","SPDR Gold Shares",0.075),
    ])

In [72]:
# 2) Configs
sim_cfg = SimConfig(horizon_months=30*12, n_sims=100, starting_balance=1_000_000)  # start with 100 sims
sam_cfg = SamplerConfig(mode="single_month", block_years=1, seed=42)

goals = [
    # Withdraw $4,000/mo starting in 1 year, for 30 years, inflation-indexed (real)
    Goal("Retirement Withdrawals", amount=0, start_month=12, frequency=12, repeats=30*12, real=True),
]

In [73]:
# 3) Data
tickers = p.tickers()
prices_m = fetch_prices_monthly(tickers)
rets_m, infl_m, rf_m = prep_returns_and_macro(prices_m)


In [74]:
# quick alignment check
rets, infl, rf = prep_returns_and_macro(prices_m)
print("indexes equal:", rets.index.equals(infl.index) and rets.index.equals(rf.index))
print("rets index min/max:", rets.index.min(), rets.index.max())
print("infl missing:", infl.isna().any(), "rf missing:", rf.isna().any())
# show any index differences
print("extra in infl:", infl.index.difference(rets.index))
print("extra in rf:", rf.index.difference(rets.index))

indexes equal: True
rets index min/max: 2006-08-31 00:00:00 2025-10-31 00:00:00
infl missing: False rf missing: False
extra in infl: DatetimeIndex([], dtype='datetime64[ns]', name='Date', freq=None)
extra in rf: DatetimeIndex([], dtype='datetime64[ns]', name='Date', freq=None)


In [75]:
cpitest = fetch_fred_series("CPIAUCSL")

In [76]:
# merge prices, returns, inflation and rf into one DataFrame for inspection
import pandas as pd

# assume prices_m, rets_m, infl_m, rf_m already exist from your earlier cells
merged = pd.concat([
    prices_m.reindex(rets_m.index).add_prefix("price_"),   # prices aligned to rets index
    rets_m.add_prefix("ret_"),                             # returns (rets_m)
    infl_m.rename("inflation_m"),                         # CPI monthly pct-change
    rf_m.rename("rf_m"),                                  # monthly RF
], axis=1)

# quick checks
print("indexes equal:", rets_m.index.equals(infl_m.index) and rets_m.index.equals(rf_m.index))
print("merged shape:", merged.shape)
print("NaNs per column:\n", merged.isna().sum())

# rows with any NaN (if any)
print("\nRows with any NaN (first 10):")
display(merged[merged.isna().any(axis=1)].head(10))

# sample of merged table
print("\nHead:")
display(merged.head())
print("\nTail:")
display(merged.tail())

# index differences between prices and returns
print("\nExtra months in prices_m not in rets_m:", prices_m.index.difference(rets_m.index)[:10])
print("Extra months in rets_m not in prices_m:", rets_m.index.difference(prices_m.index)[:10])

indexes equal: True
merged shape: (231, 12)
NaNs per column:
 price_VTI      0
price_TLT      0
price_IEF      0
price_GSG      0
price_GLD      0
ret_VTI        0
ret_TLT        0
ret_IEF        0
ret_GSG        0
ret_GLD        0
inflation_m    0
rf_m           0
dtype: int64

Rows with any NaN (first 10):


Unnamed: 0_level_0,price_VTI,price_TLT,price_IEF,price_GSG,price_GLD,ret_VTI,ret_TLT,ret_IEF,ret_GSG,ret_GLD,inflation_m,rf_m
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1



Head:


Unnamed: 0_level_0,price_VTI,price_TLT,price_IEF,price_GSG,price_GLD,ret_VTI,ret_TLT,ret_IEF,ret_GSG,ret_GLD,inflation_m,rf_m
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2006-08-31,45.430836,48.595963,50.731098,47.130001,62.290001,0.022987,0.030562,0.017635,-0.07133,-0.013775,0.0,0.0
2006-09-30,46.457561,49.52652,51.343735,42.07,59.470001,0.0226,0.019149,0.012076,-0.107363,-0.045272,0.0,0.003923
2006-10-31,48.097599,49.945847,51.620174,41.0,60.240002,0.035302,0.008467,0.005384,-0.025434,0.012948,-0.004438,0.00401
2006-11-30,49.218037,51.098686,52.297146,43.049999,64.389999,0.023295,0.023082,0.013114,0.05,0.068891,0.000495,0.004026
2006-12-31,49.809414,49.721226,51.630035,40.07,63.209999,0.012015,-0.026957,-0.012756,-0.069222,-0.018326,0.005446,0.003955



Tail:


Unnamed: 0_level_0,price_VTI,price_TLT,price_IEF,price_GSG,price_GLD,ret_VTI,ret_TLT,ret_IEF,ret_GSG,ret_GLD,inflation_m,rf_m
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2025-06-30,303.087463,86.952774,94.55632,22.040001,304.829987,0.051634,0.02664,0.01602,0.041096,0.004051,0.00287,0.003458
2025-07-31,310.028168,85.96183,93.994728,22.799999,302.959991,0.0229,-0.011396,-0.005939,0.034483,-0.006135,0.001966,0.003474
2025-08-31,317.317902,85.972748,95.543907,22.77,318.070007,0.023513,0.000127,0.016482,-0.001316,0.049875,0.003825,0.00337
2025-09-30,328.170013,89.059998,96.166,22.969999,355.470001,0.034199,0.03591,0.006511,0.008783,0.117584,0.0,0.003209
2025-10-31,331.01001,91.43,97.400002,23.34,378.790009,0.008654,0.026611,0.012832,0.016108,0.065603,0.0,0.003209



Extra months in prices_m not in rets_m: DatetimeIndex(['2001-06-30', '2001-07-31', '2001-08-31', '2001-09-30',
               '2001-10-31', '2001-11-30', '2001-12-31', '2002-01-31',
               '2002-02-28', '2002-03-31'],
              dtype='datetime64[ns]', name='Date', freq=None)
Extra months in rets_m not in prices_m: DatetimeIndex([], dtype='datetime64[ns]', name='Date', freq=None)


In [77]:
# merge rets, infl, rf for inspection
import pandas as pd

# assume rets_m, infl_m, rf_m already exist
# original (no reindexing) so you can see misalignment
merged_orig = pd.concat([
    rets_m.add_prefix("ret_"),
    infl_m.rename("inflation_m"),
    rf_m.rename("rf_m"),
], axis=1)

# aligned to rets_m (what prep_returns_and_macro currently does)
merged_aligned = pd.concat([
    rets_m.add_prefix("ret_"),
    infl_m.reindex(rets_m.index).rename("inflation_m"),
    rf_m.reindex(rets_m.index).rename("rf_m"),
], axis=1)

# quick checks
print("indexes equal (infl==rets and rf==rets):",
      rets_m.index.equals(infl_m.index) and rets_m.index.equals(rf_m.index))
print("rets index min/max:", rets_m.index.min(), rets_m.index.max())
print("infl index min/max:", infl_m.index.min(), infl_m.index.max())
print("rf index min/max:", rf_m.index.min(), rf_m.index.max())

print("\nNaNs per column (original merged):\n", merged_orig.isna().sum())
print("\nNaNs per column (aligned to rets_m):\n", merged_aligned.isna().sum())

print("\nRows with any NaN in original merged (first 10):")
display(merged_orig[merged_orig.isna().any(axis=1)].head(10))

print("\nIndex differences (what months are missing/extra):")
print("in infl not in rets:", infl_m.index.difference(rets_m.index)[:20])
print("in rets not in infl:", rets_m.index.difference(infl_m.index)[:20])
print("in rf not in rets:", rf_m.index.difference(rets_m.index)[:20])
print("in rets not in rf:", rets_m.index.difference(rf_m.index)[:20])

# sample views
print("\nAligned head:")
display(merged_aligned.head())
print("\nAligned tail:")
display(merged_aligned.tail())

indexes equal (infl==rets and rf==rets): True
rets index min/max: 2006-08-31 00:00:00 2025-10-31 00:00:00
infl index min/max: 2006-08-31 00:00:00 2025-10-31 00:00:00
rf index min/max: 2006-08-31 00:00:00 2025-10-31 00:00:00

NaNs per column (original merged):
 ret_VTI        0
ret_TLT        0
ret_IEF        0
ret_GSG        0
ret_GLD        0
inflation_m    0
rf_m           0
dtype: int64

NaNs per column (aligned to rets_m):
 ret_VTI        0
ret_TLT        0
ret_IEF        0
ret_GSG        0
ret_GLD        0
inflation_m    0
rf_m           0
dtype: int64

Rows with any NaN in original merged (first 10):


Unnamed: 0_level_0,ret_VTI,ret_TLT,ret_IEF,ret_GSG,ret_GLD,inflation_m,rf_m
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1



Index differences (what months are missing/extra):
in infl not in rets: DatetimeIndex([], dtype='datetime64[ns]', name='Date', freq=None)
in rets not in infl: DatetimeIndex([], dtype='datetime64[ns]', name='Date', freq=None)
in rf not in rets: DatetimeIndex([], dtype='datetime64[ns]', name='Date', freq=None)
in rets not in rf: DatetimeIndex([], dtype='datetime64[ns]', name='Date', freq=None)

Aligned head:


Unnamed: 0_level_0,ret_VTI,ret_TLT,ret_IEF,ret_GSG,ret_GLD,inflation_m,rf_m
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2006-08-31,0.022987,0.030562,0.017635,-0.07133,-0.013775,0.0,0.0
2006-09-30,0.0226,0.019149,0.012076,-0.107363,-0.045272,0.0,0.003923
2006-10-31,0.035302,0.008467,0.005384,-0.025434,0.012948,-0.004438,0.00401
2006-11-30,0.023295,0.023082,0.013114,0.05,0.068891,0.000495,0.004026
2006-12-31,0.012015,-0.026957,-0.012756,-0.069222,-0.018326,0.005446,0.003955



Aligned tail:


Unnamed: 0_level_0,ret_VTI,ret_TLT,ret_IEF,ret_GSG,ret_GLD,inflation_m,rf_m
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-06-30,0.051634,0.02664,0.01602,0.041096,0.004051,0.00287,0.003458
2025-07-31,0.0229,-0.011396,-0.005939,0.034483,-0.006135,0.001966,0.003474
2025-08-31,0.023513,0.000127,0.016482,-0.001316,0.049875,0.003825,0.00337
2025-09-30,0.034199,0.03591,0.006511,0.008783,0.117584,0.0,0.003209
2025-10-31,0.008654,0.026611,0.012832,0.016108,0.065603,0.0,0.003209


In [78]:
# 4) Sample paths
sampler = ReturnSampler(rets_m, infl_m)
R_paths, CPI_paths = sampler.sample(sim_cfg.horizon_months, sim_cfg.n_sims, sam_cfg)

In [79]:
R_paths.shape

(100, 360, 5)

In [80]:
CPI_paths.shape

(100, 360)

In [81]:
# 5) Run simulation
sim = MonteCarloSimulator(weights=p.weights_vector(), starting_balance=sim_cfg.starting_balance, rebalance_every_months=sim_cfg.rebalance_every_months)
out = sim.run_with_cashflows(R_paths, CPI_paths, goals)

In [82]:
# 6) Simple summary
surv = (out["failure_month"] == -1).mean()
cagr_vals = cagr(out["balances"], sim_cfg.horizon_months)
twrr_vals = twrr_annualized(out["twrr_monthly"])
mdd_vals = max_drawdown(out["balances"])

def pct(x): return f"{100*x:.1f}%"
print("=== Monte Carlo Summary (100 sims) ===")
print(f"Survival rate: {pct(surv)}")
print(f"End balance (nominal) median: ${np.median(out['balances'][:,-1]):,.0f}")
print(f"CAGR median: {np.nanmedian(cagr_vals):.2%}")
print(f"TWRR median: {np.nanmedian(twrr_vals):.2%}")
print(f"Max Drawdown median: {np.median(mdd_vals):.1%}")
print("Percentiles (10/50/90) - End Balance:",
        [f"${v:,.0f}" for v in np.percentile(out['balances'][:,-1], [10,50,90])])

=== Monte Carlo Summary (100 sims) ===
Survival rate: 100.0%
End balance (nominal) median: $6,907,942
CAGR median: 6.65%
TWRR median: 6.65%
Max Drawdown median: -18.7%
Percentiles (10/50/90) - End Balance: ['$3,602,111', '$6,907,942', '$11,548,723']


In [83]:
# diagnostics: run where tickers and prices_m are defined
print("shape:", prices_m.shape)
print("first/last index:", prices_m.index.min(), prices_m.index.max())
print("columns returned:", list(prices_m.columns))
print("missing tickers:", set(tickers) - set(prices_m.columns))
print("NaN counts per column:\n", prices_m.isna().sum())
# show months that are all-NaN (these were dropped by fetcher)
# run before calling fetcher to see raw behavior, otherwise inspect cache file

shape: (293, 5)
first/last index: 2001-06-30 00:00:00 2025-10-31 00:00:00
columns returned: ['VTI', 'TLT', 'IEF', 'GSG', 'GLD']
missing tickers: set()
NaN counts per column:
 VTI     0
TLT    13
IEF    13
GSG    61
GLD    41
dtype: int64


In [84]:
from portfolio_tester.data.cache import key_path
key = "|".join(tickers) + f"|{None}|{None}"   # use actual start/end if provided
path = key_path("prices_yf", key)
print("cache path:", path)
print("exists?", path.exists())
if path.exists():
    import pandas as pd
    print(pd.read_csv(path, index_col=0, parse_dates=True).head())

cache path: data_cache/prices_yf_5d45f30857e9b55d.csv
exists? True
                  VTI  TLT  IEF  GSG  GLD
Date                                     
2001-06-30  36.763496  NaN  NaN  NaN  NaN
2001-07-31  36.060757  NaN  NaN  NaN  NaN
2001-08-31  33.949303  NaN  NaN  NaN  NaN
2001-09-30  31.001385  NaN  NaN  NaN  NaN
2001-10-31  31.638067  NaN  NaN  NaN  NaN
