# Task 4: Portfolio Optimization with MPT (TSLA, BND, SPY)
This notebook builds an Efficient Frontier using:
- TSLA expected return from Task 3 forecast (fallback to historical if not available)
- BND & SPY expected returns from historical average (annualized)
- Covariance matrix from historical daily returns (annualized)
Then it finds the Maximum Sharpe and Minimum Volatility portfolios, plots the frontier, and saves outputs.

In [None]:
# Ensure project root on path
import os, sys, glob
from pathlib import Path
ROOT = Path.cwd().resolve().parents[0] if (Path.cwd().name == 'notebooks') else Path.cwd().resolve()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))
ROOT


In [None]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from src.constants.config import TICKERS, START_DATE, END_DATE, INTERVAL, AUTO_ADJUST
from src.utils.data_loader import fetch_yfinance_data, merge_adjusted_close
from src.utils.preprocessing import fill_missing_dates, handle_missing

from pypfopt import EfficientFrontier


## 1) Build clean prices and daily returns

In [None]:
def build_prices():
    raw = fetch_yfinance_data(TICKERS, START_DATE, END_DATE, interval=INTERVAL, auto_adjust=AUTO_ADJUST)
    prices = merge_adjusted_close(raw, column='Adj Close')
    if prices is None or prices.empty:
        raw2 = fetch_yfinance_data(TICKERS, START_DATE, END_DATE, interval=INTERVAL, auto_adjust=False)
        prices = merge_adjusted_close(raw2, column='Adj Close')
    if prices is None or prices.empty:
        raise RuntimeError('No price data merged. Check network or date range.')
    prices = fill_missing_dates(prices)
    prices = handle_missing(prices, method='interpolate')
    prices = prices.dropna(axis=1, how='all')
    return prices

prices = build_prices()
retn = prices.pct_change().dropna()
prices.tail(), retn.tail()


## 2) Expected returns (annualized)
- TSLA from forecast CSV if available (Task 3)
- BND, SPY from historical average

In [None]:
def load_tsla_expected_return_from_forecast() -> float | None:
    path_glob = str(ROOT / 'data' / 'processed' / 'tsla_task3_forecasts_*m.csv')
    files = sorted(glob.glob(path_glob))
    if not files:
        return None
    latest = files[-1]
    df = pd.read_csv(latest, parse_dates=[0], index_col=0)
    col = 'LSTM_mean' if 'LSTM_mean' in df.columns else 'ARIMA_mean' if 'ARIMA_mean' in df.columns else None
    if col is None:
        return None
    s = pd.to_numeric(df[col], errors='coerce').dropna().astype(float)
    if len(s) < 2:
        return None
    daily_returns = s.pct_change().dropna()
    if daily_returns.empty:
        return None
    return float(daily_returns.mean() * 252.0)

hist_means_annual = retn.mean() * 252.0
mu = hist_means_annual.copy()
tsla_mu_forecast = load_tsla_expected_return_from_forecast()
if tsla_mu_forecast is not None and 'TSLA' in mu.index:
    mu.loc['TSLA'] = tsla_mu_forecast
mu


## 3) Covariance matrix (annualized)
Use a recent lookback window if desired.

In [None]:
WINDOW_YEARS = 3
if len(retn) > int(252 * WINDOW_YEARS):
    retn_cov = retn.iloc[-int(252 * WINDOW_YEARS):]
else:
    retn_cov = retn
S = retn_cov.cov() * 252.0
mu, S


## 4) Efficient Frontier, Max Sharpe, Min Volatility

In [None]:
RISK_FREE_RATE = 0.02  # annual
assets = [a for a in ['TSLA','BND','SPY'] if a in mu.index]
mu_vec = mu.loc[assets]
S_mat = S.loc[assets, assets]

ef = EfficientFrontier(mu_vec, S_mat)
w_sharpe = ef.max_sharpe(risk_free_rate=RISK_FREE_RATE)
ret_s, vol_s, sr_s = ef.portfolio_performance(verbose=False, risk_free_rate=RISK_FREE_RATE)
weights_sharpe = ef.clean_weights()

ef2 = EfficientFrontier(mu_vec, S_mat)
w_minv = ef2.min_volatility()
ret_m, vol_m, sr_m = ef2.portfolio_performance(verbose=False, risk_free_rate=RISK_FREE_RATE)
weights_minv = ef2.clean_weights()

weights_sharpe, (ret_s, vol_s, sr_s), weights_minv, (ret_m, vol_m, sr_m)


## 5) Plot Efficient Frontier and mark key portfolios

In [None]:
# Sample efficient frontier by scanning target returns
ret_min = float(min(mu_vec))
ret_max = float(max(mu_vec))
targets = np.linspace(ret_min, ret_max, 50)
risks, returns = [], []
for t in targets:
    try:
        ef_t = EfficientFrontier(mu_vec, S_mat)
        ef_t.efficient_return(target_return=t)
        r, v, _ = ef_t.portfolio_performance(verbose=False)
        returns.append(r); risks.append(v)
    except Exception:
        continue

fig, ax = plt.subplots(figsize=(10,6))
ax.plot(risks, returns, label='Efficient Frontier')
ax.scatter([vol_s], [ret_s], marker='*', s=200, color='C1', label='Max Sharpe')
ax.scatter([vol_m], [ret_m], marker='o', s=120, color='C2', label='Min Volatility')
ax.set_xlabel('Annualized Volatility')
ax.set_ylabel('Annualized Return')
ax.set_title('Efficient Frontier — TSLA/BND/SPY')
ax.legend()
plt.tight_layout()
(ROOT / 'reports' / 'figures').mkdir(parents=True, exist_ok=True)
fig_path = ROOT / 'reports' / 'figures' / 'task4_efficient_frontier.png'
plt.savefig(fig_path, dpi=150)
plt.show()
fig_path


## 6) Summaries and recommendation

In [None]:
summary = pd.DataFrame({
    'Portfolio': ['Max Sharpe', 'Min Volatility'],
    'Return': [ret_s, ret_m],
    'Volatility': [vol_s, vol_m],
    'Sharpe': [sr_s, sr_m]
})
weights_df = pd.DataFrame([weights_sharpe, weights_minv], index=['Max Sharpe', 'Min Volatility'])
(ROOT / 'data' / 'processed').mkdir(parents=True, exist_ok=True)
summary.to_csv(ROOT / 'data' / 'processed' / 'task4_portfolio_summary.csv', index=False)
weights_df.to_csv(ROOT / 'data' / 'processed' / 'task4_portfolio_weights.csv')
summary, weights_df


### Recommendation
Adjust based on your risk preference. By default, we recommend the Max Sharpe portfolio for highest risk-adjusted return.