In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from tqdm.notebook import tqdm
import plotly.graph_objects as go
import math
from itertools import combinations_with_replacement
from collections import Counter
from scipy import signal
import plotly.express as px
import plotly.io as pio
import numpy as np
from scipy.interpolate import interpn
pio.templates.default = "plotly_dark"
GREEN = "LimeGreen"
RED = "crimson"

plt.style.use('fivethirtyeight')
np.random.seed(777)

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [3]:
def plot_returns(returns):
    normalized_returns = returns / returns.iloc[0]

    fig = go.Figure()

    traces = []
    for column in normalized_returns.columns:
        traces.append(
            go.Scatter(
                x=normalized_returns.index,
                y=normalized_returns[column],
                mode="lines",
                name=column,
            )
        )

    layout = go.Layout(
        title="Normalized Returns Over Time",
        xaxis_title="Date",
        yaxis_title="Normalized Returns",
        width=1000,
        height=500,
        hovermode="x",
    )

    # Create the figure
    fig = go.Figure(data=traces, layout=layout)
    fig.show()


def adjust_start_date(stockData, start_date):
    start = pd.to_datetime(start_date, utc=True)
    for stock in stockData:
        stock_start = stockData[stock].dropna().index.min()
        if stock_start > start:
            start = stock_start
            print(
                f"{stock} data starts at {start:%Y-%m-%d}. Setting start date to {start:%Y-%m-%d}"
            )
    return stockData.loc[stockData.index >= start]


def get_returns_and_covariance(stocks, plot=True, **kwargs):
    stockData = yf.download(stocks, **kwargs)["Adj Close"]
    stockData = adjust_start_date(stockData, kwargs["start"])

    if plot:
        plot_returns(stockData)
    returns = stockData.pct_change()
    meanReturns = returns.mean()
    covMatrix = returns.cov()
    return meanReturns, covMatrix


def portfolio_annualised_performance(weights, meanReturns, covMatrix):
    returns = np.sum(meanReturns * weights) * 252
    std = np.sqrt(np.dot(weights.T, np.dot(covMatrix, weights))) * np.sqrt(252)
    return std, returns


def n_combinations(n, r):
    result = math.factorial(n + r - 1) // (math.factorial(r) * math.factorial(n - 1))
    return result


def get_n_weight_combinations(stocks, num_portfolios):
    n_parts = len(stocks)
    while n_combinations(len(stocks), n_parts) < num_portfolios:
        n_parts += 1
    n_parts += 1
    portfolio_weights = []
    for combination in combinations_with_replacement(stocks, n_parts):
        counts = Counter(combination)
        portfolio_weights.append(np.array([counts[s] / n_parts for s in stocks]))
    return portfolio_weights


def get_portfolio_performances(numPortfolios, meanReturns, covMatrix, riskFreeRate=0):
    portfolio_weights = get_n_weight_combinations(meanReturns.index, numPortfolios)
    actual_num_portfolios = len(portfolio_weights)
    results = np.zeros((3, actual_num_portfolios))
    allocations = []
    for i in tqdm(range(actual_num_portfolios)):
        portfolio_std_dev, portfolio_return = portfolio_annualised_performance(
            portfolio_weights[i], meanReturns, covMatrix
        )
        allocations.append(
            {
                asset: weight
                for asset, weight in zip(meanReturns.index, portfolio_weights[i])
            }
        )
        results[0, i] = portfolio_std_dev
        results[1, i] = portfolio_return
        results[2, i] = (portfolio_return - riskFreeRate) / portfolio_std_dev
    return results, allocations


def find_max_sharpe_portfolio(results, verbose=False):
    selected = results["sharpe_ratio"] == results["sharpe_ratio"].max()

    if verbose:
        print("-" * 80)
        print("Annualised Return:", round(results.loc[selected, "return"].iloc[0], 2))
        print(
            "Annualised Volatility:",
            round(results.loc[selected, "volatility"].iloc[0], 2),
        )
        print("\n")
        allocations = {
            k: round(100 * v, 2)
            for k, v in results.loc[selected, "allocations"].iloc[0].items()
        }
        display(pd.DataFrame(allocations, index=["Allocation"]))
    results.loc[selected, "max_sharpe"] = True
    return results


def find_min_volatility_portfolio(results, verbose=False):
    selected = results["volatility"] == results["volatility"].min()

    if verbose:
        print("-" * 80)
        print("Minimum Volatility Portfolio Allocation\n")
        print("Annualised Return:", round(results.loc[selected, "return"].iloc[0], 2))
        print(
            "Annualised Volatility:",
            round(results.loc[selected, "volatility"].iloc[0], 2),
        )
        print("\n")
        allocations = {
            k: round(100 * v, 2)
            for k, v in results.loc[selected, "allocations"].iloc[0].items()
        }
        display(pd.DataFrame(allocations, index=["Allocation"]))
    results.loc[selected, "min_volatility"] = True
    return results


def find_efficient_portfolios(results, n_bins):
    min_std_dev = results.loc[results["min_volatility"] == True, "volatility"].min()
    next_smallest = results.loc[results["volatility"] > min_std_dev, "volatility"].min()

    max_std_dev = results.loc[results["max_sharpe"] == True, "volatility"].max()
    next_largest = results.loc[results["volatility"] < max_std_dev, "volatility"].max()

    if not np.isnan(next_smallest) and not np.isnan(next_largest):
        bins = np.linspace(next_smallest, next_largest, n_bins + 1)
        results["vol_bin"] = pd.cut(results["volatility"], bins)

        results["efficient"] = results.groupby("vol_bin", observed=True)[
            "return"
        ].transform(lambda x: x == x.max())

    results.loc[
        (results["min_volatility"] == True) | (results["max_sharpe"] == True),
        "efficient",
    ] = True

    return results


def plot_allocations(results):
    efficient_portfolios = results.loc[results["efficient"] == True].copy()
    if "vol_bin" in efficient_portfolios.columns:
        efficient_portfolios["volatility"] = (
            efficient_portfolios["vol_bin"]
            .apply(lambda x: x.mid)
            .astype(float)
            .fillna(efficient_portfolios["volatility"])
        )
    efficient_portfolios["volatility_str"] = efficient_portfolios["volatility"].apply(
        lambda x: f"{x:.2%}"
    )

    efficient_portfolios["allocations"] = efficient_portfolios["allocations"].apply(
        lambda x: list(x.items())
    )
    efficient_portfolios = efficient_portfolios.explode("allocations")
    efficient_portfolios["asset"] = efficient_portfolios["allocations"].str[0]
    efficient_portfolios["weight"] = efficient_portfolios["allocations"].str[1]

    allocations = px.bar(
        efficient_portfolios,
        x="volatility_str",
        y="weight",
        labels="volatility",
        color="asset",
    )

    min_volatility = go.Scatter(
        x=efficient_portfolios.loc[
            efficient_portfolios["min_volatility"] == True, "volatility_str"
        ].iloc[:1],
        y=[1.1],
        mode="markers",
        marker=dict(symbol="star", color=GREEN, size=10),
        name="Min Volatility",
        hoverinfo="none",
    )
    max_sharpe = go.Scatter(
        x=efficient_portfolios.loc[
            efficient_portfolios["max_sharpe"] == True, "volatility_str"
        ].iloc[-1:],
        y=[1.1],
        mode="markers",
        marker=dict(symbol="star", color=RED, size=10),
        name="Max Sharpe Ratio",
        hoverinfo="none",
    )

    layout = go.Layout(
        title="Efficient Portfolio Allocations",
        xaxis=dict(
            title="Volatility",
            categoryorder="array",
            categoryarray=efficient_portfolios["volatility_str"],
        ),
        yaxis=dict(title="Allocations"),
        width=1000,
        height=400,
        barmode="stack",
        hovermode="x",
    )
    fig = go.Figure(
        data=[*allocations.data],
        layout=layout,
    )
    fig.update_traces(hovertemplate="%{y:.2%}")
    fig.add_trace(min_volatility)
    fig.add_trace(max_sharpe)

    fig.show()


def get_label(row):
    _return = row["return"]
    volatility = row["volatility"]
    sharpe_ratio = row["sharpe_ratio"]
    allocations = [
        f"&nbsp;<i>{k}</i> - {v*100:.0f}%"
        for k, v in row["allocations"].items()
        if v != 0
    ]
    allocations = "<br>".join(allocations)
    return (
        f"<b>Volatility : </b>{volatility*100:.2f}%<br>"
        f"<b>Return : </b>{_return*100:.2f}%<br>"
        f"<b>Sharpe ratio : </b>{sharpe_ratio*100:.2f}%<br>"
        "<b>Allocations : </b><br>"
        f"{allocations}"
    )


def get_kde(x, y, bins=20):
    data, x_e, y_e = np.histogram2d(x, y, bins=bins, density=True)
    z = interpn(
        (0.5 * (x_e[1:] + x_e[:-1]), 0.5 * (y_e[1:] + y_e[:-1])),
        data,
        np.vstack([x, y]).T,
        method="splinef2d",
        bounds_error=False,
    )
    z[np.where(np.isnan(z))] = np.min(z)
    return z


def plot_efficient_frontier(results):
    if len(results) > 10_000:
        density = get_kde(results["volatility"], results["return"])
        weights = 1 / density
        sampled = results.sample(weights=weights, n=10_000)
        sampled_results = pd.concat(
            [results.loc[results["efficient"] == True], sampled]
        )
    else:
        sampled_results = results

    labels = sampled_results.apply(get_label, axis=1)
    simulated_portfolios = go.Scatter(
        x=sampled_results["volatility"],
        y=sampled_results["return"],
        mode="markers",
        text=labels,
        hoverinfo="text",
        marker=dict(
            color=sampled_results["sharpe_ratio"],
            colorscale="plasma",
            size=10,
            opacity=0.8,
            colorbar=dict(title="Sharpe Ratio"),
        ),
        name="Simulated Portfolios",
    )
    efficient_rows = results.loc[results["efficient"] == True]
    efficient_frontier = go.Scatter(
        x=efficient_rows["volatility"],
        y=signal.savgol_filter(
            efficient_rows["return"],
            len(efficient_rows),
            min(5, len(efficient_rows) - 1),
        ),
        line=dict(color="white", width=5),
        mode="lines",
        name="Efficient Frontier",
    )

    max_sharpe = go.Scatter(
        x=results.loc[results["max_sharpe"] == True, "volatility"],
        y=results.loc[results["max_sharpe"] == True, "return"],
        mode="markers",
        marker=dict(symbol="star", color=RED, size=20),
        name="Max Sharpe Ratio",
        hoverinfo="skip",
    )

    min_volatility = go.Scatter(
        x=results.loc[results["min_volatility"] == True, "volatility"],
        y=results.loc[results["min_volatility"] == True, "return"],
        mode="markers",
        marker=dict(symbol="star", color=GREEN, size=20),
        name="Min Volatility",
        hoverinfo="skip",
    )
    layout = go.Layout(
        title="Simulated Portfolio Performance",
        xaxis=dict(title="annualised volatility", tickformat=".2%"),
        yaxis=dict(title="annualised returns", tickformat=".2%"),
        width=1000,
        height=700,
    )

    fig = go.Figure(
        data=[simulated_portfolios, efficient_frontier, max_sharpe, min_volatility],
        layout=layout,
    )
    fig.add_hline(
        y=risk_free_rate,
        line_width=3,
        line_dash="dash",
        line_color="LightGrey",
        annotation_text="Risk Free Rate",
    )
    fig.update_layout(legend_orientation="h")

    fig.show()
    return None


def simulate_efficient_frontier(
    mean_returns, cov_matrix, num_portfolios, risk_free_rate, plot
):
    results, allocations = get_portfolio_performances(
        num_portfolios, mean_returns, cov_matrix, risk_free_rate
    )
    results = pd.DataFrame(results.T, columns=["volatility", "return", "sharpe_ratio"])
    results["allocations"] = allocations
    results = results.sort_values(by="volatility")

    results = find_max_sharpe_portfolio(results, verbose=plot)
    results = find_min_volatility_portfolio(results, verbose=plot)
    results = find_efficient_portfolios(
        results,
        n_bins=30,
    )
    if plot:
        plot_efficient_frontier(results)
        plot_allocations(results)
    return results


def plot_multi_period_results(results_df):
    for column in ["std_dev", "returns", "sharpe_ratio"]:
        fig = px.line(
            results_df,
            x=results_df.index,
            y=column,
            title=f"Optimal portfolio {column}",
            width=1000,
            height=300,
        )
        fig.show()

    allocation_df = results_df[["allocations"]]
    allocation_df = allocation_df["allocations"].apply(pd.Series)
    fig = px.line(
        allocation_df,
        x=allocation_df.index,
        y=allocation_df.columns.tolist(),
        title="Optimal Portfolio Allocations",
        width=1000,
        height=500,
    )
    fig.show()


num_portfolios = 25_0
risk_free_rate = 0.04

stocks = [
    "VTI",  # Total stock Market - Large cap
    "TLT",  #
    "GLD",  #
    "VNQ",  #
    "QQQ",  #
    "BND",  #
]


all_results = []
for today in [
    f"{year}-{month}-01"
    for year in ["2020", "2021", "2022", "2023", "2024"]
    for month in ["01", "04", "07", "10"]
]:
    mean_returns, cov_matrix = get_returns_and_covariance(
        stocks, plot=False, start="2015-01-01", end=today
    )
    results = simulate_efficient_frontier(
        mean_returns, cov_matrix, num_portfolios, risk_free_rate, plot=False
    )
    max_sharpe = results.loc[results["max_sharpe"] == True].iloc[0]

    mean_returns, cov_matrix = get_returns_and_covariance(
        stocks, plot=False, start=today, end="2024-09-30"
    )
    portfolio_std_dev, portfolio_return = portfolio_annualised_performance(
        np.array(list(max_sharpe["allocations"].values())), mean_returns, cov_matrix
    )

    all_results.append(
        dict(
            today=today,
            std_dev=portfolio_std_dev,
            returns=portfolio_return,
            allocations=max_sharpe["allocations"],
            sharpe_ratio=(portfolio_return - risk_free_rate) / portfolio_std_dev,
        )
    )
results_df = pd.DataFrame(all_results)
results_df.set_index("today", inplace=True)

plot_multi_period_results(results_df)

[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed
[****************      33%                       ]  2 of 6 completed

BND data starts at 2020-01-02. Setting start date to 2020-01-02


[*********************100%***********************]  6 of 6 completed


BND data starts at 2015-01-02. Setting start date to 2015-01-02


  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed
[*********************100%***********************]  6 of 6 completed


BND data starts at 2015-01-02. Setting start date to 2015-01-02


  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed
[*********************100%***********************]  6 of 6 completed


BND data starts at 2015-01-02. Setting start date to 2015-01-02


  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed
[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed


BND data starts at 2021-01-04. Setting start date to 2021-01-04


[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed
[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed
[*********************100%***********************]  6 of 6 completed


BND data starts at 2015-01-02. Setting start date to 2015-01-02


  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed
[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed


BND data starts at 2022-01-03. Setting start date to 2022-01-03


[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed
[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed
[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed
[                       0%                       ]

BND data starts at 2022-10-03. Setting start date to 2022-10-03


[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed


BND data starts at 2023-01-03. Setting start date to 2023-01-03


[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed


BND data starts at 2023-04-03. Setting start date to 2023-04-03


[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed
[                       0%                       ]

BND data starts at 2023-07-03. Setting start date to 2023-07-03


[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed


BND data starts at 2023-10-02. Setting start date to 2023-10-02


[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed


BND data starts at 2024-01-02. Setting start date to 2024-01-02


[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed
[*********************100%***********************]  6 of 6 completed

BND data starts at 2015-01-02. Setting start date to 2015-01-02





  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  6 of 6 completed
[*********************100%***********************]  6 of 6 completed


BND data starts at 2015-01-02. Setting start date to 2015-01-02


  0%|          | 0/792 [00:00<?, ?it/s]

[*********************100%***********************]  2 of 6 completed

6 Failed downloads:
[**********************50%                       ]  3 of 6 completed['VTI', 'TLT', 'VNQ', 'BND', 'GLD', 'QQQ']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2024-10-01 -> 2024-09-30) (Yahoo error = "Invalid input - start date cannot be after end date. startDate = 1727755200, endDate = 1727668800")')
  avg = a.mean(axis, **keepdims_kw)
  ret = um.true_divide(
  base_cov = np.cov(mat.T, ddof=ddof)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)


In [7]:
def get_returns_and_covariance(stocks, plot=True, **kwargs):
    stockData = yf.download(stocks, **kwargs)["Adj Close"]
    stockData = adjust_start_date(stockData, kwargs["start"])
    print(stockData)
    if plot:
        plot_returns(stockData)
    returns = stockData.pct_change()
    meanReturns = returns.mean()
    covMatrix = returns.cov()
    return meanReturns, covMatrix

meanReturns, covMatrix = get_returns_and_covariance(["VTI", "BND"], start="2024-05-01", end="2024-09-30")
meanReturns

[*********************100%***********************]  2 of 2 completed

Ticker                           BND         VTI
Date                                            
2024-05-01 00:00:00+00:00  69.632545  246.353592
2024-05-02 00:00:00+00:00  69.908264  248.906631
2024-05-03 00:00:00+00:00  70.262787  251.857025
2024-05-06 00:00:00+00:00  70.331726  254.559082
2024-05-07 00:00:00+00:00  70.469582  254.787582
...                              ...         ...
2024-09-23 00:00:00+00:00  75.015137  281.292206
2024-09-24 00:00:00+00:00  75.104851  282.000031
2024-09-25 00:00:00+00:00  74.825722  281.072876
2024-09-26 00:00:00+00:00  74.825722  282.329010
2024-09-27 00:00:00+00:00  75.045036  282.049988

[104 rows x 2 columns]





Ticker
BND    0.000732
VTI    0.001351
dtype: float64