# Beyond Mean-Variance Optimization: Finding Optimal Portfolios with Hierarchical Risk Parity

## Setup

In [None]:
import requests
import pandas as pd

from skfolio import RiskMeasure, Population
from skfolio.optimization import (
    MeanRisk,
    ObjectiveFunction,
    EqualWeighted,
    HierarchicalRiskParity,
)
from skfolio.preprocessing import prices_to_returns
from skfolio.cluster import HierarchicalClustering, LinkageMethod
from skfolio.distance import MutualInformation

# plotting
from plotly.io import show
import seaborn as sns
import matplotlib.pyplot as plt

# settings
plt.style.use("seaborn-v0_8-colorblind")
plt.rcParams["figure.figsize"] = (16, 8)

# api key
from api_keys import FMP_API_KEY

## Downloading data

In [None]:
TICKERS = [
    "AAPL",
    "MSFT",
    "NVDA",
    "GOOGL",
    "AMZN",
    "META",
    "AVGO",
    "TSLA",
    "ORCL",
    "NFLX",
    "AMD",
    "QCOM",
    "CRM",
    "ADBE",
    "CSCO",
]
START_DATE = "2023-01-01"

In [None]:
def get_adj_close_price(symbol, start_date):
    hist_price_url = f"https://financialmodelingprep.com/api/v3/historical-price-full/{symbol}?from={start_date}&apikey={FMP_API_KEY}"
    r_json = requests.get(hist_price_url).json()
    df = pd.DataFrame(r_json["historical"]).set_index("date").sort_index()
    df.index = pd.to_datetime(df.index)
    return df[["adjClose"]].rename(columns={"adjClose": symbol})

In [None]:
price_df_list = []
for ticker in TICKERS:
    price_df_list.append(get_adj_close_price(ticker, START_DATE))
prices_df = price_df_list[0].join(price_df_list[1:])

prices_df.plot(title="Stock prices of the 15 largest US tech companies")

In [None]:
returns_df = prices_to_returns(prices_df)
returns_df

In [None]:
X_train = returns_df.loc[:"2024-08-31"]
X_test = returns_df.loc["2024-09-01":]

## Baseline portfolios

In [None]:
# 1/n
eq_w_model = EqualWeighted(
    portfolio_params=dict(name="Equally weighted"),
)
eq_w_model.fit(X_train)

eq_w_train = eq_w_model.predict(X_train)
eq_w_test = eq_w_model.predict(X_test)

In [None]:
# minumum variance
min_var_model = MeanRisk(
    objective_function=ObjectiveFunction.MINIMIZE_RISK,
    risk_measure=RiskMeasure.VARIANCE,
    portfolio_params=dict(name="Min Variance"),
)
min_var_model.fit(X_train)

min_var_train = min_var_model.predict(X_train)
min_var_test = min_var_model.predict(X_test)

## Hierarchical Risk Parity

In [None]:
hrp1_model = HierarchicalRiskParity(
    risk_measure=RiskMeasure.VARIANCE, portfolio_params=dict(name="HRP-variance")
)
hrp1_model.fit(X_train)
hrp1_train = hrp1_model.predict(X_train)
hrp1_test = hrp1_model.predict(X_test)

In [None]:
hrp1_train.plot_contribution(measure=RiskMeasure.VARIANCE)

In [None]:
hrp1_test.plot_contribution(measure=RiskMeasure.CVAR)

In [None]:
hrp1_model.hierarchical_clustering_estimator_.plot_dendrogram(heatmap=False)

In [None]:
hrp1_model.hierarchical_clustering_estimator_.plot_dendrogram()

In [None]:
hrp2_model = HierarchicalRiskParity(
    risk_measure=RiskMeasure.VARIANCE,
    hierarchical_clustering_estimator=HierarchicalClustering(
        linkage_method=LinkageMethod.SINGLE,
    ),
    portfolio_params=dict(name="HRP-variance-single-linkage"),
)
hrp2_model.fit(X_train)

hrp2_train = hrp2_model.predict(X_train)
hrp2_test = hrp2_model.predict(X_test)

hrp2_model.hierarchical_clustering_estimator_.plot_dendrogram(heatmap=True)

In [None]:
hrp3_model = HierarchicalRiskParity(
    risk_measure=RiskMeasure.VARIANCE,
    distance_estimator=MutualInformation(),
    portfolio_params=dict(name="HRP-variance-mutual-information"),
)
hrp3_model.fit(X_train)

hrp3_train = hrp3_model.predict(X_train)
hrp3_test = hrp3_model.predict(X_test)

hrp3_model.hierarchical_clustering_estimator_.plot_dendrogram(heatmap=True)

## Comparison

In [None]:
population_train = Population([eq_w_train, min_var_train, hrp1_train, hrp2_train, hrp3_train])
population_test = Population([eq_w_test, min_var_test, hrp1_test, hrp2_test, hrp3_test])

In [None]:
population_train.plot_composition()

In [None]:
population_train.plot_cumulative_returns()

In [None]:
population_test.plot_cumulative_returns()

In [None]:
summary = population_test.summary()
summary.loc["Annualized Sharpe Ratio"].sort_values()

In [None]:
summary