Skip to content

Commit

Permalink
Merge pull request #20 from skfolio/fix/issue-11/business-days-in-year
Browse files Browse the repository at this point in the history
[BUG] Default trading days in a year amended from 255 to 252
  • Loading branch information
HugoDelatte committed Jan 22, 2024
2 parents b369e87 + 0c2c59e commit 8334c7b
Show file tree
Hide file tree
Showing 29 changed files with 500 additions and 105 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ Randomized Search of the L2 Norm
randomized_search = RandomizedSearchCV(
estimator=MeanRisk(),
cv=WalkForward(train_size=255, test_size=60),
cv=WalkForward(train_size=252, test_size=60),
param_distributions={
"l2_coef": loguniform(1e-3, 1e-1),
},
Expand Down
2 changes: 2 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
),
)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# -- Project information -----------------------------------------------------

project = "skfolio"
Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ Randomized Search of the L2 Norm
randomized_search = RandomizedSearchCV(
estimator=MeanRisk(),
cv=WalkForward(train_size=255, test_size=60),
cv=WalkForward(train_size=252, test_size=60),
param_distributions={
"l2_coef": loguniform(1e-3, 1e-1),
},
Expand Down
8 changes: 4 additions & 4 deletions docs/user_guide/data_preparation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ It is not uncommon to witness the following steps [1]_:
#. Take the daily prices :math:`S_{t}, S_{t+1}, ...,` for all the n securities
#. Transform the daily prices to daily logarithmic returns
#. Estimate the expected returns vector :math:`\mu` and covariance matrix :math:`\Sigma` from the daily logarithmic returns
#. Determine the investment horizon, for example k = 255 days
#. Determine the investment horizon, for example k = 252 days
#. Project the expected returns and covariance to the horizon using the square-root rule: :math:`\mu_{k} ≡ k \times \mu` and :math:`\Sigma_{k} ≡ k \times \Sigma`
#. Compute the mean-variance efficient frontier :math:`\max_{w} \Biggl\{ w^T \mu - \lambda \times w^T \Sigma w \Biggr\}`

Expand Down Expand Up @@ -82,10 +82,10 @@ Example for stocks
#. Take the prices :math:`S_{t}, S_{t+1}, ...,` (for example daily) for all the n securities
#. Transform the daily prices to daily logarithmic returns. Note that linear return is also a market invariant for stock, however logarithmic return is going to simplify step 3) and 4).
#. Estimate the joint distribution of market invariants by fitting parametrically the daily logarithmic returns to a multivariate normal distribution: estimate the joint distribution parameters :math:`\mu^{Log}_{daily}` and :math:`\Sigma^{Log}_{daily}`
#. Project the distribution of invariants to the time period of investment (for example one year i.e. 255 business days). Because logarithmic returns are additive across time, we have:
#. Project the distribution of invariants to the time period of investment (for example one year i.e. 252 business days). Because logarithmic returns are additive across time, we have:

* .. math:: \mu^{Log}_{yearly} = 255 \times \mu^{Log}_{daily}
* .. math:: \Sigma^{Log}_{yearly} = 255 \times \Sigma^{Log}_{daily}
* .. math:: \mu^{Log}_{yearly} = 252 \times \mu^{Log}_{daily}
* .. math:: \Sigma^{Log}_{yearly} = 252 \times \Sigma^{Log}_{daily}
#. Compute the distribution of linear returns at the investment horizon. Using the characteristic function of the normal distribution, and the pricing function :math:`S_{yearly} = S_{0} e^{R^{Log}_{yearly}}`, we get:

* .. math:: \mathbb{E}(S_{yearly}) = \pmb{s}_{0} \circ exp\Biggl(\pmb{\mu}^{Log}_{yearly} + \frac{1}{2} diag\Biggl(\pmb{\Sigma}^{Log}_{yearly}\Biggr)\Biggr)
Expand Down
2 changes: 1 addition & 1 deletion examples/1_mean_risk/plot_3_efficient_frontier.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
# 15%, 20%, 25%, 30% and 35% (annualized):
model = MeanRisk(
risk_measure=RiskMeasure.VARIANCE,
min_return=np.array([0.15, 0.20, 0.25, 0.30, 0.35]) / 255,
min_return=np.array([0.15, 0.20, 0.25, 0.30, 0.35]) / 252,
portfolio_params=dict(name="Variance"),
)

Expand Down
2 changes: 1 addition & 1 deletion examples/1_mean_risk/plot_6_transaction_costs.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
* Duration = 1 months (21 business days):
* 1 month expected return A ≈ -0.8%
* 1 month expected return B ≈ 0.1%
* Duration = 1 year (255 business days):
* Duration = 1 year (252 business days):
* 1 year expected return A ≈ 1.5%
* 1 year expected return B ≈ 1.3%
Expand Down
8 changes: 4 additions & 4 deletions examples/1_mean_risk/plot_7_management_fees.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@
# * JPMorgan: 1% p.a.
#
# The MF are expressed in per annum, so we need to convert them in daily MF.
# We suppose 255 trading days in a year:
management_fees = {"AAPL": 0.03 / 255, "GE": 0.06 / 255, "JPM": 0.01 / 255}
# Same as management_fees = np.array([0.03, 0.06, 0.01]) / 255
# We suppose 252 trading days in a year:
management_fees = {"AAPL": 0.03 / 252, "GE": 0.06 / 252, "JPM": 0.01 / 252}
# Same as management_fees = np.array([0.03, 0.06, 0.01]) / 252

model_mf = MeanRisk(
objective_function=ObjectiveFunction.MAXIMIZE_UTILITY,
Expand All @@ -101,7 +101,7 @@

# %%
# As explained above, we transform the yearly MF into a daily MF:
management_fees = np.array([0.03, 0.06, 0.01]) / 255
management_fees = np.array([0.03, 0.06, 0.01]) / 252

# %%
# First, we train the model without MF and test it with MF.
Expand Down
18 changes: 9 additions & 9 deletions examples/1_mean_risk/plot_8_regularization.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
model = MeanRisk(
risk_measure=RiskMeasure.VARIANCE,
min_weights=-1,
max_variance=0.3**2 / 255,
max_variance=0.3**2 / 252,
efficient_frontier_size=30,
portfolio_params=dict(name="Mean-Variance", tag="No Regularization"),
)
Expand All @@ -78,7 +78,7 @@
model_l1 = MeanRisk(
risk_measure=RiskMeasure.VARIANCE,
min_weights=-1,
max_variance=0.3**2 / 255,
max_variance=0.3**2 / 252,
efficient_frontier_size=30,
l1_coef=0.001,
portfolio_params=dict(name="Mean-Variance", tag="L1 Regularization"),
Expand Down Expand Up @@ -153,7 +153,7 @@
# Hyper-parameter Tuning
# ======================
# In this section, we consider a 3 months rolling (60 business days) long-short
# allocation fitted on the preceding year of data (255 business days) that maximizes the
# allocation fitted on the preceding year of data (252 business days) that maximizes the
# return under a volatility constraint of 30% p.a.
#
# We use `GridSearchCV` to select the optimal L1 and L2 regularization coefficients on
Expand All @@ -165,11 +165,11 @@
ref_model = MeanRisk(
risk_measure=RiskMeasure.VARIANCE,
objective_function=ObjectiveFunction.MAXIMIZE_RETURN,
max_variance=0.3**2 / 255,
max_variance=0.3**2 / 252,
min_weights=-1,
)

cv = WalkForward(train_size=255, test_size=60)
cv = WalkForward(train_size=252, test_size=60)

grid_search = GridSearchCV(
estimator=ref_model,
Expand Down Expand Up @@ -296,14 +296,14 @@
# |
#
# The highest mean out-of-sample Sharpe Ratio is 1.55 and is achieved for a L2 coef of
# 0.031.
# 0.023.
# Also note that without regularization, the mean train Sharpe Ratio is around
# six time higher than the mean test Sharpe Ratio. That would be a clear indiction of
# overfitting.
#
# Now, we analyze all three models on the test set. By using `cross_val_predict` with
# `WalkForward`, we are able to compute efficiently the `MultiPeriodPortfolio`
# composed of 60 days rolling portfolios fitted on the preceding 255 days:
# composed of 60 days rolling portfolios fitted on the preceding 252 days:

benchmark = EqualWeighted()
pred_bench = cross_val_predict(benchmark, X_test, cv=cv)
Expand All @@ -320,8 +320,8 @@

# %%
# From the plot and the below summary, we can see that the un-regularized model is
# overfitted and perform poorly on the test set. Its annualized volatility is 53%, which
# is significantly above the model upper-bound of 30% and its Sharpe Ratio is 0.29 which
# overfitted and perform poorly on the test set. Its annualized volatility is 54%, which
# is significantly above the model upper-bound of 30% and its Sharpe Ratio is 0.32 which
# is the lowest of all models.

population.summary()
Expand Down
15 changes: 8 additions & 7 deletions examples/1_mean_risk/plot_9_uncertainty_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
# Hyper-Parameter Tuning
# ======================
# In this section, we consider a 3 months rolling (60 business days) long-short
# allocation fitted on the preceding year of data (255 business days) that maximizes the
# allocation fitted on the preceding year of data (252 business days) that maximizes the
# portfolio return under a CVaR constraint.
# We will use `GridSearchCV` to select the below model parameters on the training set
# using walk forward analysis with a Mean/CVaR ratio scoring.
Expand All @@ -113,22 +113,23 @@
model_no_uncertainty = MeanRisk(
risk_measure=RiskMeasure.CVAR,
objective_function=ObjectiveFunction.MAXIMIZE_RETURN,
max_cvar=0.04,
max_cvar=0.02,
cvar_beta=0.9,
min_weights=-1,
)

model_uncertainty = clone(model_no_uncertainty)
model_uncertainty.set_params(mu_uncertainty_set_estimator=EmpiricalMuUncertaintySet())

cv = WalkForward(train_size=255, test_size=60)
cv = WalkForward(train_size=252, test_size=60)

grid_search = GridSearchCV(
estimator=model_uncertainty,
cv=cv,
n_jobs=-1,
param_grid={
"mu_uncertainty_set_estimator__confidence_level": [0.80, 0.90],
"max_cvar": [0.04, 0.05, 0.06],
"max_cvar": [0.03, 0.04, 0.05],
"cvar_beta": [0.8, 0.9, 0.95],
},
scoring=make_scorer(RatioMeasure.CVAR_RATIO),
Expand All @@ -138,7 +139,7 @@
print(best_model)

# %%
# The optimal parameters among the above 2x3x3 grid are the `max_cvar=6%`,
# The optimal parameters among the above 2x3x3 grid are the `max_cvar=3%`,
# `cvar_beta=90%` and :class:`~skfolio.uncertainty_set.EmpiricalMuUncertaintySet`
# `confidence_level=80%`. These parameters are the ones that achieved the highest mean
# out-of-sample Mean/CVaR ratio.
Expand Down Expand Up @@ -204,7 +205,7 @@
# Now, we analyze all three models on the test set.
# By using `cross_val_predict` with `WalkForward`, we are able to compute efficiently
# the `MultiPeriodPortfolio` composed of 60 days rolling portfolios fitted on the
# preceding 255 days:
# preceding 252 days:
pred_no_uncertainty = cross_val_predict(model_no_uncertainty, X_test, cv=cv)
pred_no_uncertainty.name = "No Uncertainty set"

Expand All @@ -219,7 +220,7 @@

# %%
# From the plot and the below summary, we can see that the model without uncertainty set
# is overfitted and perform poorly on the test set. Its CVaR at 95% is 18% and its
# is overfitted and perform poorly on the test set. Its CVaR at 95% is 10% and its
# Mean/CVaR ratio is 0.006 which is the lowest of all models.
population.summary()

Expand Down
2 changes: 1 addition & 1 deletion examples/5_clustering/plot_1_hrp_cvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
# The original HRP is based on the single-linkage (equivalent to the minimum spanning
# tree), which suffers from the chaining effect.
# In the :class:`~skfolio.optimization.HierarchicalRiskParity` estimator, the default
# linkage method is set to the Ward variance minimization algorithm, which is more
# linkage method is set to the Ward variance minimization algorithm, which is more
# stable and has better properties than the single-linkage method.
#
# However, since the HRP optimization doesn’t utilize the full cluster structure but
Expand Down
8 changes: 4 additions & 4 deletions examples/5_clustering/plot_3_hrp_vs_herc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
:class:`~skfolio.optimization.HierarchicalEqualRiskContribution` (HERC) optimization.
For that comparison, we consider a 3 months rolling (60 business days) allocation fitted
on the preceding year of data (255 business days) that minimizes the CVaR.
on the preceding year of data (252 business days) that minimizes the CVaR.
We will employ `GridSearchCV` to select the optimal parameters of each model on the
training set using cross-validation that achieves the highest average out-of-sample
Expand Down Expand Up @@ -71,8 +71,8 @@
# For both HRP and HERC models, we find the parameters that maximizes the average
# out-of-sample Mean-CVaR ratio using `GridSearchCV` with `WalkForward` cross-validation
# on the training set. The `WalkForward` are chosen to simulate a three months
# (60 business days) rolling portfolio fitted on the previous year (255 business days):
cv = WalkForward(train_size=255, test_size=60)
# (60 business days) rolling portfolio fitted on the previous year (252 business days):
cv = WalkForward(train_size=252, test_size=60)

grid_search_hrp = GridSearchCV(
estimator=model_hrp,
Expand Down Expand Up @@ -155,7 +155,7 @@

# %%
# We choose `n_folds` and `n_test_folds` to obtain more than 100 test paths and an average
# training size of approximately 255 days:
# training size of approximately 252 days:
cv.summary(X_test)

# %%
Expand Down
2 changes: 1 addition & 1 deletion examples/5_clustering/plot_4_nco.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@
model1.clustering_estimator_.plot_dendrogram()

# %%
# Linkage M
# Linkage Methods
# ===============
# The hierarchical clustering can be greatly affected by the choice of the linkage
# method. In the :class:`~skfolio.cluster.HierarchicalClustering` estimator, the default
Expand Down
10 changes: 4 additions & 6 deletions examples/5_clustering/plot_5_nco_grid_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@
# We find the model parameters that maximizes the out-of-sample Sharpe ratio using
# `GridSearchCV` with `WalkForward` cross-validation on the training set.
# The `WalkForward` are chosen to simulate a three months (60 business days) rolling
# portfolio fitted on the previous year (255 business days):
cv = WalkForward(train_size=255, test_size=60)
# portfolio fitted on the previous year (252 business days):
cv = WalkForward(train_size=252, test_size=60)

grid_search_hrp = GridSearchCV(
estimator=model_nco,
Expand Down Expand Up @@ -137,7 +137,7 @@

# %%
# We choose `n_folds` and `n_test_folds` to obtain more than 30 test paths and an average
# training size of approximately 255 days:
# training size of approximately 252 days:
cv.summary(X_test)

# %%
Expand All @@ -157,9 +157,7 @@
# Distribution
# ============
# We plot the out-of-sample distribution of Sharpe Ratio for the NCO model:
pred_nco.plot_distribution(
measure_list=[RatioMeasure.ANNUALIZED_SHARPE_RATIO]
)
pred_nco.plot_distribution(measure_list=[RatioMeasure.ANNUALIZED_SHARPE_RATIO])

# %%
# Let's print the average and standard-deviation of out-of-sample Sharpe Ratios:
Expand Down
4 changes: 2 additions & 2 deletions examples/7_pre_selection/plot_1_drop_correlated.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,15 @@
X_test,
cv=cv,
n_jobs=-1,
portfolio_params=dict(annualized_factor=255, tag="model1"),
portfolio_params=dict(annualized_factor=252, tag="model1"),
)

pred_2 = cross_val_predict(
model2,
X_test,
cv=cv,
n_jobs=-1,
portfolio_params=dict(annualized_factor=255, tag="model2"),
portfolio_params=dict(annualized_factor=252, tag="model2"),
)

# %%
Expand Down
4 changes: 2 additions & 2 deletions examples/7_pre_selection/plot_2_select_best_performers.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@
# pre-selected assets `k` that maximizes the out-of-sample Sharpe Ratio using
# `GridSearchCV` with `WalkForward` cross-validation on the training set. The
# `WalkForward` is chosen to simulate a three months (60 business days) rolling
# portfolio fitted on the previous year (255 business days):
cv = WalkForward(train_size=255, test_size=60)
# portfolio fitted on the previous year (252 business days):
cv = WalkForward(train_size=252, test_size=60)

scorer = make_scorer(RatioMeasure.ANNUALIZED_SHARPE_RATIO)
# %%
Expand Down
6 changes: 3 additions & 3 deletions examples/8_data_preparation/plot_1_investment_horizon.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@
population.extend(model.fit_predict(X))

for tag, investment_horizon in [
("3M", 255 / 4),
("1Y", 255),
("10Y", 10 * 255),
("3M", 252 / 4),
("1Y", 252),
("10Y", 10 * 252),
]:
model = MeanRisk(
risk_measure=RiskMeasure.VARIANCE,
Expand Down
8 changes: 4 additions & 4 deletions src/skfolio/cluster/_hierarchical.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@

import numpy as np
import numpy.typing as npt
import plotly.figure_factory as ff
import plotly.graph_objects as go
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as scd
import sklearn.base as skb
import sklearn.utils.validation as skv

from skfolio.utils.fixes import create_dendrogram
from skfolio.utils.stats import assert_is_distance, compute_optimal_n_clusters
from skfolio.utils.tools import AutoEnum, default_asset_names

Expand Down Expand Up @@ -245,7 +245,7 @@ def plot_dendrogram(self, heatmap: bool = True) -> go.Figure:
asset_names = default_asset_names(n_assets=n_assets)

if not heatmap:
fig = ff.create_dendrogram(
fig = create_dendrogram(
np.ones(1),
distfun=lambda x: None,
linkagefun=lambda x: linkage_matrix,
Expand All @@ -264,7 +264,7 @@ def plot_dendrogram(self, heatmap: bool = True) -> go.Figure:
return fig

# Initialize figure by creating upper dendrogram
fig = ff.create_dendrogram(
fig = create_dendrogram(
np.ones(1),
orientation="bottom",
distfun=lambda x: None,
Expand All @@ -277,7 +277,7 @@ def plot_dendrogram(self, heatmap: bool = True) -> go.Figure:
fig["data"][i]["yaxis"] = "y2"

# Create Side Dendrogram
side_dendrogram = ff.create_dendrogram(
side_dendrogram = create_dendrogram(
np.ones(1),
orientation="right",
distfun=lambda x: None,
Expand Down

0 comments on commit 8334c7b

Please sign in to comment.