chore(docs): improved the docs [skip ci]

skfolio · Jan 2, 2024 · d05b661 · d05b661
1 parent e8aed66
commit d05b661
Show file tree

Hide file tree

Showing 31 changed files with 292 additions and 256 deletions.
diff --git a/docs/user_guide/optimization.rst b/docs/user_guide/optimization.rst
@@ -662,10 +662,10 @@ inner-weights and outer-weights.
     The original paper uses KMeans as the clustering algorithm, minimum Variance for
     the inner-estimator and equal-weighted for the outer-estimator. Here we generalize
     it to all `sklearn` and `skfolio` clustering algorithms (Hierarchical Tree
-    Clustering, KMeans, etc.), all optimization estimators (Mean-Variance, HRP, etc.)
+    Clustering, KMeans, etc.), all portfolio optimizations (Mean-Variance, HRP, etc.)
     and risk measures (variance, CVaR, etc.).
     To avoid data leakage at the outer-estimator, we use out-of-sample estimates to
-    fit the outer optimization.
+    fit the outer estimator.
 
 It supports all :ref:`distance estimator <distance>`
 and :ref:`clustering estimator <cluster>` (both `skfolio` and `sklearn`)

diff --git a/examples/1_mean_risk/plot_10_tracking_error.py b/examples/1_mean_risk/plot_10_tracking_error.py
@@ -6,8 +6,8 @@
 This tutorial shows how to incorporate a tracking error constraint into the
 :class:`~skfolio.optimization.MeanRisk` optimization.
 
-The tracking error is defined as the RMSE (root-mean-square error) of the portfolio's
-returns compared to a target's returns.
+The tracking error is defined as the RMSE (root-mean-square error) of the portfolio
+returns compared to a target returns.
 
 In this example we will create a long-short portfolio of 20 stocks that tracks the
 SPX Index with a tracking error constraint of 0.30% while minimizing the CVaR
@@ -41,11 +41,11 @@
 # %%
 # Model
 # =====
-# We create two long-short models: a Minimum-CVaR without tracking error and a
-# Minimum-CVaR with a 0.30% tracking error constraint versus the SPX Index.
+# We create two long-short models: a Minimum CVaR without tracking error and a
+# Minimum CVaR with a 0.30% tracking error constraint versus the SPX Index.
 # A 0.30% tracking error constraint is a constraint on the RMSE of the difference
 # between the daily portfolio returns and the SPX Index returns.
-# We first create the Minimum-CVaR model without tracking error:
+# We first create the Minimum CVaR model without tracking error:
 model_no_tracking = MeanRisk(
     objective_function=ObjectiveFunction.MINIMIZE_RISK,
     risk_measure=RiskMeasure.CVAR,
@@ -56,7 +56,7 @@
 model_no_tracking.weights_
 
 # %%
-# Then we create the Minimum-CVaR model with a 0.30% tracking error constraint
+# Then we create the Minimum CVaR model with a 0.30% tracking error constraint
 # versus the SPX Index:
 model_tracking = clone(model_no_tracking)
 model_tracking.set_params(
@@ -107,7 +107,7 @@
 # %%
 # Prediction
 # ==========
-# Finally we predict both models on the test set:
+# Finally, we predict both models on the test set:
 ptf_no_tracking_test = model_no_tracking.predict(X_test)
 ptf_tracking_test = model_tracking.predict(X_test)
 spx_test = model_spx.predict(y_test)

diff --git a/examples/1_mean_risk/plot_13_factor_model.py b/examples/1_mean_risk/plot_13_factor_model.py
@@ -78,7 +78,7 @@
 # We can change the :class:`~skfolio.prior.BaseLoadingMatrix` that estimates the loading
 # matrix (betas) of the factors.
 #
-# The default is the :class:`LoadingMatrixRegression` which fit the factors using a
+# The default is the :class:`LoadingMatrixRegression`, which fit the factors using a
 # `LassoCV` on each asset separately.
 #
 # For example, let's change the `LassoCV` into a `RidgeCV` without intercept and use

diff --git a/examples/1_mean_risk/plot_14_black_litterman_factor_model.py b/examples/1_mean_risk/plot_14_black_litterman_factor_model.py
@@ -96,6 +96,7 @@
 # %%
 # Because our factor views were accurate, the Black & Litterman Factor Model
 # outperformed the simple Factor Model on the test set.
-# Let's plot the portfolios' composition:
+#
+# Let's plot the portfolios compositions:
 fig = population.plot_composition()
 show(fig)
diff --git a/examples/1_mean_risk/plot_5_weight_constraints.py b/examples/1_mean_risk/plot_5_weight_constraints.py
@@ -81,27 +81,28 @@
 model.weights_
 
 # %%
-# Lower and upper bounds on weights
+# Lower and Upper Bounds on Weights
 # =================================
 # The weights lower and upper bounds are controlled by the parameters `min_weights` and
 # `max_weights` respectively.
 # You can provide `None`, a float, an array-like or a dictionary.
 # `None` is equivalent to `-np.Inf` (no lower bounds).
 # If a float is provided, it is applied to each asset.
 # If a dictionary is provided, its (key/value) pair must be the (asset name/asset
-# weight bound) and the input `X` of the `fit` methods must be a DataFrame with the
+# weight bound) and the input `X` of the `fit` method must be a DataFrame with the
 # assets names in columns.
-# The default is `min_weights=0.0` (no short selling) and `max_weights=1.0` (each asset
-# is below 100%). When using a dictionary, you don't have to provide constraints for
-# all assets. The ones not provided will be assigned the default value (0.0 and 1.0
-# respectively).
+# The default values are `min_weights=0.0` (no short selling) and `max_weights=1.0`
+# (each asset is below 100%). When using a dictionary, you don't have to provide
+# constraints for all assets. If not provided, the default values (0.0 for min_weights
+# and 1.0 for max_weights) will be assigned to the assets not specified in the
+# dictionary.
 #
 # .. note ::
 #
-#   When adding a :ref:`pre-selection transformer <pre_selection>` into a `Pipeline`,
-#   you cannot use a **list** for the weight constraints because we don't know which
-#   assets will be selected by the pre-selection process. This is where the
-#   **dictionary** becomes useful.
+#   When incorporating a pre-selection transformer into a Pipeline, using a list for
+#   weight constraints is not feasible, as we don't know in advance which assets will
+#   be selected by the pre-selection process. This is where the dictionary proves
+#   useful.
 #
 # Example:
 #   * min_weights = 0                     –> long only portfolio (no short selling).
@@ -113,17 +114,18 @@
 #   * max_weights = None                  –> no upper bound (same as +np.Inf).
 #   * max_weights = 2                     –> each weight must be below 200%.
 #   * max_weights = [1, 2, -0.5]          -> "AAPL", "GE" and "JPM"  must be below 100%, 200% and -50% respectively.
-#   * max_weights = {"AAPL": 1, "GE": 2}  -> "AAPL", "GE" and "JPM"  must be below 100%, 200% and 100% (default) respectively.
+#   * max_weights = {"AAPL": 1, "GE": 2}  -> "AAPL", "GE" and "JPM"  must be below 100%, 200% and 100% (default).
 
 # %%
-# Allowing short positions with a budget of -100%:
+# Let's create a model that allows short positions with a budget of -100%:
 model = MeanRisk(budget=-1, min_weights=-1)
 model.fit(X)
 print(sum(model.weights_))
 model.weights_
 
 # %%
-# "AAPL", "GE" and "JPM" above 0%, 50% and 10% respectively:
+# Let's add weight constraints on "AAPL", "GE" and "JPM" to be above 0%, 50% and 10%
+# respectively:
 model = MeanRisk(min_weights=[0, 0.5, 0.1])
 model.fit(X)
 print(sum(model.weights_))
@@ -138,50 +140,51 @@
 # %%
 # |
 #
-# Same as above but using partial dictionary:
+# Let's create the same model as above but using partial dictionary:
 model = MeanRisk(min_weights={"GE": 0.5, "JPM": 0.1})
 model.fit(X)
 print(sum(model.weights_))
 model.weights_
 
 # %%
-# Leverage 3 with each weight below 150%:
+# Let's create a model with a leverage of 3 and every weights below 150%:
 model = MeanRisk(budget=3, max_weights=1.5)
 model.fit(X)
 print(sum(model.weights_))
 model.weights_
 
 # %%
-# Short and long position constraints
+# Short and Long Position Constraints
 # ===================================
 # Constraints on the upper bound for short and long positions can be set using
 # `max_short` and `max_long`. The short position is defined as the sum of negative
 # weights (in absolute term) and the long position as the sum of positive weights.
 
 # %%
-# Fully invested long-short portfolio with a total short position less than 50%:
+# Let's create a fully invested long-short portfolio model with a total short position
+# less than 50%:
 model = MeanRisk(min_weights=-1, max_short=0.5)
 model.fit(X)
 print(sum(model.weights_))
 model.weights_
 
 # %%
-# Group and linear constraints
+# Group and Linear Constraints
 # ============================
 # We can assign groups to each asset using the `groups` parameter and set
 # constraints on these groups using the `linear_constraint` parameter.
-# The parameter `groups` can be a 2D array-like or a dictionary. If a dictionary is
+# The `groups` parameter can be a 2D array-like or a dictionary. If a dictionary is
 # provided, its (key/value)  pair must be the (asset name/asset groups).
-# You can reference these groups and/or the asset names in `linear_constraint` which
+# You can reference these groups and/or the asset names in `linear_constraint`, which
 # is a list if strings following the below patterns:
 #
 #   * "2.5 * ref1 + 0.10 * ref2 + 0.0013 <= 2.5 * ref3"
 #   * "ref1 >= 2.9 * ref2"
 #   * "ref1 <= ref2"
 #   * "ref1 >= ref1"
 #
-# Examples:
-# In this example we consider two groups: industry sector and capitalization.
+# Let's create a model with groups constraints on "industry sector" and
+# "capitalization":
 groups = {
     "AAPL": ["Technology", "Mega Cap"],
     "GE": ["Industrial", "Big Cap"],
@@ -202,10 +205,10 @@
 model.weights_
 
 # %%
-# Left and right inequalities
+# Left and Right Inequalities
 # ===========================
-# Finally, you can also provide the matrix :math:`A` and the vector :math:`b` of the
-# linear constraint :math:`A \cdot w \leq b`.
+# Finally, you can also directly provide the matrix :math:`A` and the vector
+# :math:`b` of the linear constraint :math:`A \cdot w \leq b`:
 left_inequality = np.array(
     [[1.0, 1.5, -2.0], [-1.0, 0.75, 0.75], [-1.0, 1.0, 1.0], [-1.0, -0.0, 2.0]]
 )

diff --git a/examples/1_mean_risk/plot_6_transaction_costs.py b/examples/1_mean_risk/plot_6_transaction_costs.py
@@ -6,44 +6,45 @@
 This tutorial shows how to incorporate transaction costs (TC) into the
 :class:`~skfolio.optimization.MeanRisk` optimization.
 
-TC are fixed costs charged when buying or selling an asset.
+TC are fixed costs incurred when buying or selling an asset.
 
-By using the parameter `transaction_costs`, you can add linear TC to the optimization
+By using the `transaction_costs` parameter, you can add linear TC to the optimization
 problem:
 
 .. math:: total\_cost = \sum_{i=1}^{N} c_{i} \times |w_{i} - w\_prev_{i}|
 
 with :math:`c_{i}` the TC of asset i, :math:`w_{i}` its weight and :math:`w\_prev_{i}`
 its previous weight (defined in `previous_weights`).
-The float :math:`total\_cost` is used in the portfolio expected return:
+The float :math:`total\_cost` is impacting the portfolio expected return in the
+optimization:
 
 .. math:: expected\_return = \mu^{T} \cdot w - total\_cost
 
 with :math:`\mu` the vector af assets expected returns and :math:`w` the vector of
 assets weights.
 
-The parameter `transaction_costs` can be a float, a dictionary or an array-like of
+the `transaction_costs` parameter can be a float, a dictionary or an array-like of
 shape `(n_assets, )`. If a float is provided, it is applied to each asset.
 If a dictionary is provided, its (key/value) pair must be the (asset name/asset TC) and
-the input `X` of the `fit` methods must be a DataFrame with the assets names in columns.
+the input `X` of the `fit` method must be a DataFrame with the assets names in columns.
 The default is 0.0 (no transaction costs).
 
 .. warning::
 
     According to the above formula, the periodicity of the transaction costs
     needs to be homogenous to the periodicity of :math:`\mu`. For example, if
     the input `X` is composed of **daily** returns, the `transaction_costs` need
-    to be expressed in **daily** costs.
+    to be expressed as **daily** costs.
 
-This means that you need to transform your fix TC into a daily cost. For that you need
-the notion of expected investment duration. This is because the optimization problem has
-no notion of investment duration.
+This means that you need to convert this fixed transaction costs into daily costs. To
+achieve this, you need the notion of expected investment duration. This is crucial since
+the optimization problem has no notion of investment duration.
 
 For example, let's assume that asset A has an expected daily return of 0.01%
 with a TC of 1% and asset B has an expected daily return of 0.005% with no TC.
-Let's assume that both have same volatility and a correlation of 1.
+Let's assume both assets have the same volatility and a correlation of 1.0.
 If the investment duration is only one month, we should allocate all the weights to
-asset B whereas if the investment duration is one year, we should allocate all the
+asset B. However, if the investment duration is one year, we should allocate all the
 weights to asset A.
 
 Example:
@@ -63,7 +64,7 @@
 # ====
 # We load the S&P 500 :ref:`dataset <datasets>` composed of the daily prices of 20
 # assets from the S&P 500 Index composition starting from 1990-01-02 up to 2022-12-28.
-# We select only 3 assets to make the example more readable which are Apple (AAPL),
+# We select only 3 assets to make the example more readable, which are Apple (AAPL),
 # General Electric (GE) and JPMorgan (JPM):
 
 import numpy as np
@@ -90,7 +91,7 @@
 model.weights_
 
 # %%
-# Transaction cost
+# Transaction Cost
 # ================
 # Let's assume we have the below TC:
 #   * Apple: 1%
@@ -102,7 +103,7 @@
 # Same as transaction_costs = np.array([0.01, 0.005, 0.002]) / 21
 
 # %%
-# First we assume that there is no previous position:
+# First, we assume that there is no previous position:
 model_tc = MeanRisk(
     objective_function=ObjectiveFunction.MAXIMIZE_UTILITY,
     transaction_costs=transaction_costs,
@@ -115,7 +116,7 @@
 model_tc.weights_ - model.weights_
 
 # %%
-# Now we assume that the previous position was equal-weighted:
+# Now, let's assume that the previous position was equal-weighted:
 model_tc2 = MeanRisk(
     objective_function=ObjectiveFunction.MAXIMIZE_UTILITY,
     transaction_costs=transaction_costs,
@@ -152,9 +153,9 @@
 pred1.name = "pred1"
 
 # %%
-# Then we train the model without TC and test with TC. The model trained without TC is
-# the same as above so we can retrieve the results and simply update the prediction with
-# the TC:
+# Then, we train the model without TC and test it with TC. The model trained without TC
+# is the same as above so we can retrieve the results and simply update the prediction
+# with the TC:
 pred2 = MultiPeriodPortfolio(name="pred2")
 previous_weights = None
 for portfolio in pred1:
@@ -168,10 +169,11 @@
     pred2.append(new_portfolio)
 
 # %%
-# Finally we train and test the model with TC. Note that we cannot use the
+# Finally, we train and test the model with TC. Note that we cannot use the
 # `cross_val_predict` function anymore because it uses parallelization and cannot handle
 # the `previous_weights` dependency between folds:
 pred3 = MultiPeriodPortfolio(name="pred3")
+
 model.set_params(transaction_costs=transaction_costs)
 previous_weights = None
 for train, test in cv.split(X):
@@ -191,5 +193,7 @@
 show(fig)
 
 # %%
+# |
+#
 # If we exclude the unrealistic prediction without TC, we notice that the model
-# **fitted with TC** outperform the model **fitted without TC**.
+# **fitted with TC** outperforms the model **fitted without TC**.