refactor base_optimizer structure

robertmartin8 · Jan 25, 2019 · 3c4dad8 · 3c4dad8
1 parent d33666e
commit 3c4dad8
Show file tree

Hide file tree

Showing 9 changed files with 201 additions and 83 deletions.
diff --git a/pypfopt/base_optimizer.py b/pypfopt/base_optimizer.py
@@ -1,22 +1,67 @@
 # TODO module docstring
 
 import numpy as np
+import pandas as pd
+from . import objective_functions
 
 
 class BaseOptimizer:
-    def __init__(self, n_assets, weight_bounds=(0, 1)):
+    def __init__(self, n_assets, tickers=None):
+        """
+        :param n_assets: number of assets
+        :type n_assets: int
+        :param tickers: name of assets
+        :type tickers: list
+        """
+        self.n_assets = n_assets
+        if tickers is None:
+            self.tickers = list(range(n_assets))
+        else:
+            self.tickers = tickers
+        # Outputs
+        self.weights = None
+
+    def set_weights(self, weights):
+        if self.weights is None:
+            self.weights = [0] * self.n_assets
+        for i, k in enumerate(self.tickers):
+            if k in weights:
+                self.weights[i] = weights[k]
+
+    def clean_weights(self, cutoff=1e-4, rounding=5):
+        """
+        Helper method to clean the raw weights, setting any weights whose absolute
+        values are below the cutoff to zero, and rounding the rest.
+
+        :param cutoff: the lower bound, defaults to 1e-4
+        :type cutoff: float, optional
+        :param rounding: number of decimal places to round the weights, defaults to 5.
+                         Set to None if rounding is not desired.
+        :type rounding: int, optional
+        :return: asset weights
+        :rtype: dict
+        """
+        if not isinstance(rounding, int) or rounding < 1:
+            raise ValueError("rounding must be a positive integer")
+        clean_weights = self.weights.copy()
+        clean_weights[np.abs(clean_weights) < cutoff] = 0
+        if rounding is not None:
+            clean_weights = np.round(clean_weights, rounding)
+        return dict(zip(self.tickers, clean_weights))
+
+
+class BaseScipyOptimizer(BaseOptimizer):
+    def __init__(self, n_assets, tickers=None, weight_bounds=(0, 1)):
         """
         :param weight_bounds: minimum and maximum weight of an asset, defaults to (0, 1).
                               Must be changed to (-1, 1) for portfolios with shorting.
         :type weight_bounds: tuple, optional
         """
-        self.n_assets = n_assets
+        super().__init__(n_assets, tickers)
         self.bounds = self._make_valid_bounds(weight_bounds)
         # Optimisation parameters
         self.initial_guess = np.array([1 / self.n_assets] * self.n_assets)
         self.constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]
-        # Outputs
-        self.weights = None
 
     def _make_valid_bounds(self, test_bounds):
         """
@@ -39,23 +84,54 @@ def _make_valid_bounds(self, test_bounds):
                 raise ValueError("Lower bound is too high")
         return (test_bounds,) * self.n_assets
 
-    def clean_weights(self, cutoff=1e-4, rounding=5):
-        """
-        Helper method to clean the raw weights, setting any weights whose absolute
-        values are below the cutoff to zero, and rounding the rest.
 
-        :param cutoff: the lower bound, defaults to 1e-4
-        :type cutoff: float, optional
-        :param rounding: number of decimal places to round the weights, defaults to 5.
-                         Set to None if rounding is not desired.
-        :type rounding: int, optional
-        :return: asset weights
-        :rtype: dict
-        """
-        if not isinstance(rounding, int) or rounding < 1:
-            raise ValueError("rounding must be a positive integer")
-        clean_weights = self.weights.copy()
-        clean_weights[np.abs(clean_weights) < cutoff] = 0
-        if rounding is not None:
-            clean_weights = np.round(clean_weights, rounding)
-        return dict(zip(self.tickers, clean_weights))
+def portfolio_performance(
+    expected_returns, cov_matrix, weights, verbose=False, risk_free_rate=0.02
+):
+    """
+    After optimising, calculate (and optionally print) the performance of the optimal
+    portfolio. Currently calculates expected return, volatility, and the Sharpe ratio.
+
+    :param expected_returns: expected returns for each asset. Set to None if
+                             optimising for volatility only.
+    :type expected_returns: pd.Series, list, np.ndarray
+    :param cov_matrix: covariance of returns for each asset
+    :type cov_matrix: pd.DataFrame or np.array
+    :param weights: weights or assets
+    :type weights: list, np.array or dict, optional
+    :param verbose: whether performance should be printed, defaults to False
+    :type verbose: bool, optional
+    :param risk_free_rate: risk-free rate of borrowing/lending, defaults to 0.02
+    :type risk_free_rate: float, optional
+    :raises ValueError: if weights have not been calcualted yet
+    :return: expected return, volatility, Sharpe ratio.
+    :rtype: (float, float, float)
+    """
+    if isinstance(weights, dict):
+        if isinstance(expected_returns, pd.Series):
+            tickers = list(expected_returns.index)
+        elif isinstance(cov_matrix, pd.DataFrame):
+            tickers = list(cov_matrix.columns)
+        else:
+            tickers = list(range(len(expected_returns)))
+        newweights = np.zeros(len(tickers))
+        for i, k in enumerate(tickers):
+            if k in weights:
+                newweights[i] = weights[k]
+        if newweights.sum() == 0:
+            raise ValueError("Weights add to zero, or ticker names don't match")
+    elif weights is not None:
+        newweights = np.asarray(weights)
+    else:
+        raise ValueError("Weights is None")
+    sigma = np.sqrt(objective_functions.volatility(newweights, cov_matrix))
+    mu = newweights.dot(expected_returns)
+
+    sharpe = -objective_functions.negative_sharpe(
+        newweights, expected_returns, cov_matrix, risk_free_rate
+    )
+    if verbose:
+        print("Expected annual return: {:.1f}%".format(100 * mu))
+        print("Annual volatility: {:.1f}%".format(100 * sigma))
+        print("Sharpe Ratio: {:.2f}".format(sharpe))
+    return mu, sigma, sharpe
diff --git a/pypfopt/discrete_allocation.py b/pypfopt/discrete_allocation.py
@@ -40,7 +40,7 @@ def portfolio(weights, latest_prices, min_allocation=0.01, total_portfolio_value
     :type total_portfolio_value: int/float, optional
     :raises TypeError: if ``weights`` is not a dict
     :raises TypeError: if ``latest_prices`` isn't a series
-    :raises ValueError: if ``0 < min_allocation < 0.3``
+    :raises ValueError: if not ``0 < min_allocation < 0.3``
     :return: the number of shares of each ticker that should be purchased, along with the amount
              of funds leftover.
     :rtype: (dict, float)

diff --git a/pypfopt/efficient_frontier.py b/pypfopt/efficient_frontier.py
@@ -7,14 +7,13 @@
 import numpy as np
 import pandas as pd
 import scipy.optimize as sco
-from . import objective_functions
-from .base_optimizer import BaseOptimizer
+from . import objective_functions, base_optimizer
 
 
-class EfficientFrontier(BaseOptimizer):
+class EfficientFrontier(base_optimizer.BaseScipyOptimizer):
 
     """
-    An EfficientFrontier object (inheriting from BaseOptimizer) contains multiple
+    An EfficientFrontier object (inheriting from BaseScipyOptimizer) contains multiple
     optimisation methods that can be called (corresponding to different objective
     functions) with various parameters.
 
@@ -65,28 +64,25 @@ def __init__(self, expected_returns, cov_matrix, weight_bounds=(0, 1), gamma=0):
         self.cov_matrix = cov_matrix
         if expected_returns is not None:
             if not isinstance(expected_returns, (pd.Series, list, np.ndarray)):
-                raise TypeError(
-                    "expected_returns is not a series, list or array")
+                raise TypeError("expected_returns is not a series, list or array")
             if not isinstance(cov_matrix, (pd.DataFrame, np.ndarray)):
                 raise TypeError("cov_matrix is not a dataframe or array")
             self.expected_returns = expected_returns
-            self.tickers = list(expected_returns.index)
+        if isinstance(expected_returns, pd.Series):
+            tickers = list(expected_returns.index)
+        elif isinstance(cov_matrix, pd.DataFrame):
+            tickers = list(cov_matrix.columns)
         else:
-            self.tickers = list(cov_matrix.columns)
-        self.n_assets = len(self.tickers)
+            tickers = list(range(len(expected_returns)))
 
-        super().__init__(self.n_assets, weight_bounds)
+        super().__init__(len(tickers), tickers, weight_bounds)
 
         if not isinstance(gamma, (int, float)):
             raise ValueError("gamma should be numeric")
         if gamma < 0:
-            warnings.warn(
-                "in most cases, gamma should be positive", UserWarning)
+            warnings.warn("in most cases, gamma should be positive", UserWarning)
         self.gamma = gamma
 
-        # Outputs
-        self.weights = None
-
     def max_sharpe(self, risk_free_rate=0.02):
         """
         Maximise the Sharpe Ratio. The result is also referred to as the tangency portfolio,
@@ -102,8 +98,7 @@ def max_sharpe(self, risk_free_rate=0.02):
         if not isinstance(risk_free_rate, (int, float)):
             raise ValueError("risk_free_rate should be numeric")
 
-        args = (self.expected_returns, self.cov_matrix,
-                self.gamma, risk_free_rate)
+        args = (self.expected_returns, self.cov_matrix, self.gamma, risk_free_rate)
         result = sco.minimize(
             objective_functions.negative_sharpe,
             x0=self.initial_guess,
@@ -178,8 +173,7 @@ def efficient_risk(self, target_risk, risk_free_rate=0.02, market_neutral=False)
         if not isinstance(risk_free_rate, (int, float)):
             raise ValueError("risk_free_rate should be numeric")
 
-        args = (self.expected_returns, self.cov_matrix,
-                self.gamma, risk_free_rate)
+        args = (self.expected_returns, self.cov_matrix, self.gamma, risk_free_rate)
         target_constraint = {
             "type": "ineq",
             "fun": lambda w: target_risk
@@ -273,17 +267,10 @@ def portfolio_performance(self, verbose=False, risk_free_rate=0.02):
         :return: expected return, volatility, Sharpe ratio.
         :rtype: (float, float, float)
         """
-        if self.weights is None:
-            raise ValueError("Weights not calculated yet")
-        sigma = np.sqrt(objective_functions.volatility(
-            self.weights, self.cov_matrix))
-        mu = self.weights.dot(self.expected_returns)
-
-        sharpe = -objective_functions.negative_sharpe(
-            self.weights, self.expected_returns, self.cov_matrix, risk_free_rate
+        return base_optimizer.portfolio_performance(
+            self.expected_returns,
+            self.cov_matrix,
+            self.weights,
+            verbose,
+            risk_free_rate,
         )
-        if verbose:
-            print("Expected annual return: {:.1f}%".format(100 * mu))
-            print("Annual volatility: {:.1f}%".format(100 * sigma))
-            print("Sharpe Ratio: {:.2f}".format(sharpe))
-        return mu, sigma, sharpe
diff --git a/pypfopt/expected_returns.py b/pypfopt/expected_returns.py
@@ -18,6 +18,17 @@
 import pandas as pd
 
 
+def daily_price_returns(prices):
+    """
+    Calculate the daily return DataFrame from the prices of the asset.
+
+    :param prices: adjusted closing prices of the asset, each row is a date
+                   and each column is a ticker/id.
+    :type prices: pd.DataFrame
+    """
+    return prices.pct_change().dropna(how="all")
+
+
 def mean_historical_return(prices, frequency=252):
     """
     Calculate annualised mean (daily) historical return from input (daily) asset prices.
@@ -34,7 +45,7 @@ def mean_historical_return(prices, frequency=252):
     if not isinstance(prices, pd.DataFrame):
         warnings.warn("prices are not in a dataframe", RuntimeWarning)
         prices = pd.DataFrame(prices)
-    daily_returns = prices.pct_change().dropna(how="all")
+    daily_returns = daily_price_returns(prices)
     return daily_returns.mean() * frequency
 
 
@@ -57,5 +68,5 @@ def ema_historical_return(prices, frequency=252, span=500):
     if not isinstance(prices, pd.DataFrame):
         warnings.warn("prices are not in a dataframe", RuntimeWarning)
         prices = pd.DataFrame(prices)
-    daily_returns = prices.pct_change().dropna(how="all")
+    daily_returns = daily_price_returns(prices)
     return daily_returns.ewm(span=span).mean().iloc[-1] * frequency
diff --git a/pypfopt/hierarchical_risk_parity.py b/pypfopt/hierarchical_risk_parity.py
@@ -6,6 +6,7 @@
 import pandas as pd
 import scipy.cluster.hierarchy as sch
 import scipy.spatial.distance as ssd
+from .base_optimizer import BaseOptimizer
 
 # This code has been reproduced(with modification) from the paper:
 # López de Prado, M. (2016). Building Diversified Portfolios that Outperform Out of Sample.
@@ -66,6 +67,52 @@ def _raw_hrp_allocation(cov, ordered_tickers):
     return w
 
 
+class HRPOpt(BaseOptimizer):
+    """
+    A HRPOpt object (inheriting from BaseOptimizer) constructs a hierarchical
+    risk parity portfolio.
+
+    Instance variables:
+
+    - Inputs
+        - ``returns``
+
+    - Output: ``weights``
+
+    Public methods:
+
+    - ``hrp_portfolio()``
+    """
+
+    def __init__(self, returns):
+        """
+        :param returns: asset historical returns
+        :type returns: pd.DataFrame
+        :raises TypeError: if ``returns`` is not a dataframe
+        """
+        if not isinstance(returns, pd.DataFrame):
+            raise TypeError("returns are not a dataframe")
+
+        self.returns = returns
+        tickers = list(returns.columns)
+        super().__init__(len(tickers), tickers)
+
+    def hrp_portfolio(self):
+        corr, cov = self.returns.corr(), self.returns.cov()
+
+        # Compute distance matrix, with ClusterWarning fix as
+        # per https://stackoverflow.com/questions/18952587/
+        dist = ssd.squareform(((1 - corr) / 2) ** 0.5)
+
+        link = sch.linkage(dist, "single")
+        sort_ix = _get_quasi_diag(link)
+        ordered_tickers = corr.index[sort_ix].tolist()
+        hrp = _raw_hrp_allocation(cov, ordered_tickers)
+        weights = dict(hrp.sort_index())
+        self.set_weights(weights)
+        return weights
+
+
 def hrp_portfolio(returns):
     """
     Construct a hierarchical risk parity portfolio
@@ -76,16 +123,4 @@ def hrp_portfolio(returns):
     :rtype: dict
     :raises TypeError: if ``returns`` is not a dataframe
     """
-    if not isinstance(returns, pd.DataFrame):
-        raise TypeError("returns are not a dataframe")
-    corr, cov = returns.corr(), returns.cov()
-
-    # Compute distance matrix, with ClusterWarning fix as
-    # per https://stackoverflow.com/questions/18952587/
-    dist = ssd.squareform(((1 - corr) / 2) ** 0.5)
-
-    link = sch.linkage(dist, "single")
-    sort_ix = _get_quasi_diag(link)
-    ordered_tickers = corr.index[sort_ix].tolist()
-    hrp = _raw_hrp_allocation(cov, ordered_tickers)
-    return dict(hrp.sort_index())
+    return HRPOpt(returns).hrp_portfolio()