Skip to content

Commit

Permalink
refactor base_optimizer structure
Browse files Browse the repository at this point in the history
  • Loading branch information
gumblex committed Jan 25, 2019
1 parent d33666e commit 3c4dad8
Show file tree
Hide file tree
Showing 9 changed files with 201 additions and 83 deletions.
122 changes: 99 additions & 23 deletions pypfopt/base_optimizer.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,67 @@
# TODO module docstring

import numpy as np
import pandas as pd
from . import objective_functions


class BaseOptimizer:
def __init__(self, n_assets, weight_bounds=(0, 1)):
def __init__(self, n_assets, tickers=None):
"""
:param n_assets: number of assets
:type n_assets: int
:param tickers: name of assets
:type tickers: list
"""
self.n_assets = n_assets
if tickers is None:
self.tickers = list(range(n_assets))
else:
self.tickers = tickers
# Outputs
self.weights = None

def set_weights(self, weights):
if self.weights is None:
self.weights = [0] * self.n_assets
for i, k in enumerate(self.tickers):
if k in weights:
self.weights[i] = weights[k]

def clean_weights(self, cutoff=1e-4, rounding=5):
"""
Helper method to clean the raw weights, setting any weights whose absolute
values are below the cutoff to zero, and rounding the rest.
:param cutoff: the lower bound, defaults to 1e-4
:type cutoff: float, optional
:param rounding: number of decimal places to round the weights, defaults to 5.
Set to None if rounding is not desired.
:type rounding: int, optional
:return: asset weights
:rtype: dict
"""
if not isinstance(rounding, int) or rounding < 1:
raise ValueError("rounding must be a positive integer")
clean_weights = self.weights.copy()
clean_weights[np.abs(clean_weights) < cutoff] = 0
if rounding is not None:
clean_weights = np.round(clean_weights, rounding)
return dict(zip(self.tickers, clean_weights))


class BaseScipyOptimizer(BaseOptimizer):
def __init__(self, n_assets, tickers=None, weight_bounds=(0, 1)):
"""
:param weight_bounds: minimum and maximum weight of an asset, defaults to (0, 1).
Must be changed to (-1, 1) for portfolios with shorting.
:type weight_bounds: tuple, optional
"""
self.n_assets = n_assets
super().__init__(n_assets, tickers)
self.bounds = self._make_valid_bounds(weight_bounds)
# Optimisation parameters
self.initial_guess = np.array([1 / self.n_assets] * self.n_assets)
self.constraints = [{"type": "eq", "fun": lambda x: np.sum(x) - 1}]
# Outputs
self.weights = None

def _make_valid_bounds(self, test_bounds):
"""
Expand All @@ -39,23 +84,54 @@ def _make_valid_bounds(self, test_bounds):
raise ValueError("Lower bound is too high")
return (test_bounds,) * self.n_assets

def clean_weights(self, cutoff=1e-4, rounding=5):
"""
Helper method to clean the raw weights, setting any weights whose absolute
values are below the cutoff to zero, and rounding the rest.

:param cutoff: the lower bound, defaults to 1e-4
:type cutoff: float, optional
:param rounding: number of decimal places to round the weights, defaults to 5.
Set to None if rounding is not desired.
:type rounding: int, optional
:return: asset weights
:rtype: dict
"""
if not isinstance(rounding, int) or rounding < 1:
raise ValueError("rounding must be a positive integer")
clean_weights = self.weights.copy()
clean_weights[np.abs(clean_weights) < cutoff] = 0
if rounding is not None:
clean_weights = np.round(clean_weights, rounding)
return dict(zip(self.tickers, clean_weights))
def portfolio_performance(
expected_returns, cov_matrix, weights, verbose=False, risk_free_rate=0.02
):
"""
After optimising, calculate (and optionally print) the performance of the optimal
portfolio. Currently calculates expected return, volatility, and the Sharpe ratio.
:param expected_returns: expected returns for each asset. Set to None if
optimising for volatility only.
:type expected_returns: pd.Series, list, np.ndarray
:param cov_matrix: covariance of returns for each asset
:type cov_matrix: pd.DataFrame or np.array
:param weights: weights or assets
:type weights: list, np.array or dict, optional
:param verbose: whether performance should be printed, defaults to False
:type verbose: bool, optional
:param risk_free_rate: risk-free rate of borrowing/lending, defaults to 0.02
:type risk_free_rate: float, optional
:raises ValueError: if weights have not been calcualted yet
:return: expected return, volatility, Sharpe ratio.
:rtype: (float, float, float)
"""
if isinstance(weights, dict):
if isinstance(expected_returns, pd.Series):
tickers = list(expected_returns.index)
elif isinstance(cov_matrix, pd.DataFrame):
tickers = list(cov_matrix.columns)
else:
tickers = list(range(len(expected_returns)))
newweights = np.zeros(len(tickers))
for i, k in enumerate(tickers):
if k in weights:
newweights[i] = weights[k]
if newweights.sum() == 0:
raise ValueError("Weights add to zero, or ticker names don't match")
elif weights is not None:
newweights = np.asarray(weights)
else:
raise ValueError("Weights is None")
sigma = np.sqrt(objective_functions.volatility(newweights, cov_matrix))
mu = newweights.dot(expected_returns)

sharpe = -objective_functions.negative_sharpe(
newweights, expected_returns, cov_matrix, risk_free_rate
)
if verbose:
print("Expected annual return: {:.1f}%".format(100 * mu))
print("Annual volatility: {:.1f}%".format(100 * sigma))
print("Sharpe Ratio: {:.2f}".format(sharpe))
return mu, sigma, sharpe
2 changes: 1 addition & 1 deletion pypfopt/discrete_allocation.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def portfolio(weights, latest_prices, min_allocation=0.01, total_portfolio_value
:type total_portfolio_value: int/float, optional
:raises TypeError: if ``weights`` is not a dict
:raises TypeError: if ``latest_prices`` isn't a series
:raises ValueError: if ``0 < min_allocation < 0.3``
:raises ValueError: if not ``0 < min_allocation < 0.3``
:return: the number of shares of each ticker that should be purchased, along with the amount
of funds leftover.
:rtype: (dict, float)
Expand Down
51 changes: 19 additions & 32 deletions pypfopt/efficient_frontier.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,13 @@
import numpy as np
import pandas as pd
import scipy.optimize as sco
from . import objective_functions
from .base_optimizer import BaseOptimizer
from . import objective_functions, base_optimizer


class EfficientFrontier(BaseOptimizer):
class EfficientFrontier(base_optimizer.BaseScipyOptimizer):

"""
An EfficientFrontier object (inheriting from BaseOptimizer) contains multiple
An EfficientFrontier object (inheriting from BaseScipyOptimizer) contains multiple
optimisation methods that can be called (corresponding to different objective
functions) with various parameters.
Expand Down Expand Up @@ -65,28 +64,25 @@ def __init__(self, expected_returns, cov_matrix, weight_bounds=(0, 1), gamma=0):
self.cov_matrix = cov_matrix
if expected_returns is not None:
if not isinstance(expected_returns, (pd.Series, list, np.ndarray)):
raise TypeError(
"expected_returns is not a series, list or array")
raise TypeError("expected_returns is not a series, list or array")
if not isinstance(cov_matrix, (pd.DataFrame, np.ndarray)):
raise TypeError("cov_matrix is not a dataframe or array")
self.expected_returns = expected_returns
self.tickers = list(expected_returns.index)
if isinstance(expected_returns, pd.Series):
tickers = list(expected_returns.index)
elif isinstance(cov_matrix, pd.DataFrame):
tickers = list(cov_matrix.columns)
else:
self.tickers = list(cov_matrix.columns)
self.n_assets = len(self.tickers)
tickers = list(range(len(expected_returns)))

super().__init__(self.n_assets, weight_bounds)
super().__init__(len(tickers), tickers, weight_bounds)

if not isinstance(gamma, (int, float)):
raise ValueError("gamma should be numeric")
if gamma < 0:
warnings.warn(
"in most cases, gamma should be positive", UserWarning)
warnings.warn("in most cases, gamma should be positive", UserWarning)
self.gamma = gamma

# Outputs
self.weights = None

def max_sharpe(self, risk_free_rate=0.02):
"""
Maximise the Sharpe Ratio. The result is also referred to as the tangency portfolio,
Expand All @@ -102,8 +98,7 @@ def max_sharpe(self, risk_free_rate=0.02):
if not isinstance(risk_free_rate, (int, float)):
raise ValueError("risk_free_rate should be numeric")

args = (self.expected_returns, self.cov_matrix,
self.gamma, risk_free_rate)
args = (self.expected_returns, self.cov_matrix, self.gamma, risk_free_rate)
result = sco.minimize(
objective_functions.negative_sharpe,
x0=self.initial_guess,
Expand Down Expand Up @@ -178,8 +173,7 @@ def efficient_risk(self, target_risk, risk_free_rate=0.02, market_neutral=False)
if not isinstance(risk_free_rate, (int, float)):
raise ValueError("risk_free_rate should be numeric")

args = (self.expected_returns, self.cov_matrix,
self.gamma, risk_free_rate)
args = (self.expected_returns, self.cov_matrix, self.gamma, risk_free_rate)
target_constraint = {
"type": "ineq",
"fun": lambda w: target_risk
Expand Down Expand Up @@ -273,17 +267,10 @@ def portfolio_performance(self, verbose=False, risk_free_rate=0.02):
:return: expected return, volatility, Sharpe ratio.
:rtype: (float, float, float)
"""
if self.weights is None:
raise ValueError("Weights not calculated yet")
sigma = np.sqrt(objective_functions.volatility(
self.weights, self.cov_matrix))
mu = self.weights.dot(self.expected_returns)

sharpe = -objective_functions.negative_sharpe(
self.weights, self.expected_returns, self.cov_matrix, risk_free_rate
return base_optimizer.portfolio_performance(
self.expected_returns,
self.cov_matrix,
self.weights,
verbose,
risk_free_rate,
)
if verbose:
print("Expected annual return: {:.1f}%".format(100 * mu))
print("Annual volatility: {:.1f}%".format(100 * sigma))
print("Sharpe Ratio: {:.2f}".format(sharpe))
return mu, sigma, sharpe
15 changes: 13 additions & 2 deletions pypfopt/expected_returns.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,17 @@
import pandas as pd


def daily_price_returns(prices):
"""
Calculate the daily return DataFrame from the prices of the asset.
:param prices: adjusted closing prices of the asset, each row is a date
and each column is a ticker/id.
:type prices: pd.DataFrame
"""
return prices.pct_change().dropna(how="all")


def mean_historical_return(prices, frequency=252):
"""
Calculate annualised mean (daily) historical return from input (daily) asset prices.
Expand All @@ -34,7 +45,7 @@ def mean_historical_return(prices, frequency=252):
if not isinstance(prices, pd.DataFrame):
warnings.warn("prices are not in a dataframe", RuntimeWarning)
prices = pd.DataFrame(prices)
daily_returns = prices.pct_change().dropna(how="all")
daily_returns = daily_price_returns(prices)
return daily_returns.mean() * frequency


Expand All @@ -57,5 +68,5 @@ def ema_historical_return(prices, frequency=252, span=500):
if not isinstance(prices, pd.DataFrame):
warnings.warn("prices are not in a dataframe", RuntimeWarning)
prices = pd.DataFrame(prices)
daily_returns = prices.pct_change().dropna(how="all")
daily_returns = daily_price_returns(prices)
return daily_returns.ewm(span=span).mean().iloc[-1] * frequency
61 changes: 48 additions & 13 deletions pypfopt/hierarchical_risk_parity.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pandas as pd
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as ssd
from .base_optimizer import BaseOptimizer

# This code has been reproduced(with modification) from the paper:
# López de Prado, M. (2016). Building Diversified Portfolios that Outperform Out of Sample.
Expand Down Expand Up @@ -66,6 +67,52 @@ def _raw_hrp_allocation(cov, ordered_tickers):
return w


class HRPOpt(BaseOptimizer):
"""
A HRPOpt object (inheriting from BaseOptimizer) constructs a hierarchical
risk parity portfolio.
Instance variables:
- Inputs
- ``returns``
- Output: ``weights``
Public methods:
- ``hrp_portfolio()``
"""

def __init__(self, returns):
"""
:param returns: asset historical returns
:type returns: pd.DataFrame
:raises TypeError: if ``returns`` is not a dataframe
"""
if not isinstance(returns, pd.DataFrame):
raise TypeError("returns are not a dataframe")

self.returns = returns
tickers = list(returns.columns)
super().__init__(len(tickers), tickers)

def hrp_portfolio(self):
corr, cov = self.returns.corr(), self.returns.cov()

# Compute distance matrix, with ClusterWarning fix as
# per https://stackoverflow.com/questions/18952587/
dist = ssd.squareform(((1 - corr) / 2) ** 0.5)

link = sch.linkage(dist, "single")
sort_ix = _get_quasi_diag(link)
ordered_tickers = corr.index[sort_ix].tolist()
hrp = _raw_hrp_allocation(cov, ordered_tickers)
weights = dict(hrp.sort_index())
self.set_weights(weights)
return weights


def hrp_portfolio(returns):
"""
Construct a hierarchical risk parity portfolio
Expand All @@ -76,16 +123,4 @@ def hrp_portfolio(returns):
:rtype: dict
:raises TypeError: if ``returns`` is not a dataframe
"""
if not isinstance(returns, pd.DataFrame):
raise TypeError("returns are not a dataframe")
corr, cov = returns.corr(), returns.cov()

# Compute distance matrix, with ClusterWarning fix as
# per https://stackoverflow.com/questions/18952587/
dist = ssd.squareform(((1 - corr) / 2) ** 0.5)

link = sch.linkage(dist, "single")
sort_ix = _get_quasi_diag(link)
ordered_tickers = corr.index[sort_ix].tolist()
hrp = _raw_hrp_allocation(cov, ordered_tickers)
return dict(hrp.sort_index())
return HRPOpt(returns).hrp_portfolio()

0 comments on commit 3c4dad8

Please sign in to comment.