-
Notifications
You must be signed in to change notification settings - Fork 382
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH Add beta_fragility_heuristic and gpd_risk_estimates functions #42
Changes from all commits
cf05451
95367b8
05eb8bf
4e6187e
500205b
6f532ff
d0898d9
460a698
ab2d943
e6c31c3
bc29c2f
be4d00d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,8 +17,10 @@ | |
|
||
import pandas as pd | ||
import numpy as np | ||
from scipy import stats | ||
from math import pow | ||
from scipy import stats, optimize | ||
from six import iteritems | ||
from sys import float_info | ||
|
||
from .utils import nanmean, nanstd, nanmin, up, down, roll | ||
from .periods import ANNUALIZATION_FACTORS, APPROX_BDAYS_PER_YEAR | ||
|
@@ -993,6 +995,325 @@ def cagr(returns, period=DAILY, annualization=None): | |
return ending_value ** (1. / no_years) - 1 | ||
|
||
|
||
def beta_fragility_heuristic(returns, factor_returns): | ||
""" | ||
Estimate fragility to drops in beta. | ||
A negative return value indicates potential losses | ||
could follow volatility in beta. | ||
The magnitude of the negative value indicates the size of | ||
the potential loss. | ||
|
||
seealso:: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would put this under |
||
|
||
`A New Heuristic Measure of Fragility and | ||
Tail Risks: Application to Stress Testing` | ||
https://www.imf.org/external/pubs/ft/wp/2012/wp12216.pdf | ||
An IMF Working Paper describing the heuristic | ||
|
||
Parameters | ||
---------- | ||
returns : pd.Series or np.ndarray | ||
Daily returns of the strategy, noncumulative. | ||
- See full explanation in :func:`~empyrical.stats.cum_returns`. | ||
factor_returns : pd.Series or np.ndarray | ||
Daily noncumulative returns of the factor to which beta is | ||
computed. Usually a benchmark such as the market. | ||
- This is in the same style as returns. | ||
|
||
Returns | ||
------- | ||
float, np.nan | ||
The beta fragility of the strategy. | ||
|
||
""" | ||
if len(returns) < 3 or len(factor_returns) < 3: | ||
return np.nan | ||
|
||
return beta_fragility_heuristic_aligned( | ||
*_aligned_series(returns, factor_returns)) | ||
|
||
|
||
def beta_fragility_heuristic_aligned(returns, factor_returns): | ||
""" | ||
Estimate fragility to drops in beta | ||
|
||
seealso:: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same. |
||
|
||
`A New Heuristic Measure of Fragility and | ||
Tail Risks: Application to Stress Testing` | ||
https://www.imf.org/external/pubs/ft/wp/2012/wp12216.pdf | ||
An IMF Working Paper describing the heuristic | ||
|
||
If they are pd.Series, expects returns and factor_returns have already | ||
been aligned on their labels. If np.ndarray, these arguments should have | ||
the same shape. | ||
|
||
Parameters | ||
---------- | ||
returns : pd.Series or np.ndarray | ||
Daily returns of the strategy, noncumulative. | ||
- See full explanation in :func:`~empyrical.stats.cum_returns`. | ||
factor_returns : pd.Series or np.ndarray | ||
Daily noncumulative returns of the factor to which beta is | ||
computed. Usually a benchmark such as the market. | ||
- This is in the same style as returns. | ||
|
||
Returns | ||
------- | ||
float, np.nan | ||
The beta fragility of the strategy. | ||
|
||
""" | ||
if len(returns) < 3 or len(factor_returns) < 3: | ||
return np.nan | ||
|
||
# combine returns and factor returns into pairs | ||
returns_series = pd.Series(returns) | ||
factor_returns_series = pd.Series(factor_returns) | ||
pairs = pd.concat([returns_series, factor_returns_series], axis=1) | ||
pairs.columns = ['returns', 'factor_returns'] | ||
|
||
# exclude any rows where returns are nan | ||
pairs = pairs.dropna() | ||
# sort by beta | ||
pairs = pairs.sort_values(by='factor_returns') | ||
|
||
# find the three vectors, using median of 3 | ||
start_index = 0 | ||
mid_index = int(np.around(len(pairs) / 2, 0)) | ||
end_index = len(pairs) - 1 | ||
|
||
(start_returns, start_factor_returns) = pairs.iloc[start_index] | ||
(mid_returns, mid_factor_returns) = pairs.iloc[mid_index] | ||
(end_returns, end_factor_returns) = pairs.iloc[end_index] | ||
|
||
factor_returns_range = (end_factor_returns - start_factor_returns) | ||
start_returns_weight = 0.5 | ||
end_returns_weight = 0.5 | ||
|
||
# find weights for the start and end returns | ||
# using a convex combination | ||
if not factor_returns_range == 0: | ||
start_returns_weight = \ | ||
(mid_factor_returns - start_factor_returns) / \ | ||
factor_returns_range | ||
end_returns_weight = \ | ||
(end_factor_returns - mid_factor_returns) / \ | ||
factor_returns_range | ||
|
||
# calculate fragility heuristic | ||
heuristic = (start_returns_weight*start_returns) + \ | ||
(end_returns_weight*end_returns) - mid_returns | ||
|
||
return heuristic | ||
|
||
|
||
def gpd_risk_estimates(returns, var_p=0.01): | ||
""" | ||
Estimate VaR and ES using the Generalized Pareto Distribution (GPD) | ||
|
||
seealso:: | ||
|
||
`An Application of Extreme Value Theory for | ||
Measuring Risk <https://link.springer.com/article/10.1007/s10614-006-9025-7>` | ||
A paper describing how to use the Generalized Pareto | ||
Distribution to estimate VaR and ES. | ||
|
||
Parameters | ||
---------- | ||
returns : pd.Series or np.ndarray | ||
Daily returns of the strategy, noncumulative. | ||
- See full explanation in :func:`~empyrical.stats.cum_returns`. | ||
var_p : float | ||
The percentile to use for estimating the VaR and ES | ||
|
||
Returns | ||
------- | ||
[threshold, scale_param, shape_param, var_estimate, es_estimate] | ||
: list[float] | ||
threshold - the threshold use to cut off exception tail losses | ||
scale_param - a parameter (often denoted by sigma, capturing the | ||
scale, related to variance) | ||
shape_param - a parameter (often denoted by xi, capturing the shape or | ||
type of the distribution) | ||
var_estimate - an estimate for the VaR for the given percentile | ||
es_estimate - an estimate for the ES for the given percentile | ||
""" | ||
if len(returns) < 3: | ||
result = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) | ||
if isinstance(returns, pd.Series): | ||
result = pd.Series(result) | ||
return result | ||
return gpd_risk_estimates_aligned(*_aligned_series(returns, var_p)) | ||
|
||
|
||
def gpd_risk_estimates_aligned(returns, var_p=0.01): | ||
""" | ||
Estimate VaR and ES using the Generalized Pareto Distribution (GPD) | ||
|
||
seealso:: | ||
|
||
`An Application of Extreme Value Theory for | ||
Measuring Risk <https://link.springer.com/article/10.1007/s10614-006-9025-7>` | ||
A paper describing how to use the Generalized Pareto | ||
Distribution to estimate VaR and ES. | ||
|
||
Parameters | ||
---------- | ||
returns : pd.Series or np.ndarray | ||
Daily returns of the strategy, noncumulative. | ||
- See full explanation in :func:`~empyrical.stats.cum_returns`. | ||
var_p : float | ||
The percentile to use for estimating the VaR and ES | ||
|
||
Returns | ||
------- | ||
[threshold, scale_param, shape_param, var_estimate, es_estimate] | ||
: list[float] | ||
threshold - the threshold use to cut off exception tail losses | ||
scale_param - a parameter (often denoted by sigma, capturing the | ||
scale, related to variance) | ||
shape_param - a parameter (often denoted by xi, capturing the shape or | ||
type of the distribution) | ||
var_estimate - an estimate for the VaR for the given percentile | ||
es_estimate - an estimate for the ES for the given percentile | ||
""" | ||
result = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if not len(returns) < 3: | ||
|
||
DEFAULT_THRESHOLD = 0.2 | ||
MINIMUM_THRESHOLD = 0.000000001 | ||
returns_array = pd.Series(returns).as_matrix() | ||
flipped_returns = -1 * returns_array | ||
losses = flipped_returns[flipped_returns > 0] | ||
threshold = DEFAULT_THRESHOLD | ||
finished = False | ||
scale_param = 0 | ||
shape_param = 0 | ||
while not finished and threshold > MINIMUM_THRESHOLD: | ||
losses_beyond_threshold = \ | ||
losses[losses >= threshold] | ||
param_result = \ | ||
gpd_loglikelihood_minimizer_aligned(losses_beyond_threshold) | ||
if (param_result[0] is not False and | ||
param_result[1] is not False): | ||
scale_param = param_result[0] | ||
shape_param = param_result[1] | ||
var_estimate = gpd_var_calculator(threshold, scale_param, | ||
shape_param, var_p, | ||
len(losses), | ||
len(losses_beyond_threshold)) | ||
# non-negative shape parameter is required for fat tails | ||
# non-negative VaR estimate is required for loss of some kind | ||
if (shape_param > 0 and var_estimate > 0): | ||
finished = True | ||
if (not finished): | ||
threshold = threshold / 2 | ||
if (finished): | ||
es_estimate = gpd_es_calculator(var_estimate, threshold, | ||
scale_param, shape_param) | ||
result = np.array([threshold, scale_param, shape_param, | ||
var_estimate, es_estimate]) | ||
if isinstance(returns, pd.Series): | ||
result = pd.Series(result) | ||
return result | ||
|
||
|
||
def gpd_es_calculator(var_estimate, threshold, scale_param, | ||
shape_param): | ||
result = 0 | ||
if ((1 - shape_param) != 0): | ||
# this formula is from Gilli and Kellezi pg. 8 | ||
var_ratio = (var_estimate/(1 - shape_param)) | ||
param_ratio = ((scale_param - (shape_param * threshold)) / | ||
(1 - shape_param)) | ||
result = var_ratio + param_ratio | ||
return result | ||
|
||
|
||
def gpd_var_calculator(threshold, scale_param, shape_param, | ||
probability, total_n, exceedance_n): | ||
result = 0 | ||
if (exceedance_n > 0 and shape_param > 0): | ||
# this formula is from Gilli and Kellezi pg. 12 | ||
param_ratio = scale_param / shape_param | ||
prob_ratio = (total_n/exceedance_n) * probability | ||
result = threshold + (param_ratio * | ||
(pow(prob_ratio, -shape_param) - 1)) | ||
return result | ||
|
||
|
||
def gpd_loglikelihood_minimizer_aligned(price_data): | ||
result = [False, False] | ||
DEFAULT_SCALE_PARAM = 1 | ||
DEFAULT_SHAPE_PARAM = 1 | ||
if (len(price_data) > 0): | ||
gpd_loglikelihood_lambda = \ | ||
gpd_loglikelihood_factory(price_data) | ||
optimization_results = \ | ||
optimize.minimize(gpd_loglikelihood_lambda, | ||
[DEFAULT_SCALE_PARAM, | ||
DEFAULT_SHAPE_PARAM], | ||
method='Nelder-Mead') | ||
if optimization_results.success: | ||
resulting_params = optimization_results.x | ||
if len(resulting_params) == 2: | ||
result[0] = resulting_params[0] | ||
result[1] = resulting_params[1] | ||
return result | ||
|
||
|
||
def gpd_loglikelihood_factory(price_data): | ||
return lambda params: gpd_loglikelihood(params, price_data) | ||
|
||
|
||
def gpd_loglikelihood(params, price_data): | ||
if (params[1] != 0): | ||
return -gpd_loglikelihood_scale_and_shape(params[0], | ||
params[1], | ||
price_data) | ||
else: | ||
return -gpd_loglikelihood_scale_only(params[0], price_data) | ||
|
||
|
||
def gpd_loglikelihood_scale_and_shape_factory(price_data): | ||
# minimize a function of two variables requires a list of params | ||
# we are expecting the lambda below to be called as follows: | ||
# parameters = [scale, shape] | ||
# the final outer negative is added because scipy only minimizes | ||
return lambda params: \ | ||
-gpd_loglikelihood_scale_and_shape(params[0], | ||
params[1], | ||
price_data) | ||
|
||
|
||
def gpd_loglikelihood_scale_and_shape(scale, shape, price_data): | ||
n = len(price_data) | ||
result = -1 * float_info.max | ||
if (scale != 0): | ||
param_factor = shape / scale | ||
if (shape != 0 and param_factor >= 0 and scale >= 0): | ||
result = ((-n * np.log(scale)) - | ||
(((1 / shape) + 1) * | ||
(np.log((shape / scale * price_data) + 1)).sum())) | ||
return result | ||
|
||
|
||
def gpd_loglikelihood_scale_only_factory(price_data): | ||
# the negative is added because scipy only minimizes | ||
return lambda scale: \ | ||
-gpd_loglikelihood_scale_only(scale, price_data) | ||
|
||
|
||
def gpd_loglikelihood_scale_only(scale, price_data): | ||
n = len(price_data) | ||
data_sum = price_data.sum() | ||
result = -1 * float_info.max | ||
if (scale >= 0): | ||
result = ((-n*np.log(scale)) - (data_sum/scale)) | ||
return result | ||
|
||
|
||
def capture(returns, factor_returns, period=DAILY): | ||
""" | ||
Compute capture ratio. | ||
|
@@ -1285,6 +1606,8 @@ def conditional_value_at_risk(returns, cutoff=0.05): | |
excess_sharpe, | ||
alpha, | ||
beta, | ||
beta_fragility_heuristic, | ||
gpd_risk_estimates, | ||
capture, | ||
up_capture, | ||
down_capture | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these line-breaks are a bit odd.