From 7546879fdf3014e6a187707b42d1ba6acdab004b Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Wed, 13 Feb 2019 13:17:39 +0000 Subject: [PATCH] Adding inferring periods to core.py adn optional trading_days_per_year to calculate correctly statistics for non traditional markets (crypto..) resolve comment fix lint errors fix lint issue use name from #161 --- ffn/core.py | 106 ++++++++++++++++++++++++++++++++++++++------- tests/test_core.py | 35 +++++++++++++++ 2 files changed, 126 insertions(+), 15 deletions(-) diff --git a/ffn/core.py b/ffn/core.py index 47b0e6e..a8f6b69 100644 --- a/ffn/core.py +++ b/ffn/core.py @@ -36,6 +36,9 @@ from matplotlib import pyplot as plt # noqa +# module level variable, can be different for non traditional markets (eg. crypto - 360) +TRADING_DAYS_PER_YEAR = 252 + class PerformanceStats(object): """ @@ -57,10 +60,11 @@ class PerformanceStats(object): * lookback_returns (Series): Returns for different lookback periods (1m, 3m, 6m, ytd...) * stats (Series): A series that contains all the stats + * annualization_factor (float): `Annualization factor` used in various calculations; aka `nperiods`, `252` """ - def __init__(self, prices, rf=0.0): + def __init__(self, prices, rf=0.0, annualization_factor=TRADING_DAYS_PER_YEAR): super(PerformanceStats, self).__init__() self.prices = prices self.name = self.prices.name @@ -68,6 +72,7 @@ def __init__(self, prices, rf=0.0): self._end = self.prices.index[-1] self.rf = rf + self.annualization_factor = annualization_factor self._update(self.prices) @@ -223,21 +228,24 @@ def _calculate(self, obj): # Will calculate daily figures only if the input data has at least daily frequency or higher (e.g hourly) # Rather < 2 days than <= 1 days in case of data taken at different hours of the days if r.index.to_series().diff().min() < pd.Timedelta("2 days"): - self.daily_mean = r.mean() * 252 - self.daily_vol = np.std(r, ddof=1) * np.sqrt(252) + self.daily_mean = r.mean() * self.annualization_factor + self.daily_vol = np.std(r, ddof=1) * np.sqrt(self.annualization_factor) - # if type(self.rf) is float: if isinstance(self.rf, float): - self.daily_sharpe = r.calc_sharpe(rf=self.rf, nperiods=252) - self.daily_sortino = calc_sortino_ratio(r, rf=self.rf, nperiods=252) + self.daily_sharpe = r.calc_sharpe( + rf=self.rf, nperiods=self.annualization_factor + ) + self.daily_sortino = calc_sortino_ratio( + r, rf=self.rf, nperiods=self.annualization_factor + ) # rf is a price series else: _rf_daily_price_returns = self.rf.to_returns() self.daily_sharpe = r.calc_sharpe( - rf=_rf_daily_price_returns, nperiods=252 + rf=_rf_daily_price_returns, nperiods=self.annualization_factor ) self.daily_sortino = calc_sortino_ratio( - r, rf=_rf_daily_price_returns, nperiods=252 + r, rf=_rf_daily_price_returns, nperiods=self.annualization_factor ) self.best_day = r.max() @@ -1402,7 +1410,9 @@ def calc_sharpe(returns, rf=0.0, nperiods=None, annualize=True): etc.) """ - # if type(rf) is float and rf != 0 and nperiods is None: + if nperiods is None: + nperiods = infer_freq(returns) + if isinstance(rf, float) and rf != 0 and nperiods is None: raise Exception("Must provide nperiods if rf != 0") @@ -1825,7 +1835,9 @@ def calc_erc_weights( return pd.Series(erc_weights, index=returns.columns, name="erc") -def get_num_days_required(offset, period="d", perc_required=0.90): +def get_num_days_required( + offset, period="d", perc_required=0.90, annualization_factor=252 +): """ Estimates the number of days required to assume that data is OK. @@ -1849,7 +1861,7 @@ def get_num_days_required(offset, period="d", perc_required=0.90): elif period == "m": req = (days / 20) * perc_required elif period == "y": - req = (days / 252) * perc_required + req = (days / annualization_factor) * perc_required else: raise NotImplementedError("period not supported. Supported periods are d, m, y") @@ -2276,9 +2288,68 @@ def deannualize(returns, nperiods): monthly, etc. """ + if nperiods is None: + nperiods = infer_freq(returns) return np.power(1 + returns, 1.0 / nperiods) - 1.0 +def infer_freq(data): + """ + Infer the most likely frequency given the input index. If the frequency is + uncertain or index is not DateTime like, just return None + Args: + * data (DataFrame, Series): Any timeseries dataframe or series + """ + try: + return pd.infer_freq(data.index, warn=False) + except Exception: + return None + + +def infer_nperiods(data, annualization_factor=None): + if annualization_factor is None: + annualization_factor = TRADING_DAYS_PER_YEAR + + freq = infer_freq(data) + + if freq is None: + return None + + def whole_periods_str_to_nperiods(freq): + if freq == "Y" or freq == "A": + return 1 + if freq == "M": + return 12 + if freq == "D": + return annualization_factor + if freq == "H": + return annualization_factor * 24 + if freq == "T": + return annualization_factor * 24 * 60 + if freq == "S": + return annualization_factor * 24 * 60 * 60 + return None + + "" + if len(freq) == 1: + return whole_periods_str_to_nperiods(freq) + else: + try: + if freq.startswith("A"): + return 1 + else: + whole_periods_str = freq[-1] + num_str = freq[:-1] + num = int(num_str) + return num * whole_periods_str_to_nperiods(whole_periods_str) + except KeyboardInterrupt: + raise + except BaseException: + return None + + return None + + def calc_sortino_ratio(returns, rf=0.0, nperiods=None, annualize=True): """ Calculates the `Sortino ratio `_ given a series of returns @@ -2291,10 +2362,12 @@ def calc_sortino_ratio(returns, rf=0.0, nperiods=None, annualize=True): provided if rf is non-zero and rf is not a price series """ - # if type(rf) is float and rf != 0 and nperiods is None: if isinstance(rf, float) and rf != 0 and nperiods is None: raise Exception("nperiods must be set if rf != 0 and rf is not a price series") + if nperiods is None: + nperiods = infer_freq(returns) + er = returns.to_excess_returns(rf, nperiods=nperiods) negative_returns = np.minimum(er[1:], 0.0) @@ -2322,9 +2395,10 @@ def to_excess_returns(returns, rf, nperiods=None): * excess_returns (Series, DataFrame): Returns - rf """ - # if type(rf) is float and nperiods is not None: - if isinstance(rf, float) and nperiods is not None: + if nperiods is None: + nperiods = infer_freq(returns) + if isinstance(rf, float) and nperiods is not None: _rf = deannualize(rf, nperiods) else: _rf = rf @@ -2370,7 +2444,9 @@ def to_ulcer_performance_index(prices, rf=0.0, nperiods=None): * nperiods (int): Used to deannualize rf if rf is provided (non-zero) """ - # if type(rf) is float and rf != 0 and nperiods is None: + if nperiods is None: + nperiods = infer_freq(prices) + if isinstance(rf, float) and rf != 0 and nperiods is None: raise Exception("nperiods must be set if rf != 0 and rf is not a price series") diff --git a/tests/test_core.py b/tests/test_core.py index 44a5771..2c65ffe 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -936,3 +936,38 @@ def test_drawdown_details(): drawdown = ffn.to_drawdown_series(returns) drawdown_details = ffn.drawdown_details(drawdown, index_type=drawdown.index) + + +def test_infer_nperiods(): + daily = pd.DataFrame(np.random.randn(10), + index = pd.date_range(start='2018-01-01', periods = 10, freq = 'D')) + hourly = pd.DataFrame(np.random.randn(10), + index = pd.date_range(start='2018-01-01', periods = 10, freq = 'H')) + yearly = pd.DataFrame(np.random.randn(10), + index = pd.date_range(start='2018-01-01', periods = 10, freq = 'Y')) + monthly = pd.DataFrame(np.random.randn(10), + index = pd.date_range(start='2018-01-01', periods = 10, freq = 'M')) + minutely = pd.DataFrame(np.random.randn(10), + index = pd.date_range(start='2018-01-01', periods = 10, freq = 'T')) + secondly = pd.DataFrame(np.random.randn(10), + index = pd.date_range(start='2018-01-01', periods = 10, freq = 'S')) + + minutely_30 = pd.DataFrame(np.random.randn(10), + index = pd.date_range(start='2018-01-01', periods = 10, freq = '30T')) + + + not_known_vals = np.concatenate((pd.date_range(start='2018-01-01', periods = 5, freq = '1H').values, + pd.date_range(start='2018-01-02', periods = 5, freq = '5H').values)) + + not_known = pd.DataFrame(np.random.randn(10), + index = pd.DatetimeIndex(not_known_vals)) + + assert ffn.core.infer_nperiods(daily) == ffn.core.TRADING_DAYS_PER_YEAR + assert ffn.core.infer_nperiods(hourly) == ffn.core.TRADING_DAYS_PER_YEAR * 24 + assert ffn.core.infer_nperiods(minutely) == ffn.core.TRADING_DAYS_PER_YEAR * 24 * 60 + assert ffn.core.infer_nperiods(secondly) == ffn.core.TRADING_DAYS_PER_YEAR * 24 * 60 * 60 + assert ffn.core.infer_nperiods(monthly) == 12 + assert ffn.core.infer_nperiods(yearly) == 1 + assert ffn.core.infer_nperiods(minutely_30) == ffn.core.TRADING_DAYS_PER_YEAR * 24 * 60 * 30 + assert ffn.core.infer_nperiods(not_known) is None + \ No newline at end of file