In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import skew, kurtosis, norm

In [2]:
dollar_df = pd.read_csv('../data/processed_sp500_futures_dollar.csv')
dollar_df['datetime'] = pd.to_datetime(dollar_df['datetime'], format='mixed')

#### Compute HHI index on positive and negative returns

In [3]:
def compute_hhi(returns):
    r_plus = returns[returns > 0]
    r_minus = returns[returns < 0]
    w_plus = r_plus / r_plus.sum()
    w_minus = r_minus / r_minus.sum()
    n_plus = len(r_plus)
    n_minus = len(r_minus)
    
    hhi_plus = (sum(w_plus**2) - 1/n_plus) / (1 - 1/n_plus)
    hhi_minus = (sum(w_minus**2) - 1/n_minus) / (1 - 1/n_minus)

    return hhi_plus, hhi_minus

In [4]:
hhi_plus, hhi_minus = compute_hhi(dollar_df.label_returns)
print("HHI of positive returns are {:.10f}".format(hhi_plus))
print("HHI of negative returns are {:.10f}".format(hhi_minus))

HHI of positive returns are 0.0000047739
HHI of negative returns are 0.0000058734


#### Compute 95-percentile DD and TuW

In [5]:
# Compute all the drawdowns from high watermarks
# Then choose the 95-percentile value

In [6]:
def compute_drawdown_timeunderwater(prices, datetime):
    high_water_mark = prices.cummax()
    dd = pd.DataFrame({'datetime': datetime, 'price': prices, 'high_water_mark': high_water_mark})
    dd = dd[dd.high_water_mark > dd.price]
    dd = dd.groupby('high_water_mark').agg({'price': 'mean', 'datetime': lambda x: x.iloc[-1] - x.iloc[0]}).reset_index()
    dd['drawdown'] = dd.high_water_mark - dd.price

    return dd.drawdown, dd.datetime

In [7]:
drawdown, timeunderwater = compute_drawdown_timeunderwater(dollar_df.close, dollar_df.datetime)

drawdown_95_percentile = np.percentile(drawdown, 95)
time_under_water_95_percentile = pd.Timedelta(np.percentile(timeunderwater, 95))

print("95-percentile draw down is: {}".format(drawdown_95_percentile))
print("95-percentile time under water is: {}".format(time_under_water_95_percentile))

95-percentile draw down is: 8.865270570793323
95-percentile time under water is: 6 days 00:39:09.500000


#### Compute Annualized Average Return, Average Returns from Hits / Misses, and Annualized SR

In [8]:
def compute_annualized_ave_return(datetimes, prices):
    total_return = (prices.iloc[-1] - prices.iloc[0]) / prices.iloc[0] - 1
    year_count = (datetimes.iloc[-1] - datetimes.iloc[0]).days // 365
    annualized_return = (1+total_return)**(1/year_count) - 1
    return annualized_return

In [9]:
annualized_return = compute_annualized_ave_return(dollar_df.datetime, dollar_df.close)
print("The Annualized Return is: {:.2f}%".format(annualized_return * 100))

The Annualized Return is: 2.70%


In [10]:
def compute_ave_return_hit_miss(returns, prices):
    hit_idx = returns > 0
    hit_avg_return = (returns[hit_idx] / prices[hit_idx]).mean()
    
    miss_idx = returns < 0
    miss_avg_return = (returns[miss_idx] / prices[miss_idx]).mean()

    return hit_avg_return, miss_avg_return

In [11]:
hit_avg_return, miss_avg_return = compute_ave_return_hit_miss(dollar_df.label_returns, dollar_df.close)

print("Average return from hits: {:.2f}%".format(hit_avg_return * 100))
print("Average return from misses: {:.2f}%".format(miss_avg_return * 100))

Average return from hits: 0.32%
Average return from misses: -0.34%


In [12]:
def annual_sharpe(returns, prices, datetimes):
    return_rates = returns / prices
    sharpe_ratio = return_rates.mean() / return_rates.std()
    day_count = (datetimes.iloc[-1] - datetimes.iloc[0]).days
    annualized_sharpe_ratio = np.sqrt(day_count) * sharpe_ratio
    return annualized_sharpe_ratio

In [13]:
annual_sharpe_ratio = annual_sharpe(dollar_df.label_returns, dollar_df.close, dollar_df.datetime)
print("Annualized Sharpe Ratio is {:.2f}".format(annual_sharpe_ratio))

Annualized Sharpe Ratio is 0.44


#### Compute Information Ratio, where the benchmark is the risk-free rate.

##### Assuming the risk-free rate is 80% of the mean of the portfolio

In [14]:
def information_ratio(returns, datetimes, rf_rate):
    excess_return = dollar_df.label_returns - rf_rate
    day_count = (datetimes.iloc[-1] - datetimes.iloc[0]).days
    information_ratio = excess_return.mean() / excess_return.std() * np.sqrt(day_count)
    return information_ratio

In [15]:
rf_rate = dollar_df.label_returns.mean() * 0.8
information_ratio = information_ratio(dollar_df.label_returns, dollar_df.datetime, rf_rate)
print("The Information Ratio is {:.10f}".format(information_ratio))

The Information Ratio is -0.0006002150


#### Compute the Probabilistic Sharpe Ratio

In [16]:
def prob_sharpe_ratio(returns, hyp_sr):
    obs_sr = returns.mean() / returns.std()
    T = len(returns)
    y_3 = skew(returns)
    y_4 = kurtosis(returns)
    z_score = (obs_sr-hyp_sr) * np.sqrt(T-1) / np.sqrt(1 - y_3*obs_sr + 1/4*(y_4-1)*obs_sr**2)
    return norm().cdf(z_score)

In [17]:
prob_sharpe_ratio(dollar_df.label_returns, 0.0005)

0.2726537651302702

#### Compute DSR, where we assume there were 100 trials, and the variance of the trials’ SR was 0.5.

In [18]:
def deflated_sr(N, sr_variance):
    norm_dist = norm()
    return np.sqrt(sr_variance) * ((1-np.euler_gamma)*norm_dist.ppf(1-1/N) + np.euler_gamma*(norm_dist.ppf(1-1/N*1/np.e)))

In [19]:
deflated_sr(100, 0.5)

1.7894064662732079

#### Recompute returns given the new strategy: long during even years and short during odd years

In [20]:
long_short_df = dollar_df.copy()

In [22]:
long_short_df['long_short_pos'] = np.where(long_short_df.datetime.dt.year % 2 == 0, 1, -1)
long_short_df['long_short_returns'] = long_short_df.label_returns * long_short_df.long_short_pos