In [2]:
import pandas as pd
from scipy import stats

In [3]:
df_topBonds = pd.read_csv('source/informationAsymmetry-topBonds.csv', parse_dates=['TrdExctnDtEOM'])
df_topBonds['VolumeFraction'] = df_topBonds['RetailVolume'] / ( df_topBonds['InstitutionalVolume'] + df_topBonds['RetailVolume'] )
df_topBonds['TradesFraction'] = df_topBonds['RetailCusips'] / ( df_topBonds['InstitutionalCusips'] + df_topBonds['RetailCusips'] )
df_topBonds['TradeSizeFraction'] = ( df_topBonds['RetailCusips'] * df_topBonds['RetailVolume'] ) / ( df_topBonds['InstitutionalCusips'] * df_topBonds['InstitutionalVolume'] + df_topBonds['RetailCusips'] * df_topBonds['RetailVolume'] )

df_nonTopBonds = pd.read_csv('source/informationAsymmetry-nonTopBonds.csv', parse_dates=['TrdExctnDtEOM'])
df_nonTopBonds['VolumeFraction'] = df_nonTopBonds['RetailVolume'] / ( df_nonTopBonds['InstitutionalVolume'] + df_nonTopBonds['RetailVolume'] )
df_nonTopBonds['TradesFraction'] = df_nonTopBonds['RetailTrades'] / ( df_nonTopBonds['InstitutionalTrades'] + df_nonTopBonds['RetailTrades'] )
df_nonTopBonds['TradeSizeFraction'] = ( df_nonTopBonds['RetailCusips'] * df_nonTopBonds['RetailVolume'] ) / ( df_nonTopBonds['InstitutionalCusips'] * df_nonTopBonds['InstitutionalVolume'] + df_nonTopBonds['RetailCusips'] * df_nonTopBonds['RetailVolume'] )

In [4]:
print('Top Bonds')
print('Volume: Mean: {:.4f} | Median: {:.4f}'.format(df_topBonds['VolumeFraction'].mean(), df_topBonds['VolumeFraction'].median()))
print('Trades: Mean: {:.4f} | Median: {:.4f}'.format(df_topBonds['TradesFraction'].mean(), df_topBonds['TradesFraction'].median()))
print()
print('Non-Top Bonds')
print('Volume: Mean: {:.4f} | Median: {:.4f}'.format(df_nonTopBonds['VolumeFraction'].mean(), df_nonTopBonds['VolumeFraction'].median()))
print('Trades: Mean: {:.4f} | Median: {:.4f}'.format(df_nonTopBonds['TradesFraction'].mean(), df_nonTopBonds['TradesFraction'].median()))
print()
print('Difference')
print('Volume: Mean: {:.4f} | Median: {:.4f}'.format(df_nonTopBonds['VolumeFraction'].mean()-df_topBonds['VolumeFraction'].mean(), df_nonTopBonds['VolumeFraction'].median()-df_topBonds['VolumeFraction'].median()))
print('Trades: Mean: {:.4f} | Median: {:.4f}'.format(df_nonTopBonds['TradesFraction'].mean()-df_topBonds['TradesFraction'].mean(), df_nonTopBonds['TradesFraction'].median()-df_topBonds['TradesFraction'].median()))

Top Bonds
Volume: Mean: 0.0404 | Median: 0.0424
Trades: Mean: 0.4918 | Median: 0.4954

Non-Top Bonds
Volume: Mean: 0.0589 | Median: 0.0603
Trades: Mean: 0.8295 | Median: 0.8347

Difference
Volume: Mean: 0.0186 | Median: 0.0179
Trades: Mean: 0.3377 | Median: 0.3394


In [5]:
t_means = pd.Series(stats.ttest_1samp(df_topBonds['VolumeFraction'] - df_nonTopBonds['VolumeFraction'], 0.0)).to_frame().T
t_means = t_means.rename(columns={0: 't-stat', 1: 'p-value'})

print('T-Test on Difference of Means')
print()
print('t-stat: {:.4f} | p-value: {:.4f}'.format(t_means['t-stat'].max(), t_means['p-value'].max()))

T-Test on Difference of Means

t-stat: -60.9643 | p-value: 0.0000


In [7]:
t_median = pd.Series(stats.median_test(df_topBonds['VolumeFraction'], df_nonTopBonds['VolumeFraction'])).to_frame().T
t_median = t_median.rename(columns={0: 't-stat', 1: 'p-value'})

print('U-Test on Top/Non-Top Bonds')
print()
print('t-stat: {:.4f} | p-value: {:.4f}'.format(t_median['t-stat'].max(), t_median['p-value'].max()))

U-Test on Top/Non-Top Bonds

t-stat: 99.5885 | p-value: 0.0000
