In [121]:
import pandas as pd

pd.set_option('display.max_rows', 100)

### Whole Dataset

In [122]:
df = pd.read_csv('source/wholeDataset.csv', parse_dates=['TrdExctnDtEOM', 'OfferingDate', 'Maturity'])
df['MaturityMonths'] = abs(df['TrdExctnDtEOM'] - df['Maturity']).dt.days / 30
df['Age'] = abs(df['OfferingDate'] - df['TrdExctnDtEOM']).dt.days / 360

# Gs share per month
df_ = df.groupby(
    ['TrdExctnDtEOM', 'TopBondGrouping']
)['CusipId'].nunique().reset_index().pivot(
    index='TrdExctnDtEOM', columns='TopBondGrouping', values='CusipId'
).reset_index()
df_['G1_share'] = df_['G1'] / (df_['G1'] + df_['G2'] + df_['G3'])
df_['G2_share'] = df_['G2'] / (df_['G1'] + df_['G2'] + df_['G3'])
df_['G3_share'] = df_['G3'] / (df_['G1'] + df_['G2'] + df_['G3'])

# Gs bond count per issuer per month
df__ = df.groupby(
    ['TrdExctnDtEOM', 'IssuerId', 'TopBondGrouping']
)['CusipId'].nunique().reset_index().pivot(
    index=['TrdExctnDtEOM', 'IssuerId'], columns='TopBondGrouping', values='CusipId'
).reset_index()

# output
print('Transacation Based Monthly Prices: {:,}'.format(
    df['Coupon'].count()
))
print('Monthly Returns: {:,}\n'.format(
    df['R'].count()
))
print('Distinct Cusips: {:,}'.format(
    df['CusipId'].nunique()
))
print('Distinct Issuers: {:,} | Private: {:,} | Public: {:,}\n'.format(
    df['IssuerId'].nunique(), df[df['IssuerOwnership'] == 1]['IssuerId'].nunique(), df[df['IssuerOwnership'] == 0]['IssuerId'].nunique()
))
print('Institunional Share => Trades: {:.2%} | Volume: {:.2%}\n'.format(
    df['InstitunionalTradeShare'].mean(), df['InstitunionalVolumeShare'].mean()
))
print('Rating')
print('IG Share: {:.2%}'.format(
    df[df['RatingClass'] == 'IG']['CusipId'].nunique() / df['CusipId'].nunique()
))
print('Distinct Cusips => IG: {:,} | HY: {:,}'.format(
    df[df['RatingClass'] == 'IG']['CusipId'].nunique(), df[df['RatingClass'] == 'HY']['CusipId'].nunique()
))
print('Total transactions => IG: {:,} | HY: {:,}\n'.format(
    df[df['RatingClass'] == 'IG']['CusipId'].count(), df[df['RatingClass'] == 'HY']['CusipId'].count()
))
print('Maturity Band')
print('Maturity (months): {:.2f} | Age (years): {:.2f}'.format(
    df['MaturityMonths'].mean(), df['Age'].mean()
))
print('Distinct Cusips => 1: {:,} | 2: {:,} | 3: {:,}'.format(
    df[df['MaturityBand'] == 1]['CusipId'].nunique(), df[df['MaturityBand'] == 2]['CusipId'].nunique(), df[df['MaturityBand'] == 3]['CusipId'].nunique()
))
print('Total Transactions => 1: {:,} | 2: {:,} | 3: {:,}\n'.format(
    df[df['MaturityBand'] == 1]['CusipId'].count(), df[df['MaturityBand'] == 2]['CusipId'].count(), df[df['MaturityBand'] == 3]['CusipId'].count()
))
print('Number of Bonds per Firm per Month')
print('G1 => Min: {} | Max: {}  | Mean: {:.2} | Std: {:.3}'.format(
    df__['G1'].min(), df__['G1'].max(), df__['G1'].mean(), df__['G1'].std()
))
print('G2 => Min: {} | Max: {} | Mean: {:.2} | Std: {:.3}'.format(
    df__['G2'].min(), df__['G2'].max(), df__['G2'].mean(), df__['G2'].std()
))
print('G3 => Min: {} | Max: {}   | Mean: {:.2} | Std: {:.2}\n'.format(
    df__['G3'].min(), df__['G3'].max(), df__['G3'].mean(), df__['G3'].std()
))
print('Top Bond Grouping (% of share) => G1: {:.2%} | G2: {:.2%} | G3: {:.2%}\n'.format(
    df_['G1_share'].mean(), df_['G2_share'].mean(), df_['G3_share'].mean()
))
print('Returns')
print('Volume => Mean: {:,.2f} | Median: {:,.2f}'.format(
    df['T_Volume'].mean(), df['T_Volume'].median()
))
print('Coupon => Mean: {:.3} | Median: {:.3}'.format(
    df['Coupon'].mean(), df['Coupon'].median()
))
print('Returns => Mean: {:.2%} | Median: {:.2%} | Std: {:.2%}'.format(
    df['R'].mean(), df['R'].median(), df['R'].std()
))

Transacation Based Monthly Prices: 1,050,711
Monthly Returns: 898,063

Distinct Cusips: 23,574
Distinct Issuers: 3,028 | Private: 465 | Public: 2,563

Institunional Share => Trades: 20.01% | Volume: 34.20%

Rating
IG Share: 85.68%
Distinct Cusips => IG: 20,199 | HY: 5,531
Total transactions => IG: 852,011 | HY: 198,700

Maturity Band
Maturity (months): 109.08 | Age (years): 5.03
Distinct Cusips => 1: 2,833 | 2: 16,420 | 3: 4,293
Total Transactions => 1: 46,108 | 2: 716,123 | 3: 287,903

Number of Bonds per Firm per Month
G1 => Min: 1.0 | Max: 32.0  | Mean: 1.6 | Std: 1.15
G2 => Min: 1.0 | Max: 100.0 | Mean: 6.6 | Std: 6.71
G3 => Min: 1.0 | Max: 5.0   | Mean: 1.4 | Std: 0.66

Top Bond Grouping (% of share) => G1: 13.44% | G2: 78.23% | G3: 8.33%

Returns
Volume => Mean: 3,107,874.37 | Median: 355,000.00
Coupon => Mean: 5.23 | Median: 5.25
Returns => Mean: 0.45% | Median: 0.27% | Std: 16.13%


### Top Bonds

In [123]:
df = pd.read_csv('source/topBonds.csv', parse_dates=['TrdExctnDtEOM', 'OfferingDate', 'Maturity'])
df['MaturityMonths'] = abs(df['TrdExctnDtEOM'] - df['Maturity']).dt.days / 30
df['Age'] = abs(df['OfferingDate'] - df['TrdExctnDtEOM']).dt.days / 360

# Gs share per month
df_ = df.groupby(
    ['TrdExctnDtEOM', 'TopBondGrouping']
)['CusipId'].nunique().reset_index().pivot(
    index='TrdExctnDtEOM', columns='TopBondGrouping', values='CusipId'
).reset_index()
df_['G1'] = 0
df_['G1_share'] = df_['G1'] / (df_['G1'] + df_['G2'] + df_['G3'])
df_['G2_share'] = df_['G2'] / (df_['G1'] + df_['G2'] + df_['G3'])
df_['G3_share'] = df_['G3'] / (df_['G1'] + df_['G2'] + df_['G3'])

# Gs bond count per issuer per month
df__ = df.groupby(
    ['TrdExctnDtEOM', 'IssuerId', 'TopBondGrouping']
)['CusipId'].nunique().reset_index().pivot(
    index=['TrdExctnDtEOM', 'IssuerId'], columns='TopBondGrouping', values='CusipId'
).reset_index()
df__['G1'] = 0

# output
print('Transacation Based Monthly Prices: {:,}'.format(
    df['Coupon'].count()
))
print('Monthly Returns: {:,}\n'.format(
    df['R'].count()
))
print('Distinct Cusips: {:,}'.format(
    df['CusipId'].nunique()
))
print('Distinct Issuers: {:,} | Private: {:,} | Public: {:,}\n'.format(
    df['IssuerId'].nunique(), df[df['IssuerOwnership'] == 1]['IssuerId'].nunique(), df[df['IssuerOwnership'] == 0]['IssuerId'].nunique()
))
print('Institunional Share => Trades: {:.2%} | Volume: {:.2%}\n'.format(
    df['InstitunionalTradeShare'].mean(), df['InstitunionalVolumeShare'].mean()
))
print('Rating')
print('IG Share: {:.2%}'.format(
    df[df['RatingClass'] == 'IG']['CusipId'].nunique() / df['CusipId'].nunique()
))
print('Distinct Cusips => IG: {:,} | HY: {:,}'.format(
    df[df['RatingClass'] == 'IG']['CusipId'].nunique(), df[df['RatingClass'] == 'HY']['CusipId'].nunique()
))
print('Total transactions => IG: {:,} | HY: {:,}\n'.format(
    df[df['RatingClass'] == 'IG']['CusipId'].count(), df[df['RatingClass'] == 'HY']['CusipId'].count()
))
print('Maturity Band')
print('Maturity (months): {:.2f} | Age (years): {:.2f}'.format(
    df['MaturityMonths'].mean(), df['Age'].mean()
))
print('Distinct Cusips => 1: {:,} | 2: {:,} | 3: {:,}'.format(
    df[df['MaturityBand'] == 1]['CusipId'].nunique(), df[df['MaturityBand'] == 2]['CusipId'].nunique(), df[df['MaturityBand'] == 3]['CusipId'].nunique()
))
print('Total Transactions => 1: {:,} | 2: {:,} | 3: {:,}\n'.format(
    df[df['MaturityBand'] == 1]['CusipId'].count(), df[df['MaturityBand'] == 2]['CusipId'].count(), df[df['MaturityBand'] == 3]['CusipId'].count()
))
print('Number of Bonds per Firm per Month')
print('G1 => Min: {}   | Max: {}   | Mean: {:.2} | Std: {:.3}'.format(
    df__['G1'].min(), df__['G1'].max(), df__['G1'].mean(), df__['G1'].std()
))
print('G2 => Min: {} | Max: {} | Mean: {:.2} | Std: {:.2}'.format(
    df__['G2'].min(), df__['G2'].max(), df__['G2'].mean(), df__['G2'].std()
))
print('G3 => Min: {} | Max: {} | Mean: {:.2} | Std: {:.2}\n'.format(
    df__['G3'].min(), df__['G3'].max(), df__['G3'].mean(), df__['G3'].std()
))
print('Top Bond Grouping (% of share) => G1: {:.2%} | G2: {:.2%} | G3: {:.2%}\n'.format(
    df_['G1_share'].mean(), df_['G2_share'].mean(), df_['G3_share'].mean()
))
print('Consecutive Month Status => Min: {} | Max: {} | Mean: {:.2} | Median: {:.2}\n'.format(
    df['ConsecutiveMonths'].min(), df['ConsecutiveMonths'].max(), df['ConsecutiveMonths'].mean(), df['ConsecutiveMonths'].median()
))
print('Returns')
print('Volume => Mean: {:,.2f} | Median: {:,.2f}'.format(
    df['T_Volume'].mean(), df['T_Volume'].median()
))
print('Coupon => Mean: {:.3} | Median: {:.3}'.format(
    df['Coupon'].mean(), df['Coupon'].median()
))
print('Returns => Mean: {:.2%} | Median: {:.2%} | Std: {:.2%}'.format(
    df['R'].mean(), df['R'].median(), df['R'].std()
))

Transacation Based Monthly Prices: 357,168
Monthly Returns: 179,370

Distinct Cusips: 18,673
Distinct Issuers: 2,935 | Private: 433 | Public: 2,502

Institunional Share => Trades: 41.24% | Volume: 65.24%

Rating
IG Share: 83.36%
Distinct Cusips => IG: 15,566 | HY: 4,797
Total transactions => IG: 251,441 | HY: 105,727

Maturity Band
Maturity (months): 107.79 | Age (years): 3.71
Distinct Cusips => 1: 1,604 | 2: 13,555 | 3: 3,500
Total Transactions => 1: 13,653 | 2: 271,390 | 3: 72,007

Number of Bonds per Firm per Month
G1 => Min: 0   | Max: 0   | Mean: 0.0 | Std: 0.0
G2 => Min: 1.0 | Max: 6.0 | Mean: 2.2 | Std: 0.91
G3 => Min: 1.0 | Max: 5.0 | Mean: 1.4 | Std: 0.66

Top Bond Grouping (% of share) => G1: 0.00% | G2: 76.55% | G3: 23.45%

Consecutive Month Status => Min: 1 | Max: 94 | Mean: 3.0 | Median: 2.0

Returns
Volume => Mean: 6,895,765.78 | Median: 2,000,000.00
Coupon => Mean: 5.46 | Median: 5.45
Returns => Mean: 0.57% | Median: 0.36% | Std: 6.23%


### Non-Top Bonds

In [124]:
df = pd.read_csv('source/nonTopBonds.csv', parse_dates=['TrdExctnDtEOM', 'OfferingDate', 'Maturity'])
df['MaturityMonths'] = abs(df['TrdExctnDtEOM'] - df['Maturity']).dt.days / 30
df['Age'] = abs(df['OfferingDate'] - df['TrdExctnDtEOM']).dt.days / 360

# Gs share per month
df_ = df.groupby(
    ['TrdExctnDtEOM', 'TopBondGrouping']
)['CusipId'].nunique().reset_index().pivot(
    index='TrdExctnDtEOM', columns='TopBondGrouping', values='CusipId'
).reset_index()
df_['G3'] = 0
df_['G1_share'] = df_['G1'] / (df_['G1'] + df_['G2'] + df_['G3'])
df_['G2_share'] = df_['G2'] / (df_['G1'] + df_['G2'] + df_['G3'])
df_['G3_share'] = df_['G3'] / (df_['G1'] + df_['G2'] + df_['G3'])

# Gs bond count per issuer per month
df__ = df.groupby(
    ['TrdExctnDtEOM', 'IssuerId', 'TopBondGrouping']
)['CusipId'].nunique().reset_index().pivot(
    index=['TrdExctnDtEOM', 'IssuerId'], columns='TopBondGrouping', values='CusipId'
).reset_index()
df__['G3'] = 0

# output
print('Transacation Based Monthly Prices: {:,}'.format(
    df['Coupon'].count()
))
print('Monthly Returns: {:,}\n'.format(
    df['R'].count()
))
print('Distinct Cusips: {:,}'.format(
    df['CusipId'].nunique()
))
print('Distinct Issuers: {:,} | Private: {:,} | Public: {:,}\n'.format(
    df['IssuerId'].nunique(), df[df['IssuerOwnership'] == 1]['IssuerId'].nunique(), df[df['IssuerOwnership'] == 0]['IssuerId'].nunique()
))
print('Institunional Share => Trades: {:.2%} | Volume: {:.2%}\n'.format(
    df['InstitunionalTradeShare'].mean(), df['InstitunionalVolumeShare'].mean()
))
print('Rating')
print('IG Share: {:.2%}'.format(
    df[df['RatingClass'] == 'IG']['CusipId'].nunique() / df['CusipId'].nunique()
))
print('Total transactions => IG: {:,} | HY: {:,}'.format(
    df[df['RatingClass'] == 'IG']['CusipId'].count(), df[df['RatingClass'] == 'HY']['CusipId'].count()
))
print('Distinct Cusips => IG: {:,} | HY: {:,}\n'.format(
    df[df['RatingClass'] == 'IG']['CusipId'].nunique(), df[df['RatingClass'] == 'HY']['CusipId'].nunique()
))
print('Maturity Band')
print('Maturity: {:.2f} | Age: {:.2f}'.format(
    df['MaturityMonths'].mean(), df['Age'].mean()
))
print('Total Transactions => 1: {:,} | 2: {:,} | 3: {:,}'.format(
    df[df['MaturityBand'] == 1]['CusipId'].count(), df[df['MaturityBand'] == 2]['CusipId'].count(), df[df['MaturityBand'] == 3]['CusipId'].count()
))
print('Distinct Cusips => 1: {:,} | 2: {:,} | 3: {:,}\n'.format(
    df[df['MaturityBand'] == 1]['CusipId'].nunique(), df[df['MaturityBand'] == 2]['CusipId'].nunique(), df[df['MaturityBand'] == 3]['CusipId'].nunique()
))
print('Number of Bonds per Firm per Month')
print('G1 => Min: {} | Max: {} | Mean: {:.2} | Std: {:.3}'.format(
    df__['G1'].min(), df__['G1'].max(), df__['G1'].mean(), df__['G1'].std()
))
print('G2 => Min: {} | Max: {} | Mean: {:.2} | Std: {:.2}'.format(
    df__['G2'].min(), df__['G2'].max(), df__['G2'].mean(), df__['G2'].std()
))
print('G3 => Min: {}   | Max: {}    | Mean: {:.2} | Std: {:.2}\n'.format(
    df__['G3'].min(), df__['G3'].max(), df__['G3'].mean(), df__['G3'].std()
))
print('Top Bond Grouping (% of share) => G1: {:.2%} | G2: {:.2%} | G3: {:.2%}\n'.format(
    df_['G1_share'].mean(), df_['G2_share'].mean(), df_['G3_share'].mean()
))
print('Consecutive Month Status => Min: {} | Max: {} | Mean: {:.2} | Median: {:.2}\n'.format(
    df['ConsecutiveMonths'].min(), df['ConsecutiveMonths'].max(), df['ConsecutiveMonths'].mean(), df['ConsecutiveMonths'].median()
))
print('Returns')
print('Volume => Mean: {:,.2f} | Median: {:,.2f}'.format(
    df['T_Volume'].mean(), df['T_Volume'].median()
))
print('Coupon => Mean: {:.3} | Median: {:.3}'.format(
    df['Coupon'].mean(), df['Coupon'].median()
))
print('Returns => Mean: {:.2%} | Median: {:.2%} | Std: {:.2%}'.format(
    df['R'].mean(), df['R'].median(), df['R'].std()
))

Transacation Based Monthly Prices: 693,543
Monthly Returns: 447,802

Distinct Cusips: 23,010
Distinct Issuers: 2,954 | Private: 436 | Public: 2,518

Institunional Share => Trades: 9.08% | Volume: 18.21%

Rating
IG Share: 85.52%
Total transactions => IG: 600,570 | HY: 92,973
Distinct Cusips => IG: 19,679 | HY: 5,295

Maturity Band
Maturity: 109.74 | Age: 5.71
Total Transactions => 1: 32,455 | 2: 444,733 | 3: 215,896
Distinct Cusips => 1: 2,765 | 2: 16,017 | 3: 4,205

Number of Bonds per Firm per Month
G1 => Min: 1.0 | Max: 32.0 | Mean: 1.6 | Std: 1.15
G2 => Min: 1.0 | Max: 97.0 | Mean: 5.0 | Std: 6.5
G3 => Min: 0   | Max: 0    | Mean: 0.0 | Std: 0.0

Top Bond Grouping (% of share) => G1: 20.41% | G2: 79.59% | G3: 0.00%

Consecutive Month Status => Min: 1 | Max: 145 | Mean: 5.4 | Median: 2.0

Returns
Volume => Mean: 1,157,146.59 | Median: 150,000.00
Coupon => Mean: 5.12 | Median: 5.15
Returns => Mean: 0.42% | Median: 0.24% | Std: 22.13%
