## Commission By Net Liquidity Group

In [47]:
import pandas as pd
import numpy as np

In [48]:
pd.options.display.float_format = '{:.2f}%'.format

In [49]:
df = pd.read_csv('acctIdEqComm.csv.gz', compression='gzip')

In [50]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 558114 entries, 0 to 558113
Data columns (total 3 columns):
ACCT_ID    558114 non-null object
COMM       143445 non-null float64
EQUITY     453978 non-null float64
dtypes: float64(2), object(1)
memory usage: 17.0+ MB


In [51]:
df.tail()

Unnamed: 0,ACCT_ID,COMM,EQUITY
558109,ACCTID558109,nan%,858.00%
558110,ACCTID558110,nan%,84807.47%
558111,ACCTID558111,25.15%,37868.63%
558112,ACCTID558112,21.76%,46015.88%
558113,ACCTID558113,nan%,6368.76%


In [52]:
df.fillna(0, inplace=True)
equityBins = [-np.inf, 0, 500, 2000, 5000, 10000, 25000, 100000, 250000, 500000, 1000000, 5000000, 10000000, np.inf]
equityLabels = ['Equity < 0', 
                'Equity 0 - 500', 
                'Equity 500 - 2000', 
                'Equity 2000 - 5000', 
                'Equity 5000 - 10000', 
                'Equity 10K - 25K', 
                'Equity 25K - 100K', 
                'Equity 100K - 250K', 
                'Equity 250K - 500K', 
                'Equity 500K - 1M', 
                'Equity 1M - 5M', 
                'Equity 5M - 10M', 
                'Equity > 10M']

In [53]:
cnt = 0
equityOrder = dict()
for e in equityLabels:
    equityOrder[e] = cnt
    cnt += 1

In [54]:
df['equityGroups'] = pd.cut(df.EQUITY, equityBins, labels=equityLabels, right=False)

In [55]:
df['order'] = df.equityGroups.map(equityOrder)

In [56]:
resDf = df.groupby('equityGroups').agg({'ACCT_ID': {'cnt': 'size'}, 
                                        'COMM': 'sum', 
                                        'order': 'first'}).sort_values(('order', 'first')).apply(np.round)

In [57]:
resDf.columns

MultiIndex(levels=[['COMM', 'ACCT_ID', 'order'], ['cnt', 'first', 'sum']],
           labels=[[0, 1, 2], [2, 0, 1]])

In [58]:
totalDf = pd.DataFrame({('order', 'first'): 13,
                        ('ACCT_ID', 'cnt'): resDf[('ACCT_ID', 'cnt')].sum(),
                        ('ACCT_ID', 'cnt%'): 100.00,
                        ('COMM', 'sum'): resDf[('COMM', 'sum')].sum(),
                        ('COMM', 'sum%'): 100.00}, index=['Total'])
totalDf.index.name = 'equityGroups'
totalDf

Unnamed: 0_level_0,ACCT_ID,ACCT_ID,COMM,COMM,order
Unnamed: 0_level_1,cnt,cnt%,sum,sum%,first
equityGroups,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Total,558114,100.00%,5311890.00%,100.00%,13


In [59]:
resDf[('ACCT_ID', 'cnt%')] = (resDf[('ACCT_ID', 'cnt')] / resDf[('ACCT_ID', 'cnt')].sum())*100

In [60]:
resDf[('COMM', 'sum%')] = (resDf[('COMM', 'sum')] / resDf[('COMM', 'sum')].sum())*100

In [61]:
resDf[('COMM', 'sum')] = resDf[('COMM', 'sum')].astype('int64')

In [62]:
resDf.index = resDf.index.get_values()

In [63]:
#resDf

In [64]:
allDf = pd.concat([resDf, totalDf])

In [44]:
allDf

Unnamed: 0_level_0,ACCT_ID,ACCT_ID,COMM,COMM,order
Unnamed: 0_level_1,cnt,cnt%,sum,sum%,first
Equity < 0,11614,2.08%,7607,0.14%,0
Equity 0 - 500,222629,39.89%,692874,13.04%,1
Equity 500 - 2000,32371,5.80%,98740,1.86%,2
Equity 2000 - 5000,41213,7.38%,130125,2.45%,3
Equity 5000 - 10000,47188,8.45%,146888,2.77%,4
Equity 10K - 25K,62928,11.28%,320994,6.04%,5
Equity 25K - 100K,79799,14.30%,667918,12.57%,6
Equity 100K - 250K,32571,5.84%,584122,11.00%,7
Equity 250K - 500K,13899,2.49%,450668,8.48%,8
Equity 500K - 1M,7286,1.31%,395422,7.44%,9
