In [1]:
import pandas as pd
import re
import numpy as np

In [2]:
main = pd.read_table('../stata/logit.tsv', skiprows=[0, 1, 3, 4])
main['VARIABLES'] = main.VARIABLES.apply(lambda s: s.replace('b.', '.').replace('o.', '.') if not pd.isnull(s) else s)
main = main.set_index('VARIABLES')

In [3]:
income = pd.read_table('../stata/income_hhsize.tsv', skiprows=[0, 2, 3, 4, 5, 6])

for rown, row in income.iloc[0:4].iterrows():
    var = row.VARIABLES.replace('bn', '').replace('b.', '.').replace('o.', '.')
    
    for incomeCategory in [1, 2, 3]:
        for suffix, mainCol in [('', 'coef'), ('.1', 'pval.1'), ('.2', 'ci_low'), ('.3', 'ci_high')]:
            col = f'{incomeCategory}.incomeCategory{suffix}'
            
            mainRow = f'{var}#{incomeCategory}.incomeCategory'
            
            main.loc[mainRow, mainCol] = row.loc[col]

In [4]:
altt = pd.read_table('../stata/alt_hhsize.tsv', skiprows=[0, 2])

for rown, row in altt.iloc[0:4].iterrows():
    var = row.VARIABLES.replace('bn', '').replace('b.', '.').replace('o.', '.')
    
    for alt in ['pubTransCount', 'bikeCount', 'walkCount', 'vehPerHHMember', 'CNTTDTR']:
        for suffix, mainCol in [('', 'coef'), ('.1', 'pval.1'), ('.2', 'ci_low'), ('.3', 'ci_high')]:
            col = f'{alt}{suffix}'
            mainRow = f'{var}#c.{alt}'
            
            main.loc[mainRow, mainCol] = row.loc[col]

In [5]:
dens = pd.read_table('../stata/density_msaSize.tsv', skiprows=[0, 2, 3, 4, 5, 6, 7, 8])
for rown, row in dens.iloc[0:6].iterrows():
    var = row.VARIABLES.replace('bn', '').replace('b.', '.').replace('o.', '.')
    
    for densCategory in [1, 2, 3, 4, 5, 6, 7]:
        for suffix, mainCol in [('', 'coef'), ('.1', 'pval.1'), ('.2', 'ci_low'), ('.3', 'ci_high')]:
            col = f'{densCategory}.density{suffix}'
            
            mainRow = f'{var}#{densCategory}.density'
            
            main.loc[mainRow, mainCol] = row.loc[col]

In [6]:
with pd.option_context('display.max_rows', 100):
    display(main)

Unnamed: 0_level_0,LABELS,coefEform,pval,ci_lowEform,ci_highEform,coef,pval.1,ci_low,ci_high,coef.1,pval.2,ci_low.1,ci_high.1
VARIABLES,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2.hhSizeTopcode4,hhSizeTopcode4 = 2,1.380***,6.590000e-04,1.14600,1.66100,0.00732,1.910000e-01,-0.003660,0.018300,,,,
3.hhSizeTopcode4,hhSizeTopcode4 = 3,0.913,4.920000e-01,0.70400,1.18400,-0.0314***,2.140000e-07,-0.043200,-0.019500,,,,
4.hhSizeTopcode4,hhSizeTopcode4 = 4,0.687**,7.670000e-03,0.52100,0.90500,-0.0498***,0.000000e+00,-0.061800,-0.037800,,,,
1.isWorker,isWorker = 1,1.412***,0.000000e+00,1.34400,1.48300,0.0226***,0.000000e+00,0.019500,0.025800,,,,
1.homeowner,homeowner = 1,0.752***,0.000000e+00,0.71300,0.79300,-0.0200***,0.000000e+00,-0.023800,-0.016200,,,,
1.sex,"sex = 1, Male",1.135***,0.000000e+00,1.10000,1.17100,0.00864***,0.000000e+00,0.006500,0.010800,,,,
1.children0to12,children0to12 = 1,1.000,9.980000e-01,0.92800,1.07800,6.59e-06,9.980000e-01,-0.005120,0.005140,,,,
1.children13to17,children13to17 = 1,0.980,6.420000e-01,0.90100,1.06600,-0.00136,6.410000e-01,-0.007090,0.004360,,,,
1.unrelated,unrelated = 1,2.045***,0.000000e+00,1.80100,2.32200,0.0565***,0.000000e+00,0.045000,0.068000,,,,
1.outOfTown,outOfTown = 1,2.110***,0.000000e+00,1.94100,2.29300,0.0615***,0.000000e+00,0.053400,0.069700,,,,


In [7]:
main.insert(0, 'Variable', main.index)

# Marginal effects for MSA size are in a different column because they're estimated on a different sample
for i in range(1, 6):
    idx = main.index[main.Variable == f'{i}.msaSize'][0]
    main.loc[idx, 'coef'] = main.loc[idx, 'coef.1']
    main.loc[idx, 'pval.1'] = main.loc[idx, 'pval.2']
    main.loc[idx, 'ci_low'] = main.loc[idx, 'ci_low.1']
    main.loc[idx, 'ci_high'] = main.loc[idx, 'ci_high.1']

main = main.reset_index()
main['pos'] = main.index * 2
main = main.set_index('pos')

for firstVar, label in [
    ('2.hhSizeTopcode4', 'Household size'),
    ('0.incomeCategory#c.vehPerHHMember', 'Vehicles per household member'),
    ('0.incomeCategory#c.pubTransCount', 'Days used public transit in last 30 days'),
    ('0.incomeCategory#c.walkCount', 'Walk trips in last 7 days'),
    ('0.incomeCategory#c.bikeCount', 'Bike trips in last 7 days'),
    ('1.children0to12', 'Presence of children'),
    ('0.incomeCategory#c.CNTTDTR', 'Count of trips on travel day'),
    ('1.hhSizeTopcode4#1.incomeCategory', 'Income, household size 1'),
    ('2.hhSizeTopcode4#1.incomeCategory', 'Income, household size 2'),
    ('3.hhSizeTopcode4#1.incomeCategory', 'Income, household size 3'),
    ('4.hhSizeTopcode4#1.incomeCategory', 'Income, household size 4+'),
    ('0.msaSize#1.density', 'Home block group density, not in MSA'),
    ('1.msaSize#1.density', 'Home block group density, MSA with less than 250,000 people'),
    ('2.msaSize#1.density', 'Home block group density, MSA with 250,000 to 499,999 people'),
    ('3.msaSize#1.density', 'Home block group density, MSA with 500,000 to 999,999 people'),
    ('4.msaSize#1.density', 'Home block group density, MSA with 1,000,000 to 2,999,999 people'),
    ('5.msaSize#1.density', 'Home block group density, MSA with 3 million people or more'),
    ('1.race', 'Race'),
    ('1.agecat', 'Age'),
    ('1.msaSize', 'MSA size'),
]:
    if not np.any(main.Variable == firstVar):
        print(f'{firstVar} not found!')
    idx = main.index[main.Variable == firstVar][0]
    main.loc[idx - 1] =\
        pd.Series([r'\textbf{' + label + '}', '', '', '', '', '', '', '', ''
                  ], index=['Variable', 'coefEform', 'pval', 'ci_lowEform',
       'ci_highEform', 'coef', 'pval.1', 'ci_low', 'ci_high'])

# Variables that are not part of a group
loneVars = ['1.outOfTown', '1.isWorker', '1.unrelated', '1.isHispanic', '1.homeowner', '1.dailySmartphoneUseHH', '1.msaRail', '1.sex']
        
# Replace all vars with labels
main['Variable'] = main.Variable.replace({ k: (r'\ti ' + v) if k not in loneVars else v for k, v in {
'2.hhSizeTopcode4': 'Household size 2',
'3.hhSizeTopcode4': 'Household size 3',
'4.hhSizeTopcode4': 'Household size 4+',
'1.outOfTown': 'Out of town on travel day',
'0.incomeCategory#c.vehPerHHMember': r'\$24,999 or less',
'1.incomeCategory#c.vehPerHHMember': r'\$25,000-\$50,000',
'2.incomeCategory#c.vehPerHHMember': r'\$50,000-\$100,000',
'3.incomeCategory#c.vehPerHHMember': r'More than \$100,000',
'0.incomeCategory#c.pubTransCount': r'\$24,999 or less',
'1.incomeCategory#c.pubTransCount': r'\$25,000-\$50,000',
'2.incomeCategory#c.pubTransCount': r'\$50,000-\$100,000',
'3.incomeCategory#c.pubTransCount': r'More than \$100,000',
'0.incomeCategory#c.walkCount': r'\$24,999 or less',
'1.incomeCategory#c.walkCount': r'\$25,000-\$50,000',
'2.incomeCategory#c.walkCount': r'\$50,000-\$100,000',
'3.incomeCategory#c.walkCount': r'More than \$100,000',
'0.incomeCategory#c.bikeCount': r'\$24,999 or less',
'1.incomeCategory#c.bikeCount': r'\$25,000-\$50,000',
'2.incomeCategory#c.bikeCount': r'\$50,000-\$100,000',
'3.incomeCategory#c.bikeCount': r'More than \$100,000',
'0.incomeCategory#c.CNTTDTR': r'\$24,999 or less',
'1.incomeCategory#c.CNTTDTR': r'\$25,000-\$50,000',
'2.incomeCategory#c.CNTTDTR': r'\$50,000-\$100,000',
'3.incomeCategory#c.CNTTDTR': r'More than \$100,000',
'1.hhSizeTopcode4#1.incomeCategory': r'\$25,000-\$50,000',
'1.hhSizeTopcode4#2.incomeCategory': r'\$50,000-\$100,000',
'1.hhSizeTopcode4#3.incomeCategory': r'More than \$100,000',
'2.hhSizeTopcode4#1.incomeCategory': r'\$25,000-\$50,000',
'2.hhSizeTopcode4#2.incomeCategory': r'\$50,000-\$100,000',
'2.hhSizeTopcode4#3.incomeCategory': r'More than \$100,000',
'3.hhSizeTopcode4#1.incomeCategory': r'\$25,000-\$50,000',
'3.hhSizeTopcode4#2.incomeCategory': r'\$50,000-\$100,000',
'3.hhSizeTopcode4#3.incomeCategory': r'More than \$100,000',
'4.hhSizeTopcode4#1.incomeCategory': r'\$25,000-\$50,000',
'4.hhSizeTopcode4#2.incomeCategory': r'\$50,000-\$100,000',
'4.hhSizeTopcode4#3.incomeCategory': r'More than \$100,000',
'1.race': 'American Indian or Alaska Native',
'2.race': 'Asian',
'3.race': 'Black or African American',
'4.race': 'Multiple races',
'5.race': 'Native Hawaiian or other Pacific Islander',
'6.race': 'Other',
'1.agecat': '20-29',
'2.agecat': '30-39',
'3.agecat': '40-49',
'4.agecat': '50-59',
'5.agecat': '60-69',
'6.agecat': '70-79',
'7.agecat': '80+',
'1.msaSize': 'Less than 250,000\textsuperscript{\textdagger}',
'2.msaSize': '250,000-499,999\textsuperscript{\textdagger}',
'3.msaSize': '500,000-999,999\textsuperscript{\textdagger}',
'4.msaSize': '1,000,000-2,999,999\textsuperscript{\textdagger}',
'5.msaSize': 'More than 3,000,000\textsuperscript{\textdagger}',
'1.density': '100-499 persons/sq. mi.',
'2.density': '500-999 persons/sq. mi.',
'3.density': '1,000-1,999 persons/sq. mi.',
'4.density': '2,000-3,999 persons/sq. mi.',
'5.density': '4,000-9,999 persons/sq. mi.',
'6.density': '10,000-24,999 persons/sq. mi.',
'7.density': '25,000 persons/sq. mi. or more',
'1.isWorker': 'Worker',
'1.children0to12': 'Children 0-12',
'1.children13to17': 'Children 13-17',
'1.unrelated': 'No related adults in household',
'1.isHispanic': 'Hispanic',
'1.homeowner': 'Homeowner',
'1.msaRail': 'Heavy rail in MSA',
'0.msaSize#1.density': '100--499 persons/sq. mi.',
'0.msaSize#2.density': '500--999 persons/sq. mi.',
'0.msaSize#3.density': '1,000--1,999 persons/sq. mi.',
'0.msaSize#4.density': '2,000--3,999 persons/sq. mi.',
'0.msaSize#5.density': '4,000--9,999 persons/sq. mi.',
'0.msaSize#6.density': '10,000--24,999 persons/sq. mi.',
'0.msaSize#7.density': '> 25,000 persons/sq. mi.',
'1.msaSize#1.density': '100--499 persons/sq. mi.',
'1.msaSize#2.density': '500--999 persons/sq. mi.',
'1.msaSize#3.density': '1,000--1,999 persons/sq. mi.',
'1.msaSize#4.density': '2,000--3,999 persons/sq. mi.',
'1.msaSize#5.density': '4,000--9,999 persons/sq. mi.',
'1.msaSize#6.density': '10,000--24,999 persons/sq. mi.',
'1.msaSize#7.density': '> 25,000 persons/sq. mi.',
'2.msaSize#1.density': '100--499 persons/sq. mi.',
'2.msaSize#2.density': '500--999 persons/sq. mi.',
'2.msaSize#3.density': '1,000--1,999 persons/sq. mi.',
'2.msaSize#4.density': '2,000--3,999 persons/sq. mi.',
'2.msaSize#5.density': '4,000--9,999 persons/sq. mi.',
'2.msaSize#6.density': '10,000--24,999 persons/sq. mi.',
'2.msaSize#7.density': '> 25,000 persons/sq. mi.',
'3.msaSize#1.density': '100--499 persons/sq. mi.',
'3.msaSize#2.density': '500--999 persons/sq. mi.',
'3.msaSize#3.density': '1,000--1,999 persons/sq. mi.',
'3.msaSize#4.density': '2,000--3,999 persons/sq. mi.',
'3.msaSize#5.density': '4,000--9,999 persons/sq. mi.',
'3.msaSize#6.density': '10,000--24,999 persons/sq. mi.',
'3.msaSize#7.density': '> 25,000 persons/sq. mi.',
'4.msaSize#1.density': '100--499 persons/sq. mi.',
'4.msaSize#2.density': '500--999 persons/sq. mi.',
'4.msaSize#3.density': '1,000--1,999 persons/sq. mi.',
'4.msaSize#4.density': '2,000--3,999 persons/sq. mi.',
'4.msaSize#5.density': '4,000--9,999 persons/sq. mi.',
'4.msaSize#6.density': '10,000--24,999 persons/sq. mi.',
'4.msaSize#7.density': '> 25,000 persons/sq. mi.',
'5.msaSize#1.density': '100--499 persons/sq. mi.',
'5.msaSize#2.density': '500--999 persons/sq. mi.',
'5.msaSize#3.density': '1,000--1,999 persons/sq. mi.',
'5.msaSize#4.density': '2,000--3,999 persons/sq. mi.',
'5.msaSize#5.density': '4,000--9,999 persons/sq. mi.',
'5.msaSize#6.density': '10,000--24,999 persons/sq. mi.',
'5.msaSize#7.density': '> 25,000 persons/sq. mi.',
'1.dailySmartphoneUseHH': 'Daily smartphone use by a household member',
'1.sex': 'Male'
}.items()})

main = main.sort_index()

In [8]:
with pd.option_context("max_colwidth", 1000, 'display.float_format', lambda x: '%.3f' % x):
    main[[c for c in main.columns if c not in ['LABELS', 'VARIABLES', 'coef.1', 'pval.2', 'ci_low.1', 'ci_high.1']]]\
        [~pd.isnull(main.pval)]\
        .rename(columns={
            'ci_lowEform': 'ci_lowE',
            'ci_highEform': 'ci_highE',
            'coef': 'Marg. Eff.',
            'coefEform': 'Odds Rat.'
        })\
        .round(3)\
        .fillna('-')\
        .to_latex('../stata/result_table.tex', index=False, escape=False, longtable=True)

In [9]:
with pd.option_context('display.max_rows', 500):
    display(main)

Unnamed: 0_level_0,VARIABLES,Variable,LABELS,coefEform,pval,ci_lowEform,ci_highEform,coef,pval.1,ci_low,ci_high,coef.1,pval.2,ci_low.1,ci_high.1
pos,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
-1,,\textbf{Household size},,,,,,,,,,,,,
0,2.hhSizeTopcode4,\ti Household size 2,hhSizeTopcode4 = 2,1.380***,0.000659,1.146,1.661,0.00732,0.191,-0.00366,0.0183,,,,
2,3.hhSizeTopcode4,\ti Household size 3,hhSizeTopcode4 = 3,0.913,0.492,0.704,1.184,-0.0314***,2.14e-07,-0.0432,-0.0195,,,,
4,4.hhSizeTopcode4,\ti Household size 4+,hhSizeTopcode4 = 4,0.687**,0.00767,0.521,0.905,-0.0498***,0.0,-0.0618,-0.0378,,,,
6,1.isWorker,Worker,isWorker = 1,1.412***,0.0,1.344,1.483,0.0226***,0.0,0.0195,0.0258,,,,
8,1.homeowner,Homeowner,homeowner = 1,0.752***,0.0,0.713,0.793,-0.0200***,0.0,-0.0238,-0.0162,,,,
10,1.sex,Male,"sex = 1, Male",1.135***,0.0,1.1,1.171,0.00864***,0.0,0.0065,0.0108,,,,
11,,\textbf{Presence of children},,,,,,,,,,,,,
12,1.children0to12,\ti Children 0-12,children0to12 = 1,1.000,0.998,0.928,1.078,6.59e-06,0.998,-0.00512,0.00514,,,,
14,1.children13to17,\ti Children 13-17,children13to17 = 1,0.980,0.642,0.901,1.066,-0.00136,0.641,-0.00709,0.00436,,,,
