In [133]:
import pandas as pd
import numpy as np
import math

### Investigating Pearson Correlation Coefficients Using Paired T-Test with Hacker Statistics

We execute a two-sample permutation test to check statistical significance of the Pearson Correlation Coefficient between each population group and keyword. For easier reading and analysis, each keyword is given its own DataFrame, as described in the comments below.

In [134]:
""" dataframe key, for reference:

how to find number and routing number = kw_df_1
fiduciary bank = kw_df_2
foreign bank account = kw_df_3
balance your checkbook = kw_df_4
purchase money order = kw_df_5
savings account = kw_df_6
personal bankruptcy = kw_df_7
savings plan = kw_df_8
direct deposit = kw_df_9
529 plan = kw_df_10
credit card = kw_df_11
bankruptcy = kw_df_12
check cashing = kw_df_13
ATM = kw_df_14
fafsa = kw_df_15
savings association = kw_df_16
deposit money order = kw_df_17
deposit check = kw_df_18
best bank accounts = kw_df_19
small business bank = kw_df_20
"""

# extract data from cleaned csv: df
df = pd.read_csv('/Users/jacobschroeder/anaconda3/projects/test.csv')

# store population groups as a list: pop_list
pop_list = []
for i in range(101):
    pop_list.append("POP_" + str(i))

# pearson_r: Pearson Correlation Coefficient function
def pearson_r(x, y):
    """Compute Pearson correlation coefficient between two arrays."""
    # Compute correlation matrix: corr_mat
    corr_mat = np.corrcoef(x,y)

    # Return entry [0,1]
    return corr_mat[0,1]

# draw_bs_pairs: function to draw bootstrap pairs
def draw_bs_pairs(x, y, func, size=1):
    
    # Set up array of indices to sample from: inds
    inds = np.arange(len(x))

    # Initialize replicates
    bs_replicates = np.empty(size)

    # Generate replicates
    for i in range(size):
        bs_inds = np.random.choice(inds, len(inds))
        bs_replicates[i] = func(x[bs_inds], y[bs_inds])
        
    return bs_replicates

# function to remove unnecessary columns for this excercise: clean_dataframe
def clean_dataframe(df):
    del df['YEAR']
    del df['TOTAL_POP']
    del df['how to find number and routing number']
    del df['fiduciary bank']
    del df['foreign bank account']
    del df['balance your checkbook']
    del df['purchase money order']
    del df['savings account']
    del df['personal bankruptcy']
    del df['savings plan']
    del df['direct deposit']
    del df['529 plan']
    del df['credit card']
    del df['bankruptcy']
    del df['check cashing']
    del df['ATM']
    del df['fafsa']
    del df['savings association']
    del df['deposit money order']
    del df['deposit check']
    del df['best bank accounts']
    del df['small business bank']
    return df

In this two-sample t-test, we test randomly sampled Pearson Correlation Coefficients against the Pearson Correlation Coefficient extracted from the original dataset. 

The null hypothesis is that the correlation is no higher than what was initially sampled.

Testing assumes a 95% confidence interval, or 5% margin of error that the p-value will be evaluated against.

In [135]:
""" Our null hypothesis is that the selected keyword does not have
    a higher correlation to the age group than initially computed
"""

def test_pearson(kw,  i, size, pop_list):
    """ DOCSTRING: a function for testing pearson correlation coefficient over and over 
        kw: the keyword you wish to analyze, numpy array (independent variable)
        i: the starting row of the data (slice)
        size: the number of rows counted in the data analysis
    """
    # t: master counter
    t = 0
    return_list = np.empty(len(pop_list))
    return_pval = np.empty(len(pop_list))
    # for every age group in the population list
    for p in pop_list:
    
        # j, k: counters for operation
        j = 0
        k = 0

        # initialize empty numpy arrays to store data
        xdata = np.empty(size)
        ydata = np.empty(size)

        # xdata: an increase in a certain population group (independent) 
        for item in df[p][4:19]:
            xdata[j] = item
            j += 1
        j = 0

        # ydata: will create an increase in keyword demand (dependent)
        for item in df[kw][4:19]:
            ydata[k] = item
            k += 1
        k = 0

        # multiply each data set by 100 to ensure no "divide by zero" errors
        xdata = xdata
        ydata = ydata

        # run replicates test for selected age
        replicates = draw_bs_pairs(xdata, ydata, pearson_r, 10000)

        # get actual data for comparison
        actual_pearson = pearson_r(xdata, ydata)
        # print(actual_pearson)

        # p-value
        return_pval[t] = sum(abs(replicates) >= abs(actual_pearson)) / 10000
        
        return_list[t] = np.mean(replicates)
        print('Age Group {}: '.format(str(t)) + str(return_list[t]))
        print('P-Value: {}'.format(str(return_pval[t])))

        # increase master counter
        t += 1
    
    return { 'results': [return_list, return_pval] }

In [136]:
# setup the first dataframe
kw_df_1 = pd.DataFrame().reindex_like(df)
kw_df_1 = clean_dataframe(kw_df_1)
kw = 'how to find number and routing number'
kw_reps_1 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_1.columns)):
    kw_df_1.iloc[1, i] = kw_reps_1['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_1.columns)):
    kw_df_1.iloc[2, i] = kw_reps_1['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')
print(kw_df_1.head())

Age Group 0: 0.928954591875
P-Value: 0.6703
Age Group 1: 0.936613233545
P-Value: 0.6718
Age Group 2: 0.944576121966
P-Value: 0.6763
Age Group 3: 0.955671600314
P-Value: 0.6835
Age Group 4: 0.930544422378
P-Value: 0.5841
Age Group 5: 0.91316317079
P-Value: 0.6182
Age Group 6: 0.903345199979
P-Value: 0.6387
Age Group 7: 0.896141290278
P-Value: 0.6227
Age Group 8: 0.911152028878
P-Value: 0.5957
Age Group 9: 0.927457613274
P-Value: 0.602
Age Group 10: 0.938975987327
P-Value: 0.6018
Age Group 11: 0.92358947292
P-Value: 0.6003
Age Group 12: 0.841671439933
P-Value: 0.5409
Age Group 13: 0.658465900603
P-Value: 0.5123
Age Group 14: 0.434361699825
P-Value: 0.5156
Age Group 15: 0.24738304185
P-Value: 0.5629
Age Group 16: 0.104711763502
P-Value: 0.6773
Age Group 17: -0.0120871530922
P-Value: 0.9141
Age Group 18: -0.13742839984
P-Value: 0.6379
Age Group 19: -0.025556060152
P-Value: 0.9148
Age Group 20: 0.165662766454
P-Value: 0.6323
Age Group 21: 0.392143779101
P-Value: 0.5763
Age Group 22: 0.59118

In [137]:
# setup dataframe 2
kw_df_2 = pd.DataFrame().reindex_like(df)
kw_df_2 = clean_dataframe(kw_df_2)
kw = 'fiduciary bank'
kw_reps_2 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_2.columns)):
    kw_df_2.iloc[1, i] = kw_reps_2['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_2.columns)):
    kw_df_2.iloc[2, i] = kw_reps_2['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_2.head())

Age Group 0: -0.0615686632375
P-Value: 0.8034
Age Group 1: -0.0615973602013
P-Value: 0.7878
Age Group 2: -0.0663811254338
P-Value: 0.8035
Age Group 3: -0.00982999367782
P-Value: 0.8972
Age Group 4: -0.0847911238488
P-Value: 0.7776
Age Group 5: -0.143631396519
P-Value: 0.6906
Age Group 6: -0.101045891376
P-Value: 0.7628
Age Group 7: -0.00797983302396
P-Value: 0.8839
Age Group 8: 0.0446974229274
P-Value: 0.9373
Age Group 9: 0.0931277438308
P-Value: 0.7928
Age Group 10: 0.0701801009785
P-Value: 0.7819
Age Group 11: 0.135633377647
P-Value: 0.5894
Age Group 12: 0.27730057469
P-Value: 0.5208
Age Group 13: 0.395781162283
P-Value: 0.5503
Age Group 14: 0.400640907644
P-Value: 0.5455
Age Group 15: 0.166437296384
P-Value: 0.6477
Age Group 16: -0.138794215638
P-Value: 0.6318
Age Group 17: -0.160953644871
P-Value: 0.649
Age Group 18: -0.152524552822
P-Value: 0.6653
Age Group 19: -0.195512243805
P-Value: 0.6322
Age Group 20: -0.31546541504
P-Value: 0.5222
Age Group 21: -0.361106253187
P-Value: 0.528

In [138]:
# setup dataframe 3
kw_df_3 = pd.DataFrame().reindex_like(df)
kw_df_3 = clean_dataframe(kw_df_3)
kw = 'foreign bank account'
kw_reps_3 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_3.columns)):
    kw_df_3.iloc[1, i] = kw_reps_3['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_3.columns)):
    kw_df_3.iloc[2, i] = kw_reps_3['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_3.head())

Age Group 0: 0.831019602382
P-Value: 0.5323
Age Group 1: 0.827855276325
P-Value: 0.5285
Age Group 2: 0.817708647071
P-Value: 0.5374
Age Group 3: 0.810869964357
P-Value: 0.5318
Age Group 4: 0.832840146388
P-Value: 0.5391
Age Group 5: 0.79443211375
P-Value: 0.5327
Age Group 6: 0.788961833727
P-Value: 0.539
Age Group 7: 0.819735273146
P-Value: 0.5271
Age Group 8: 0.834287084401
P-Value: 0.5369
Age Group 9: 0.820232497314
P-Value: 0.5368
Age Group 10: 0.823276480502
P-Value: 0.5376
Age Group 11: 0.643623863929
P-Value: 0.5337
Age Group 12: 0.481780913472
P-Value: 0.5321
Age Group 13: 0.337384158014
P-Value: 0.5404
Age Group 14: 0.0828602445242
P-Value: 0.7795
Age Group 15: -0.0490938156617
P-Value: 0.9069
Age Group 16: -0.110877618885
P-Value: 0.7585
Age Group 17: -0.0846310942327
P-Value: 0.8673
Age Group 18: 0.0574889123169
P-Value: 0.8043
Age Group 19: 0.326179271631
P-Value: 0.5257
Age Group 20: 0.484306889662
P-Value: 0.5393
Age Group 21: 0.604213603009
P-Value: 0.5366
Age Group 22: 0

In [139]:
# setup dataframe 4
kw_df_4 = pd.DataFrame().reindex_like(df)
kw_df_4 = clean_dataframe(kw_df_4)
kw = 'balance your checkbook'
kw_reps_4 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_4.columns)):
    kw_df_4.iloc[1, i] = kw_reps_4['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_4.columns)):
    kw_df_4.iloc[2, i] = kw_reps_4['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_4.head())

Age Group 0: 0.511052530458
P-Value: 0.5463
Age Group 1: 0.51881252583
P-Value: 0.5302
Age Group 2: 0.542746694366
P-Value: 0.5424
Age Group 3: 0.566933817365
P-Value: 0.5494
Age Group 4: 0.511983285165
P-Value: 0.5311
Age Group 5: 0.51249846658
P-Value: 0.5454
Age Group 6: 0.510759972848
P-Value: 0.5467
Age Group 7: 0.448716616401
P-Value: 0.5359
Age Group 8: 0.488548281679
P-Value: 0.5437
Age Group 9: 0.526039186689
P-Value: 0.545
Age Group 10: 0.628161964138
P-Value: 0.5683
Age Group 11: 0.684273743218
P-Value: 0.5766
Age Group 12: 0.737507713178
P-Value: 0.5697
Age Group 13: 0.721620894515
P-Value: 0.562
Age Group 14: 0.612628204956
P-Value: 0.5314
Age Group 15: 0.41302490754
P-Value: 0.5052
Age Group 16: 0.248577397292
P-Value: 0.5566
Age Group 17: 0.0563425682928
P-Value: 0.76
Age Group 18: -0.307188309473
P-Value: 0.589
Age Group 19: -0.344656845262
P-Value: 0.5945
Age Group 20: -0.260373130012
P-Value: 0.551
Age Group 21: -0.0300380379859
P-Value: 0.8288
Age Group 22: 0.1570046

In [140]:
# setup dataframe 5
kw_df_5 = pd.DataFrame().reindex_like(df)
kw_df_5 = clean_dataframe(kw_df_5)
kw = 'purchase money order'
kw_reps_5 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_5.columns)):
    kw_df_5.iloc[1, i] = kw_reps_5['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_5.columns)):
    kw_df_5.iloc[2, i] = kw_reps_5['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_5.head())

Age Group 0: 0.706848834158
P-Value: 0.5597
Age Group 1: 0.721268042034
P-Value: 0.5619
Age Group 2: 0.725990702113
P-Value: 0.5495
Age Group 3: 0.737943641157
P-Value: 0.5462
Age Group 4: 0.705637107602
P-Value: 0.565
Age Group 5: 0.670106687076
P-Value: 0.5643
Age Group 6: 0.629939576463
P-Value: 0.5595
Age Group 7: 0.627195084207
P-Value: 0.5583
Age Group 8: 0.683055355665
P-Value: 0.5634
Age Group 9: 0.763648901381
P-Value: 0.5485
Age Group 10: 0.882631049852
P-Value: 0.5303
Age Group 11: 0.899245935742
P-Value: 0.515
Age Group 12: 0.887114087879
P-Value: 0.5312
Age Group 13: 0.797763773265
P-Value: 0.5403
Age Group 14: 0.64818874967
P-Value: 0.5331
Age Group 15: 0.409598742638
P-Value: 0.516
Age Group 16: 0.0872835458446
P-Value: 0.7103
Age Group 17: -0.295508525808
P-Value: 0.5742
Age Group 18: -0.504040562654
P-Value: 0.5436
Age Group 19: -0.367564823325
P-Value: 0.5318
Age Group 20: -0.103961281272
P-Value: 0.5945
Age Group 21: 0.165756940209
P-Value: 0.5381
Age Group 22: 0.364

In [141]:
# setup dataframe 6
kw_df_6 = pd.DataFrame().reindex_like(df)
kw_df_6 = clean_dataframe(kw_df_6)
kw = 'savings account'
kw_reps_6 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_6.columns)):
    kw_df_6.iloc[1, i] = kw_reps_6['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_6.columns)):
    kw_df_6.iloc[2, i] = kw_reps_6['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_6.head())

Age Group 0: 0.725567385414
P-Value: 0.5918
Age Group 1: 0.740746783023
P-Value: 0.6061
Age Group 2: 0.757845814903
P-Value: 0.604
Age Group 3: 0.773303297419
P-Value: 0.5972
Age Group 4: 0.770633462748
P-Value: 0.6149
Age Group 5: 0.773406489167
P-Value: 0.6443
Age Group 6: 0.768045221576
P-Value: 0.6457
Age Group 7: 0.743754115866
P-Value: 0.6088
Age Group 8: 0.711678543728
P-Value: 0.5863
Age Group 9: 0.674074202578
P-Value: 0.5549
Age Group 10: 0.667287727625
P-Value: 0.545
Age Group 11: 0.663339050805
P-Value: 0.5519
Age Group 12: 0.65663568745
P-Value: 0.5505
Age Group 13: 0.606149069464
P-Value: 0.5229
Age Group 14: 0.516522896656
P-Value: 0.5089
Age Group 15: 0.480729382065
P-Value: 0.5011
Age Group 16: 0.496219256917
P-Value: 0.5097
Age Group 17: 0.374523683175
P-Value: 0.4914
Age Group 18: 0.101814887291
P-Value: 0.6762
Age Group 19: -0.00329744332378
P-Value: 0.9737
Age Group 20: -0.0110635975881
P-Value: 0.9272
Age Group 21: 0.0994347037798
P-Value: 0.7422
Age Group 22: 0.2

In [142]:
# setup dataframe 7
kw_df_7 = pd.DataFrame().reindex_like(df)
kw_df_7 = clean_dataframe(kw_df_7)
kw = 'personal bankruptcy'
kw_reps_7 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_7.columns)):
    kw_df_7.iloc[1, i] = kw_reps_7['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_7.columns)):
    kw_df_7.iloc[2, i] = kw_reps_7['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_7.head())

Age Group 0: -0.878433790405
P-Value: 0.554
Age Group 1: -0.881457789647
P-Value: 0.549
Age Group 2: -0.883503184475
P-Value: 0.5531
Age Group 3: -0.862347030182
P-Value: 0.5629
Age Group 4: -0.867516886437
P-Value: 0.5516
Age Group 5: -0.903160654069
P-Value: 0.5491
Age Group 6: -0.88371013976
P-Value: 0.5372
Age Group 7: -0.84730194591
P-Value: 0.5328
Age Group 8: -0.795890915803
P-Value: 0.5728
Age Group 9: -0.78179982532
P-Value: 0.6033
Age Group 10: -0.760039067993
P-Value: 0.6207
Age Group 11: -0.68492591556
P-Value: 0.57
Age Group 12: -0.515564751007
P-Value: 0.548
Age Group 13: -0.250801162368
P-Value: 0.5799
Age Group 14: 0.0112263592563
P-Value: 0.8787
Age Group 15: 0.0780174656509
P-Value: 0.7526
Age Group 16: -0.00809715779667
P-Value: 0.8793
Age Group 17: -0.0687938482433
P-Value: 0.737
Age Group 18: -0.0755110368276
P-Value: 0.7524
Age Group 19: -0.218429919788
P-Value: 0.6125
Age Group 20: -0.461734929137
P-Value: 0.5136
Age Group 21: -0.670020003012
P-Value: 0.5222
Age 

In [148]:
# setup dataframe 8
kw_df_8 = pd.DataFrame().reindex_like(df)
kw_df_8 = clean_dataframe(kw_df_8)
kw = 'savings plan'
kw_reps_8 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_8.columns)):
    kw_df_8.iloc[1, i] = kw_reps_8['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_8.columns)):
    kw_df_8.iloc[2, i] = kw_reps_8['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_8.head())

Age Group 0: -0.85965849754
P-Value: 0.5395
Age Group 1: -0.847841050358
P-Value: 0.5331
Age Group 2: -0.838043415202
P-Value: 0.5368
Age Group 3: -0.822954912723
P-Value: 0.5467
Age Group 4: -0.833767218167
P-Value: 0.551
Age Group 5: -0.837313853275
P-Value: 0.5425
Age Group 6: -0.842341027401
P-Value: 0.5312
Age Group 7: -0.868410019414
P-Value: 0.5315
Age Group 8: -0.845989504743
P-Value: 0.5415
Age Group 9: -0.79265091988
P-Value: 0.5527
Age Group 10: -0.713634145607
P-Value: 0.5464
Age Group 11: -0.559091146129
P-Value: 0.5365
Age Group 12: -0.331900181117
P-Value: 0.5326
Age Group 13: -0.0620985060144
P-Value: 0.8318
Age Group 14: 0.221495596114
P-Value: 0.5138
Age Group 15: 0.282935218101
P-Value: 0.5283
Age Group 16: 0.251195691535
P-Value: 0.5662
Age Group 17: 0.0859223424294
P-Value: 0.8613
Age Group 18: -0.217958734264
P-Value: 0.589
Age Group 19: -0.482484641747
P-Value: 0.5393
Age Group 20: -0.706885225741
P-Value: 0.5536
Age Group 21: -0.821576831763
P-Value: 0.5727
Age 

In [144]:
# setup dataframe 9
kw_df_9 = pd.DataFrame().reindex_like(df)
kw_df_9 = clean_dataframe(kw_df_9)
kw = 'direct deposit'
kw_reps_9 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_9.columns)):
    kw_df_9.iloc[1, i] = kw_reps_9['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_9.columns)):
    kw_df_9.iloc[2, i] = kw_reps_9['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_9.head())

Age Group 0: 0.975040409205
P-Value: 0.6014
Age Group 1: 0.976396935549
P-Value: 0.6277
Age Group 2: 0.976681237424
P-Value: 0.65
Age Group 3: 0.978393462128
P-Value: 0.6822
Age Group 4: 0.970166154897
P-Value: 0.6901
Age Group 5: 0.959799873384
P-Value: 0.7463
Age Group 6: 0.958838108258
P-Value: 0.6766
Age Group 7: 0.968837127088
P-Value: 0.6347
Age Group 8: 0.961865411646
P-Value: 0.6638
Age Group 9: 0.949673591922
P-Value: 0.6587
Age Group 10: 0.921649178623
P-Value: 0.6416
Age Group 11: 0.83830672306
P-Value: 0.5867
Age Group 12: 0.687715134128
P-Value: 0.5419
Age Group 13: 0.451527246404
P-Value: 0.5258
Age Group 14: 0.193333232975
P-Value: 0.5946
Age Group 15: 0.0452212721383
P-Value: 0.7981
Age Group 16: 0.00905930570585
P-Value: 0.8804
Age Group 17: 0.0625721336367
P-Value: 0.7496
Age Group 18: 0.0573259850307
P-Value: 0.7863
Age Group 19: 0.216712735913
P-Value: 0.5929
Age Group 20: 0.405208758154
P-Value: 0.5684
Age Group 21: 0.579035756148
P-Value: 0.5584
Age Group 22: 0.73

In [145]:
# setup dataframe 10
kw_df_10 = pd.DataFrame().reindex_like(df)
kw_df_10 = clean_dataframe(kw_df_10)
kw = '529 plan'
kw_reps_10 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_10.columns)):
    kw_df_10.iloc[1, i] = kw_reps_10['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_10.columns)):
    kw_df_10.iloc[2, i] = kw_reps_10['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_10.head())

Age Group 0: -0.456722162464
P-Value: 0.5236
Age Group 1: -0.436967392814
P-Value: 0.5221
Age Group 2: -0.416399822912
P-Value: 0.5259
Age Group 3: -0.392330750173
P-Value: 0.5237
Age Group 4: -0.415564413744
P-Value: 0.5287
Age Group 5: -0.398284741914
P-Value: 0.5247
Age Group 6: -0.411957271101
P-Value: 0.522
Age Group 7: -0.46393339946
P-Value: 0.5253
Age Group 8: -0.462162939283
P-Value: 0.5217
Age Group 9: -0.433037280777
P-Value: 0.5165
Age Group 10: -0.350743035792
P-Value: 0.5465
Age Group 11: -0.163127822711
P-Value: 0.7043
Age Group 12: 0.0284038776575
P-Value: 0.8765
Age Group 13: 0.27386487376
P-Value: 0.5246
Age Group 14: 0.484584253139
P-Value: 0.5031
Age Group 15: 0.480734709656
P-Value: 0.5134
Age Group 16: 0.513964191446
P-Value: 0.5322
Age Group 17: 0.326393394417
P-Value: 0.5519
Age Group 18: -0.170399837487
P-Value: 0.5698
Age Group 19: -0.494496348979
P-Value: 0.5441
Age Group 20: -0.71700576414
P-Value: 0.5487
Age Group 21: -0.739992091638
P-Value: 0.574
Age Grou

In [146]:
# setup dataframe 11
kw_df_11 = pd.DataFrame().reindex_like(df)
kw_df_11 = clean_dataframe(kw_df_11)
kw = 'credit card'
kw_reps_11 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_11.columns)):
    kw_df_11.iloc[1, i] = kw_reps_11['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_11.columns)):
    kw_df_11.iloc[2, i] = kw_reps_11['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_11.head())

Age Group 0: 0.975589118802
P-Value: 0.5756
Age Group 1: 0.978775595213
P-Value: 0.5848
Age Group 2: 0.982952532604
P-Value: 0.5861
Age Group 3: 0.989325668828
P-Value: 0.5591
Age Group 4: 0.968159585707
P-Value: 0.554
Age Group 5: 0.955565070314
P-Value: 0.5989
Age Group 6: 0.948541086616
P-Value: 0.6002
Age Group 7: 0.951507030142
P-Value: 0.5621
Age Group 8: 0.962765979726
P-Value: 0.5241
Age Group 9: 0.952961283922
P-Value: 0.5502
Age Group 10: 0.947653236365
P-Value: 0.5712
Age Group 11: 0.887344221103
P-Value: 0.5598
Age Group 12: 0.773715175371
P-Value: 0.539
Age Group 13: 0.579231857348
P-Value: 0.5235
Age Group 14: 0.308817710528
P-Value: 0.5295
Age Group 15: 0.102791630579
P-Value: 0.7036
Age Group 16: -0.0090573217066
P-Value: 0.9502
Age Group 17: -0.0529933144537
P-Value: 0.9529
Age Group 18: -0.0640213873653
P-Value: 0.8502
Age Group 19: 0.1067280358
P-Value: 0.7115
Age Group 20: 0.298172132367
P-Value: 0.5708
Age Group 21: 0.511456071233
P-Value: 0.5611
Age Group 22: 0.69

In [149]:
# setup dataframe 12
kw_df_12 = pd.DataFrame().reindex_like(df)
kw_df_12 = clean_dataframe(kw_df_12)
kw = 'bankruptcy'
kw_reps_12 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_12.columns)):
    kw_df_12.iloc[1, i] = kw_reps_12['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_12.columns)):
    kw_df_12.iloc[2, i] = kw_reps_12['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_12.head())

Age Group 0: -0.732233735958
P-Value: 0.5656
Age Group 1: -0.74536790419
P-Value: 0.563
Age Group 2: -0.757115775438
P-Value: 0.5687
Age Group 3: -0.751228386533
P-Value: 0.5627
Age Group 4: -0.723889591711
P-Value: 0.575
Age Group 5: -0.741007220589
P-Value: 0.5575
Age Group 6: -0.701361510388
P-Value: 0.5541
Age Group 7: -0.654825217639
P-Value: 0.5571
Age Group 8: -0.632919351455
P-Value: 0.548
Age Group 9: -0.689797707414
P-Value: 0.5365
Age Group 10: -0.750696987409
P-Value: 0.5394
Age Group 11: -0.79899053751
P-Value: 0.5566
Age Group 12: -0.753563805822
P-Value: 0.5417
Age Group 13: -0.571678284765
P-Value: 0.5371
Age Group 14: -0.337192518515
P-Value: 0.5202
Age Group 15: -0.174031553514
P-Value: 0.5877
Age Group 16: -0.06139793609
P-Value: 0.7999
Age Group 17: 0.112941341136
P-Value: 0.739
Age Group 18: 0.30357959357
P-Value: 0.549
Age Group 19: 0.240091310434
P-Value: 0.584
Age Group 20: -0.0423318678952
P-Value: 0.8579
Age Group 21: -0.328284753958
P-Value: 0.5174
Age Group 

NameError: name 'kw_reps_12' is not defined

In [None]:
# setup dataframe 13
kw_df_13 = pd.DataFrame().reindex_like(df)
kw_df_13 = clean_dataframe(kw_df_13)
kw = 'check cashing'
kw_reps_13 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_13.columns)):
    kw_df_13.iloc[1, i] = kw_reps_13['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_13.columns)):
    kw_df_13.iloc[2, i] = kw_reps_13['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_13.head())

In [None]:
# setup dataframe 14
kw_df_14 = pd.DataFrame().reindex_like(df)
kw_df_14 = clean_dataframe(kw_df_14)
kw = 'ATM'
kw_reps_14 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_14.columns)):
    kw_df_14.iloc[1, i] = kw_reps_14['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_14.columns)):
    kw_df_14.iloc[2, i] = kw_reps_14['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_14.head())

In [None]:
# setup dataframe 15
kw_df_15 = pd.DataFrame().reindex_like(df)
kw_df_15 = clean_dataframe(kw_df_15)
kw = 'fafsa'
kw_reps_15 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_15.columns)):
    kw_df_15.iloc[1, i] = kw_reps_15['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_15.columns)):
    kw_df_15.iloc[2, i] = kw_reps_15['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_15.head())

In [None]:
# setup dataframe 16
kw_df_16 = pd.DataFrame().reindex_like(df)
kw_df_16 = clean_dataframe(kw_df_16)
kw = 'savings association'
kw_reps_16 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_16.columns)):
    kw_df_16.iloc[1, i] = kw_reps_16['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_16.columns)):
    kw_df_16.iloc[2, i] = kw_reps_16['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_16.head())

In [None]:
# setup dataframe 17
kw_df_17 = pd.DataFrame().reindex_like(df)
kw_df_17 = clean_dataframe(kw_df_17)
kw = 'deposit money order'
kw_reps_17 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_17.columns)):
    kw_df_17.iloc[1, i] = kw_reps_17['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_17.columns)):
    kw_df_17.iloc[2, i] = kw_reps_17['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_17.head())

In [None]:
# setup dataframe 18
kw_df_18 = pd.DataFrame().reindex_like(df)
kw_df_18 = clean_dataframe(kw_df_18)
kw = 'deposit check'
kw_reps_18 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_18.columns)):
    kw_df_18.iloc[1, i] = kw_reps_18['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_18.columns)):
    kw_df_18.iloc[2, i] = kw_reps_18['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_18.head())

In [None]:
# setup dataframe 19
kw_df_19 = pd.DataFrame().reindex_like(df)
kw_df_19 = clean_dataframe(kw_df_19)
kw = 'best bank accounts'
kw_reps_19 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_19.columns)):
    kw_df_19.iloc[1, i] = kw_reps_19['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_19.columns)):
    kw_df_19.iloc[2, i] = kw_reps_19['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_19.head())

In [None]:
# setup dataframe 20
kw_df_20 = pd.DataFrame().reindex_like(df)
kw_df_20 = clean_dataframe(kw_df_20)
kw = 'small business bank'
kw_reps_20 = test_pearson(kw, 4, 15, pop_list)
print('Permutation Test Complete')

# place corresponding permuatation values in first row of dataframe
for i in range(len(kw_df_20.columns)):
    kw_df_20.iloc[1, i] = kw_reps_20['results'][0][i]
    print('success 1: ' + str(i))
print('Checkpoint 2')

# p-value loading in the second row
for i in range(len(kw_df_20.columns)):
    kw_df_20.iloc[2, i] = kw_reps_20['results'][1][i]
    print('success 2: ' + str(i))
print('Checkpoint 3')

print('Complete')

print(kw_df_20.head())

In [None]:
kw_df_1.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_1.csv')
kw_df_2.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_2.csv')
kw_df_3.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_3.csv')
kw_df_4.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_4.csv')
kw_df_5.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_5.csv')
kw_df_6.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_6.csv')
kw_df_7.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_7.csv')
kw_df_8.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_8.csv')
kw_df_9.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_9.csv')
kw_df_10.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_10.csv')
kw_df_11.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_11.csv')
kw_df_12.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_12.csv')
kw_df_13.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_13.csv')
kw_df_14.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_14.csv')
kw_df_15.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_15.csv')
kw_df_16.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_16.csv')
kw_df_17.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_17.csv')
kw_df_18.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_18.csv')
kw_df_19.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_19.csv')
kw_df_20.to_csv('/Users/jacobschroeder/anaconda3/projects/kw_df_20.csv')