In [1]:
import pandas as pd
from census import Census

In [2]:
c = Census('45abd686abf7a53c954b1e3d26eadca0631584fa')

In [3]:
data = c.acs5.get(
    (
        'NAME',
        'B03002_001E', # total
        'B03002_003E', # white
    ),
    geo={'for':'place:*', 'in':'state:06'},
    year=2017
)

In [4]:
df = pd.DataFrame(data)\
.rename(columns={'NAME':'place','place':'placefp','state':'statefp','B03002_001E':'totalpop','B03002_003E':'white'})\
[['statefp','placefp','place','totalpop','white']]\
.sort_values('white', ascending=False)

In [5]:
df.head(10)

Unnamed: 0,statefp,placefp,place,totalpop,white
382,6,44000,"Los Angeles city, California",3949776.0,1123131.0
81,6,66000,"San Diego city, California",1390966.0,599633.0
1211,6,67000,"San Francisco city, California",864263.0,353000.0
1208,6,68000,"San Jose city, California",1023031.0,271027.0
1248,6,64000,"Sacramento city, California",489650.0,162206.0
1006,6,27000,"Fresno city, California",519037.0,141971.0
1047,6,43000,"Long Beach city, California",470489.0,129957.0
134,6,36000,"Huntington Beach city, California",200415.0,126453.0
459,6,3526,"Bakersfield city, California",372680.0,125649.0
356,6,30000,"Glendale city, California",199750.0,122658.0


In [6]:
df['whitepct'] = df['white'] / df['totalpop']

In [7]:
df.sort_values('whitepct', ascending=False).head(10)

Unnamed: 0,statefp,placefp,place,totalpop,white,whitepct
1358,6,55506,"Panorama Heights CDP, California",38.0,38.0,1.0
681,6,43126,"Lookout CDP, California",63.0,63.0,1.0
89,6,21530,"Edgewood CDP, California",25.0,25.0,1.0
1474,6,4716,"Bear Valley CDP (Alpine County), California",62.0,62.0,1.0
1385,6,44672,"Mabie CDP, California",43.0,43.0,1.0
174,6,81869,"Valley Ranch CDP, California",10.0,10.0,1.0
1393,6,36098,"Hyampom CDP, California",63.0,63.0,1.0
1405,6,71000,"Sereno del Mar CDP, California",63.0,63.0,1.0
123,6,37918,"Keeler CDP, California",10.0,10.0,1.0
1409,6,37274,"Jenner CDP, California",64.0,64.0,1.0


In [8]:
df.white.sum()

13605468.0

In [9]:
len(df)

1522

In [10]:
whitepct = df['white'].sum() / df['totalpop'].sum()

In [11]:
whitepct

0.36830829784805175

In [12]:
df['whitez'] = (df['whitepct'] - whitepct) / pd.np.sqrt(whitepct * (1 - whitepct) / df['totalpop'])

In [13]:
df[df.place.str.startswith('Ojai')]

Unnamed: 0,statefp,placefp,place,totalpop,white,whitepct,whitez
752,6,53476,"Ojai city, California",7594.0,6247.0,0.822623,82.079342


In [14]:
df.totalpop.describe()

count    1.522000e+03
mean     2.427098e+04
std      1.205208e+05
min      0.000000e+00
25%      6.215000e+02
50%      3.191500e+03
75%      1.659075e+04
max      3.949776e+06
Name: totalpop, dtype: float64

In [15]:
df.sort_values('whitez').head()

Unnamed: 0,statefp,placefp,place,totalpop,white,whitepct,whitez
382,6,44000,"Los Angeles city, California",3949776.0,1123131.0,0.284353,-345.919903
493,6,69000,"Santa Ana city, California",334493.0,31499.0,0.094169,-328.704663
1188,6,20802,"East Los Angeles CDP, California",123905.0,2274.0,0.018353,-255.387088
397,6,22230,"El Monte city, California",115958.0,4655.0,0.040144,-231.677339
247,6,15044,"Compton city, California",97847.0,1125.0,0.011498,-231.394673


In [18]:
df.sort_values('whitez', ascending=False).head(30)

Unnamed: 0,statefp,placefp,place,totalpop,white,whitepct,whitez
1144,6,51182,"Newport Beach city, California",86793.0,70471.0,0.811943,270.962927
310,6,59920,"Redding city, California",91236.0,71081.0,0.779089,257.238063
1083,6,11194,"Carlsbad city, California",113147.0,83328.0,0.736458,256.73643
134,6,36000,"Huntington Beach city, California",200415.0,126453.0,0.630956,243.769852
474,6,62938,"Roseville city, California",130705.0,90388.0,0.691542,242.272672
995,6,78582,"Thousand Oaks city, California",128909.0,88170.0,0.683971,234.966725
356,6,30000,"Glendale city, California",199750.0,122658.0,0.614058,227.707482
1079,6,22678,"Encinitas city, California",62595.0,49376.0,0.788817,218.11547
568,6,13014,"Chico city, California",90660.0,65031.0,0.717306,217.857547
642,6,13588,"Citrus Heights city, California",86618.0,60509.0,0.698573,201.515268


In [22]:
df[
    (df.totalpop < 10000) & (df.totalpop > 5000)
].sort_values('whitez')

Unnamed: 0,statefp,placefp,place,totalpop,white,whitepct,whitez
187,06,84116,"West Athens CDP, California",8746.0,86.0,0.009833,-69.503320
1253,06,20438,"Earlimart CDP, California",8824.0,107.0,0.012126,-69.366011
1185,06,54008,"Orange Cove city, California",9567.0,302.0,0.031567,-68.285129
788,06,46660,"Mecca CDP, California",7893.0,9.0,0.001140,-67.628163
1089,06,30392,"Gonzales city, California",8462.0,358.0,0.042307,-62.172423
868,06,54372,"Orosi CDP, California",7760.0,255.0,0.032861,-61.262829
581,06,36084,"Huron city, California",6926.0,116.0,0.016748,-60.657168
646,06,73276,"South San Gabriel CDP, California",8964.0,580.0,0.064703,-59.593866
1159,06,09878,"Calipatria city, California",7426.0,408.0,0.054942,-55.984949
597,06,17708,"Cutler CDP, California",5850.0,94.0,0.016068,-55.854500
