In [1]:
import pandas as pd
from census import Census

In [2]:
c = Census('45abd686abf7a53c954b1e3d26eadca0631584fa')

In [3]:
data = c.acs5.get(
    (
        'NAME',
        'B03002_001E', # total
        'B03002_004E', # black
    ),
    geo={'for':'place:*', 'in':'state:06'},
    year=2017
)

In [4]:
df = pd.DataFrame(data)\
.rename(columns={'NAME':'place','place':'placefp','state':'statefp','B03002_001E':'totalpop','B03002_004E':'black'})\
[['statefp','placefp','place','totalpop','black']]\
.sort_values('black', ascending=False)

In [5]:
df.head(10)

Unnamed: 0,statefp,placefp,place,totalpop,black
382,6,44000,"Los Angeles city, California",3949776.0,339659.0
1180,6,53000,"Oakland city, California",417442.0,98681.0
81,6,66000,"San Diego city, California",1390966.0,84367.0
1248,6,64000,"Sacramento city, California",489650.0,63976.0
1047,6,43000,"Long Beach city, California",470489.0,58260.0
36,6,36546,"Inglewood city, California",111006.0,45444.0
1211,6,67000,"San Francisco city, California",864263.0,43961.0
1006,6,27000,"Fresno city, California",519037.0,38502.0
789,6,49270,"Moreno Valley city, California",203691.0,34160.0
440,6,40130,"Lancaster city, California",160113.0,34026.0


In [6]:
df['blackpct'] = df['black'] / df['totalpop']

In [7]:
df.sort_values('blackpct', ascending=False).head(10)

Unnamed: 0,statefp,placefp,place,totalpop,black,blackpct
186,6,82667,"View Park-Windsor Hills CDP, California",11308.0,8936.0,0.790237
456,6,39108,"Ladera Heights CDP, California",7081.0,4607.0,0.650614
187,6,84116,"West Athens CDP, California",8746.0,4675.0,0.53453
1449,6,84780,"West Rancho Dominguez CDP, California",23537.0,10768.0,0.457492
457,6,84592,"Westmont CDP, California",33532.0,15095.0,0.450167
36,6,36546,"Inglewood city, California",111006.0,45444.0,0.409383
1481,6,31540,"Guinda CDP, California",259.0,87.0,0.335907
770,6,13157,"China Lake Acres CDP, California",2446.0,786.0,0.321341
983,6,42230,"Lodoga CDP, California",162.0,51.0,0.314815
680,6,17995,"Daphnedale Park CDP, California",103.0,32.0,0.31068


In [8]:
df.to_csv('blacks-by-city-ca.csv', index=False)

In [8]:
df.black.sum()

2114240.0

In [9]:
len(df)

1522

In [10]:
df['runningcount'] = df.black.cumsum()

In [11]:
df['runningpct'] = df['runningcount'] / df.black.sum()

In [12]:
len(df[df.runningpct <= 0.5])

18

In [13]:
df.head(19)

Unnamed: 0,statefp,placefp,place,totalpop,black,blackpct,runningcount,runningpct
382,6,44000,"Los Angeles city, California",3949776.0,339659.0,0.085994,339659.0,0.160653
1180,6,53000,"Oakland city, California",417442.0,98681.0,0.236395,438340.0,0.207327
81,6,66000,"San Diego city, California",1390966.0,84367.0,0.060654,522707.0,0.247232
1248,6,64000,"Sacramento city, California",489650.0,63976.0,0.130657,586683.0,0.277491
1047,6,43000,"Long Beach city, California",470489.0,58260.0,0.123829,644943.0,0.305047
36,6,36546,"Inglewood city, California",111006.0,45444.0,0.409383,690387.0,0.326541
1211,6,67000,"San Francisco city, California",864263.0,43961.0,0.050865,734348.0,0.347334
1006,6,27000,"Fresno city, California",519037.0,38502.0,0.07418,772850.0,0.365545
789,6,49270,"Moreno Valley city, California",203691.0,34160.0,0.167705,807010.0,0.381702
440,6,40130,"Lancaster city, California",160113.0,34026.0,0.212512,841036.0,0.397796


In [14]:
blackpct = df['black'].sum() / df['totalpop'].sum()

In [15]:
blackpct

0.057233763340023655

In [16]:
df['blackz'] = (df['blackpct'] - blackpct) / pd.np.sqrt(blackpct * (1 - blackpct) / df['totalpop'])

In [17]:
df[df.place.str.startswith('Ojai')]

Unnamed: 0,statefp,placefp,place,totalpop,black,blackpct,runningcount,runningpct,blackz
752,6,53476,"Ojai city, California",7594.0,15.0,0.001975,2113516.0,0.999658,-20.730338


In [18]:
df.totalpop.describe()

count    1.522000e+03
mean     2.427098e+04
std      1.205208e+05
min      0.000000e+00
25%      6.215000e+02
50%      3.191500e+03
75%      1.659075e+04
max      3.949776e+06
Name: totalpop, dtype: float64

In [19]:
df.sort_values('blackz').head()

Unnamed: 0,statefp,placefp,place,totalpop,black,blackpct,runningcount,runningpct,blackz
1208,6,68000,"San Jose city, California",1023031.0,29147.0,0.028491,933609.0,0.441581,-125.154756
493,6,69000,"Santa Ana city, California",334493.0,2716.0,0.00812,1785691.0,0.844602,-122.284318
129,6,2000,"Anaheim city, California",349007.0,7843.0,0.022472,1451399.0,0.686487,-88.406989
1033,6,29000,"Garden Grove city, California",174812.0,1607.0,0.009193,1908867.0,0.902862,-86.470859
134,6,36000,"Huntington Beach city, California",200415.0,2510.0,0.012524,1798760.0,0.850783,-86.166648


In [20]:
df[
    (df.totalpop < 8000) & (df.totalpop > 7000)
].sort_values('blackz').head(70)

Unnamed: 0,statefp,placefp,place,totalpop,black,blackpct,runningcount,runningpct,blackz
788,6,46660,"Mecca CDP, California",7893.0,0.0,0.0,2114240.0,1.0,-21.889971
118,6,53294,"Oceano CDP, California",7788.0,0.0,0.0,2114240.0,1.0,-21.743884
868,6,54372,"Orosi CDP, California",7760.0,0.0,0.0,2114240.0,1.0,-21.704761
955,6,57302,"Piñon Hills CDP, California",7583.0,0.0,0.0,2114240.0,1.0,-21.455798
745,6,34316,"Homeland CDP, California",7471.0,0.0,0.0,2114240.0,1.0,-21.296759
432,6,22790,"Escalon city, California",7449.0,0.0,0.0,2114240.0,1.0,-21.265379
1095,6,78162,"Templeton CDP, California",7989.0,18.0,0.002253,2113177.0,0.999497,-21.155731
485,6,54274,"Orland city, California",7532.0,9.0,0.001195,2113829.0,0.999806,-20.937089
313,6,58030,"Pollock Pines CDP, California",7269.0,5.0,0.000688,2114121.0,0.999944,-20.754409
752,6,53476,"Ojai city, California",7594.0,15.0,0.001975,2113516.0,0.999658,-20.730338
