# Processing U.S. Census data

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import pyarrow
import geopandas as gpd
import geojson
import matplotlib
import matplotlib.pyplot as plt
import geojson
import json
import jenkspy
import numpy as np
from earthpy import clip as cl
from altair import datum
import altair as alt
import altair_latimes as lat

alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 200
pd.options.display.max_rows = 1000

### Import DataDesk tool

In [6]:
!censusdatadownloader age tracts

  df[field] = df[field].astype(pd.np.float64)


In [7]:
!censusdatadownloader population zctas

In [8]:
src = pd.read_csv(
    "../census/processed/acs5_2018_age_tracts.csv",
    low_memory="False",
    dtype={"geoid": str},
)

  interactivity=interactivity, compiler=compiler, result=result)


In [9]:
df = src[
    [
        "geoid",
        "name",
        "total_under_5",
        "total_5_to_9",
        "total_10_to_14",
        "total_15_to_17",
        "total_18_to_19",
        "total_20",
        "total_21",
        "total_22_to_24",
        "total_25_to_29",
        "total_30_to_34",
        "total_35_to_39",
        "total_40_to_44",
        "total_45_to_49",
        "total_50_to_54",
        "total_55_to_59",
        "total_60_to_61",
        "total_62_to_64",
        "total_65_to_66",
        "total_67_to_69",
        "total_70_to_74",
        "total_75_to_79",
        "total_80_to_84",
        "total_85_and_over",
    ]
]

In [10]:
df.head()

Unnamed: 0,geoid,name,total_under_5,total_5_to_9,total_10_to_14,total_15_to_17,total_18_to_19,total_20,total_21,total_22_to_24,total_25_to_29,total_30_to_34,total_35_to_39,total_40_to_44,total_45_to_49,total_50_to_54,total_55_to_59,total_60_to_61,total_62_to_64,total_65_to_66,total_67_to_69,total_70_to_74,total_75_to_79,total_80_to_84,total_85_and_over
0,1071951100,"Census Tract 9511, Jackson County, Alabama",346.0,604.0,437.0,216.0,173.0,95.0,100.0,173.0,431.0,366.0,431.0,509.0,418.0,500.0,429.0,108.0,303.0,112.0,325.0,290.0,230.0,177.0,124.0
1,1097002900,"Census Tract 29, Mobile County, Alabama",333.0,407.0,176.0,245.0,62.0,26.0,31.0,84.0,417.0,505.0,370.0,298.0,236.0,225.0,225.0,85.0,116.0,44.0,87.0,97.0,121.0,78.0,56.0
2,1121010302,"Census Tract 103.02, Talladega County, Alabama",185.0,94.0,396.0,191.0,123.0,0.0,0.0,131.0,320.0,176.0,135.0,385.0,421.0,326.0,523.0,159.0,207.0,123.0,267.0,342.0,160.0,118.0,22.0
3,1097003404,"Census Tract 34.04, Mobile County, Alabama",161.0,226.0,226.0,83.0,93.0,45.0,28.0,144.0,191.0,160.0,259.0,154.0,156.0,135.0,139.0,121.0,88.0,41.0,44.0,62.0,32.0,16.0,19.0
4,1101002900,"Census Tract 29, Montgomery County, Alabama",683.0,764.0,571.0,270.0,146.0,143.0,59.0,237.0,760.0,822.0,620.0,500.0,321.0,347.0,479.0,84.0,275.0,142.0,161.0,166.0,96.0,62.0,53.0


In [11]:
df.to_csv("../coronavirus/input/tracts_age_cohorts.csv", index=False)

### California counties

In [12]:
counties = pd.read_csv("input/processed/acs5_2018_population_counties.csv")

In [13]:
ca_counties = pd.DataFrame(counties[counties["name"].str.contains(", California")])

In [14]:
ca_counties["name"] = ca_counties["name"].str.replace(", California", "")

In [15]:
ca_counties.sort_values(by="universe", ascending=False).head(10)

Unnamed: 0,geoid,name,universe,universe_annotation,universe_moe,universe_moe_annotation,state,county
198,6037,Los Angeles County,10098052.0,,-555555555.0,*****,6,37
216,6073,San Diego County,3302833.0,,-555555555.0,*****,6,73
209,6059,Orange County,3164182.0,,-555555555.0,*****,6,59
212,6065,Riverside County,2383286.0,,-555555555.0,*****,6,65
215,6071,San Bernardino County,2135413.0,,-555555555.0,*****,6,71
222,6085,Santa Clara County,1922200.0,,-555555555.0,*****,6,85
181,6001,Alameda County,1643700.0,,-555555555.0,*****,6,1
213,6067,Sacramento County,1510023.0,,-555555555.0,*****,6,67
186,6013,Contra Costa County,1133247.0,,-555555555.0,*****,6,13
189,6019,Fresno County,978130.0,,-555555555.0,*****,6,19


---

### California cities

In [16]:
places = (
    pd.read_csv(
        "input/processed/acs5_2018_population_places.csv",
        dtype={"geoid": str, "state": str, "place": str},
    )
    .drop(["universe_annotation", "universe_moe", "universe_moe_annotation"], axis=1)
    .rename(columns={"universe": "population", "name": "city"})
)

In [17]:
ca_cities = pd.DataFrame(places[places["geoid"].str.startswith("06")])

In [18]:
ca_cities["city"] = ca_cities["city"].str.replace(" city, California", "")
ca_cities["city"] = ca_cities["city"].str.replace(" CDP, California", "")
ca_cities["city"] = ca_cities["city"].str.replace(" town, California", "")

In [19]:
ca_cities.loc[ca_cities.city.str.contains("Ventura"), "city"] = "Ventura"
ca_cities.loc[ca_cities.city.str.contains("Paso Robles"), "city"] = "Paso Robles"
ca_cities.loc[ca_cities.city.str.contains("Paso Robles"), "city"] = "Paso Robles"
ca_cities.loc[
    ca_cities.city.str.contains("La Crescenta-Montrose"), "city"
] = "La Crescenta"
ca_cities.loc[
    ca_cities.city.str.contains("La Crescenta-Montrose"), "city"
] = "La Crescenta"

In [20]:
ca_cities[ca_cities["city"].str.contains("Alpine")]

Unnamed: 0,geoid,city,population,state,place
18354,604716,"Bear Valley CDP (Alpine County), California",58.0,6,4716
18904,601228,Alpine Village,155.0,6,1228
18912,601192,Alpine,15233.0,6,1192


In [21]:
ca_cities[ca_cities["geoid"] == ""]

Unnamed: 0,geoid,city,population,state,place


In [22]:
ca_cities = ca_cities.drop([18742, 19030])

In [23]:
ca_cities.sort_values(by="population", ascending=False).head()

Unnamed: 0,geoid,city,population,state,place
20022,644000,Los Angeles,3959657.0,6,44000
18899,666000,San Diego,1401932.0,6,66000
19803,668000,San Jose,1026658.0,6,68000
19805,667000,San Francisco,870044.0,6,67000
19104,627000,Fresno,522277.0,6,27000


In [24]:
len(ca_cities)

1519

In [25]:
ca_cities.to_csv(
    "/Users/mhustiles/data/github/coronavirus-tracker/_notebooks/data/raw/census/ca_cities.csv",
    index=False,
)

In [38]:
la_communities = gpd.read_file(
    "/Users/mhustiles/data/data/GIS/LA/lac_cities_noislands.shp"
)

In [27]:
la_county_cities = la_communities[la_communities["CITY_TYPE"] == "City"]

In [28]:
la_communities = gpd.read_file(
    "/Users/mhustiles/data/data/GIS/LA/LACITY_COMMUNITIES/LACITY_COMMUNITIES.shp"
)

In [29]:
la_communities["name"] = la_communities["COMTY_NAME"].str.title()

In [30]:
la_communities.columns = la_communities.columns.str.lower()

In [31]:
la_communities[["name", "geometry"]].to_file(
    "/Users/mhustiles/data/github/coronavirus-tracker/_notebooks/data/raw/la_communities.geojson"
)

In [41]:
la_communities.head()

Unnamed: 0,CITY,CITY_ID,CITY_TYPE,CITY_NAME,CITY_LABEL,COLOR_CODE,ABBR,CITY_NO,DESCRIPTN,URL,PHONE,OF_AREA_SM,FEAT_TYPE,COMMENT,COLOR_EGIS,POPULATION,Shape_Leng,Shape_Area,geometry
0,250,31,Unincorporated,Unincorporated,CO,1,UNIN,0,UNINC,www.lacounty.gov,2139744321,0.0,Land,,"Yellow - RGB 255,255,115",1095592,9186.569374,2493216.0,"POLYGON ((-118.29705 33.85818, -118.29705 33.8..."
1,250,31,Unincorporated,Unincorporated,CO,1,UNIN,0,UNINC,www.lacounty.gov,2139744321,0.0,Land,,"Yellow - RGB 255,255,115",1095592,760.011258,28000.47,"POLYGON ((-118.35846 34.07571, -118.35847 34.0..."
2,254,141,City,Los Angeles,Los Angeles,9,LAX,49,LA,http://www.lacity.org,2134852121,468.852,Water,,"Gray - RGB 178,178,178",3792622,6166.499933,1956899.0,"POLYGON ((-118.45297 33.96672, -118.45195 33.9..."
3,250,31,Unincorporated,Unincorporated,CO,1,UNIN,0,UNINC,www.lacounty.gov,2139744321,0.0,Land,,"Yellow - RGB 255,255,115",1095592,6015.469922,1424112.0,"POLYGON ((-117.89096 34.09930, -117.89083 34.0..."
4,250,31,Unincorporated,Unincorporated,CO,1,UNIN,0,UNINC,www.lacounty.gov,2139744321,0.0,Land,,"Yellow - RGB 255,255,115",1095592,5303.929074,1353492.0,"POLYGON ((-118.40904 33.98133, -118.40913 33.9..."


In [42]:
la_communities[la_communities["CITY_NAME"].fillna("").str.contains("South")]

Unnamed: 0,CITY,CITY_ID,CITY_TYPE,CITY_NAME,CITY_LABEL,COLOR_CODE,ABBR,CITY_NO,DESCRIPTN,URL,PHONE,OF_AREA_SM,FEAT_TYPE,COMMENT,COLOR_EGIS,POPULATION,Shape_Leng,Shape_Area,geometry
58,107,187,City,South El Monte,South El Monte,3,SEM,78,SEL,www.ci.south-el-monte.ca.us,6265796540,2.808,Land,,"Purple - RGB 185,185,250",20116,62002.388587,79451040.0,"POLYGON ((-118.06098 34.06353, -118.06099 34.0..."
120,200,74,City,South Gate,South Gate,2,SGT,79,SGT,www.cityofsouthgate.org,3235639500,7.321,Land,,"Pink - RGB 255,190,232",94396,102569.189768,204962200.0,"POLYGON ((-118.22438 33.96568, -118.22443 33.9..."
144,64,189,City,South Pasadena,South Pasadena,4,SPS,80,SPAS,www.ci.south-pasadena.ca.us,6264037200,3.468,Land,,"Green - RGB 203,255,112",25619,43466.685739,95202620.0,"POLYGON ((-118.14670 34.12404, -118.14661 34.1..."


In [None]:
!mapshaper

In [None]:
ca_counties = gpd.read_file('/Users/mhustiles/data/data/GIS/ca-counties.geojson')

In [None]:
ca_counties.plot()