# import library

In [1]:
import pandas as pd
pd.set_option('display.max_columns', 100)

import requests
import json

import re

from pprint import pprint

# fips and label lists

## get fips list of all counties in CA

In [2]:
fips_ep_url = "https://www2.census.gov/geo/docs/reference/codes/files/st06_ca_cou.txt"

fips_txt = requests.get(fips_ep_url).text

fips_lines = fips_txt.split("\r\n")
fips_lines[:5]

fips_ls = []

for line in fips_lines:
    fips_ls.append(line.split(","))
    
fips_ls[:5]

fips_df = pd.DataFrame(fips_ls)
fips_df.drop([0, 1, 4], inplace=True, axis = 1)
fips_df.columns = ["fips", "county"]
fips_df.head()

Unnamed: 0,fips,county
0,1,Alameda County
1,3,Alpine County
2,5,Amador County
3,7,Butte County
4,9,Calaveras County


## get list of label id and names

In [3]:
label_ep_url = "https://api.census.gov/data/2017/acs/acs5/variables"
label_txt = requests.get(label_ep_url).text
label_ls = json.loads(label_txt)

label_dict = {}

for row in label_ls:
    label_dict[row[0]] = row[1]

label_df = pd.DataFrame(label_ls[1:], columns = label_ls[0])
label_df.head()

Unnamed: 0,name,label,concept
0,for,Census API FIPS 'for' clause,Census API Geography Specification
1,in,Census API FIPS 'in' clause,Census API Geography Specification
2,ucgid,Uniform Census Geography Identifier clause,Census API Geography Specification
3,B06004HPR_002E,Estimate!!Total!!Born in Puerto Rico,"PLACE OF BIRTH (WHITE ALONE, NOT HISPANIC OR L..."
4,B02019_008E,Estimate!!Total Groups Tallied!!Micronesian!!O...,NATIVE HAWAIIAN AND OTHER PACIFIC ISLANDER ALO...


# census  

## get tract data from us census API

In [4]:
def census_extracter(variable, geo):

    # extract from cnesus
    cs_ls = []

    cs_ep_url = "https://api.census.gov/data/2017/acs/acs5?" \
                 "get=" + variable + "&" \
                 "for=" + geo + "&in=state:06&" \
                 "key=9e518d813c34cefe46bf0317ca1b54e57d30219a"

    cs_txt = requests.get(cs_ep_url).text
    cs_js = json.loads(cs_txt)

    cs_ls.extend(cs_js)

    cs_df = pd.DataFrame(cs_ls[1:], columns = cs_ls[0])
    cs_df.dropna(axis = 1, inplace=True)
    cs_cols_to_drop = [col for col in list(cs_df.columns) if re.match(".*M$", col)]
    cs_df.drop(labels = cs_cols_to_drop, inplace=True, axis = 1)
    
    # col name converter
    cs_cols = cs_df.columns

    cs_cols_conv = []

    for col in cs_cols:
        try:
            cs_cols_conv.append(label_dict[col])
        except:
            cs_cols_conv.append(col)

    cs_df.columns = cs_cols_conv
    
    cs_df.columns = cs_df.columns.str.lower().str.replace("estimate!!", "").str.replace(" ", "_").str.replace("!!", "_")

    return cs_df

## scrape variable groups including keyword in its description

In [5]:
group_ep_url = "https://api.census.gov/data/2017/acs/acs5/groups"
group_txt = requests.get(group_ep_url).text
group_ls = json.loads(group_txt)

group_dict = {}

keyword = "CITIZENSHIP"

for row in group_ls["groups"]:
    if (keyword in row["description"]) and ("PR" not in row["name"]):
        group_dict[row["name"]] = row["description"].lower().replace(" ", "_")

group_df = pd.DataFrame(group_dict.items(), columns = ["name", "description"])
group_df.head()

Unnamed: 0,name,description
0,B27020,health_insurance_coverage_status_and_type_by_c...
1,B07007,geographical_mobility_in_the_past_year_by_citi...
2,B08111,means_of_transportation_to_work_by_citizenship...
3,B08511,means_of_transportation_to_work_by_citizenship...
4,B07407,geographical_mobility_in_the_past_year_by_citi...


In [6]:
for group, description in group_dict.items():
    entry_us_df = census_extracter(variable="group(" + group + ")", geo="county:*")
    entry_us_df.to_csv("../data/census/" + group + "_" + description + ".csv", index=False)

In [7]:
entry_us_df = census_extracter(variable="group(" + "B26210" + ")", geo="county:*")
entry_us_df.head()

Unnamed: 0,geography,name,state,county
0,0500000US06049,"Modoc County, California",6,49
1,0500000US06047,"Merced County, California",6,47
2,0500000US06033,"Lake County, California",6,33
3,0500000US06043,"Mariposa County, California",6,43
4,0500000US06115,"Yuba County, California",6,115
