# American Community Survey (ACS) for US Census 2018

In [None]:
import numpy as np
import pandas as pd
import re
import folktables
from folktables import ACSDataSource, ACSIncome, ACSEmployment, ACSHealthInsurance, ACSPublicCoverage

In [None]:
src = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person')
defs = src.get_definitions()

In [None]:
cols = defs.groupby(1).first().reset_index().iloc[:,[0, 4]]
cols.columns = ['code', 'label']
cols.label = cols.label.apply(lambda x: re.sub('[ ]+[\(\,].*', '', x))
cols = {row.code: row.label for _, row in cols.iterrows()}
cols = cols | {
    'AGEP': 'Age',
    'COW': 'Workclass',
    'SCHL': 'Education',
    'MAR': 'Marital Status',
    'OCCP': 'Occupation',
    'WKHP': 'Hours per Week',
    'SEX': 'Gender',
    'RAC1P': 'Race',
    'PINCP': 'Income',
}

## Income

In [None]:
dfs = []
for state in folktables.state_list:
    data = src.get_data(states=[state])
    cats = folktables.generate_categories(features=ACSIncome.features, definition_df=defs)
    X_state, y_state, _ = ACSIncome.df_to_pandas(data, categories=cats)
    df_state = pd.concat([X_state, y_state], axis=1)
    df_state.insert(0, "State", state)
    dfs.append(df_state)
df = pd.concat(dfs, axis=0).reset_index(drop=True)
df = df.rename(columns=cols)
df.to_parquet('acs-income-2018.parquet', index=False)
df

## Employment

In [None]:
dfs = []
src = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person')
defs = src.get_definitions()
for state in folktables.state_list:
    data = src.get_data(states=[state])
    cats = folktables.generate_categories(features=ACSEmployment.features, definition_df=defs)
    X_state, y_state, _ = ACSEmployment.df_to_pandas(data, categories=cats)
    df_state = pd.concat([X_state, y_state], axis=1)
    df_state.insert(0, "State", state)
    dfs.append(df_state)
df = pd.concat(dfs, axis=0).reset_index(drop=True)
df = df.rename(columns=cols)
df.to_parquet('acs-employment-2018.parquet', index=False)
df

## Public Coverage

In [None]:
dfs = []
src = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person')
defs = src.get_definitions()
for state in folktables.state_list:
    data = src.get_data(states=[state])
    cats = folktables.generate_categories(features=ACSPublicCoverage.features, definition_df=defs)
    X_state, y_state, _ = ACSPublicCoverage.df_to_pandas(data, categories=cats)
    df_state = pd.concat([X_state, y_state], axis=1)
    df_state.insert(0, "state", state)
    dfs.append(df_state)
df = pd.concat(dfs, axis=0).reset_index(drop=True)
df = df.rename(columns=cols)
df.to_parquet('acs-publiccoverage-2018.parquet', index=False)
df