In [1]:
import pandas as pd
import numpy as np
import itertools

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from folktables import ACSDataSource, ACSEmployment, ACSIncome, ACSPublicCoverage

In [3]:
state_list = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI',
              'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI',
              'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC',
              'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT',
              'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'PR']

region_list = ['NORTHEAST', 'MIDWEST', 'SOUTH', 'WEST']
chosen_states = ['MA', 'CT', 'NY', 'PA', 'IL', 'OH', 'MO', 'MN', 'FL', 'GA',
                 'TN', 'AL', 'TX', 'LA', 'AZ', 'CO', 'CA', 'WA']
REGIONS = {
    'NORTHEAST': {
        'NewEngland': ['MA', 'CT'],
        'MidAtlantic': ['NY', 'PA'],
    },
    'MIDWEST': {
        'EastNorthCentral': ['IL', 'OH'],
        'WestNorthCentral': ['MO', 'MN']
    },
    'SOUTH': {
        'SouthAtlantic': ['FL', 'GA'],
        'EastSouthCentral': ['TN', 'AL'],
        'WestSouthCentral': ['TX', 'LA']
    },
    'WEST': {
        'Mountain': ['AZ', 'CO'],
        'Pacific': ['CA', 'WA']
    }
}

_STATE_CODES = {'AL': '01', 'AK': '02', 'AZ': '04', 'AR': '05', 'CA': '06',
                'CO': '08', 'CT': '09', 'DE': '10', 'FL': '12', 'GA': '13',
                'HI': '15', 'ID': '16', 'IL': '17', 'IN': '18', 'IA': '19',
                'KS': '20', 'KY': '21', 'LA': '22', 'ME': '23', 'MD': '24',
                'MA': '25', 'MI': '26', 'MN': '27', 'MS': '28', 'MO': '29',
                'MT': '30', 'NE': '31', 'NV': '32', 'NH': '33', 'NJ': '34',
                'NM': '35', 'NY': '36', 'NC': '37', 'ND': '38', 'OH': '39',
                'OK': '40', 'OR': '41', 'PA': '42', 'RI': '44', 'SC': '45',
                'SD': '46', 'TN': '47', 'TX': '48', 'UT': '49', 'VT': '50',
                'VA': '51', 'WA': '53', 'WV': '54', 'WI': '55', 'WY': '56',
                'PR': '72'}

_STATE_CODES_INV = {
    '1': 'AL',
    '2': 'AK',
    '4': 'AZ',
    '5': 'AR',
    '6': 'CA',
    '8': 'CO',
    '9': 'CT',
    '10': 'DE',
    '12': 'FL',
    '13': 'GA',
    '15': 'HI',
    '16': 'ID',
    '17': 'IL',
    '18': 'IN',
    '19': 'IA',
    '20': 'KS',
    '21': 'KY',
    '22': 'LA',
    '23': 'ME',
    '24': 'MD',
    '25': 'MA',
    '26': 'MI',
    '27': 'MN',
    '28': 'MS',
    '29': 'MO',
    '30': 'MT',
    '31': 'NE',
    '32': 'NV',
    '33': 'NH',
    '34': 'NJ',
    '35': 'NM',
    '36': 'NY',
    '37': 'NC',
    '38': 'ND',
    '39': 'OH',
    '40': 'OK',
    '41': 'OR',
    '42': 'PA',
    '44': 'RI',
    '45': 'SC',
    '46': 'SD',
    '47': 'TN',
    '48': 'TX',
    '49': 'UT',
    '50': 'VT',
    '51': 'VA',
    '53': 'WA',
    '54': 'WV',
    '55': 'WI',
    '56': 'WY',
    '72': 'PR',
}

In [6]:
data_source = ACSDataSource(survey_year="2016", horizon="1-Year",
                                survey='person')
acs_data = data_source.get_data(states=chosen_states, download=True)

Downloading data for 2016 1-Year person survey for MA...
Downloading data for 2016 1-Year person survey for CT...
Downloading data for 2016 1-Year person survey for PA...
Downloading data for 2016 1-Year person survey for IL...
Downloading data for 2016 1-Year person survey for OH...
Downloading data for 2016 1-Year person survey for MO...
Downloading data for 2016 1-Year person survey for MN...
Downloading data for 2016 1-Year person survey for GA...
Downloading data for 2016 1-Year person survey for TN...
Downloading data for 2016 1-Year person survey for AL...
Downloading data for 2016 1-Year person survey for LA...
Downloading data for 2016 1-Year person survey for AZ...
Downloading data for 2016 1-Year person survey for CO...
Downloading data for 2016 1-Year person survey for WA...


In [5]:
acs_data['ST']

0         6
1         6
2         6
3         6
4         6
         ..
376030    6
376031    6
376032    6
376033    6
376034    6
Name: ST, Length: 376035, dtype: int64