To run this script, enter desired start and end year, and run last to cells at the bottom:

In [4]:
start_year = 2010
end_year = 2019

### List of Selected Variable Types:

1: Total Number of Housing Units

2: House Heating Fuel (Gas, Propane, Electric, Fuel Oil)

3: Year Structure Built (2014 or later, 2010-2013, 2000-2009, 1990-1999, 1980-1989, 1970-1979, 1960-1969, 1950-1959, 1940-1949, before 1939)

4: Total Units in Structure (1,detached, 1,attached, 2, 3-4, 5-9, 10-19, 20-49, 50+, Mobile home, Boat,RV,van,etc.)

5: Number of rooms (1,2,3,4,5,6,7,8,9+, Median Rooms)

6: Owner/Renter Data (Owner-occupied, renter-occupied, Average household size of owner-occupied unit, average household size of renter-occupied unit)

7: Household Income in Past 12 Months in 2015 Inflation-Adjusted Dollars 
(Less than 10,000, 10,000-14,999, 15,000 -19,999, ... , 200,000+ )

8: Median Household Income

9: Total Population

Code names are stored in the my_codes variable, label names are associated for each variable to create a readable column name for the .csv output.

In [1]:
import pandas as pd
import censusdata as cd
import matplotlib.pyplot as plt
pd.set_option('max_columns', None)
import time
import statistics as stats

state_codes = {'*':'*','WA': '53', 'DE': '10', 'DC': '11', 'WI': '55', 'WV': '54', 'HI': '15','FL': '12', 'WY': '56', 'PR': '72', 'NJ': '34', 'NM': '35', 'TX': '48',
    'LA': '22', 'NC': '37', 'ND': '38', 'NE': '31', 'TN': '47', 'NY': '36','PA': '42', 'AK': '02', 'NV': '32', 'NH': '33', 'VA': '51', 'CO': '08',
    'CA': '06', 'AL': '01', 'AR': '05', 'VT': '50', 'IL': '17', 'GA': '13','IN': '18', 'IA': '19', 'MA': '25', 'AZ': '04', 'ID': '16', 'CT': '09',
    'ME': '23', 'MD': '24', 'OK': '40', 'OH': '39', 'UT': '49', 'MO': '29','MN': '27', 'MI': '26', 'RI': '44', 'KS': '20', 'MT': '30', 'MS': '28',
    'SC': '45', 'KY': '21', 'OR': '41', 'SD': '46'}

incomes = []
for s in cd.search('acs5', 2015, 'concept', 'B19001')[306:]:
    if(not('M' in s[0])):
        incomes.append(s)
income_labels = []
for i in incomes:
    income_labels.append('Household Income: ' + i[2])

huc = ['B25001_001E']
fuelc = ['B25040_002E','B25040_003E','B25040_004E','B25040_005E','B25040_007E']
ybc = ['B25034_002E','B25034_003E','B25034_004E','B25034_005E','B25034_006E','B25034_007E','B25034_008E','B25034_009E','B25034_010E','B25034_011E']
uisc = ['B25024_002E','B25024_003E','B25024_004E','B25024_005E','B25024_006E','B25024_007E','B25024_008E','B25024_009E','B25024_010E','B25024_011E']
numrc = ['B25017_002E','B25017_003E','B25017_004E','B25017_005E','B25017_006E','B25017_007E','B25017_008E','B25017_009E','B25017_010E', 'B25018_001E']
roc = ['B25008_002E','B25008_003E','B25010_002E','B25010_003E']
incomesc = ['B19001_001E','B19001_002E','B19001_003E','B19001_004E','B19001_005E','B19001_006E','B19001_007E',
            'B19001_008E','B19001_009E','B19001_010E','B19001_011E','B19001_012E','B19001_013E',
            'B19001_014E','B19001_015E','B19001_016E','B19001_017E']
medinc = ['B19013_001E']
totpopc = ['B01003_001E']

my_codes = huc + fuelc + ybc + uisc + numrc + roc + incomesc + medinc + totpopc
my_codes2 = huc + fuelc + ybc[:-1] + uisc + numrc + roc + incomesc + medinc + totpopc

hul = ['Total Housing Units']
fuell = ['House Heating Fuel: Gas', 'House Heating Fuel: Propane', 'House Heating Fuel: Electric', 'House Heating Fuel: Fuel Oil', 'House Heating Fuel: Wood']
ybl = ['Year Built: 2014 or later', 'Year Built: 2010-2013', 'Year Built: 2000-2009', 'Year Built: 1990-1999', 'Year Built: 1980-1989','Year Built: 1970-1979','Year Built: 1960-1969','Year Built: 1950-1959','Year Built: 1940-1949','Year Built: before 1940']
uisl = ['Units in Structure: 1 DETACHED', 'Units in Structure: 1 ATTACHED', 'Units in Structure: 2', 'Units in Structure: 3-4', 'Units in Structure: 5-9', 'Units in Structure: 10-19', 'Units in Structure: 20-49', 'Units in Structure: 50+', 'Units in Structure: Mobile Home', 'Units in Structure: Boat,RV,Van,Etc.']            
numrl = ['Number of Rooms: 1','Number of Rooms: 2', 'Number of Rooms: 3','Number of Rooms: 4','Number of Rooms: 5','Number of Rooms: 6','Number of Rooms: 7','Number of Rooms: 8','Number of Rooms: 9+', "Median Number of Rooms"]
rol = ['Owner-Occupied', 'Renter-Occupied', 'Avg household size of owner-occupied unit', 'Avg household size of renter-occupied unit']
medil = ['Median Household Income']
totpopl = ['Total Population']


my_labels = hul + fuell + ybl + uisl + numrl + rol + income_labels + medil + totpopl
my_labels2 = hul + fuell + ybl[1:] + uisl + numrl + rol + income_labels + medil + totpopl

codes_to_labels = dict()
for i in range(len(my_codes)):
    codes_to_labels[my_codes[i]] = my_labels[i]

In [2]:
def get_acs_state_data(survey, year):
    codes = my_codes
    labels = my_labels
    if(year<2015):
        codes = my_codes2
        labels = my_labels2
    df = cd.download(survey, year, cd.censusgeo([('state', '*')]), codes)
    df.columns = labels
    states = []
    for i in df.index:
        states.append(str(i).split(':')[0])
    df = df.reset_index(drop=True)
    df2 = pd.DataFrame({'State':states,'Year':[year for _ in df.index]})
    df_out = pd.concat([df2, df], axis=1).sort_values(by=['State'])
    return df_out

In [3]:
def get_acs_county_data(survey, year):
    codes = my_codes
    labels = my_labels
    if(year<2021):
        codes = my_codes2
        labels = my_labels2
    df = cd.download(survey, year, cd.censusgeo([('county', '*')]), codes)
    df.columns = labels
    states = []
    counties = []
    fips = []
    for i in df.index:
        states.append(i.name.split(',')[1].strip(' '))
        counties.append(i.name.split(',')[0].strip('County')[:-1])
        fips.append(i.geo[0][1] + i.geo[1][1])
    df = df.reset_index(drop=True)
    df2 = pd.DataFrame({'State':states,'County':counties,'FIPS Code':fips,'Year':[year for _ in df.index]})
    df_out = pd.concat([df2, df], axis=1)
    return df_out

In [5]:
datasets = []
for i in range(start_year,end_year+1):
    df = get_acs_state_data(survey='acs1',year=i)
    us_totals = ['US',df.Year.unique()[0]]
    for i in df.columns[2:]:
        us_totals.append(sum(df.loc[:,i]))
    us_totals[-2] = stats.median(df.loc[:,df.columns[-2]])
    df.loc[len(df.index)] = us_totals
    datasets.append(df)
df_final = pd.concat(datasets, axis=0)
df_final.to_csv('2010_2019_ACS1_State_Data.csv')

In [None]:
datasets = []
for i in range(start_year,end_year+1):
    df = get_acs_state_data(survey='acs5',year=i)
    us_totals = ['US',df.Year.unique()[0]]
    for i in df.columns[2:]:
        us_totals.append(sum(df.loc[:,i]))
    us_totals[-2] = stats.median(df.loc[:,df.columns[-2]])
    df.loc[len(df.index)] = us_totals
    datasets.append(df)
df_final = pd.concat(datasets, axis=0)
df_final.to_csv('2010_2019_ACS5_State_Data.csv')

In [None]:
datasets = []
for i in range(start_year,end_year+1):
    df = get_acs_county_data(survey='acs5',year=i)
    datasets.append(df)
df_final = pd.concat(datasets, axis=0)
df_final.to_csv('2010_2019_ACS5_County_Data.csv')