In [251]:
import numpy as np
import urllib3
import json
from sys import argv
import pandas as pd
import numpy as np
from functools import reduce

__DESCRIPTION:__ This notebook is used to match the SPLC data to the social and demographic variables of interest. American Community Survey 5-year estimate  data are obtained from both the U.S. Census Bureau API (available for 5-year estimate PROFILES data only from 2012-2015) and from individual tables obtained through the U.S. Census Bureau's American FactFinder website tool. My personal key has been removed from the code, in compliance with the API Terms of Agreement. Urban/Rural codes are assigned using the 6-point scale created by the National Center for Health Statistics. <br>

__INPUTS USED:__ <br>
 - SPLC_Clean.xlsx -- This is the cleaned SPLC data created in the SPLC Cleaning Jupyter Notebook. <br>

 - 50_fips.xlsx -- This is a dataset matching all 50 United States to county FIPS code. <br>

 - Ctytype_fips.csv -- Dataset obtained from the National Centers for Health Services (NCHS) that assigns each U.S. county a value between 1-6 to indicate degree of urbanization. Values 1-4 are classified as "Metropolitan" and Values 5-6 are classified as "Nonmetropolitan." This dataset was created by the Rural Urban Juptery Notebook. <br>

 - Veteran_ACS.csv -- This is the combined dataset of ACS 5-Year Estimates for Veteran Population by county, by service period. This dataset was generated in Veterans ACS Data Jupyter Notebook. <br>
 
__FINAL OUTPUT:__ <br>
 - SPLC_ACS_FINAL_2012-2015.csv -- Dataset of counties, matched with social, demographic, economic and flag "yes/no" for  Militia or Antigovernment Group presence, by year.

__GETTING DATA FROM THE ACS API__

In [252]:
# Variables of interest from the ACS API
variables = {'DP02_0069E'   : 'Veteran_E', \
        'DP02_0069PE'  : 'Veteran_P', \
        'DP02_0068E'   : 'Veteran_Pop', \
        'DP02_0067PE'  : 'Education_BachPlus_P', \
        'DP02_0061PE'  : 'Education_ThruHS_P', \
        'DP02_0090PE'  : 'Birth_DiffSt_P', \
        'DP02_0092PE'  : 'Birth_Forgn_P', \
        'DP02_0002PE'  : 'House_Fam_P', \
        'DP02_0001E'   : 'House_Pop_E', \
        'DP02_0008PE'  : 'House_FemHead_P', \
        'DP03_0119PE'  : 'Poverty_Fam_P', \
        'DP03_0062E'   : 'Income_House_Median', \
        'DP03_0047PE'  : 'Employ_Private_P', \
        'DP03_0048PE'  : 'Employ_Gov_P', \
        'DP03_0049PE'  : 'Employ_Self_P', \
        'DP03_0050PE'  : 'Employ_Unpaid_P', \
        'DP05_0059PE'  : 'Race_White_P', \
        'DP05_0003PE'  : 'Sex_Male_P', \
        'DP05_0004PE'  : 'Sex_Female_P', \
        'DP05_0017E'   : 'Age_Median_E', \
        'DP03_0028PE'  : 'Employ_Service_P', \
        'DP03_0041PE'  : 'Industry_Pro_P', \
        'DP03_0093E'   : 'Male_FullTime_Earnings', \
        'DP03_0094E'   : 'Female_FullTime_Earnings', \
        'DP02_0010PE'  : 'House_Nonfamily_P', \
        'DP05_0001E'   : 'Total_Population_E'
       }

In [253]:
# Code for the 2015 year API data pull

# Set constants
http = urllib3.PoolManager()
key = 'insert your own key here'
url = 'http://api.census.gov/data/2015/acs5/profile?get='
mid2 = ',NAME&for=county:*&key='

# Create blank dictionary to store dataframes
output = {}

# For each variable of interest, query API for data and store in a dataframe. Output: dictionary of dataframes, one dataframe/variable.
for k, v in variables.items():
    
    # Create the URL of interest to pull data for variable k
    full = url + k + mid2 + key
    # Send the URL request
    datum = http.request('GET', full)
    # Pull data from the JSON database
    clean_datum = json.loads(datum.data.decode('utf-8'))
    dat = clean_datum[1:]
    # Add data to new dataframe
    mergedf = pd.DataFrame(dat, columns = [v, 'Name', 'state', 'county'])
    # Create fips code that includes both state and county
    mergedf['fips'] = mergedf.state.map(str) + mergedf.county.map(str)
    # Store data for each variable k in the dictionary
    output[v] = mergedf

# Create list of dataframes within dictionary
a = list(output.values())
    
# Merge individual variable tables as stored in dictionary "output" into a single dataframe
df_2015 = reduce(lambda left,right: pd.merge(left,right,on=['fips', 'Name', 'state', 'county'], how = 'outer'), a)
# Label each record with the correct year
df_2015['Year'] = '2015'

Unnamed: 0,Education_ThruHS_P,Name,state,county,fips,Male_FullTime_Earnings,Education_BachPlus_P,House_Fam_P,Total_Population_E,Employ_Private_P,...,Sex_Female_P,Veteran_Pop,Race_White_P,Employ_Service_P,Income_House_Median,Industry_Pro_P,House_Nonfamily_P,Birth_Forgn_P,House_FemHead_P,Year
0,33.5,"Autauga County, Alabama",01,001,01001,47713,23.2,69.7,55221,73.6,...,5.9,40732,78.8,17.0,51281,7.7,30.3,1.6,10.6,2015
1,28.8,"Baldwin County, Alabama",01,003,01003,47113,29.0,67.8,195121,81.5,...,5.4,151238,87.9,17.7,50254,9.8,32.2,3.5,10.4,2015
2,34.5,"Barbour County, Alabama",01,005,01005,37242,12.5,65.7,26932,71.8,...,5.5,21194,48.5,16.1,32964,6.0,34.3,2.7,19.5,2015
3,42.1,"Bibb County, Alabama",01,007,01007,35904,10.6,75.4,22604,76.8,...,5.0,17682,77.9,17.9,38678,5.4,24.6,1.1,15.1,2015
4,33.4,"Blount County, Alabama",01,009,01009,43315,12.9,75.1,57710,82.0,...,6.1,43993,96.8,14.1,45813,6.5,24.9,4.1,10.0,2015
5,33.3,"Bullock County, Alabama",01,011,01011,33231,13.9,63.3,10678,79.5,...,6.3,8444,27.7,15.0,31938,4.2,36.7,5.4,25.9,2015
6,37.7,"Butler County, Alabama",01,013,01013,37062,14.5,65.4,20354,77.4,...,6.1,15594,55.1,16.6,32229,5.1,34.6,0.7,18.7,2015
7,31.8,"Calhoun County, Alabama",01,015,01015,42300,17.6,67.7,116648,74.1,...,5.8,90265,77.2,17.7,41703,7.8,32.3,2.6,16.6,2015
8,38.4,"Chambers County, Alabama",01,017,01017,37170,11.6,62.6,34079,85.1,...,6.0,26686,58.6,14.5,34177,11.7,37.4,1.1,17.2,2015
9,36.4,"Cherokee County, Alabama",01,019,01019,39543,13.8,72.5,26008,73.1,...,4.9,20662,94.0,16.0,36296,4.8,27.5,1.0,12.0,2015


In [254]:
# Code for the 2014 year API data pull

# Set constants
url2 = 'http://api.census.gov/data/2014/acs5/profile?get='
http = urllib3.PoolManager()
key = 'insert your own key here'
mid2 = ',NAME&for=county:*&key='

# Create blank dictionary to store dataframes
output = {}

# For each variable of interest, query API for data and store in a dataframe. Output: dictionary of dataframes, one dataframe/variable.
for k, v in variables.items():
  
    # Create the URL of interest to pull data for variable k
    full = url2 + k + mid2 + key
    # Send the URL request
    datum = http.request('GET', full)
    # Pull data from the JSON database
    clean_datum = json.loads(datum.data.decode('utf-8'))
    dat = clean_datum[1:]
    # Add data to new dataframe
    mergedf = pd.DataFrame(dat, columns = [v, 'Name', 'state', 'county'])
    # Create fips code that includes both state and county
    mergedf['fips'] = mergedf.state.map(str) + mergedf.county.map(str)
    # Store data for each variable k in the dictionary
    output[v] = mergedf
    
# Create list of dataframes within dictionary
a = list(output.values())    

# Merge individual variable tables as stored in dictionary "output" into a single dataframe
df_2014 = reduce(lambda left,right: pd.merge(left,right,on=['fips', 'Name', 'state', 'county'], how = 'outer'), a)
# Label each record with the correct year
df_2014['Year'] = '2014'

In [255]:
# Code for the 2013 year API data pull

# Set constants
url2 = 'http://api.census.gov/data/2013/acs5/profile?get='
http = urllib3.PoolManager()
key = 'insert your own key here'
mid2 = ',NAME&for=county:*&key='

# Create blank dictionary to store dataframes
output = {}

# For each variable of interest, query API for data and store in a dataframe. Output: dictionary of dataframes, one dataframe/variable.
for k, v in variables.items():
  
    # Create the URL of interest to pull data for variable k
    full = url2 + k + mid2 + key
    # Send the URL request
    datum = http.request('GET', full)   
    # Pull data from the JSON database
    clean_datum = json.loads(datum.data.decode('utf-8'))
    dat = clean_datum[1:]
    # Add data to new dataframe
    mergedf = pd.DataFrame(dat, columns = [v, 'Name', 'state', 'county'])
    # Create fips code that includes both state and county
    mergedf['fips'] = mergedf.state.map(str) + mergedf.county.map(str)
    # Store data for each variable k in the dictionary
    output[v] = mergedf
    
# Create list of dataframes within dictionary
a = list(output.values())   

# Merge individual variable tables as stored in dictionary "output" into a single dataframe
df_2013 = reduce(lambda left,right: pd.merge(left,right,on=['fips', 'Name', 'state', 'county'], how = 'outer'), a)
# Label each record with the correct year
df_2013['Year'] = '2013'

In [256]:
# Code for the 2012 year API data pull

# Set constants
url3 = 'http://api.census.gov/data/2012/acs5/profile?get='
http = urllib3.PoolManager()
key = 'insert your own key here'
mid3 = '&for=county:*&key='

# Create blank dictionary to store dataframes
output = {}

# For each variable of interest, query API for data and store in a dataframe. Output: dictionary of dataframes, one dataframe/variable.
for k, v in variables.items():
  
    # Create the URL of interest to pull data for variable k
    full = url3 + k + mid3 + key
    # Send the URL request
    datum = http.request('GET', full)
    # Pull data from the JSON database
    clean_datum = json.loads(datum.data.decode('utf-8'))
    dat = clean_datum[1:]
    # Add data to new dataframe
    mergedf = pd.DataFrame(dat, columns = [k, 'state', 'county'])
    # Create fips code that includes both state and county
    mergedf['fips'] = mergedf.state.map(str) + mergedf.county.map(str)
    # Store data for each variable k in the dictionary
    output[v] = mergedf

# Create list of dataframes within dictionary
a = list(output.values())

# Merge individual variable tables as stored in dictionary "output" into a single dataframe    
df_2012 = reduce(lambda left,right: pd.merge(left,right,on=['fips', 'state', 'county'], how = 'outer'), a)
# Label each record with the correct year
df_2012['Year'] = '2012'
# Rename the columns using the variables dictionary
df_2012 = df_2012.rename(index=str, columns = variables)

Unnamed: 0,Education_ThruHS_P,state,county,fips,Male_FullTime_Earnings,Education_BachPlus_P,House_Fam_P,Total_Population_E,Employ_Private_P,House_Pop_E,...,Sex_Female_P,Veteran_Pop,Race_White_P,Employ_Service_P,Income_House_Median,Industry_Pro_P,House_Nonfamily_P,Birth_Forgn_P,House_FemHead_P,Year
0,33.8,01,001,01001,51258,21.7,71.5,54590,74.4,19934,...,6.5,39233,80.3,16.1,53773,7.4,28.5,1.4,13.8,2012
1,28.8,01,003,01003,46069,27.7,71.6,183226,80.1,72751,...,6.1,140812,87.7,17.8,50706,9.7,28.4,3.7,11.3,2012
2,33.3,01,005,01005,38286,14.5,67.9,27469,70.5,9423,...,5.7,21463,50.3,16.7,31889,5.4,32.1,2.8,19.6,2012
3,41.6,01,007,01007,38859,9.0,74.3,22769,78.2,7386,...,5.8,17481,77.6,13.5,36824,5.0,25.7,1.3,15.1,2012
4,36.2,01,009,01009,43317,12.4,74.1,57466,78.4,21031,...,6.3,43449,96.3,14.0,45192,6.6,25.9,4.7,9.0,2012
5,36.0,01,011,01011,31413,11.9,64.5,10779,79.6,3740,...,6.2,8396,28.6,12.8,34500,2.6,35.5,4.6,23.9,2012
6,38.8,01,013,01013,38718,12.9,66.0,20730,74.9,8149,...,6.2,15744,55.4,19.0,30752,5.5,34.0,1.3,20.2,2012
7,33.5,01,015,01015,43951,16.0,66.5,117834,74.1,45764,...,6.1,90732,76.6,17.3,40093,7.7,33.5,2.4,14.6,2012
8,35.0,01,017,01017,37216,11.0,66.0,34228,81.9,13634,...,6.0,26417,60.0,14.3,32181,10.4,34.0,1.2,17.2,2012
9,33.5,01,019,01019,41795,13.1,72.8,25917,73.4,11758,...,5.5,20556,93.8,12.9,36241,3.5,27.2,0.8,10.8,2012


__JOIN THE US CENSUS BUREAU API DATA TO THE SPLC DATASET__

In [259]:
# COMBINE THE API 2012-2015 YEAR DATA INTO A SINGLE DATAFRAME
# Create list of dataframes of interest
dfs = [df_2015, df_2014, df_2013, df_2012]
# Concatenate the dataframes
bigdf = pd.concat(dfs)
# Assign ID label by fips_year (will be used as key to join to SPLC data)
bigdf['id'] = bigdf['Year'] + "_" + bigdf['fips']


# Import the SPLC CLEANED dataset
splc = pd.read_excel('SPLC_CLEAN.xlsx', converters={'fips':str})
# Filter table for only those years we were able to get ACS data from the API
j = ['2015', '2014', '2013', '2012']
splc = splc[splc['Year'].map(str).isin(j)]

# Assign ID label by fips_year (will be used as key to join to US Census API data)
splc['fips'] = splc['fips2'].map(str).str.zfill(5)
splc['id'] = splc['Year'].map(str) + "_" + splc['fips']

# Merge the two datasets
test = pd.merge(bigdf,splc, how = 'outer', on = 'id', indicator = True)

# Flag records for Militia and County present
test['Militia_Present'] = np.where(((test.Militia == 'Yes') | (test.Militia2 == 'Yes')), 1, 0)
test['AG_Present'] = np.where(test._merge == 'left_only', 0, 1)

Unnamed: 0,State,Name,Year,Raw,ST,Militia,Militia2,County_P7,fips2
0,Alaska,Bethel Citizens Militia*,2014,Tuluksak,AK,Yes,Yes,Bethel Census Area,2050
116,Alaska,Alaska Citizens Militia*,2015,Nikiski,AK,Yes,Yes,Kenai Peninsula Borough,2122
117,Alaska,Alaska Citizens Militia*,2014,Nikiski,AK,Yes,Yes,Kenai Peninsula Borough,2122
118,Alaska,Alaska Citizens Militia*,2013,Nikiski,AK,Yes,Yes,Kenai Peninsula Borough,2122
201,Alaska,Alaska Citizens Militia*,2012,Nikiski,AK,Yes,Yes,Kenai Peninsula Borough,2122
633,Alaska,Central Alaska Militia*,2013,Tok,AK,Yes,Yes,Southeast Fairbanks Census Area,2240
636,Alaska,Central Alaska Militia*,2012,Tok,AK,Yes,Yes,Southeast Fairbanks Census Area,2240
637,Alaska,Central Alaska Militia*,2013,Glennallen,AK,Yes,Yes,Valdez-Cordova Census Area,2261
642,Alaska,Central Alaska Militia*,2012,Glennallen,AK,Yes,Yes,Valdez-Cordova Census Area,2261
718,District of Columbia,Constitutional Rights PAC,2015,"Washington, DC",DC,No,No,District of Columbia,11001


__FILTER FOR US STATE COUNTIES ONLY, EXCLUDING TERRITORIES__

In [266]:
# Import file with all 50 US State FIPS codes
fipsfile = pd.read_excel('50_fips.xlsx')
# Coerce FIPS code to match 5-digit syntax used throughout the analysis
fipsfile['stfips'] = fipsfile['code'].map(str).str.zfill(2)
# Push values for 50 fips into list
glub = list(fipsfile.stfips)

# Create state only FIPS code for easy sorting of US and nonUS states in SPLC dataset
test['stfips'] = test['fips_x'].str[:2]
# Filter SPLC dataset for those items whose state fips codes are in the list of 50 United States
usonly = test[test['stfips'].map(str).isin(glub)]

# Reduce size of dataset. Filter usonly dataframe for only those variables of interest (discard unnecessary columns from the merge)
# Generate list of variables of interest
yup = [variables[k] for k in variables]
# Add columns of interest
yup.extend(['AG_Present', 'Militia_Present', 'Year_x', 'fips_x'])
# Filter the SPLC for only those columns and variables of interest
torun = usonly[yup]

__CREATE TABLE OF UNIQUE RECORDS BY COUNTY AND YEAR, LABELED WITH MILITIA/AG GROUP PRESENT (1) OR NO (0)__

In [270]:
# Create fips_yr label
torun['fips_yr'] = torun['fips_x'] + torun['Year_x']

# Assign each record 1 if there is any militia/AG group in the county for that year
torun['AG_Max'] = torun.groupby(['fips_yr'])['AG_Present'].transform(max)
torun['Militia_Max'] = torun.groupby(['fips_yr'])['Militia_Present'].transform(max)

# Make a copy of the data
maybe = torun.copy()

# Create list of the final variables to be used in the output
finames = [variables[k] for k in variables]
# Extend this by the columns of interest
finames.extend(['Year_x', 'fips_x', 'fips_yr', 'AG_Max', 'Militia_Max'])

# Drop records that are duplicate for fips_yr
maybe = maybe.drop_duplicates(subset = ['fips_yr'])
# Get copy of dataframe that contains only the variables of interest
final = maybe.filter(items = finames).copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,House_Pop_E,Education_ThruHS_P,Employ_Self_P,Birth_DiffSt_P,Male_FullTime_Earnings,Veteran_E,Employ_Private_P,Education_BachPlus_P,House_Fam_P,Age_Median_E,...,Birth_Forgn_P,Total_Population_E,Sex_Male_P,AG_Present,Militia_Present,Year_x,fips_x,fips_yr,AG_Max,Militia_Max
0,20396,33.5,5.5,29.1,47713,5506,73.6,23.2,69.7,37.7,...,1.6,55221,51.6,0,0,2015,01001,010012015,0,0
1,74104,28.8,5.8,41.1,47113,19608,81.5,29.0,67.8,42.2,...,3.5,195121,51.2,1,0,2015,01003,010032015,1,0
2,9222,34.5,7.3,27.8,37242,2042,71.8,12.5,65.7,38.8,...,2.7,26932,46.2,0,0,2015,01005,010052015,0,0
3,7027,42.1,6.7,15.0,35904,1182,76.8,10.6,75.4,38.9,...,1.1,22604,46.6,0,0,2015,01007,010072015,0,0
4,20816,33.4,4.2,15.3,43315,4360,82.0,12.9,75.1,40.7,...,4.1,57710,50.6,0,0,2015,01009,010092015,0,0
5,3683,33.3,5.4,10.8,33231,428,79.5,13.9,63.3,39.3,...,5.4,10678,47.0,0,0,2015,01011,010112015,0,0
6,8056,37.7,6.2,12.9,37062,1457,77.4,14.5,65.4,40.5,...,0.7,20354,53.3,0,0,2015,01013,010132015,0,0
7,45154,31.8,5.0,23.5,42300,10867,74.1,17.6,67.7,39.1,...,2.6,116648,51.8,0,0,2015,01015,010152015,0,0
8,13787,38.4,2.8,22.6,37170,2450,85.1,11.6,62.6,43.0,...,1.1,34079,52.3,0,0,2015,01017,010172015,0,0
9,11278,36.4,7.9,43.2,39543,2337,73.1,13.8,72.5,45.4,...,1.0,26008,50.1,0,0,2015,01019,010192015,0,0


__ADD ADDITIONAL VARIABLES OF INTEREST TO THIS FINAL TABLE__

In [272]:
# FINISH UP THE ACS API DATA

# Calculate variable 'House_Fem_P' that measures the percentage of female-only-headed households in the whole population
final['House_Fem_P'] = final.House_FemHead_P.map(float)/100 * final.House_Fam_P.map(float)/100

# Coerce all items to numeric, dropping items that are not valid from the ACS data (ACS reports missing values as "(X)")
# Get list of all variables only (not ID columns)
letit = [variables[k] for k in variables]

# For each variable, coerce to numeric
for item in letit:
    final[item] = final[item].apply(pd.to_numeric, errors ='coerce')
#final.dtypes



# ASSIGN URBAN CODES from NCHS dataset

# Import the data
rurub = pd.read_csv('Ctytype_fips.csv', index_col = 0, converters = {'fips' : str})
# Create dictionary matching fips code to the urban code assigned in 2013
rurub_dict = dict(zip(rurub.fips, rurub.CODE2013))
# Map urban code to the SPLC dataset
final['urban_code'] = final.fips_x.map(rurub_dict)
# create binary code of metropolitan versus nonmetropolitan
final['Urban'] = np.where(final.urban_code < 5, 1, 0)




# ATTACH VETERAN DATA BY WAR TYPE FROM ACS 5-YEAR ESTIMATE TABLES (NOT ACCESSIBLE THROUGH API FOR ALL YEARS 2012-2015)
# Import the data
vets = pd.read_csv('Veteran_ACS.csv', index_col = 0, converters = {'GEO.id2':str})
# ASsign ID label by fips_yr to match Veteran data to SPLC data
vets['fips_yr'] = vets['GEO.id2'] + vets.Year.map(str)
# Merge SPLC data with Veteran data
wvets = pd.merge(final, vets[['Gulf', 'PreGulf', 'fips_yr']], on = 'fips_yr')
# Get percentage of Gulf War Veterans in total county population
wvets['Veterans_Gulf_P'] = wvets.Gulf/wvets.Total_Population_E
# Get percentage of Pre-Gulf War Veterans in total county population
wvets['Veterans_PreGulf_P'] = wvets.PreGulf/wvets.Total_Population_E

Unnamed: 0,House_Pop_E,Education_ThruHS_P,Employ_Self_P,Birth_DiffSt_P,Male_FullTime_Earnings,Veteran_E,Employ_Private_P,Education_BachPlus_P,House_Fam_P,Age_Median_E,...,House_FemHead_P,Birth_Forgn_P,Total_Population_E,Sex_Male_P,Year_x,fips_x,fips_yr,AG_Max,Militia_Max,House_Fem_P
0,20396,33.5,5.5,29.1,47713,5506,73.6,23.2,69.7,37.7,...,10.6,1.6,55221,51.6,2015,01001,010012015,0,0,0.073882
1,74104,28.8,5.8,41.1,47113,19608,81.5,29.0,67.8,42.2,...,10.4,3.5,195121,51.2,2015,01003,010032015,1,0,0.070512
2,9222,34.5,7.3,27.8,37242,2042,71.8,12.5,65.7,38.8,...,19.5,2.7,26932,46.2,2015,01005,010052015,0,0,0.128115
3,7027,42.1,6.7,15.0,35904,1182,76.8,10.6,75.4,38.9,...,15.1,1.1,22604,46.6,2015,01007,010072015,0,0,0.113854
4,20816,33.4,4.2,15.3,43315,4360,82.0,12.9,75.1,40.7,...,10.0,4.1,57710,50.6,2015,01009,010092015,0,0,0.075100
5,3683,33.3,5.4,10.8,33231,428,79.5,13.9,63.3,39.3,...,25.9,5.4,10678,47.0,2015,01011,010112015,0,0,0.163947
6,8056,37.7,6.2,12.9,37062,1457,77.4,14.5,65.4,40.5,...,18.7,0.7,20354,53.3,2015,01013,010132015,0,0,0.122298
7,45154,31.8,5.0,23.5,42300,10867,74.1,17.6,67.7,39.1,...,16.6,2.6,116648,51.8,2015,01015,010152015,0,0,0.112382
8,13787,38.4,2.8,22.6,37170,2450,85.1,11.6,62.6,43.0,...,17.2,1.1,34079,52.3,2015,01017,010172015,0,0,0.107672
9,11278,36.4,7.9,43.2,39543,2337,73.1,13.8,72.5,45.4,...,12.0,1.0,26008,50.1,2015,01019,010192015,0,0,0.087000


__SELECT FINAL COLUMNS FOR ANALYSIS IN R and EXPORT__

In [284]:
# Generate list of columns of interest
fivars = ['Education_ThruHS_P', 'Employ_Self_P', 'Birth_DiffSt_P', 'Male_FullTime_Earnings', 'Veteran_E', 'Employ_Private_P',\
          'Education_BachPlus_P', 'House_Fam_P', 'Age_Median_E', 'Employ_Service_P', 'Income_House_Median', 'Sex_Female_P', \
          'Female_FullTime_Earnings', 'Poverty_Fam_P', 'House_Nonfamily_P', 'Employ_Gov_P', 'Industry_Pro_P', 'Veteran_P', \
          'Race_White_P', 'House_FemHead_P', 'Birth_Forgn_P', 'Sex_Male_P', 'Year_x', 'fips_x', 'AG_Max', 'Militia_Max', \
          'House_Fem_P', 'Urban', 'Veterans_Gulf_P', 'Veterans_PreGulf_P']

# Create copy of SPLC data, with only those columns of interest
lastone = wvets[fivars].copy()

# Save file and export
lastone.to_csv('SPLC_ACS_FINAL_2012-2015.csv', encoding = 'utf-8')

Unnamed: 0,Education_ThruHS_P,Employ_Self_P,Birth_DiffSt_P,Male_FullTime_Earnings,Veteran_E,Employ_Private_P,Education_BachPlus_P,House_Fam_P,Age_Median_E,Employ_Service_P,...,Birth_Forgn_P,Sex_Male_P,Year_x,fips_x,AG_Max,Militia_Max,House_Fem_P,Urban,Veterans_Gulf_P,Veterans_PreGulf_P
0,33.5,5.5,29.1,47713.0,5506,73.6,23.2,69.7,37.7,17.0,...,1.6,51.6,2015,01001,0,0,0.073882,1,0.041415,0.058293
1,28.8,5.8,41.1,47113.0,19608,81.5,29.0,67.8,42.2,17.7,...,3.5,51.2,2015,01003,1,0,0.070512,1,0.027470,0.073021
2,34.5,7.3,27.8,37242.0,2042,71.8,12.5,65.7,38.8,16.1,...,2.7,46.2,2015,01005,0,0,0.128115,0,0.022687,0.053134
3,42.1,6.7,15.0,35904.0,1182,76.8,10.6,75.4,38.9,17.9,...,1.1,46.6,2015,01007,0,0,0.113854,1,0.012166,0.040126
4,33.4,4.2,15.3,43315.0,4360,82.0,12.9,75.1,40.7,14.1,...,4.1,50.6,2015,01009,0,0,0.075100,0,0.017068,0.058482
5,33.3,5.4,10.8,33231.0,428,79.5,13.9,63.3,39.3,15.0,...,5.4,47.0,2015,01011,0,0,0.163947,0,0.012362,0.027721
6,37.7,6.2,12.9,37062.0,1457,77.4,14.5,65.4,40.5,16.6,...,0.7,53.3,2015,01013,0,0,0.122298,0,0.022403,0.049180
7,31.8,5.0,23.5,42300.0,10867,74.1,17.6,67.7,39.1,17.7,...,2.6,51.8,2015,01015,0,0,0.112382,1,0.028779,0.064382
8,38.4,2.8,22.6,37170.0,2450,85.1,11.6,62.6,43.0,14.5,...,1.1,52.3,2015,01017,0,0,0.107672,0,0.018956,0.052936
9,36.4,7.9,43.2,39543.0,2337,73.1,13.8,72.5,45.4,16.0,...,1.0,50.1,2015,01019,0,0,0.087000,0,0.020455,0.069402
