In [None]:
import numpy as np
import pandas
states = ['ak', 'al', 'ar', 'az', 'ca', 'co', 'ct', 'dc']
states += ['de', 'fl', 'ga', 'hi', 'ia', 'id', 'il', 'in']
states += ['ks', 'ky', 'la', 'ma', 'md', 'me', 'mi', 'mn']
states += ['mo', 'ms', 'mt', 'nc', 'nd', 'ne', 'nh', 'nj']
states += ['nh', 'nj', 'nm', 'nv', 'ny', 'oh', 'ok', 'or']
states += ['pa', 'pr', 'ri', 'sc', 'sd', 'tn', 'tx', 'us']
states += ['ut' ,'va', 'vt', 'wa', 'wi', 'wv', 'wy']

data_df = pandas.DataFrame()
metadata_df = pandas.DataFrame()

for i in range(1,31):
    print('Importing data for sequence {0:,.0f}...'.format(i))
    this_seq_data_df = pandas.DataFrame()
    this_seq_metadata_filename = 'metadata/Seq{0:.0f}.xls'.format(i)
    this_seq_metadata_df = pandas.read_excel(this_seq_metadata_filename, header=None)
    
    this_seq_metadata_df = this_seq_metadata_df.dropna(axis=1)
    this_seq_metadata_df.columns = this_seq_metadata_df.loc[0]
    if (i == 1):
        this_seq_metadata_df.columns = [['FILEID','FILETYPE','STUSAB','CHARITER','SEQUENCE','LOGRECNO','A00001_001','A00001_002']]
    this_seq_metadata_df = this_seq_metadata_df.drop(0, axis=0)
    this_seq_metadata_df.index = ['description']
    
    for onestate in states:
        statefilename = 'data/seq{0:.0f}/e20161{1:s}{2:04d}000.txt'.format(i,onestate,i)
        onestate_df = pandas.read_csv(statefilename, header=None)
        this_seq_data_df = this_seq_data_df.append(onestate_df)

    this_seq_data_df.columns = this_seq_metadata_df.columns
    
    if (i >= 2):
        this_seq_metadata_df = this_seq_metadata_df.drop(['FILEID','FILETYPE','STUSAB','CHARITER','SEQUENCE','LOGRECNO'], axis=1)
        this_seq_data_df = this_seq_data_df.drop(['FILEID','FILETYPE','STUSAB','CHARITER','SEQUENCE','LOGRECNO'], axis=1)

    print('Merging datasets...')
    metadata_df = pandas.concat((metadata_df, this_seq_metadata_df), axis=1)
    data_df = pandas.concat((data_df, this_seq_data_df), axis=1)
        
print('Creating pseudo geoid...')
data_df['PSEUDO_GEOID'] = data_df['STUSAB'].str.upper() + data_df['LOGRECNO'].apply(lambda x: '{0:07d}'.format(x))
metadata_df['PSEUDO_GEOID'] = 'Temporary ID constructed from the STUSAB and the 7-digit zero-padded LOGRECNO to use to look up the geoid later'

print('Indexing...')
data_df = data_df.set_index('PSEUDO_GEOID')
metadata_df = metadata_df.set_index('PSEUDO_GEOID')

print('Reading geography file...')
geoid_df = pandas.read_excel('metadata/geoid.xlsx')
geoid_df = geoid_df.set_index('PSEUDO_GEOID')

print('Merging geography with data...')
data_df = data_df.join(geoid_df[['GEOID','NAME']])
data_df = data_df.rename(columns={'NAME': 'GEO_NAME'})

metadata_df['GEOID'] = 'Geography identifier'
metadata_df['GEO_NAME'] = 'Name of geography unit'

print('Resetting index...')
data_df = data_df.set_index('GEOID')
metadata_df = metadata_df.set_index('GEOID')

print('Rearranging columns')
cols = data_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
data_df = data_df[cols]
metadata_df = metadata_df[cols]

print('Writing variables file...')
metadata_df.to_csv('variables_acs2016_1_30.csv')

print('Writing data (estimates) file...')
data_df.to_csv('data_acs2016_1_30.csv')

print('Done!')

In [None]:
import numpy as np
import pandas
states = ['ak', 'al', 'ar', 'az', 'ca', 'co', 'ct', 'dc']
states += ['de', 'fl', 'ga', 'hi', 'ia', 'id', 'il', 'in']
states += ['ks', 'ky', 'la', 'ma', 'md', 'me', 'mi', 'mn']
states += ['mo', 'ms', 'mt', 'nc', 'nd', 'ne', 'nh', 'nj']
states += ['nh', 'nj', 'nm', 'nv', 'ny', 'oh', 'ok', 'or']
states += ['pa', 'pr', 'ri', 'sc', 'sd', 'tn', 'tx', 'us']
states += ['ut' ,'va', 'vt', 'wa', 'wi', 'wv', 'wy']

error_df = pandas.DataFrame()
metadata_df = pandas.DataFrame()

for i in range(1,31):
    print('Importing data for sequence {0:,.0f}...'.format(i))
    this_seq_error_df = pandas.DataFrame()
    this_seq_metadata_filename = 'metadata/Seq{0:.0f}.xls'.format(i)
    this_seq_metadata_df = pandas.read_excel(this_seq_metadata_filename, header=None)
    
    this_seq_metadata_df = this_seq_metadata_df.dropna(axis=1)
    this_seq_metadata_df.columns = this_seq_metadata_df.loc[0]
    if (i == 1):
        this_seq_metadata_df.columns = [['FILEID','FILETYPE','STUSAB','CHARITER','SEQUENCE','LOGRECNO','A00001_001','A00001_002']]
    this_seq_metadata_df = this_seq_metadata_df.drop(0, axis=0)
    this_seq_metadata_df.index = ['description']
    
    for onestate in states:
        statefilename = 'data/seq{0:.0f}/m20161{1:s}{2:04d}000.txt'.format(i,onestate,i)
        onestate_df = pandas.read_csv(statefilename, header=None)
        this_seq_error_df = this_seq_error_df.append(onestate_df)

    this_seq_error_df.columns = this_seq_metadata_df.columns
    
    if (i >= 2):
        this_seq_metadata_df = this_seq_metadata_df.drop(['FILEID','FILETYPE','STUSAB','CHARITER','SEQUENCE','LOGRECNO'], axis=1)
        this_seq_error_df = this_seq_error_df.drop(['FILEID','FILETYPE','STUSAB','CHARITER','SEQUENCE','LOGRECNO'], axis=1)

    print('Merging datasets...')
    metadata_df = pandas.concat((metadata_df, this_seq_metadata_df), axis=1)
    error_df = pandas.concat((error_df, this_seq_error_df), axis=1)
        
print('Creating pseudo geoid...')
error_df['PSEUDO_GEOID'] = error_df['STUSAB'].str.upper() + error_df['LOGRECNO'].apply(lambda x: '{0:07d}'.format(x))
metadata_df['PSEUDO_GEOID'] = 'Temporary ID constructed from the STUSAB and the 7-digit zero-padded LOGRECNO to use to look up the geoid later'

print('Indexing...')
error_df = error_df.set_index('PSEUDO_GEOID')
metadata_df = metadata_df.set_index('PSEUDO_GEOID')

print('Reading geography file...')
geoid_df = pandas.read_excel('metadata/geoid.xlsx')
geoid_df = geoid_df.set_index('PSEUDO_GEOID')

print('Merging geography with data...')
error_df = error_df.join(geoid_df[['GEOID','NAME']])
error_df = error_df.rename(columns={'NAME': 'GEO_NAME'})

metadata_df['GEOID'] = 'Geography identifier'
metadata_df['GEO_NAME'] = 'Name of geography unit'

print('Resetting index...')
error_df = error_df.set_index('GEOID')
metadata_df = metadata_df.set_index('GEOID')

print('Rearranging columns')
cols = error_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
error_df = error_df[cols]
metadata_df = metadata_df[cols]

#print('Writing variables file...')
#metadata_df.to_csv('variables_acs2016_1_30.csv')

print('Writing margins of error file...')
error_df.to_csv('error_acs2016_1_30.csv')

print('Done!')

#error_df.sample(4)

In [8]:
import numpy as np
import pandas
states = ['ak', 'al', 'ar', 'az', 'ca', 'co', 'ct', 'dc']
states += ['de', 'fl', 'ga', 'hi', 'ia', 'id', 'il', 'in']
states += ['ks', 'ky', 'la', 'ma', 'md', 'me', 'mi', 'mn']
states += ['mo', 'ms', 'mt', 'nc', 'nd', 'ne', 'nh', 'nj']
states += ['nh', 'nj', 'nm', 'nv', 'ny', 'oh', 'ok', 'or']
states += ['pa', 'pr', 'ri', 'sc', 'sd', 'tn', 'tx', 'us']
states += ['ut' ,'va', 'vt', 'wa', 'wi', 'wv', 'wy']

geo_df = pandas.DataFrame()
geo_metadata_df = pandas.DataFrame()

print('Importing geography data...')

geo_metadata_filename = 'metadata/2016_SFGeoFileTemplate.xls'
geo_metadata_df = pandas.read_excel(geo_metadata_filename, header=None)

geo_metadata_df = geo_metadata_df.dropna(axis=1)
geo_metadata_df.columns = geo_metadata_df.loc[0]
geo_metadata_df = geo_metadata_df.drop(0, axis=0)
geo_metadata_df.index = ['description']

for onestate in states:
    statefilename = 'geography/g20161{0:}.csv'.format(onestate)
    #print(statefilename)
    onestate_df = pandas.read_csv(statefilename, header=None, encoding='utf-8')
    geo_df = geo_df.append(onestate_df)

geo_df.columns = geo_metadata_df.columns
        
geo_df = geo_df.set_index('GEOID')

print('Writing geography variables file...')
geo_metadata_df.to_csv('geo_variables_acs2016.csv')

print('Writing geography file')
geo_df.to_csv('geo_acs2016.csv')

print('Done!')

Importing geography data...
Writing geography variables file...
Writing geography file
Done!
