Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Import scripts had hard-coded filename base (e.g. 20115, for ACS 2011…

… 5-year). Added getter function to define filename base per schema, and adjusted meta-scripts to read filename base when dynamically creating DDL.
  • Loading branch information...
commit 8fdafc32cef5a8855100b426fb4d6cf31f993d0d 1 parent 25440fa
@leehach authored
View
2  README.md
@@ -49,7 +49,7 @@ These scripts use COPY statements to do the actual data import, albeit to stagin
These scripts use forward slashes to represent filesystem separators. Testing on Windows Vista indicates that forward slashes will be interpreted correctly. Backslashes, if used, are treated as escape characters and would need to be doubled.
-These scripts contain a filesystem placeholder "<census_upload_root>". This placeholder should be updated to reflect your filesystem. This folder should have a child named acs2010_5yr. The acs2010_5yr folder should have two children. As mentioned above, the files downloaded from Census should be in two sibling directories named All_Geographies_Not_Tracts_Block_Groups and Tracts_Block_Groups_Only.
+These scripts contain a filesystem placeholder "\<census_upload_root\>". This placeholder should be updated to reflect your filesystem. This folder should have a child named acs2010_5yr. The acs2010_5yr folder should have two children. As mentioned above, the files downloaded from Census should be in two sibling directories named All_Geographies_Not_Tracts_Block_Groups and Tracts_Block_Groups_Only.
The geoheader files use a fixed-length format, and are therefore imported to a table with a single column. This column is then parsed for insertion the the final geoheader table. The geoheader files contain *all* geographies, in spite of whether they are downloaded with the larger or smaller (tracts and block groups only) datasets. These scripts assume the existence of the All_Geographies_Not_Tracts_Block_Groups folder. If you have only downloaded the tracts and block groups, you will have to modify the script or create the expected folder and move the geography files (g20105xx.txt).
View
7 acs2010_5yr/ACS 2010 Data Dictionary.sql
@@ -6,6 +6,13 @@ CREATE SCHEMA acs2010_5yr;
SET search_path = acs2010_5yr, public;
SET client_encoding = 'LATIN1';
+DROP FUNCTION IF EXISTS get_refyear_period();
+CREATE FUNCTION get_refyear_period() RETURNS text AS $get_refyear_period$
+BEGIN
+ RETURN '20105';
+END;
+$get_refyear_period$ LANGUAGE plpgsql;
+
--CREATE TABLE TO HOLD FIELD DEFINITIONS FOR geoheader
CREATE TABLE geoheader_schema (
line_number serial,
View
7 acs2011_5yr/ACS 2011 Data Dictionary.sql
@@ -6,6 +6,13 @@ CREATE SCHEMA acs2011_5yr;
SET search_path = acs2011_5yr, public;
SET client_encoding = 'LATIN1';
+DROP FUNCTION IF EXISTS get_refyear_period();
+CREATE FUNCTION get_refyear_period() RETURNS text AS $get_refyear_period$
+BEGIN
+ RETURN '20115';
+END;
+$get_refyear_period$ LANGUAGE plpgsql;
+
--CREATE TABLE TO HOLD FIELD DEFINITIONS FOR geoheader
CREATE TABLE geoheader_schema (
line_number serial,
View
26 meta-scripts/Staging Tables and Data Import Functions.sql
@@ -19,17 +19,18 @@ END;
$function$ LANGUAGE plpgsql;
--CREATE SQL STATEMENT TO IMPORT GEOHEADER TO STAGING TABLE
---NOTE: File name root (20115) is hard coded! Must be altered.
DROP FUNCTION IF EXISTS sql_import_geoheader(boolean, text[]);
CREATE FUNCTION sql_import_geoheader(exec boolean = FALSE, stusab_criteria text[] = ARRAY['%']) RETURNS text AS $function$
DECLARE
sql TEXT := '';
row RECORD;
+ filename_part TEXT :='';
BEGIN
+ EXECUTE 'SELECT ' || current_schema() || '.get_refyear_period();' INTO filename_part;
FOR row IN SELECT stusab FROM stusab WHERE stusab ILIKE ANY (stusab_criteria) LOOP
sql := sql || E'COPY tmp_geoheader FROM \'' || get_census_upload_root() || '/'
- || current_schema() || '/All_Geographies_Not_Tracts_Block_Groups/g20105'
- || row.stusab || E'.txt\';\n';
+ || current_schema() || '/All_Geographies_Not_Tracts_Block_Groups/g'
+ || filename_part || row.stusab || E'.txt\';\n';
END LOOP;
IF exec THEN EXECUTE sql; END IF;
@@ -129,7 +130,6 @@ SELECT sql_import_sequences(TRUE, array['ny', 'w%'], (SELECT array_agg(seq) FROM
--More examples (BETWEEN 'a' AND 'm', BETWEEN 1 AND 50, using generate_series()) in import file.
*/
---NOTE: File name root (20115) is hard coded! Must be altered.
DROP FUNCTION IF EXISTS sql_import_sequences(boolean, text[], int[], text);
CREATE FUNCTION sql_import_sequences(exec boolean = FALSE, stusab_criteria text[] = ARRAY['%'],
seq_criteria int[] = ARRAY[-1], actions text = 'atem'
@@ -145,7 +145,9 @@ DECLARE
sql_large_geo_moe TEXT;
sql_small_geo_moe TEXT;
seq_criteria2 int[];
+ filename_part TEXT :='';
BEGIN
+ EXECUTE 'SELECT ' || current_schema() || '.get_refyear_period();' INTO filename_part;
IF seq_criteria = ARRAY[-1] THEN
seq_criteria2 := (SELECT array_agg(seq) FROM vw_sequence);
ELSE
@@ -160,20 +162,20 @@ BEGIN
FROM (
SELECT
'COPY tmp_' || seq_id || E' FROM \''
- || get_census_upload_root() || '/' || current_schema || '/All_Geographies_Not_Tracts_Block_Groups/e20105'
- || stusab || lpad(seq::varchar, 4, '0') || E'000.txt\' WITH CSV;'
+ || get_census_upload_root() || '/' || current_schema || '/All_Geographies_Not_Tracts_Block_Groups/e'
+ || filename_part || stusab || lpad(seq::varchar, 4, '0') || E'000.txt\' WITH CSV;'
AS sql1,
'COPY tmp_' || seq_id || E' FROM \''
- || get_census_upload_root() || '/' || current_schema || '/Tracts_Block_Groups_Only/e20105'
- || stusab || lpad(seq::varchar, 4, '0') || E'000.txt\' WITH CSV;'
+ || get_census_upload_root() || '/' || current_schema || '/Tracts_Block_Groups_Only/e'
+ || filename_part || stusab || lpad(seq::varchar, 4, '0') || E'000.txt\' WITH CSV;'
AS sql2,
'COPY tmp_' || seq_id || E'_moe FROM \''
- || get_census_upload_root() || '/' || current_schema || '/All_Geographies_Not_Tracts_Block_Groups/m20105'
- || stusab || lpad(seq::varchar, 4, '0') || E'000.txt\' WITH CSV;'
+ || get_census_upload_root() || '/' || current_schema || '/All_Geographies_Not_Tracts_Block_Groups/m'
+ || filename_part || stusab || lpad(seq::varchar, 4, '0') || E'000.txt\' WITH CSV;'
AS sql1_moe,
'COPY tmp_' || seq_id || E'_moe FROM \''
- || get_census_upload_root() || '/' || current_schema || '/Tracts_Block_Groups_Only/m20105'
- || stusab || lpad(seq::varchar, 4, '0') || E'000.txt\' WITH CSV;'
+ || get_census_upload_root() || '/' || current_schema || '/Tracts_Block_Groups_Only/m'
+ || filename_part || stusab || lpad(seq::varchar, 4, '0') || E'000.txt\' WITH CSV;'
AS sql2_moe
FROM stusab, vw_sequence
WHERE stusab ILIKE ANY (stusab_criteria) AND seq = ANY (seq_criteria2)
View
20 meta-scripts/Support Functions and Tables.sql
@@ -103,3 +103,23 @@ INSERT INTO stusab VALUES ('wi');
INSERT INTO stusab VALUES ('wv');
INSERT INTO stusab VALUES ('wy');
+/**NOT USED, TEMPORARILY RETAINED
+
+--set_refyear_period() creates getter function in EACH census schema.
+--Called in Data Dictionary script for EACH Census product.
+--refyear_period used to construct Census product filenames (e.g. g20105ak.txt).
+DROP FUNCTION IF EXISTS set_refyear_period(text);
+CREATE FUNCTION set_refyear_period(refyear_period text) RETURNS void AS $set_refyear_period$
+DECLARE
+ getter TEXT;
+BEGIN
+ DROP FUNCTION IF EXISTS get_refyear_period();
+ getter := E'CREATE FUNCTION get_refyear_period() RETURNS text AS $get_refyear_period$
+ BEGIN
+ RETURN \'' || refyear_period || E'\';
+ END;
+ $get_refyear_period$ LANGUAGE plpgsql;';
+ EXECUTE getter;
+END;
+$set_refyear_period$ LANGUAGE plpgsql;
+***/
Please sign in to comment.
Something went wrong with that request. Please try again.