Skip to content

Commit

Permalink
Load name information into database
Browse files Browse the repository at this point in the history
  • Loading branch information
jetpaccomputer committed May 24, 2013
1 parent cdbc26b commit 465e697
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 7 deletions.
9 changes: 2 additions & 7 deletions docs/ec2setup.txt
Expand Up @@ -245,13 +245,8 @@ createdb -U postgres names
cd /mnt/data
curl -O "http://www.ssa.gov/oact/babynames/names.zip"
dos2unix yob*.txt
for FILE in yob*.txt
do
BASEFILENAME=`basename $FILE`
sed "s/\$/,$BASEFILENAME/" $FILE >> allyears.txt
done
export LC_ALL=C
sort -s -t, allyears.txt | ~/sources/dstk/dataconversion/analyzebabynames.rb > babynames.csv
~/sources/dstk/dataconversion/analyzebabynames.rb . > babynames.csv
psql -U postgres -d names -f ~/dstk/sql/loadnames.sql

# Fix for postgres crashes,
sudo sed -i "s/shared_buffers = [0-9A-Za-z]*/shared_buffers = 512MB/" /etc/postgresql/9.1/main/postgresql.conf
Expand Down
13 changes: 13 additions & 0 deletions sql/loadnames.sql
Expand Up @@ -14,4 +14,17 @@ CREATE TABLE ethnicity_of_surnames(

COPY ethnicity_of_surnames(name, rank, count, prop100k, cum_prop100k, pctwhite, pctblack, pctapi, pctaian, pct2prace, pcthispanic)
FROM '/home/ubuntu/sources/dstkdata/ethnicityofsurnames.csv'
WITH DELIMITER AS ',' CSV HEADER;

CREATE TABLE first_names(
name CHAR(16) PRIMARY KEY,
count INT,
male_percentage FLOAT,
most_popular_year INT,
earliest_common_year INT,
latest_common_year INT
);

COPY first_names(name, count, male_percentage, most_popular_year, earliest_common_year, latest_common_year)
FROM './babynames.csv'
WITH DELIMITER AS ',' CSV HEADER;

0 comments on commit 465e697

Please sign in to comment.