diff --git a/.gitignore b/.gitignore
index 9a3dd8b..dff3c71 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,6 @@
-ZIP_CODES.txt
-zip_codes.zip
-zip_dump.csv
-zip_codes.db
/.bundle
/free_zipcode_data.sqlite3
/data
/build
+/stubs
+/vendor/bundle/
diff --git a/.rspec b/.rspec
new file mode 100644
index 0000000..c99d2e7
--- /dev/null
+++ b/.rspec
@@ -0,0 +1 @@
+--require spec_helper
diff --git a/.rubocop.yml b/.rubocop.yml
index f0322f5..9f11713 100644
--- a/.rubocop.yml
+++ b/.rubocop.yml
@@ -1,4 +1,6 @@
AllCops:
+ TargetRubyVersion: 2.3
+
# Include gemspec and Rakefile
Include:
- '**/*.gemspec'
@@ -15,6 +17,7 @@ AllCops:
Exclude:
- 'vendor/**/*'
- 'stubs/**/*'
+ - 'spec/support/shared_contexts/*'
# Checks formatting of special comments
CommentAnnotation:
@@ -49,32 +52,16 @@ Style/RaiseArgs:
Style/DoubleNegation:
Enabled: false
-Style/SignalException:
- EnforcedStyle: semantic
-
-Style/ClassAndModuleChildren:
- Enabled: false
-
-Style/MultilineMethodCallIndentation:
- EnforcedStyle: indented
- IndentationWidth: 4
-
-Style/CaseIndentation:
- Enabled: false
-
-Style/TrivialAccessors:
- Enabled: false
-
-Style/NumericLiterals:
+Style/PerlBackrefs:
Enabled: false
########################################
# Lint Cops
Lint/Eval:
- Enabled: true
+ Enabled: false
-Lint/AssignmentInCondition:
+Lint/HandleExceptions:
Enabled: false
########################################
@@ -85,7 +72,14 @@ Metrics/LineLength:
Metrics/MethodLength:
CountComments: false # count full line comments?
- Max: 20
+ Max: 30
Metrics/ClassLength:
Max: 120
+
+Metrics/AbcSize:
+ Enabled: false
+
+# rubocop:disable Metrics/AbcSize
+# rubocop:disable Metrics/MethodLength
+# rubocop:disable Metrics/BlockLength
diff --git a/Gemfile b/Gemfile
index 833365f..e967149 100644
--- a/Gemfile
+++ b/Gemfile
@@ -1,7 +1,6 @@
-source 'http://rubygems.org'
+# frozen_string_literal: true
-gem 'midwire_common'
-gem 'pry-nav'
-gem 'rake'
-gem 'rubyzip', '~> 1.2.1'
-gem 'sqlite3'
+source 'https://rubygems.org'
+git_source(:github) { |repo| "https://github.com/#{repo}.git" }
+
+gemspec
diff --git a/Gemfile.lock b/Gemfile.lock
index a94bc5e..04c225c 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -1,31 +1,79 @@
+PATH
+ remote: .
+ specs:
+ free_zipcode_data (1.0.0)
+ colored (~> 1.2)
+ kiba (~> 2.0)
+ rubyzip (~> 1.2)
+ sqlite3 (~> 1.3)
+ trollop (~> 2.1)
+
GEM
- remote: http://rubygems.org/
+ remote: https://rubygems.org/
specs:
+ ast (2.4.0)
coderay (1.1.2)
+ colored (1.2)
+ diff-lcs (1.3)
+ docile (1.3.0)
+ json (2.1.0)
+ kiba (2.0.0)
method_source (0.8.2)
- midwire_common (0.1.16)
- thor (~> 0.19)
+ parallel (1.12.1)
+ parser (2.5.1.0)
+ ast (~> 2.4.0)
+ powerpack (0.1.1)
pry (0.10.4)
coderay (~> 1.1.0)
method_source (~> 0.8.1)
slop (~> 3.4)
pry-nav (0.2.4)
pry (>= 0.9.10, < 0.11.0)
+ rainbow (3.0.0)
rake (12.2.1)
+ rspec (3.7.0)
+ rspec-core (~> 3.7.0)
+ rspec-expectations (~> 3.7.0)
+ rspec-mocks (~> 3.7.0)
+ rspec-core (3.7.1)
+ rspec-support (~> 3.7.0)
+ rspec-expectations (3.7.0)
+ diff-lcs (>= 1.2.0, < 2.0)
+ rspec-support (~> 3.7.0)
+ rspec-mocks (3.7.0)
+ diff-lcs (>= 1.2.0, < 2.0)
+ rspec-support (~> 3.7.0)
+ rspec-support (3.7.1)
+ rubocop (0.55.0)
+ parallel (~> 1.10)
+ parser (>= 2.5)
+ powerpack (~> 0.1)
+ rainbow (>= 2.2.2, < 4.0)
+ ruby-progressbar (~> 1.7)
+ unicode-display_width (~> 1.0, >= 1.0.1)
+ ruby-progressbar (1.9.0)
rubyzip (1.2.1)
+ simplecov (0.16.1)
+ docile (~> 1.1)
+ json (>= 1.8, < 3)
+ simplecov-html (~> 0.10.0)
+ simplecov-html (0.10.2)
slop (3.6.0)
- sqlite3 (1.3.11)
- thor (0.19.1)
+ sqlite3 (1.3.13)
+ trollop (2.1.2)
+ unicode-display_width (1.3.2)
PLATFORMS
ruby
DEPENDENCIES
- midwire_common
- pry-nav
- rake
- rubyzip (~> 1.2.1)
- sqlite3
+ bundler (~> 1.16)
+ free_zipcode_data!
+ pry-nav (~> 0.2)
+ rake (~> 12.0)
+ rspec (~> 3.7)
+ rubocop
+ simplecov
BUNDLED WITH
- 1.15.4
+ 1.16.1
diff --git a/README.md b/README.md
index 7f127e3..8b40f17 100644
--- a/README.md
+++ b/README.md
@@ -90,6 +90,10 @@ create table zipcodes (
Both `lat` and `lon`, geocodes, are populated for each zipcode record.
+## Development
+
+If you want to run the specs or do development work, set `GEM_ENV='development'`
+
## Data License
The zipcode data is licensed under a Creative Commons Attribution 3.0 Unported License, carried forward from [GeoNames](http://www.geonames.org).
diff --git a/bin/free_zipcode_data b/bin/free_zipcode_data
new file mode 100755
index 0000000..40e067d
--- /dev/null
+++ b/bin/free_zipcode_data
@@ -0,0 +1,9 @@
+#!/usr/bin/env ruby
+
+require 'rubygems'
+require 'bundler/setup'
+
+require 'free_zipcode_data'
+require 'free_zipcode_data/runner'
+
+FreeZipcodeData::Runner.instance.start
diff --git a/country_lookup_table.yml b/country_lookup_table.yml
index b4f10d0..18d9c84 100644
--- a/country_lookup_table.yml
+++ b/country_lookup_table.yml
@@ -10,7 +10,7 @@ AX:
AL:
:name: Albania
:alpha3: ALB
- :iso: 008
+ :iso: '008'
DZ:
:name: Algeria
:alpha3: DZA
diff --git a/free_zipcode_data.gemspec b/free_zipcode_data.gemspec
new file mode 100644
index 0000000..9fcc5fa
--- /dev/null
+++ b/free_zipcode_data.gemspec
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+# coding: utf-8
+
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'free_zipcode_data/version'
+
+Gem::Specification.new do |spec|
+ spec.name = 'free_zipcode_data'
+ spec.version = FreeZipcodeData::VERSION
+ spec.authors = ['Chris Blackburn', 'Chris McKnight']
+ spec.email = ['87a1779b@opayq.com']
+ spec.summary = 'Free US postal codes in CSV and SQLite3 format.'
+ spec.description = spec.summary
+ spec.homepage = 'https://github.com/midwire/free_zipcode_data'
+ spec.license = 'MIT'
+
+ spec.required_ruby_version = '>= 2.3.0'
+ spec.files = `git ls-files -z`.split("\x0")
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
+ spec.require_paths = ['lib']
+
+ spec.add_development_dependency 'bundler', '~> 1.16'
+ spec.add_development_dependency 'pry-nav', '~> 0.2'
+ spec.add_development_dependency 'rake', '~> 12.0'
+ spec.add_development_dependency 'rspec', '~> 3.7'
+ spec.add_development_dependency 'rubocop'
+ spec.add_development_dependency 'simplecov'
+
+ spec.add_runtime_dependency 'colored', '~> 1.2'
+ spec.add_runtime_dependency 'kiba', '~> 2.0'
+ spec.add_runtime_dependency 'rubyzip', '~> 1.2'
+ spec.add_runtime_dependency 'sqlite3', '~> 1.3'
+ spec.add_runtime_dependency 'trollop', '~> 2.1'
+end
diff --git a/lib/etl/common.rb b/lib/etl/common.rb
new file mode 100644
index 0000000..b1a1a7d
--- /dev/null
+++ b/lib/etl/common.rb
@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+
+require_relative 'csv_source'
+require_relative '../free_zipcode_data/country_table'
+require_relative '../free_zipcode_data/state_table'
+require_relative '../free_zipcode_data/county_table'
+require_relative '../free_zipcode_data/zipcode_table'
+
+def show_me
+ transform do |row|
+ ap row
+ row
+ end
+end
+
+def limit(count)
+ count = Integer(count || -1)
+ return if count == -1
+ transform do |row|
+ @counter ||= 0
+ @counter += 1
+ @counter > count ? nil : row
+ end
+end
diff --git a/lib/etl/csv_source.rb b/lib/etl/csv_source.rb
new file mode 100644
index 0000000..0cbd162
--- /dev/null
+++ b/lib/etl/csv_source.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+require 'csv'
+
+class CsvSource
+ attr_reader :filename, :delimeter, :quote_char, :headers
+
+ def initialize(filename:, headers: true, delimeter: "\t", quote_char: '"')
+ @filename = filename
+ @headers = headers
+ @delimeter = delimeter
+ @quote_char = quote_char
+ end
+
+ def each
+ CSV.open(filename,
+ col_sep: delimeter,
+ headers: headers,
+ header_converters: :symbol,
+ quote_char: quote_char) do |csv|
+ csv.each do |row|
+ yield(row.to_hash)
+ end
+ end
+ end
+end
diff --git a/lib/etl/free_zipcode_data_job.rb b/lib/etl/free_zipcode_data_job.rb
new file mode 100644
index 0000000..d691ee0
--- /dev/null
+++ b/lib/etl/free_zipcode_data_job.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+require_relative 'common'
+
+module ETL
+ module FreeZipcodeDataJob
+ module_function
+
+ def setup(country_file, database, logger, options)
+ Kiba.parse do
+ pre_process do
+ logger.info("Processing '#{country_file}' data, please be patient...")
+ end
+
+ source CsvSource, filename: country_file, quote_char: '"', delimeter: ','
+
+ destination FreeZipcodeData::CountryTable,
+ database: database,
+ tablename: options[:country_tablename]
+
+ destination FreeZipcodeData::StateTable,
+ database: database,
+ tablename: options[:state_tablename]
+
+ destination FreeZipcodeData::CountyTable,
+ database: database,
+ tablename: options[:county_tablename]
+
+ destination FreeZipcodeData::ZipcodeTable,
+ database: database,
+ tablename: options[:zipcode_tablename]
+
+ end
+ end
+ end
+end
diff --git a/lib/free_zipcode_data.rb b/lib/free_zipcode_data.rb
new file mode 100644
index 0000000..24c8074
--- /dev/null
+++ b/lib/free_zipcode_data.rb
@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+
+require 'readline'
+
+require 'free_zipcode_data/version'
+
+module FreeZipcodeData
+ def self.root
+ Pathname.new(File.dirname(__FILE__)).parent
+ end
+
+ def self.current_environment
+ ENV.fetch('APP_ENV', 'development')
+ end
+
+ #:nocov:
+ def self.config_file(filename = '.free_zipcode_data.yml')
+ return root.join('spec', 'fixtures', filename) if current_environment == 'test'
+ home = ENV.fetch('HOME')
+ file = ENV.fetch('FZD_CONFIG_FILE', File.join(home, '.free_zipcode_data.yml'))
+ FileUtils.touch(file)
+ file
+ end
+ #:nocov:
+
+ def self.os
+ if RUBY_PLATFORM.match?(/cygwin|mswin|mingw|bccwin|wince|emx/)
+ :retarded
+ else
+ :normal
+ end
+ end
+
+ autoload :CountryTable, 'free_zipcode_data/country_table'
+ autoload :StateTable, 'free_zipcode_data/state_table'
+ autoload :CountyTable, 'free_zipcode_data/county_table'
+ autoload :ZipcodeTable, 'free_zipcode_data/zipcode_table'
+ autoload :DataSource, 'free_zipcode_data/data_source'
+ autoload :Logger, 'free_zipcode_data/logger'
+ autoload :Options, 'free_zipcode_data/options'
+ autoload :Settings, 'free_zipcode_data/settings'
+ autoload :SqliteRam, 'free_zipcode_data/sqlite_ram'
+end
diff --git a/lib/free_zipcode_data/country_table.rb b/lib/free_zipcode_data/country_table.rb
new file mode 100644
index 0000000..4b27bcf
--- /dev/null
+++ b/lib/free_zipcode_data/country_table.rb
@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+
+require_relative 'db_table'
+
+module FreeZipcodeData
+ class CountryTable < DbTable
+ def build
+ schema = <<-SQL
+ create table #{tablename} (
+ id integer not null primary key,
+ alpha2 varchar(2) not null,
+ alpha3 varchar(3),
+ iso varchar(3),
+ name varchar(255) not null
+ )
+ SQL
+ database.execute_batch(schema)
+
+ ndx = <<-SQL
+ CREATE UNIQUE INDEX "main"."unique_country_alpha2"
+ ON #{tablename} (alpha2 COLLATE NOCASE ASC);
+ SQL
+ database.execute_batch(ndx)
+ end
+
+ def write(row)
+ country_hash = country_lookup_table[row[:country]]
+
+ sql = <<-SQL
+ INSERT INTO countries (alpha2, alpha3, iso, name)
+ VALUES ('#{row[:country]}',
+ '#{country_hash[:alpha3]}',
+ '#{country_hash[:iso]}',
+ '#{country_hash[:name]}')
+ SQL
+
+ begin
+ database.execute(sql)
+ rescue SQLite3::ConstraintException
+ # Swallow duplicates
+ end
+ end
+ end
+end
diff --git a/lib/free_zipcode_data/county_table.rb b/lib/free_zipcode_data/county_table.rb
new file mode 100644
index 0000000..6b1429b
--- /dev/null
+++ b/lib/free_zipcode_data/county_table.rb
@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+
+require_relative 'db_table'
+
+module FreeZipcodeData
+ class CountyTable < DbTable
+ def build
+ schema = <<-SQL
+ create table #{tablename} (
+ id integer not null primary key,
+ state_id integer,
+ abbr varchar(255),
+ name varchar(255),
+ county_seat varchar(255)
+ )
+ SQL
+ database.execute_batch(schema)
+
+ ndx = <<-SQL
+ CREATE UNIQUE INDEX "main"."unique_county"
+ ON #{tablename} (state_id, abbr, name COLLATE NOCASE ASC);
+ SQL
+ database.execute_batch(ndx)
+ end
+
+ def write(row)
+ return nil unless row[:county]
+ state_id = get_state_id(row[:short_state])
+ raise "Could not find state: #{row[:short_state]}" unless state_id
+ sql = <<-SQL
+ INSERT INTO counties (state_id, abbr, name)
+ VALUES ('#{state_id}',
+ '#{row[:short_county]}',
+ '#{escape_single_quotes(row[:county])}'
+ )
+ SQL
+
+ begin
+ database.execute(sql)
+ rescue SQLite3::ConstraintException
+ # swallow duplicates
+ rescue StandardError => err
+ raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
+ end
+ end
+ end
+end
diff --git a/lib/free_zipcode_data/data_source.rb b/lib/free_zipcode_data/data_source.rb
new file mode 100644
index 0000000..41cfdbd
--- /dev/null
+++ b/lib/free_zipcode_data/data_source.rb
@@ -0,0 +1,88 @@
+# frozen_string_literal: true
+
+require 'csv'
+require 'open-uri'
+require 'zip'
+
+module FreeZipcodeData
+ class DataSource
+ BASE_URL = 'http://download.geonames.org/export/zip'
+
+ attr_reader :country, :options
+
+ def initialize(country = nil)
+ @country = country
+ @options = Options.instance.hash
+ @logger = Logger.instance
+ end
+
+ def download
+ return nil if !options.clobber && File.exist?(zipfile_path)
+ FileUtils.mkdir_p(options.work_dir)
+ @logger.info("Downloading: #{zipfile} from GeoNames...")
+ open(zipfile_path, 'wb') do |file|
+ file << open("#{BASE_URL}/#{zipfile}").read
+ end
+ end
+
+ def datafile
+ @datafile ||= begin
+ datafile_with_headers
+ end
+ end
+
+ private
+
+ def zipfile
+ @zipfile ||= begin
+ filename = country.nil? ? 'allCountries' : country.upcase
+ filename += '.zip' unless filename =~ /\.zip$/
+ filename
+ end
+ end
+
+ def zipfile_path
+ @zipfile_path ||= File.join(options.work_dir, zipfile)
+ end
+
+ def unzipped_datafile
+ @unzipped_datafile ||= begin
+ country_file = nil
+ Zip::File.open(zipfile_path) do |zip|
+ zip.each do |entry|
+ next if entry.name =~ /readme/i
+ country_file = File.join(options.work_dir, entry.name)
+ if File.exist?(country_file)
+ if options[:clobber]
+ Zip.on_exists_proc = true
+ Logger.instance.verbose("Extracting: #{zipfile}...")
+ entry.extract(country_file)
+ end
+ else
+ Logger.instance.verbose("Extracting: #{zipfile}...")
+ entry.extract(country_file)
+ end
+ break
+ end
+ end
+ country_file
+ end
+ end
+
+ def datafile_with_headers
+ filename = "#{unzipped_datafile}.csv"
+ if File.exist?(filename) && !options[:clobber]
+ @logger.verbose("File: #{filename} already exists, skipping...")
+ return filename
+ end
+ @logger.verbose("Preparing: #{filename} for processing...")
+ CSV.open(filename, 'w') do |outfile|
+ outfile << %w[COUNTRY POSTAL_CODE CITY STATE SHORT_STATE COUNTY SHORT_COUNTY COMMUNITY SHORT_COMMUNITY LATITUDE LONGITUDE ACCURACY]
+ CSV.foreach(unzipped_datafile, headers: false, col_sep: "\t", quote_char: '|') do |row|
+ outfile << row
+ end
+ end
+ filename
+ end
+ end
+end
diff --git a/lib/free_zipcode_data/db_table.rb b/lib/free_zipcode_data/db_table.rb
new file mode 100644
index 0000000..559cba5
--- /dev/null
+++ b/lib/free_zipcode_data/db_table.rb
@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+
+require 'yaml'
+
+module FreeZipcodeData
+ class DbTable
+ ISSUE_URL = 'https://github.com/midwire/free_zipcode_data/issues/new'
+
+ attr_reader :database, :tablename
+
+ def initialize(database:, tablename:)
+ @database = database
+ @tablename = tablename
+ end
+
+ private
+
+ def country_lookup_table
+ @country_lookup_table ||= YAML.load_file('country_lookup_table.yml')
+ end
+
+ def get_country_id(country)
+ rows = database.execute("SELECT id FROM countries WHERE alpha2 = '#{country}'")
+ rows[0].nil? ? nil : rows[0].first
+ end
+
+ def get_state_id(state)
+ rows = database.execute("SELECT id FROM states WHERE abbr = '#{state}'")
+ rows[0].nil? ? nil : rows[0].first
+ end
+
+ def get_county_id(county)
+ return nil if county.nil?
+ sql = "SELECT id FROM counties WHERE name = '#{escape_single_quotes(county)}'"
+ rows = database.execute(sql)
+ rows[0].nil? ? nil : rows[0].first
+ rescue SQLite3::SQLException => err
+ raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
+ end
+
+ def escape_single_quotes(string)
+ string&.gsub(/[']/, '\'\'') || ''
+ end
+ end
+end
diff --git a/lib/free_zipcode_data/logger.rb b/lib/free_zipcode_data/logger.rb
new file mode 100644
index 0000000..59e6af1
--- /dev/null
+++ b/lib/free_zipcode_data/logger.rb
@@ -0,0 +1,57 @@
+# frozen_string_literal: true
+
+require 'singleton'
+require 'logger'
+
+module FreeZipcodeData
+ class Logger
+ include Singleton
+
+ attr_accessor :log_provider
+
+ def initialize(provider = default_logger)
+ @log_provider = provider
+ end
+
+ def log_exception(e, data = {})
+ msg = "EXCEPTION : #{e.class.name} : #{e.message}"
+ msg += "\n data : #{data.inspect}" if data && !data.empty?
+ msg += "\n #{e.backtrace[0, 6].join("\n ")}"
+ log_provider.error(msg)
+ end
+
+ def method_missing(meth, *args, &block)
+ if log_provider.respond_to?(meth)
+ log_provider.send(meth, *args, &block)
+ else
+ super
+ end
+ end
+
+ def respond_to?(meth, include_private = false)
+ if log_provider.respond_to?(meth)
+ true
+ else
+ super
+ end
+ end
+
+ def verbose(msg)
+ info(msg) if options&.verbose
+ end
+
+ private
+
+ def default_logger
+ logger = ::Logger.new(STDOUT)
+ logger.formatter = proc do |_, _, _, msg|
+ "#{msg}\n"
+ end
+ logger
+ end
+
+ def options
+ Options.instance.hash
+ end
+ end
+end
diff --git a/lib/free_zipcode_data/options.rb b/lib/free_zipcode_data/options.rb
new file mode 100644
index 0000000..10d82b7
--- /dev/null
+++ b/lib/free_zipcode_data/options.rb
@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+
+require 'singleton'
+
+module FreeZipcodeData
+ class Options
+ include Singleton
+
+ def initialize_hash(hash)
+ @@_options = hash
+ end
+
+ def [](key)
+ @@_options[key]
+ end
+
+ def hash
+ @@_options
+ end
+ end
+end
diff --git a/lib/free_zipcode_data/runner.rb b/lib/free_zipcode_data/runner.rb
new file mode 100644
index 0000000..d96a43d
--- /dev/null
+++ b/lib/free_zipcode_data/runner.rb
@@ -0,0 +1,132 @@
+# frozen_string_literal: true
+
+require 'colored'
+require 'trollop'
+require 'kiba'
+
+require_relative '../etl/free_zipcode_data_job'
+
+require 'pry' if ENV.fetch('APP_ENV') == 'development'
+
+module FreeZipcodeData
+ class Runner
+ attr_accessor :logger, :options
+
+ # Make a singleton but allow the class to be instantiated for easier testing
+ def self.instance
+ @instance || new
+ end
+
+ def initialize
+ @logger = Logger.instance
+ end
+
+ def start
+ start_time = Time.now
+ options = FreeZipcodeData::Options.instance
+ options.initialize_hash(collect_args)
+
+ logger.info('Starting FreeZipcodeData...'.green)
+
+ datasource = DataSource.new(options.hash.country)
+ datasource.download
+
+ database = SqliteRam.new(File.join(options.hash.work_dir, 'free_zipcode_data.sqlite3'))
+
+ %i[country state county zipcode].each { |t| initialize_table(t, database) }
+
+ extract_transform_load(datasource, database)
+
+ database.save_to_disk
+
+ elapsed = Time.now - start_time
+ logger.info("Finished in [#{elapsed}] seconds.".yellow)
+ end
+
+ private
+
+ def initialize_table(table_sym, database)
+ options = Options.instance.hash
+ tablename = options["#{table_sym}_tablename".to_sym]
+ logger.verbose("Initializing #{table_sym} table: '#{tablename}'...")
+ klass = instance_eval("#{titleize(table_sym)}Table", __FILE__, __LINE__)
+ table = klass.new(
+ database: database.conn,
+ tablename: tablename
+ )
+ table.build
+ end
+
+ def extract_transform_load(datasource, database)
+ job = ETL::FreeZipcodeDataJob.setup(
+ datasource.datafile,
+ database.conn,
+ logger,
+ FreeZipcodeData::Options.instance.hash
+ )
+ Kiba.run(job)
+ end
+
+ # rubocop:disable Metrics/BlockLength
+ # rubocop:disable Metrics/MethodLength
+ def collect_args
+ Trollop.options do
+ opt(
+ :country,
+ 'Specify the country code for processing, or all countries if not specified',
+ type: :string, required: false, short: '-g'
+ )
+ opt(
+ :work_dir,
+ 'Specify your work/build directory, where the SQLite and .csv files will be built',
+ type: :string, required: true, short: '-w'
+ )
+ opt(
+ :country_tablename,
+ 'Specify the name for the `countries` table',
+ type: :string, required: false, default: 'countries'
+ )
+ opt(
+ :state_tablename,
+ 'Specify the name for the `states` table',
+ type: :string, required: false, default: 'states'
+ )
+ opt(
+ :county_tablename,
+ 'Specify the name for the `counties` table',
+ type: :string, required: false, default: 'counties'
+ )
+ opt(
+ :zipcode_tablename,
+ 'Specify the name for the `zipcodes` table',
+ type: :string, required: false, default: 'zipcodes'
+ )
+ opt(
+ :clobber,
+ 'Overwrite existing files',
+ type: :boolean, required: false, short: '-c', default: false
+ )
+ opt(
+ :dry_run,
+ 'Do not actually move or copy files',
+ type: :boolean, required: false, short: '-d',
+ default: false
+ )
+ opt(
+ :verbose,
+ 'Be verbose with output',
+ type: :boolean, required: false, short: '-v',
+ default: false
+ )
+ end
+ end
+ # rubocop:enable Metrics/MethodLength
+ # rubocop:enable Metrics/BlockLength
+
+ def titleize(string)
+ ret = string.to_s.dup
+ ret[0] = ret[0].capitalize
+ ret
+ end
+ end
+end
diff --git a/lib/free_zipcode_data/sqlite_ram.rb b/lib/free_zipcode_data/sqlite_ram.rb
new file mode 100644
index 0000000..f16a818
--- /dev/null
+++ b/lib/free_zipcode_data/sqlite_ram.rb
@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+
+require 'sqlite3'
+
+# Open a SQlite DB, work with it in-memory and save back to disk
+class SqliteRam
+ attr_reader :filename, :conn
+
+ def initialize(sqlite_filename)
+ @filename = sqlite_filename
+ @ram_db = SQLite3::Database.new(':memory:')
+ @file_db = SQLite3::Database.new(sqlite_filename)
+ @conn = @ram_db
+ end
+
+ def save_to_disk
+ backup = SQLite3::Backup.new(@file_db, 'main', @ram_db, 'main')
+ backup.step(-1)
+ backup.finish
+ end
+end
diff --git a/lib/free_zipcode_data/state_table.rb b/lib/free_zipcode_data/state_table.rb
new file mode 100644
index 0000000..497d96f
--- /dev/null
+++ b/lib/free_zipcode_data/state_table.rb
@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+
+require_relative 'db_table'
+
+module FreeZipcodeData
+ class StateTable < DbTable
+ def build
+ schema = <<-SQL
+ create table #{tablename} (
+ id integer not null primary key,
+ country_id integer not null,
+ abbr varchar(2) not null,
+ name varchar(255)
+ )
+ SQL
+ database.execute_batch(schema)
+
+ ndx = <<-SQL
+ CREATE UNIQUE INDEX "main"."unique_state"
+ ON #{tablename} (abbr, country_id COLLATE NOCASE ASC);
+ SQL
+ database.execute_batch(ndx)
+ end
+
+ def write(row)
+ return nil unless row[:short_state]
+ row[:state] = 'Marshall Islands' if row[:short_state] == 'MH' && row[:state].nil?
+ country_id = get_country_id(row[:country])
+ sql = <<-SQL
+ INSERT INTO states (abbr, name, country_id)
+ VALUES ('#{row[:short_state]}',
+ '#{escape_single_quotes(row[:state])}',
+ #{country_id}
+ )
+ SQL
+ begin
+ database.execute(sql)
+ rescue SQLite3::ConstraintException
+ # Swallow duplicates
+ end
+ end
+ end
+end
diff --git a/lib/free_zipcode_data/version.rb b/lib/free_zipcode_data/version.rb
new file mode 100644
index 0000000..0b28193
--- /dev/null
+++ b/lib/free_zipcode_data/version.rb
@@ -0,0 +1,5 @@
+# frozen_string_literal: true
+
+module FreeZipcodeData
+ VERSION = '1.0.0'
+end
diff --git a/lib/free_zipcode_data/zipcode_table.rb b/lib/free_zipcode_data/zipcode_table.rb
new file mode 100644
index 0000000..5cb6987
--- /dev/null
+++ b/lib/free_zipcode_data/zipcode_table.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+
+require_relative 'db_table'
+
+module FreeZipcodeData
+ class ZipcodeTable < DbTable
+ def build
+ schema = <<-SQL
+ create table #{tablename} (
+ id integer not null primary key,
+ code varchar(10) not null,
+ state_id integer,
+ county_id integer,
+ city varchar(255),
+ area_code varchar(3),
+ lat float,
+ lon float,
+ accuracy varchar(8)
+ )
+ SQL
+ database.execute_batch(schema)
+
+ ndx = <<-SQL
+ CREATE UNIQUE INDEX "main"."unique_zipcode"
+ ON #{tablename} (state_id, code, city COLLATE NOCASE ASC);
+ SQL
+ database.execute_batch(ndx)
+ end
+
+ def write(row)
+ return nil unless row[:postal_code]
+
+ state_id = get_state_id(row[:short_state])
+ county_id = get_county_id(row[:county])
+ city_name = escape_single_quotes(row[:city])
+
+ sql = <<-SQL
+ INSERT INTO zipcodes (code, state_id, county_id, city, lat, lon, accuracy)
+ VALUES ('#{row[:postal_code]}',
+ '#{state_id}',
+ '#{county_id}',
+ '#{city_name}',
+ '#{row[:latitude]}',
+ '#{row[:longitude]}',
+ '#{row[:accuracy]}'
+ )
+ SQL
+
+ begin
+ database.execute(sql)
+ rescue SQLite3::ConstraintException => _err
+ # there are some duplicates - swallow them
+ rescue StandardError => err
+ raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
+ end
+ end
+ end
+end
diff --git a/lib/tasks/data.rake b/lib/tasks/data.rake
deleted file mode 100644
index 4405a04..0000000
--- a/lib/tasks/data.rake
+++ /dev/null
@@ -1,325 +0,0 @@
-require 'rubygems'
-require 'sqlite3'
-require 'csv'
-require 'open-uri'
-require 'zip'
-require 'yaml'
-require 'midwire_common/string'
-# require 'pry'
-
-# rubocop:disable Metrics/BlockLength
-namespace :data do
-
- desc 'Download the specified data from GeoNames'
- task :download, [:country] do |_t, args|
- BASE_URL = 'http://download.geonames.org/export/zip'.freeze
-
- # create the download directory
- FileUtils.mkdir_p(data_dir)
-
- # determine which file to get
- zipfile = country_zipfile(args[:country])
- puts(">>> Downloading: #{zipfile} from GeoNames...")
-
- # download the file
- open("#{data_dir}/#{zipfile}", 'wb') do |file|
- file << open("#{BASE_URL}/#{zipfile}").read
- end
- end
-
- # desc 'Delete the sqlite db'
- task :kill_db do
- FileUtils.rm_f('free_zipcode_data.sqlite3')
- end
-
- desc 'Build the data files. Downloads missing files.'
- task :build, [:country] do |_t, args|
- # create the build directory
- FileUtils.mkdir_p(build_dir)
-
- # determine the zipfile path
- zipfile = File.join(data_dir, country_zipfile(args[:country]))
-
- # download the zipfile if it doesn't exist
- Rake::Task['data:download'].invoke(args[:country]) unless File.exist?(zipfile)
-
- # extract the .tsv files
- puts('>>> Extracting zipfile...')
- Zip.on_exists_proc = true
- country_file = nil
- Zip::File.open(zipfile) do |zip|
- zip.each do |entry|
- next if entry.name =~ /readme/i
- country_file = File.join(build_dir, entry.name)
- entry.extract(country_file)
- break
- end
- end
-
- # country code 0: iso country code, 2 characters
- # postal code 1: varchar(20)
- # place name 2: varchar(180)
- # admin name1 3: 1. order subdivision (state) varchar(100)
- # admin code1 4: 1. order subdivision (state) varchar(20)
- # admin name2 5: 2. order subdivision (county/province) varchar(100)
- # admin code2 6: 2. order subdivision (county/province) varchar(20)
- # admin name3 7: 3. order subdivision (community) varchar(100)
- # admin code3 8: 3. order subdivision (community) varchar(20)
- # latitude 9: estimated latitude (wgs84)
- # longitude 10: estimated longitude (wgs84)
- # accuracy 11: accuracy of lat/lng from 1=estimated to 6=centroid
-
- puts('>>> Writing CSV file...')
- CSV.open(output_file(args[:country]), 'w') do |outfile|
- # write the header
- outfile << %w[COUNTRY POSTAL_CODE CITY STATE SHORT_STATE COUNTY SHORT_COUNTY COMMUNITY SHORT_COMMUNITY LATITUDE LONGITUDE ACCURACY]
- CSV.foreach(country_file, headers: false, col_sep: "\t", quote_char: '|') do |row|
- outfile << row
- end
- end
-
- # delete the extracted file
- FileUtils.rm(country_file)
-
- # build sqlite db
- FileUtils.rm_f 'free_zipcode_data.sqlite3'
- end
-
- # desc 'Create countries table'
- task create_countries_table: :kill_db do
- schema = <<-STOP.here_with_pipe
- |create table countries (
- | id integer not null primary key,
- | alpha2 varchar(2) not null,
- | alpha3 varchar(3),
- | iso varchar(3),
- | name varchar(255) not null
- |)
- STOP
- database.execute_batch(schema)
- ndx = <<-STOP.here_with_pipe(' ')
- |CREATE UNIQUE INDEX "main"."unique_country_alpha2"
- |ON countries (alpha2 COLLATE NOCASE ASC);
- STOP
- database.execute_batch(ndx)
- end
-
- # desc 'Create states table'
- task create_states_table: :create_countries_table do
- schema = <<-STOP.here_with_pipe
- |create table states (
- | id integer not null primary key,
- | country_id integer not null,
- | abbr varchar(2) not null,
- | name varchar(255)
- |)
- STOP
- database.execute_batch(schema)
- ndx = <<-STOP.here_with_pipe(' ')
- |CREATE UNIQUE INDEX "main"."unique_state"
- |ON states (abbr, country_id COLLATE NOCASE ASC);
- STOP
- database.execute_batch(ndx)
- end
-
- # desc 'Create counties table'
- task create_counties_table: :create_states_table do
- schema = <<-STOP.here_with_pipe
- |create table counties (
- | id integer not null primary key,
- | state_id integer,
- | abbr varchar(255),
- | name varchar(255),
- | county_seat varchar(255)
- |)
- STOP
- database.execute_batch(schema)
- end
-
- # desc 'Create zipcodes table'
- task create_zipcodes_table: :create_counties_table do
- schema = <<-STOP.here_with_pipe
- |create table zipcodes (
- | id integer not null primary key,
- | code varchar(10) not null,
- | state_id integer,
- | county_id integer,
- | city varchar(255),
- | area_code varchar(3),
- | lat float,
- | lon float,
- | accuracy varchar(8)
- |)
- STOP
- database.execute_batch(schema)
- ndx = <<-STOP.here_with_pipe(' ')
- |CREATE UNIQUE INDEX "main"."unique_zipcode"
- |ON zipcodes (state_id, code, city COLLATE NOCASE ASC);
- STOP
- database.execute_batch(ndx)
- end
-
- desc 'Populate an sqlite DB'
- task :populate_db, [:country] => [:create_zipcodes_table] do |_t, args|
- start_time = Time.now
- puts '>>> Building SQLite3 DB...'
-
- csvfile = output_file(args[:country])
-
- # run the build task if the data is missing for the passed country
- Rake::Task['data:build'].invoke(args[:country]) unless File.exist?(csvfile)
-
- last_country = nil
- count = 0
-
- # Countries
- CSV.foreach(csvfile, headers: true) do |row|
- country_hash = country_lookup_table[row['COUNTRY']]
- puts(">>> #{country_hash[:name]}") if last_country != country_hash[:name]
- last_country = country_hash[:name]
- puts(">>> COUNT: #{count}") if (count % 10000).zero?
- count += 1
-
- # insert country
- sql = <<-STOP.here_with_pipe(' ')
- |INSERT INTO countries (alpha2, alpha3, iso, name)
- |VALUES ('#{row['COUNTRY']}',
- | '#{country_hash[:alpha3]}',
- | '#{country_hash[:iso]}',
- | '#{country_hash[:name]}')
- STOP
- begin
- database.execute(sql)
- rescue SQLite3::ConstraintException
- # next
- end
-
- # state
- if row['STATE']
- country_id = get_country_id(row['COUNTRY'])
- sql = <<-STOP.here_with_pipe(' ')
- |INSERT INTO states (abbr, name, country_id)
- |VALUES ('#{row['SHORT_STATE']}',
- | '#{escape_single_quotes(row['STATE'])}',
- | #{country_id}
- |)
- STOP
- begin
- database.execute(sql)
- rescue StandardError => err
- # next
- end
- end
-
- # county
- if row['COUNTY']
- state_id = get_state_id(row['SHORT_STATE'])
- sql = <<-STOP.here_with_pipe(' ')
- |INSERT INTO counties (state_id, abbr, name)
- |VALUES ('#{state_id}',
- | '#{row['SHORT_COUNTY']}',
- | '#{escape_single_quotes(row['COUNTY'])}'
- |)
- STOP
- begin
- database.execute(sql)
- rescue StandardError => err
- raise "Please file an issue at https://github.com/midwire/free_zipcode_data/issues/new: [#{err}] -> SQL: [#{sql}]"
- end
- end
-
- # zipcode
- if row['POSTAL_CODE']
- state_id = get_state_id(row['SHORT_STATE'])
- county_id = get_county_id(row['COUNTY'])
- city_name = escape_single_quotes(row['CITY'])
- sql = <<-STOP.here_with_pipe(' ')
- |INSERT INTO zipcodes (code, state_id, county_id, city, lat, lon, accuracy)
- |VALUES ('#{row['POSTAL_CODE']}',
- | '#{state_id}',
- | '#{county_id}',
- | '#{city_name}',
- | '#{row['LATITUDE']}',
- | '#{row['LONGITUDE']}',
- | '#{row['ACCURACY']}'
- |)
- STOP
- begin
- database.execute(sql)
- rescue SQLite3::ConstraintException => err
- # there are some duplicates
- rescue StandardError => err
- raise "Please file an issue at https://github.com/midwire/free_zipcode_data/issues/new: [#{err}] -> SQL: [#{sql}]"
- end
- else
- puts(">>> Missing Postal Code: #{row}")
- end
- end
-
- end_time = Time.now - start_time
- puts ">>>> Completed in #{end_time} seconds"
- end
-
- private
-
- def database
- @db ||= SQLite3::Database.new('free_zipcode_data.sqlite3')
- end
-
- def get_country_id(country)
- rows = database.execute("SELECT id FROM countries WHERE alpha2 = '#{country}'")
- rows[0].nil? ? nil : rows[0].first
- end
-
- def get_state_id(state)
- rows = database.execute("SELECT id FROM states WHERE abbr = '#{state}'")
- rows[0].nil? ? nil : rows[0].first
- end
-
- def get_county_id(county)
- return nil if county.nil?
- sql = "SELECT id FROM counties WHERE name = '#{escape_single_quotes(county)}'"
- rows = database.execute(sql)
- rows[0].nil? ? nil : rows[0].first
- rescue SQLite3::SQLException => err
- raise "Please file an issue at https://github.com/midwire/free_zipcode_data/issues/new: [#{err}] -> SQL: [#{sql}]"
- end
-
- def escape_single_quotes(string)
- string.gsub(/[']/, '\'\'')
- end
-
- def root
- Pathname.new(File.dirname(__FILE__)).parent.parent
- end
-
- def data_dir
- File.join(root, 'data')
- end
-
- def build_dir
- File.join(root, 'build')
- end
-
- def country_zipfile(country)
- filename = country.nil? ? 'allCountries' : country.upcase
- filename += '.zip' unless filename =~ /\.zip$/
- filename
- end
-
- def country_csvfile(country)
- filename = country.nil? ? 'all_countries' : country.downcase
- filename += '.csv' unless filename =~ /\.csv$/
- filename
- end
-
- def output_file(country)
- filename = country.nil? ? 'all_countries.csv' : "#{country.downcase}.csv"
- File.join(build_dir, filename)
- end
-
- def country_lookup_table
- @country_lookup_table ||= YAML.load_file('country_lookup_table.yml')
- end
-end
-# rubocop:enable Metrics/BlockLength
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
new file mode 100644
index 0000000..5b83a36
--- /dev/null
+++ b/spec/spec_helper.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+ENV['GEM_ENV'] = 'test'
+
+require 'pry'
+
+Dir[root.join('spec/support/**/*.rb')].each { |f| require f }
+
+RSpec.configure do |config|
+ config.expect_with :rspec do |expectations|
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
+ end
+
+ config.mock_with :rspec do |mocks|
+ mocks.verify_partial_doubles = true
+ end
+
+ config.shared_context_metadata_behavior = :apply_to_host_groups
+
+ config.filter_run_when_matching :focus
+
+ config.example_status_persistence_file_path = 'spec/examples.txt'
+
+ config.disable_monkey_patching!
+
+ config.warnings = true
+
+ config.default_formatter = 'doc' if config.files_to_run.one?
+
+ config.profile_examples = 3
+
+ config.order = :random
+
+ Kernel.srand config.seed
+end