diff --git a/.gitignore b/.gitignore index 83c7758..cb8f3ba 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ /stubs /vendor/bundle/ /pkg +.claude/ diff --git a/.rubocop.yml b/.rubocop.yml index 95ad76c..da3c188 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,23 +1,24 @@ AllCops: - TargetRubyVersion: 2.7 + TargetRubyVersion: 3.4 # Include gemspec and Rakefile Include: - - '**/*.gemspec' - - '**/*.podspec' - - '**/*.jbuilder' - - '**/*.rake' - - '**/Gemfile' - - '**/Rakefile' - - '**/Capfile' - - '**/Guardfile' - - '**/Podfile' - - '**/Thorfile' - - '**/Vagrantfile' + - "**/*.rb" + - "**/*.gemspec" + - "**/*.podspec" + - "**/*.jbuilder" + - "**/*.rake" + - "**/Gemfile" + - "**/Rakefile" + - "**/Capfile" + - "**/Guardfile" + - "**/Podfile" + - "**/Thorfile" + - "**/Vagrantfile" Exclude: - - 'vendor/**/*' - - 'stubs/**/*' - - 'spec/support/shared_contexts/*' + - "vendor/**/*" + - "stubs/**/*" + - "spec/support/shared_contexts/*" NewCops: enable @@ -51,6 +52,10 @@ Style/DoubleNegation: Style/PerlBackrefs: Enabled: false +Style/OpenStructUse: + Exclude: + - "spec/**/*" + ######################################## # Lint Cops @@ -66,6 +71,10 @@ Security/Eval: ######################################## # Metrics Cops +Metrics/BlockLength: + Exclude: + - "spec/**/*" + Metrics/MethodLength: CountComments: false # count full line comments? Max: 30 @@ -77,7 +86,7 @@ Metrics/AbcSize: Enabled: false ######################################## -# Metrics Cops +# Naming Cops Naming/FileName: Enabled: false diff --git a/.ruby-version b/.ruby-version index b502146..7921bd0 100644 --- a/.ruby-version +++ b/.ruby-version @@ -1 +1 @@ -3.0.2 +3.4.8 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..21de4c4 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,89 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +A Ruby gem that downloads postal/zipcode data from GeoNames.org, processes it via an ETL pipeline, and outputs an SQLite3 database and optional CSV files. Supports single-country or all-countries processing. + +## Commands + +```bash +# Install dependencies (vendored to vendor/bundle, binstubs in stubs/) +bundle install + +# Run all tests +bundle exec rspec + +# Run a single test file +bundle exec rspec spec/path/to/file_spec.rb + +# Run a specific test by line number +bundle exec rspec spec/path/to/file_spec.rb:42 + +# Lint +bundle exec rubocop + +# Lint with auto-correct +bundle exec rubocop -a + +# Version bumping (do on develop branch, not master) +bundle exec rake version:bump_patch +bundle exec rake version:bump_minor +bundle exec rake version:bump_major + +# Build and install gem +bundle exec rake build +bundle exec rake install + +# Release gem +bundle exec rake release +``` + +## Architecture + +The gem follows an ETL (Extract, Transform, Load) pattern using the Kiba gem: + +1. **Extract**: `DataSource` downloads zip files from GeoNames.org, extracts them, and prepares CSV files with headers +2. **Source**: `CsvSource` (Kiba source) feeds rows from the prepared CSV into the pipeline +3. **Load**: Four Kiba destination table classes write rows into an in-memory SQLite database + +### Key Flow + +`bin/free_zipcode_data` → `Runner#start` → `DataSource#download` → `DataSource#datafile` (extract zip + add CSV headers) → `SqliteRam` (in-memory DB) → `ETL::FreeZipcodeDataJob` (Kiba pipeline) → `SqliteRam#save_to_disk` + +### Core Classes + +- **`FreeZipcodeData::Runner`** - CLI entry point; parses args via Optimist, orchestrates the full pipeline +- **`FreeZipcodeData::DataSource`** - Downloads and extracts GeoNames zip files, prepares CSV with headers +- **`SqliteRam`** - Wraps SQLite3; works entirely in-memory then saves to disk via `SQLite3::Backup` +- **`FreeZipcodeData::DbTable`** - Base class for all table classes; provides progress bar, SQL helpers, and country lookup from `country_lookup_table.yml` +- **`FreeZipcodeData::CountryTable`/`StateTable`/`CountyTable`/`ZipcodeTable`** - Kiba destinations; each has `build` (creates schema + indexes) and `write` (inserts rows, swallows duplicate constraint violations) +- **`ETL::FreeZipcodeDataJob`** - Configures the Kiba pipeline with one source and four destinations +- **`CsvSource`** - Kiba-compatible CSV reader + +### Singletons + +`Options` and `Logger` are singletons (via Ruby's `Singleton` module). `Runner` has an `.instance` convenience class method (returns `new` each time, not cached). + +## Configuration + +- `.ruby-version`: 3.4.8 +- Bundle path: `vendor/bundle` (binstubs in `stubs/`) +- Environment: `APP_ENV` controls environment (`test`, `development`) +- Config file: `~/.free_zipcode_data.yml` (overridable via `FZD_CONFIG_FILE` env var; uses `spec/fixtures/` version in test) + +## Rubocop + +Key style settings (`.rubocop.yml`): +- Target Ruby 3.4 +- Max line length: 110 +- Max method length: 30 lines +- `Style/ClassVars`, `Style/Documentation`, `Metrics/AbcSize`, `Lint/SuppressedException` disabled +- `vendor/` and `stubs/` excluded + +## Git Workflow + +- `master` is the release branch +- `develop` is the development branch +- Version bumps should happen on `develop`, then merge to `master` before `rake release` diff --git a/Gemfile b/Gemfile index e967149..84915ee 100644 --- a/Gemfile +++ b/Gemfile @@ -4,3 +4,13 @@ source 'https://rubygems.org' git_source(:github) { |repo| "https://github.com/#{repo}.git" } gemspec + +group :development do + gem 'bundler' + gem 'pry-nav', '~> 0.2' + gem 'rake', '~> 13.0' + gem 'rspec', '~> 3.7' + gem 'rubocop' + gem 'ruby-prof', '~> 0.17' + gem 'simplecov', '~> 0.16' +end diff --git a/Gemfile.lock b/Gemfile.lock index 6053bd6..03d9535 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -3,7 +3,9 @@ PATH specs: free_zipcode_data (1.0.6) colored (~> 1.2) + csv kiba (~> 4.0) + logger optimist (~> 3.0) ruby-progressbar (~> 1.9) rubyzip (>= 1.2.2) @@ -12,63 +14,75 @@ PATH GEM remote: https://rubygems.org/ specs: - ast (2.4.2) + ast (2.4.3) coderay (1.1.3) colored (1.2) - diff-lcs (1.4.4) - docile (1.4.0) + csv (3.3.5) + diff-lcs (1.6.2) + docile (1.4.1) + json (2.18.1) kiba (4.0.0) + language_server-protocol (3.17.0.5) + lint_roller (1.1.0) + logger (1.7.0) method_source (0.9.2) mini_portile2 (2.8.9) optimist (3.2.1) - parallel (1.21.0) - parser (3.0.2.0) + parallel (1.27.0) + parser (3.3.10.1) ast (~> 2.4.1) + racc + prism (1.9.0) pry (0.12.2) coderay (~> 1.1.0) method_source (~> 0.9.0) pry-nav (0.3.0) pry (>= 0.9.10, < 0.13.0) - rainbow (3.0.0) - rake (13.0.6) - regexp_parser (2.1.1) - rexml (3.4.2) - rspec (3.10.0) - rspec-core (~> 3.10.0) - rspec-expectations (~> 3.10.0) - rspec-mocks (~> 3.10.0) - rspec-core (3.10.1) - rspec-support (~> 3.10.0) - rspec-expectations (3.10.1) + racc (1.8.1) + rainbow (3.1.1) + rake (13.3.1) + regexp_parser (2.11.3) + rspec (3.13.2) + rspec-core (~> 3.13.0) + rspec-expectations (~> 3.13.0) + rspec-mocks (~> 3.13.0) + rspec-core (3.13.6) + rspec-support (~> 3.13.0) + rspec-expectations (3.13.5) diff-lcs (>= 1.2.0, < 2.0) - rspec-support (~> 3.10.0) - rspec-mocks (3.10.2) + rspec-support (~> 3.13.0) + rspec-mocks (3.13.7) diff-lcs (>= 1.2.0, < 2.0) - rspec-support (~> 3.10.0) - rspec-support (3.10.3) - rubocop (1.22.3) + rspec-support (~> 3.13.0) + rspec-support (3.13.7) + rubocop (1.84.2) + json (~> 2.3) + language_server-protocol (~> 3.17.0.2) + lint_roller (~> 1.1.0) parallel (~> 1.10) - parser (>= 3.0.0.0) + parser (>= 3.3.0.2) rainbow (>= 2.2.2, < 4.0) - regexp_parser (>= 1.8, < 3.0) - rexml - rubocop-ast (>= 1.12.0, < 2.0) + regexp_parser (>= 2.9.3, < 3.0) + rubocop-ast (>= 1.49.0, < 2.0) ruby-progressbar (~> 1.7) - unicode-display_width (>= 1.4.0, < 3.0) - rubocop-ast (1.12.0) - parser (>= 3.0.1.1) + unicode-display_width (>= 2.4.0, < 4.0) + rubocop-ast (1.49.0) + parser (>= 3.3.7.2) + prism (~> 1.7) ruby-prof (0.18.0) - ruby-progressbar (1.11.0) - rubyzip (3.1.1) - simplecov (0.21.2) + ruby-progressbar (1.13.0) + rubyzip (3.2.2) + simplecov (0.22.0) docile (~> 1.1) simplecov-html (~> 0.11) simplecov_json_formatter (~> 0.1) - simplecov-html (0.12.3) - simplecov_json_formatter (0.1.3) + simplecov-html (0.13.2) + simplecov_json_formatter (0.1.4) sqlite3 (1.7.3) mini_portile2 (~> 2.8.0) - unicode-display_width (2.1.0) + unicode-display_width (3.2.0) + unicode-emoji (~> 4.1) + unicode-emoji (4.2.0) PLATFORMS ruby @@ -84,4 +98,4 @@ DEPENDENCIES simplecov (~> 0.16) BUNDLED WITH - 2.2.22 + 2.6.9 diff --git a/Rakefile b/Rakefile index 2bfdd1b..76fe069 100644 --- a/Rakefile +++ b/Rakefile @@ -4,7 +4,7 @@ require 'rubygems' require 'bundler/setup' require 'rake' -Dir['lib/tasks/**/*.rake'].sort.each { |ext| load ext } +Dir['lib/tasks/**/*.rake'].each { |ext| load ext } # Install rubygem tasks Bundler::GemHelper.install_tasks diff --git a/free_zipcode_data.gemspec b/free_zipcode_data.gemspec index 83745d3..4800963 100644 --- a/free_zipcode_data.gemspec +++ b/free_zipcode_data.gemspec @@ -23,18 +23,12 @@ Gem::Specification.new do |spec| spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } spec.require_paths = ['lib'] - spec.add_development_dependency 'bundler' - spec.add_development_dependency 'pry-nav', '~> 0.2' - spec.add_development_dependency 'rake', '~> 13.0' - spec.add_development_dependency 'rspec', '~> 3.7' - spec.add_development_dependency 'rubocop' - spec.add_development_dependency 'ruby-prof', '~> 0.17' - spec.add_development_dependency 'simplecov', '~> 0.16' - - spec.add_runtime_dependency 'colored', '~> 1.2' - spec.add_runtime_dependency 'kiba', '~> 4.0' - spec.add_runtime_dependency 'optimist', '~> 3.0' - spec.add_runtime_dependency 'ruby-progressbar', '~> 1.9' - spec.add_runtime_dependency 'rubyzip', '>= 1.2.2' - spec.add_runtime_dependency 'sqlite3', '~> 1.3' + spec.add_dependency 'colored', '~> 1.2' + spec.add_dependency 'csv' + spec.add_dependency 'kiba', '~> 4.0' + spec.add_dependency 'logger' + spec.add_dependency 'optimist', '~> 3.0' + spec.add_dependency 'ruby-progressbar', '~> 1.9' + spec.add_dependency 'rubyzip', '>= 1.2.2' + spec.add_dependency 'sqlite3', '~> 1.3' end diff --git a/lib/etl/common.rb b/lib/etl/common.rb index b1a1a7d..c83f54d 100644 --- a/lib/etl/common.rb +++ b/lib/etl/common.rb @@ -16,6 +16,7 @@ def show_me def limit(count) count = Integer(count || -1) return if count == -1 + transform do |row| @counter ||= 0 @counter += 1 diff --git a/lib/etl/csv_source.rb b/lib/etl/csv_source.rb index 0cbd162..1e9cd5a 100644 --- a/lib/etl/csv_source.rb +++ b/lib/etl/csv_source.rb @@ -14,10 +14,10 @@ def initialize(filename:, headers: true, delimeter: "\t", quote_char: '"') def each CSV.open(filename, - col_sep: delimeter, - headers: headers, - header_converters: :symbol, - quote_char: quote_char) do |csv| + col_sep: delimeter, + headers: headers, + header_converters: :symbol, + quote_char: quote_char) do |csv| csv.each do |row| yield(row.to_hash) end diff --git a/lib/free_zipcode_data.rb b/lib/free_zipcode_data.rb index 84f69e0..ff68d69 100644 --- a/lib/free_zipcode_data.rb +++ b/lib/free_zipcode_data.rb @@ -14,16 +14,16 @@ def self.current_environment ENV.fetch('APP_ENV', 'development') end - #:nocov: + # :nocov: def self.config_file(filename = '.free_zipcode_data.yml') return root.join('spec', 'fixtures', filename) if current_environment == 'test' - home = ENV.fetch('HOME') + home = Dir.home file = ENV.fetch('FZD_CONFIG_FILE', File.join(home, '.free_zipcode_data.yml')) FileUtils.touch(file) file end - #:nocov: + # :nocov: def self.os if RUBY_PLATFORM.match?(/cygwin|mswin|mingw|bccwin|wince|emx/) diff --git a/lib/free_zipcode_data/country_table.rb b/lib/free_zipcode_data/country_table.rb index a494fee..ce267ef 100644 --- a/lib/free_zipcode_data/country_table.rb +++ b/lib/free_zipcode_data/country_table.rb @@ -25,6 +25,7 @@ def build def write(row) country_hash = country_lookup_table[row[:country]] + return update_progress unless country_hash sql = <<-SQL INSERT INTO countries (alpha2, alpha3, iso, name) @@ -38,6 +39,8 @@ def write(row) database.execute(sql) rescue SQLite3::ConstraintException # Swallow duplicates + rescue StandardError => e + raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]" end update_progress diff --git a/lib/free_zipcode_data/county_table.rb b/lib/free_zipcode_data/county_table.rb index f622a72..2cf436a 100644 --- a/lib/free_zipcode_data/county_table.rb +++ b/lib/free_zipcode_data/county_table.rb @@ -41,8 +41,8 @@ def write(row) database.execute(sql) rescue SQLite3::ConstraintException # swallow duplicates - rescue StandardError => err - raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]" + rescue StandardError => e + raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]" end update_progress diff --git a/lib/free_zipcode_data/data_source.rb b/lib/free_zipcode_data/data_source.rb index 5c0d408..29a5598 100644 --- a/lib/free_zipcode_data/data_source.rb +++ b/lib/free_zipcode_data/data_source.rb @@ -56,11 +56,11 @@ def unzipped_datafile if options[:clobber] Zip.on_exists_proc = true Logger.instance.verbose("Extracting: #{zipfile}...") - entry.extract(country_file) + entry.extract(destination_directory: options.work_dir) end else Logger.instance.verbose("Extracting: #{zipfile}...") - entry.extract(country_file) + entry.extract(destination_directory: options.work_dir) end break end diff --git a/lib/free_zipcode_data/db_table.rb b/lib/free_zipcode_data/db_table.rb index 12fe4b6..a41d161 100644 --- a/lib/free_zipcode_data/db_table.rb +++ b/lib/free_zipcode_data/db_table.rb @@ -8,6 +8,7 @@ class DbTable ISSUE_URL = 'https://github.com/midwire/free_zipcode_data/issues/new' attr_reader :database, :tablename + @@progressbar = nil def initialize(database:, tablename:) @@ -33,9 +34,9 @@ def country_lookup_table def select_first(sql) rows = database.execute(sql) - rows[0].nil? ? nil : rows[0].first - rescue SQLite3::SQLException => err - raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]" + rows[0]&.first + rescue SQLite3::SQLException => e + raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]" end def get_country_id(country) @@ -51,12 +52,13 @@ def get_state_id(state_abbr, state_name) def get_county_id(county) return nil if county.nil? + sql = "SELECT id FROM counties WHERE name = '#{escape_single_quotes(county)}'" select_first(sql) end def escape_single_quotes(string) - string&.gsub(/[']/, '\'\'') || '' + string&.gsub('\'', '\'\'') || '' end end end diff --git a/lib/free_zipcode_data/logger.rb b/lib/free_zipcode_data/logger.rb index 59e6af1..3fdd7a8 100644 --- a/lib/free_zipcode_data/logger.rb +++ b/lib/free_zipcode_data/logger.rb @@ -13,27 +13,23 @@ def initialize(provider = default_logger) @log_provider = provider end - def log_exception(e, data = {}) - msg = "EXCEPTION : #{e.class.name} : #{e.message}" + def log_exception(error, data = {}) + msg = "EXCEPTION : #{error.class.name} : #{error.message}" msg += "\n data : #{data.inspect}" if data && !data.empty? - msg += "\n #{e.backtrace[0, 6].join("\n ")}" + msg += "\n #{error.backtrace[0, 6].join("\n ")}" log_provider.error(msg) end - def method_missing(meth, *args, &block) + def method_missing(meth, *, &) if log_provider.respond_to?(meth) - log_provider.send(meth, *args, &block) + log_provider.send(meth, *, &) else super end end - def respond_to?(meth, include_private = false) - if log_provider.respond_to?(meth) - true - else - super - end + def respond_to_missing?(meth, include_private = false) + log_provider.respond_to?(meth) || super end def verbose(msg) @@ -43,7 +39,7 @@ def verbose(msg) private def default_logger - logger = ::Logger.new(STDOUT) + logger = ::Logger.new($stdout) logger.formatter = proc do |_, _, _, msg| "#{msg}\n" end diff --git a/lib/free_zipcode_data/runner.rb b/lib/free_zipcode_data/runner.rb index 708bb9b..b540b90 100644 --- a/lib/free_zipcode_data/runner.rb +++ b/lib/free_zipcode_data/runner.rb @@ -58,9 +58,9 @@ def start private def initialize_table(table_sym, database) - tablename = options["#{table_sym}_tablename".to_sym] + tablename = options[:"#{table_sym}_tablename"] logger.verbose("Initializing #{table_sym} table: '#{tablename}'...") - klass = instance_eval("#{titleize(table_sym)}Table", __FILE__, __LINE__) + klass = FreeZipcodeData.const_get(:"#{titleize(table_sym)}Table") table = klass.new( database: database.conn, tablename: tablename diff --git a/lib/free_zipcode_data/state_table.rb b/lib/free_zipcode_data/state_table.rb index 70a86e0..b649ca5 100644 --- a/lib/free_zipcode_data/state_table.rb +++ b/lib/free_zipcode_data/state_table.rb @@ -30,6 +30,7 @@ def build def write(row) return nil unless row[:short_state] + row[:state] = 'Marshall Islands' if row[:short_state] == 'MH' && row[:state].nil? country_id = get_country_id(row[:country]) sql = <<-SQL @@ -43,6 +44,8 @@ def write(row) database.execute(sql) rescue SQLite3::ConstraintException # Swallow duplicates + rescue StandardError => e + raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]" end update_progress diff --git a/lib/free_zipcode_data/version.rb b/lib/free_zipcode_data/version.rb index 2c73f7f..1bd2a8d 100644 --- a/lib/free_zipcode_data/version.rb +++ b/lib/free_zipcode_data/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module FreeZipcodeData - VERSION = '1.0.6'.freeze + VERSION = '1.0.6' end diff --git a/lib/free_zipcode_data/zipcode_table.rb b/lib/free_zipcode_data/zipcode_table.rb index aa59b05..1e1aef2 100644 --- a/lib/free_zipcode_data/zipcode_table.rb +++ b/lib/free_zipcode_data/zipcode_table.rb @@ -45,10 +45,10 @@ def write(row) begin database.execute(sql) - rescue SQLite3::ConstraintException => _err + rescue SQLite3::ConstraintException => _e # there are some duplicates - swallow them - rescue StandardError => err - raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]" + rescue StandardError => e + raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]" end update_progress diff --git a/lib/tasks/version.rake b/lib/tasks/version.rake index 93c2022..329f1f2 100644 --- a/lib/tasks/version.rake +++ b/lib/tasks/version.rake @@ -6,8 +6,6 @@ require 'fileutils' # rubocop:disable Metrics/BlockLength namespace :version do - PROJECT_ROOT = File.expand_path(FileUtils.pwd).freeze - PROJECT_NAME = ENV['PROJECT_NAME'] || File.basename(PROJECT_ROOT) desc 'Write changes to the CHANGELOG' task :changes do @@ -23,7 +21,7 @@ namespace :version do desc 'Increment the patch version and write changes to the changelog' task :bump_patch do - exit unless check_branch_and_warn + exit unless check_branch_and_warn? major, minor, patch = read_version patch = patch.to_i + 1 write_version_file([major, minor, patch]) @@ -36,7 +34,7 @@ namespace :version do desc 'Increment the minor version and write changes to the changelog' task :bump_minor do - exit unless check_branch_and_warn + exit unless check_branch_and_warn? major, minor, _patch = read_version minor = minor.to_i + 1 patch = 0 @@ -47,7 +45,7 @@ namespace :version do desc 'Increment the major version and write changes to the changelog' task :bump_major do - exit unless check_branch_and_warn + exit unless check_branch_and_warn? major, _minor, _patch = read_version major = major.to_i + 1 minor = 0 @@ -59,19 +57,27 @@ namespace :version do private + def project_root + @project_root ||= File.expand_path(FileUtils.pwd).freeze + end + + def project_name + @project_name ||= ENV['PROJECT_NAME'] || File.basename(project_root) + end + def version_file_path - split = PROJECT_NAME.split('-') - "#{PROJECT_ROOT}/lib/#{split.join('/')}/version.rb" + split = project_name.split('-') + "#{project_root}/lib/#{split.join('/')}/version.rb" end def module_name - case PROJECT_NAME + case project_name when /-/ - PROJECT_NAME.split('-').map(&:capitalize).join('::') + project_name.split('-').map(&:capitalize).join('::') when /_/ - PROJECT_NAME.split('_').map(&:capitalize).join + project_name.split('_').map(&:capitalize).join else - PROJECT_NAME.capitalize + project_name.capitalize end end @@ -79,13 +85,13 @@ namespace :version do silence_warnings do load version_file_path end - text = eval("#{module_name}::VERSION") + text = module_name.split('::').inject(Object) { |mod, name| mod.const_get(name) }::VERSION text.split('.') end def write_version_file(version_array) version = version_array.join('.') - new_version = %( VERSION = '#{version}'.freeze) + new_version = %( VERSION = '#{version}') lines = File.readlines(version_file_path) File.open(version_file_path, 'w') do |f| lines.each do |line| @@ -100,19 +106,17 @@ namespace :version do def update_readme_version_strings version_string = read_version.join('.') - readme = open('README.md').read + readme = File.read('README.md') regex = /^\*\*Version: [0-9.]+\*\*$/i return nil unless readme =~ regex - File.open('README.md', 'w') do |f| - f.write(readme.gsub(regex, "**Version: #{version_string}**")) - end + File.write('README.md', readme.gsub(regex, "**Version: #{version_string}**")) end def changelog return @changelog_path if @changelog_path - @changelog_path = File.join(PROJECT_ROOT, 'CHANGELOG') + @changelog_path = File.join(project_root, 'CHANGELOG') FileUtils.touch(@changelog_path) @changelog_path end @@ -159,16 +163,15 @@ namespace :version do STRING end - def check_branch_and_warn + def check_branch_and_warn? return true unless current_branch == 'master' puts(branch_warning_message) - while (line = $stdin.gets.chomp) - return true if line =~ /[yY]/ + line = $stdin.gets.chomp + return true if line =~ /[yY]/ - puts 'Aborting version bump.' - return false - end + puts 'Aborting version bump.' + false end def launch_editor(file) diff --git a/spec/etl/csv_source_spec.rb b/spec/etl/csv_source_spec.rb new file mode 100644 index 0000000..bcb9892 --- /dev/null +++ b/spec/etl/csv_source_spec.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +require 'etl/csv_source' + +RSpec.describe CsvSource do + let(:fixture_csv) { File.join(FreeZipcodeData.root, 'spec', 'fixtures', 'test_data.csv') } + + describe '#initialize' do + it 'stores the filename and options' do + source = described_class.new(filename: fixture_csv) + expect(source.filename).to eq(fixture_csv) + expect(source.headers).to be true + expect(source.delimeter).to eq("\t") + end + + it 'accepts custom delimiter and quote char' do + source = described_class.new(filename: fixture_csv, delimeter: ',', quote_char: '"') + expect(source.delimeter).to eq(',') + expect(source.quote_char).to eq('"') + end + end + + # CsvSource implements only #each (Kiba source protocol), not Enumerable + # rubocop:disable Style/MapIntoArray + describe '#each' do + it 'yields each row as a hash with symbolized keys' do + source = described_class.new(filename: fixture_csv, delimeter: ',', quote_char: '"') + rows = [] + source.each { |row| rows << row } + + expect(rows.length).to eq(5) + expect(rows.first).to be_a(Hash) + expect(rows.first.keys).to include(:country, :postal_code, :city) + end + + it 'parses the correct data from each row' do + source = described_class.new(filename: fixture_csv, delimeter: ',', quote_char: '"') + rows = [] + source.each { |row| rows << row } + + first = rows.first + expect(first[:country]).to eq('US') + expect(first[:postal_code]).to eq('10001') + expect(first[:city]).to eq('New York') + expect(first[:short_state]).to eq('NY') + end + + it 'handles rows from multiple countries' do + source = described_class.new(filename: fixture_csv, delimeter: ',', quote_char: '"') + countries = [] + source.each { |row| countries << row[:country] } + + expect(countries.uniq.sort).to eq(%w[CA GB US]) + end + end + # rubocop:enable Style/MapIntoArray +end diff --git a/spec/etl/free_zipcode_data_job_spec.rb b/spec/etl/free_zipcode_data_job_spec.rb new file mode 100644 index 0000000..b9606ca --- /dev/null +++ b/spec/etl/free_zipcode_data_job_spec.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +require 'kiba' +require 'etl/free_zipcode_data_job' + +RSpec.describe ETL::FreeZipcodeDataJob do + let(:db) { create_test_database(line_count: 5) } + let(:fixture_csv) { File.join(FreeZipcodeData.root, 'spec', 'fixtures', 'test_data.csv') } + let(:logger) { FreeZipcodeData::Logger.instance } + let(:string_io) { StringIO.new } + let(:options) do + OpenStruct.new( + country_tablename: 'countries', + state_tablename: 'states', + county_tablename: 'counties', + zipcode_tablename: 'zipcodes', + verbose: false + ) + end + + before do + FreeZipcodeData::Options.instance.initialize_hash(options) + logger.log_provider = Logger.new(string_io) + end + + describe '.setup' do + it 'returns a Kiba job definition' do + job = described_class.setup(fixture_csv, db, logger, options) + expect(job).not_to be_nil + end + end + + describe 'full ETL pipeline' do + before do + # Build all tables + FreeZipcodeData::CountryTable.new(database: db, tablename: 'countries').build + FreeZipcodeData::StateTable.new(database: db, tablename: 'states').build + FreeZipcodeData::CountyTable.new(database: db, tablename: 'counties').build + FreeZipcodeData::ZipcodeTable.new(database: db, tablename: 'zipcodes').build + + job = described_class.setup(fixture_csv, db, logger, options) + Kiba.run(job) + end + + it 'populates the countries table' do + rows = db.execute('SELECT alpha2 FROM countries ORDER BY alpha2') + expect(rows.flatten).to include('CA', 'GB', 'US') + end + + it 'populates the states table' do + rows = db.execute('SELECT abbr FROM states ORDER BY abbr') + abbrs = rows.flatten + expect(abbrs).to include('CA', 'IL', 'NY') + end + + it 'populates the counties table' do + rows = db.execute('SELECT name FROM counties ORDER BY name') + names = rows.flatten + expect(names).to include('Cook', 'Los Angeles', 'New York') + end + + it 'populates the zipcodes table' do + rows = db.execute('SELECT code FROM zipcodes ORDER BY code') + codes = rows.flatten + expect(codes).to include('10001', '60601', '90210') + end + + it 'links zipcodes to states' do + rows = db.execute(<<-SQL) + SELECT z.code, s.abbr + FROM zipcodes z + JOIN states s ON CAST(z.state_id AS INTEGER) = s.id + WHERE z.code = '60601' + SQL + expect(rows[0]).to eq(%w[60601 IL]) + end + + it 'links states to countries' do + rows = db.execute(<<-SQL) + SELECT s.abbr, c.alpha2 + FROM states s + JOIN countries c ON s.country_id = c.id + WHERE s.abbr = 'NY' + SQL + expect(rows[0]).to eq(%w[NY US]) + end + + it 'stores geocode data for zipcodes' do + rows = db.execute("SELECT lat, lon FROM zipcodes WHERE code = '10001'") + lat = rows[0][0].to_f + lon = rows[0][1].to_f + expect(lat).to be_within(0.01).of(40.7484) + expect(lon).to be_within(0.01).of(-73.9967) + end + end +end diff --git a/spec/fixtures/.free_zipcode_data.yml b/spec/fixtures/.free_zipcode_data.yml new file mode 100644 index 0000000..ed97d53 --- /dev/null +++ b/spec/fixtures/.free_zipcode_data.yml @@ -0,0 +1 @@ +--- diff --git a/spec/fixtures/US.txt b/spec/fixtures/US.txt new file mode 100644 index 0000000..63972de --- /dev/null +++ b/spec/fixtures/US.txt @@ -0,0 +1,5 @@ +US 10001 New York New York NY New York 061 Manhattan MN 40.7484 -73.9967 4 +US 90210 Beverly Hills California CA Los Angeles 037 LA 34.0901 -118.4065 4 +US 60601 Chicago Illinois IL Cook 031 CK 41.8819 -87.6278 4 +CA H2X Montreal Quebec QC Montreal 45.5088 -73.5878 4 +GB SW1A London England ENG Westminster City of Westminster 51.5014 -0.1419 1 diff --git a/spec/fixtures/US.zip b/spec/fixtures/US.zip new file mode 100644 index 0000000..1318b7e Binary files /dev/null and b/spec/fixtures/US.zip differ diff --git a/spec/fixtures/test_data.csv b/spec/fixtures/test_data.csv new file mode 100644 index 0000000..848c701 --- /dev/null +++ b/spec/fixtures/test_data.csv @@ -0,0 +1,6 @@ +COUNTRY,POSTAL_CODE,CITY,STATE,SHORT_STATE,COUNTY,SHORT_COUNTY,COMMUNITY,SHORT_COMMUNITY,LATITUDE,LONGITUDE,ACCURACY +US,10001,New York,New York,NY,New York,061,Manhattan,MN,40.7484,-73.9967,4 +US,90210,Beverly Hills,California,CA,Los Angeles,037,,LA,34.0901,-118.4065,4 +US,60601,Chicago,Illinois,IL,Cook,031,,CK,41.8819,-87.6278,4 +CA,H2X,Montreal,Quebec,QC,,,Montreal,,45.5088,-73.5878,4 +GB,SW1A,London,England,ENG,Westminster,,City of Westminster,,51.5014,-0.1419,1 diff --git a/spec/fixtures/test_data.txt b/spec/fixtures/test_data.txt new file mode 100644 index 0000000..63972de --- /dev/null +++ b/spec/fixtures/test_data.txt @@ -0,0 +1,5 @@ +US 10001 New York New York NY New York 061 Manhattan MN 40.7484 -73.9967 4 +US 90210 Beverly Hills California CA Los Angeles 037 LA 34.0901 -118.4065 4 +US 60601 Chicago Illinois IL Cook 031 CK 41.8819 -87.6278 4 +CA H2X Montreal Quebec QC Montreal 45.5088 -73.5878 4 +GB SW1A London England ENG Westminster City of Westminster 51.5014 -0.1419 1 diff --git a/spec/free_zipcode_data/country_table_spec.rb b/spec/free_zipcode_data/country_table_spec.rb new file mode 100644 index 0000000..90299c9 --- /dev/null +++ b/spec/free_zipcode_data/country_table_spec.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +require 'free_zipcode_data/country_table' + +RSpec.describe FreeZipcodeData::CountryTable do + let(:db) { create_test_database(line_count: 5) } + let(:table) { described_class.new(database: db, tablename: 'countries') } + + before { table.build } + + describe '#build' do + it 'creates the countries table' do + tables = db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='countries'") + expect(tables.length).to eq(1) + end + + it 'creates the unique alpha2 index' do + indexes = db.execute("SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='countries'") + index_names = indexes.map(&:first) + expect(index_names).to include('unique_country_alpha2') + end + + it 'creates columns for alpha2, alpha3, iso, and name' do + columns = db.execute("PRAGMA table_info('countries')").map { |c| c[1] } + expect(columns).to include('alpha2', 'alpha3', 'iso', 'name') + end + end + + describe '#write' do + it 'inserts a country row using the lookup table' do + table.write({ country: 'US' }) + rows = db.execute('SELECT alpha2, alpha3, name FROM countries') + expect(rows.length).to eq(1) + expect(rows[0]).to eq(['US', 'USA', 'United States of America']) + end + + it 'inserts multiple different countries' do + table.write({ country: 'US' }) + table.write({ country: 'CA' }) + table.write({ country: 'GB' }) + rows = db.execute('SELECT alpha2 FROM countries ORDER BY alpha2') + expect(rows.flatten).to eq(%w[CA GB US]) + end + + it 'silently ignores duplicate country codes' do + table.write({ country: 'US' }) + expect { table.write({ country: 'US' }) }.not_to raise_error + rows = db.execute('SELECT COUNT(*) FROM countries') + expect(rows[0][0]).to eq(1) + end + end +end diff --git a/spec/free_zipcode_data/county_table_spec.rb b/spec/free_zipcode_data/county_table_spec.rb new file mode 100644 index 0000000..d127438 --- /dev/null +++ b/spec/free_zipcode_data/county_table_spec.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +require 'free_zipcode_data/county_table' + +RSpec.describe FreeZipcodeData::CountyTable do + let(:db) { create_test_database(line_count: 5) } + let(:table) { described_class.new(database: db, tablename: 'counties') } + + before do + seed_countries(db) + seed_states(db) + table.build + end + + describe '#build' do + it 'creates the counties table' do + tables = db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='counties'") + expect(tables.length).to eq(1) + end + + it 'creates the unique_county index' do + indexes = db.execute("SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='counties'") + index_names = indexes.map(&:first) + expect(index_names).to include('unique_county') + end + + it 'creates columns for state_id, abbr, name, and county_seat' do + columns = db.execute("PRAGMA table_info('counties')").map { |c| c[1] } + expect(columns).to include('state_id', 'abbr', 'name', 'county_seat') + end + end + + describe '#write' do + it 'inserts a county row' do + table.write({ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' }) + rows = db.execute('SELECT name, abbr FROM counties') + expect(rows.length).to eq(1) + expect(rows[0]).to eq(%w[Cook 031]) + end + + it 'links the county to its state' do + table.write({ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' }) + state_id = db.execute("SELECT id FROM states WHERE abbr = 'IL'")[0][0] + county_state_id = db.execute('SELECT state_id FROM counties')[0][0] + expect(county_state_id).to eq(state_id) + end + + it 'returns nil and skips when county is nil' do + result = table.write({ county: nil, short_county: nil, short_state: 'IL', state: 'Illinois' }) + expect(result).to be_nil + rows = db.execute('SELECT COUNT(*) FROM counties') + expect(rows[0][0]).to eq(0) + end + + it 'returns nil when state cannot be found' do + result = table.write({ county: 'Unknown', short_county: '999', short_state: 'ZZ', + state: 'Nonexistent' }) + expect(result).to be_nil + rows = db.execute('SELECT COUNT(*) FROM counties') + expect(rows[0][0]).to eq(0) + end + + it 'silently ignores duplicate county entries' do + table.write({ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' }) + expect do + table.write({ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' }) + end.not_to raise_error + rows = db.execute('SELECT COUNT(*) FROM counties') + expect(rows[0][0]).to eq(1) + end + + it 'handles county names with single quotes' do + table.write({ county: "Prince George's", short_county: '033', short_state: 'NY', state: 'New York' }) + rows = db.execute('SELECT name FROM counties') + expect(rows[0][0]).to eq("Prince George's") + end + end +end diff --git a/spec/free_zipcode_data/data_source_spec.rb b/spec/free_zipcode_data/data_source_spec.rb new file mode 100644 index 0000000..de513fe --- /dev/null +++ b/spec/free_zipcode_data/data_source_spec.rb @@ -0,0 +1,131 @@ +# frozen_string_literal: true + +require 'free_zipcode_data/data_source' + +RSpec.describe FreeZipcodeData::DataSource do + let(:work_dir) { Dir.mktmpdir('datasource_test') } + let(:options) do + OpenStruct.new( + work_dir: work_dir, + clobber: false, + country: 'US', + verbose: false + ) + end + let(:options_instance) { FreeZipcodeData::Options.instance } + + before do + options_instance.initialize_hash(options) + end + + after do + FileUtils.rm_rf(work_dir) + end + + describe '#initialize' do + it 'stores the country' do + ds = described_class.new('US') + expect(ds.country).to eq('US') + end + + it 'defaults country to nil' do + ds = described_class.new + expect(ds.country).to be_nil + end + end + + describe '#download' do + let(:datasource) { described_class.new('US') } + let(:fixture_zip) { File.read(File.join(FreeZipcodeData.root, 'spec', 'fixtures', 'US.zip')) } + + it 'downloads and saves the zip file' do + uri_object = instance_double(URI::HTTP) + allow(URI).to receive(:parse).and_return(uri_object) + allow(uri_object).to receive(:open).and_yield(StringIO.new(fixture_zip)) + + datasource.download + + expect(File.exist?(File.join(work_dir, 'US.zip'))).to be true + end + + it 'skips download if the file already exists and clobber is false' do + FileUtils.touch(File.join(work_dir, 'US.zip')) + + expect(URI).not_to receive(:parse) + datasource.download + end + + it 'redownloads if clobber is true' do + FileUtils.touch(File.join(work_dir, 'US.zip')) + options_instance.initialize_hash(OpenStruct.new(work_dir: work_dir, clobber: true, country: 'US', + verbose: false)) + + uri_object = instance_double(URI::HTTP) + allow(URI).to receive(:parse).and_return(uri_object) + allow(uri_object).to receive(:open).and_yield(StringIO.new(fixture_zip)) + + datasource.download + + expect(File.size(File.join(work_dir, 'US.zip'))).to be > 0 + end + end + + describe '#datafile' do + let(:datasource) { described_class.new('US') } + + before do + fixture_dir = File.join(FreeZipcodeData.root, 'spec', 'fixtures') + # Copy fixture zip and pre-extracted text to work_dir + FileUtils.cp(File.join(fixture_dir, 'US.zip'), File.join(work_dir, 'US.zip')) + FileUtils.cp(File.join(fixture_dir, 'US.txt'), File.join(work_dir, 'US.txt')) + end + + it 'returns a CSV file path with headers prepended' do + result = datasource.datafile + expect(result).to end_with('.csv') + expect(File.exist?(result)).to be true + end + + it 'prepends headers to the extracted data' do + result = datasource.datafile + first_line = File.open(result, &:readline) + expect(first_line).to include('COUNTRY') + expect(first_line).to include('POSTAL_CODE') + expect(first_line).to include('LATITUDE') + end + + it 'contains the original data rows' do + result = datasource.datafile + lines = File.readlines(result) + # header + 5 data rows + expect(lines.length).to eq(6) + end + + it 'does not re-extract if CSV already exists and clobber is false' do + first = datasource.datafile + mtime = File.mtime(first) + sleep(0.1) + # Create a new instance to avoid memoization + ds2 = described_class.new('US') + second = ds2.datafile + expect(File.mtime(second)).to eq(mtime) + end + end + + describe 'zipfile naming' do + it 'uses country code for single country' do + ds = described_class.new('US') + expect(ds.send(:zipfile)).to eq('US.zip') + end + + it 'uppercases the country code' do + ds = described_class.new('us') + expect(ds.send(:zipfile)).to eq('US.zip') + end + + it 'uses allCountries when no country specified' do + ds = described_class.new(nil) + expect(ds.send(:zipfile)).to eq('allCountries.zip') + end + end +end diff --git a/spec/free_zipcode_data/db_table_spec.rb b/spec/free_zipcode_data/db_table_spec.rb new file mode 100644 index 0000000..c21a591 --- /dev/null +++ b/spec/free_zipcode_data/db_table_spec.rb @@ -0,0 +1,133 @@ +# frozen_string_literal: true + +require 'free_zipcode_data/db_table' + +RSpec.describe FreeZipcodeData::DbTable do + let(:db) { create_test_database(line_count: 5) } + + # DbTable is abstract - we need a concrete subclass to test it + let(:concrete_class) do + Class.new(described_class) do + def build; end + end + end + + let(:table) { concrete_class.new(database: db, tablename: 'test_table') } + + describe '#initialize' do + it 'stores the database and tablename' do + expect(table.database).to eq(db) + expect(table.tablename).to eq('test_table') + end + end + + describe '#update_progress' do + it 'increments the progress bar without error' do + expect { table.update_progress }.not_to raise_error + end + end + + describe 'private #escape_single_quotes' do + it 'escapes single quotes for SQL safety' do + result = table.send(:escape_single_quotes, "O'Brien") + expect(result).to eq("O''Brien") + end + + it 'handles nil gracefully' do + result = table.send(:escape_single_quotes, nil) + expect(result).to eq('') + end + + it 'handles strings without quotes' do + result = table.send(:escape_single_quotes, 'Chicago') + expect(result).to eq('Chicago') + end + end + + describe 'private #country_lookup_table' do + it 'loads the YAML lookup table' do + lookup = table.send(:country_lookup_table) + expect(lookup).to be_a(Hash) + expect(lookup['US'][:name]).to eq('United States of America') + end + end + + describe 'private #select_first' do + it 'returns the first column of the first row' do + result = table.send(:select_first, "SELECT value FROM meta WHERE name = 'line_count'") + expect(result).to eq('5') + end + + it 'returns nil when no rows match' do + result = table.send(:select_first, "SELECT value FROM meta WHERE name = 'nonexistent'") + expect(result).to be_nil + end + + it 'raises with issue URL on SQL error' do + expect do + table.send(:select_first, 'SELECT * FROM nonexistent_table') + end.to raise_error(/Please file an issue/) + end + end + + context 'with seeded countries and states' do + before do + seed_countries(db) + seed_states(db) + end + + describe 'private #get_country_id' do + it 'returns the country ID for a known alpha2 code' do + id = table.send(:get_country_id, 'US') + expect(id).to be_a(Integer) + end + + it 'returns nil for an unknown country' do + id = table.send(:get_country_id, 'ZZ') + expect(id).to be_nil + end + end + + describe 'private #get_state_id' do + it 'finds a state by abbreviation' do + id = table.send(:get_state_id, 'NY', 'New York') + expect(id).to be_a(Integer) + end + + it 'finds a state by name' do + id = table.send(:get_state_id, 'XX', 'New York') + expect(id).to be_a(Integer) + end + + it 'returns nil for an unknown state' do + id = table.send(:get_state_id, 'ZZ', 'Nonexistent') + expect(id).to be_nil + end + end + end + + context 'with seeded counties' do + before do + seed_countries(db) + seed_states(db) + seed_counties(db) + end + + describe 'private #get_county_id' do + it 'returns the county ID for a known county name' do + id = table.send(:get_county_id, 'Cook') + expect(id).to be_a(Integer) + end + + it 'returns nil for nil county' do + id = table.send(:get_county_id, nil) + expect(id).to be_nil + end + + it 'returns nil for an unknown county' do + id = table.send(:get_county_id, 'Nonexistent County') + expect(id).to be_nil + end + end + end +end diff --git a/spec/free_zipcode_data/logger_spec.rb b/spec/free_zipcode_data/logger_spec.rb new file mode 100644 index 0000000..cd7d961 --- /dev/null +++ b/spec/free_zipcode_data/logger_spec.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +RSpec.describe FreeZipcodeData::Logger do + let(:logger) { described_class.instance } + let(:string_io) { StringIO.new } + let(:test_provider) { Logger.new(string_io) } + + before do + logger.log_provider = test_provider + end + + after do + # Restore default logger + logger.log_provider = Logger.new($stdout) + end + + describe '#info' do + it 'delegates to the log provider' do + logger.info('test message') + expect(string_io.string).to include('test message') + end + end + + describe '#log_exception' do + it 'logs exception class, message, and backtrace' do + error = begin + raise StandardError, 'something broke' + rescue StandardError => e + e + end + + logger.log_exception(error) + output = string_io.string + expect(output).to include('EXCEPTION') + expect(output).to include('StandardError') + expect(output).to include('something broke') + end + + it 'includes data hash when provided' do + error = begin + raise StandardError, 'oops' + rescue StandardError => e + e + end + + logger.log_exception(error, { user_id: 42 }) + expect(string_io.string).to include('user_id') + end + end + + describe '#verbose' do + let(:options) { FreeZipcodeData::Options.instance } + + it 'logs when verbose option is true' do + options.initialize_hash(OpenStruct.new(verbose: true)) + logger.verbose('verbose message') + expect(string_io.string).to include('verbose message') + end + + it 'does not log when verbose option is false' do + options.initialize_hash(OpenStruct.new(verbose: false)) + logger.verbose('should not appear') + expect(string_io.string).not_to include('should not appear') + end + end + + describe '#respond_to?' do + it 'returns true for methods the log provider responds to' do + expect(logger.respond_to?(:info)).to be true + expect(logger.respond_to?(:warn)).to be true + expect(logger.respond_to?(:error)).to be true + end + + it 'returns false for unknown methods' do + expect(logger.respond_to?(:nonexistent_method)).to be false + end + end +end diff --git a/spec/free_zipcode_data/options_spec.rb b/spec/free_zipcode_data/options_spec.rb new file mode 100644 index 0000000..8fd079b --- /dev/null +++ b/spec/free_zipcode_data/options_spec.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +RSpec.describe FreeZipcodeData::Options do + let(:options) { described_class.instance } + + after do + # Reset singleton state + options.initialize_hash({}) + end + + describe '#initialize_hash' do + it 'stores the given hash' do + options.initialize_hash({ work_dir: '/tmp/claude/test', country: 'US' }) + expect(options.hash).to include(work_dir: '/tmp/claude/test', country: 'US') + end + end + + describe '#[]' do + it 'returns the value for the given key' do + options.initialize_hash({ country: 'GB' }) + expect(options[:country]).to eq('GB') + end + + it 'returns nil for missing keys' do + options.initialize_hash({}) + expect(options[:nonexistent]).to be_nil + end + end + + describe '#hash' do + it 'returns the full options hash' do + data = { work_dir: '/tmp/claude/test', verbose: true } + options.initialize_hash(data) + expect(options.hash).to eq(data) + end + end +end diff --git a/spec/free_zipcode_data/runner_spec.rb b/spec/free_zipcode_data/runner_spec.rb new file mode 100644 index 0000000..d905d2c --- /dev/null +++ b/spec/free_zipcode_data/runner_spec.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +require 'free_zipcode_data/runner' + +RSpec.describe FreeZipcodeData::Runner do + let(:work_dir) { Dir.mktmpdir('runner_test') } + let(:fixture_zip) { File.join(FreeZipcodeData.root, 'spec', 'fixtures', 'US.zip') } + let(:string_io) { StringIO.new } + + after do + FileUtils.rm_rf(work_dir) + end + + describe '.instance' do + it 'returns a Runner instance' do + expect(described_class.instance).to be_a(described_class) + end + end + + describe '#initialize' do + it 'sets up a logger' do + runner = described_class.new + expect(runner.logger).to eq(FreeZipcodeData::Logger.instance) + end + end + + describe '#start' do + let(:runner) { described_class.new } + + before do + # Suppress logger output + runner.logger.log_provider = Logger.new(string_io) + + # Copy fixture zip and pre-extracted text into work_dir + fixture_dir = File.join(FreeZipcodeData.root, 'spec', 'fixtures') + FileUtils.mkdir_p(work_dir) + FileUtils.cp(File.join(fixture_dir, 'US.zip'), File.join(work_dir, 'US.zip')) + FileUtils.cp(File.join(fixture_dir, 'US.txt'), File.join(work_dir, 'US.txt')) + + # Stub ARGV to provide required CLI args + stub_const('ARGV', [ + '--work-dir', work_dir, + '--country', 'US', + '--generate-files' + ]) + end + + it 'creates an SQLite database in the work directory' do + runner.start + expect(File.exist?(File.join(work_dir, 'free_zipcode_data.sqlite3'))).to be true + end + + it 'generates CSV files when --generate-files is specified' do + runner.start + %w[countries states counties zipcodes].each do |table| + expect(File.exist?(File.join(work_dir, "#{table}.csv"))).to be true + end + end + + it 'populates the SQLite database with data' do + runner.start + db = SQLite3::Database.new(File.join(work_dir, 'free_zipcode_data.sqlite3')) + country_count = db.execute('SELECT COUNT(*) FROM countries')[0][0] + zipcode_count = db.execute('SELECT COUNT(*) FROM zipcodes')[0][0] + expect(country_count).to be >= 1 + expect(zipcode_count).to be >= 1 + db.close + end + + it 'sets the options on the runner' do + runner.start + expect(runner.options).not_to be_nil + expect(runner.options[:work_dir]).to eq(work_dir) + end + + context 'without --generate-files' do + before do + stub_const('ARGV', [ + '--work-dir', work_dir, + '--country', 'US' + ]) + end + + it 'creates the database but not CSV files' do + runner.start + expect(File.exist?(File.join(work_dir, 'free_zipcode_data.sqlite3'))).to be true + expect(File.exist?(File.join(work_dir, 'countries.csv'))).to be false + end + end + end +end diff --git a/spec/free_zipcode_data/sqlite_ram_spec.rb b/spec/free_zipcode_data/sqlite_ram_spec.rb new file mode 100644 index 0000000..0925749 --- /dev/null +++ b/spec/free_zipcode_data/sqlite_ram_spec.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +require 'tempfile' +require 'free_zipcode_data/sqlite_ram' + +RSpec.describe SqliteRam do + let(:tmpdir) { Dir.mktmpdir('sqlite_ram_test') } + let(:db_path) { File.join(tmpdir, 'test_db.sqlite3') } + let(:sqlite_ram) { described_class.new(db_path) } + + after do + FileUtils.rm_rf(tmpdir) + end + + describe '#initialize' do + it 'creates an in-memory database connection' do + expect(sqlite_ram.conn).to be_a(SQLite3::Database) + end + + it 'stores the filename' do + expect(sqlite_ram.filename).to eq(db_path) + end + end + + describe '#save_to_disk' do + it 'persists in-memory data to the file database' do + sqlite_ram.conn.execute('CREATE TABLE test (id INTEGER PRIMARY KEY, name TEXT)') + sqlite_ram.conn.execute("INSERT INTO test (name) VALUES ('hello')") + sqlite_ram.save_to_disk + + file_db = SQLite3::Database.new(db_path) + rows = file_db.execute('SELECT name FROM test') + expect(rows).to eq([['hello']]) + file_db.close + end + end + + describe '#dump_tables' do + it 'exports each table to a CSV file in the given directory' do + sqlite_ram.conn.execute('CREATE TABLE widgets (id INTEGER PRIMARY KEY, name TEXT, weight REAL)') + sqlite_ram.conn.execute("INSERT INTO widgets (name, weight) VALUES ('gear', 1.5)") + sqlite_ram.conn.execute("INSERT INTO widgets (name, weight) VALUES ('bolt', 0.3)") + + sqlite_ram.dump_tables(tmpdir) + + csv_path = File.join(tmpdir, 'widgets.csv') + expect(File.exist?(csv_path)).to be true + + csv = CSV.read(csv_path) + expect(csv[0]).to eq(%w[id name weight]) + expect(csv.length).to eq(3) # header + 2 rows + end + + it 'exports multiple tables' do + sqlite_ram.conn.execute('CREATE TABLE a (id INTEGER PRIMARY KEY)') + sqlite_ram.conn.execute('CREATE TABLE b (id INTEGER PRIMARY KEY)') + + sqlite_ram.dump_tables(tmpdir) + + expect(File.exist?(File.join(tmpdir, 'a.csv'))).to be true + expect(File.exist?(File.join(tmpdir, 'b.csv'))).to be true + end + end +end diff --git a/spec/free_zipcode_data/state_table_spec.rb b/spec/free_zipcode_data/state_table_spec.rb new file mode 100644 index 0000000..7c3cde0 --- /dev/null +++ b/spec/free_zipcode_data/state_table_spec.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +require 'free_zipcode_data/state_table' + +RSpec.describe FreeZipcodeData::StateTable do + let(:db) { create_test_database(line_count: 5) } + let(:table) { described_class.new(database: db, tablename: 'states') } + + before do + seed_countries(db) + table.build + end + + describe '#build' do + it 'creates the states table' do + tables = db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='states'") + expect(tables.length).to eq(1) + end + + it 'creates the unique_state index' do + indexes = db.execute("SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='states'") + index_names = indexes.map(&:first) + expect(index_names).to include('unique_state') + end + + it 'creates the state_name index' do + indexes = db.execute("SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='states'") + index_names = indexes.map(&:first) + expect(index_names).to include('state_name') + end + + it 'creates columns for country_id, abbr, and name' do + columns = db.execute("PRAGMA table_info('states')").map { |c| c[1] } + expect(columns).to include('country_id', 'abbr', 'name') + end + end + + describe '#write' do + it 'inserts a state row' do + table.write({ country: 'US', short_state: 'NY', state: 'New York' }) + rows = db.execute('SELECT abbr, name FROM states') + expect(rows.length).to eq(1) + expect(rows[0]).to eq(['NY', 'New York']) + end + + it 'links the state to its country' do + table.write({ country: 'US', short_state: 'NY', state: 'New York' }) + country_id = db.execute("SELECT id FROM countries WHERE alpha2 = 'US'")[0][0] + state_country_id = db.execute('SELECT country_id FROM states')[0][0] + expect(state_country_id).to eq(country_id) + end + + it 'returns nil and skips when short_state is nil' do + result = table.write({ country: 'US', short_state: nil, state: 'Unknown' }) + expect(result).to be_nil + rows = db.execute('SELECT COUNT(*) FROM states') + expect(rows[0][0]).to eq(0) + end + + it 'silently ignores duplicate state entries' do + table.write({ country: 'US', short_state: 'NY', state: 'New York' }) + expect { table.write({ country: 'US', short_state: 'NY', state: 'New York' }) }.not_to raise_error + rows = db.execute('SELECT COUNT(*) FROM states') + expect(rows[0][0]).to eq(1) + end + + it 'handles the Marshall Islands edge case' do + table.write({ country: 'US', short_state: 'MH', state: nil }) + rows = db.execute("SELECT name FROM states WHERE abbr = 'MH'") + expect(rows[0][0]).to eq('Marshall Islands') + end + + it 'handles state names with single quotes' do + # Some international state names can have apostrophes + table.write({ country: 'US', short_state: 'TX', state: "Cote d'Ivoire" }) + rows = db.execute("SELECT name FROM states WHERE abbr = 'TX'") + expect(rows[0][0]).to eq("Cote d'Ivoire") + end + end +end diff --git a/spec/free_zipcode_data/zipcode_table_spec.rb b/spec/free_zipcode_data/zipcode_table_spec.rb new file mode 100644 index 0000000..175a07b --- /dev/null +++ b/spec/free_zipcode_data/zipcode_table_spec.rb @@ -0,0 +1,94 @@ +# frozen_string_literal: true + +require 'free_zipcode_data/zipcode_table' + +RSpec.describe FreeZipcodeData::ZipcodeTable do + let(:db) { create_test_database(line_count: 5) } + let(:table) { described_class.new(database: db, tablename: 'zipcodes') } + + before do + seed_countries(db) + seed_states(db) + table.build + end + + describe '#build' do + it 'creates the zipcodes table' do + tables = db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='zipcodes'") + expect(tables.length).to eq(1) + end + + it 'creates the unique_zipcode index' do + indexes = db.execute("SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='zipcodes'") + index_names = indexes.map(&:first) + expect(index_names).to include('unique_zipcode') + end + + it 'creates columns for code, state_id, city, area_code, lat, lon, accuracy' do + columns = db.execute("PRAGMA table_info('zipcodes')").map { |c| c[1] } + expect(columns).to include('code', 'state_id', 'city', 'area_code', 'lat', 'lon', 'accuracy') + end + end + + describe '#write' do + let(:row) do + { + postal_code: '60601', + short_state: 'IL', + state: 'Illinois', + city: 'Chicago', + latitude: '41.8819', + longitude: '-87.6278', + accuracy: '4' + } + end + + it 'inserts a zipcode row' do + table.write(row) + rows = db.execute('SELECT code, city FROM zipcodes') + expect(rows.length).to eq(1) + expect(rows[0]).to eq(%w[60601 Chicago]) + end + + it 'stores latitude and longitude' do + table.write(row) + rows = db.execute('SELECT lat, lon FROM zipcodes') + expect(rows[0][0].to_s).to start_with('41.88') + expect(rows[0][1].to_s).to start_with('-87.62') + end + + it 'links the zipcode to its state' do + table.write(row) + state_id = db.execute("SELECT id FROM states WHERE abbr = 'IL'")[0][0] + zipcode_state_id = db.execute('SELECT state_id FROM zipcodes')[0][0] + expect(zipcode_state_id.to_i).to eq(state_id) + end + + it 'returns nil and skips when postal_code is nil' do + result = table.write(row.merge(postal_code: nil)) + expect(result).to be_nil + rows = db.execute('SELECT COUNT(*) FROM zipcodes') + expect(rows[0][0]).to eq(0) + end + + it 'silently ignores duplicate zipcode entries' do + table.write(row) + expect { table.write(row) }.not_to raise_error + rows = db.execute('SELECT COUNT(*) FROM zipcodes') + expect(rows[0][0]).to eq(1) + end + + it 'handles city names with single quotes' do + table.write(row.merge(city: "Coeur d'Alene", postal_code: '83814')) + rows = db.execute('SELECT city FROM zipcodes') + expect(rows[0][0]).to eq("Coeur d'Alene") + end + + it 'inserts multiple different zipcodes' do + table.write(row) + table.write(row.merge(postal_code: '10001', city: 'New York', short_state: 'NY', state: 'New York')) + rows = db.execute('SELECT COUNT(*) FROM zipcodes') + expect(rows[0][0]).to eq(2) + end + end +end diff --git a/spec/free_zipcode_data_spec.rb b/spec/free_zipcode_data_spec.rb new file mode 100644 index 0000000..2a8470e --- /dev/null +++ b/spec/free_zipcode_data_spec.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +RSpec.describe FreeZipcodeData do + describe '.root' do + it 'returns a Pathname to the project root' do + expect(described_class.root).to be_a(Pathname) + expect(described_class.root.join('lib', 'free_zipcode_data.rb')).to exist + end + end + + describe '.current_environment' do + it 'returns "test" when APP_ENV is set to test' do + expect(described_class.current_environment).to eq('test') + end + + it 'defaults to "development" when APP_ENV is not set' do + allow(ENV).to receive(:fetch).with('APP_ENV', 'development').and_return('development') + expect(described_class.current_environment).to eq('development') + end + end + + describe '.config_file' do + it 'returns spec/fixtures path in test environment' do + path = described_class.config_file + expect(path.to_s).to include('spec/fixtures/.free_zipcode_data.yml') + end + end + + describe '.os' do + it 'returns :normal on non-Windows platforms' do + expect(described_class.os).to eq(:normal) + end + end + + it 'has a version number' do + expect(FreeZipcodeData::VERSION).to match(/\A\d+\.\d+\.\d+\z/) + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index fc77cbe..b49c48f 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -2,11 +2,28 @@ ENV['APP_ENV'] = 'test' -require 'pry' +begin + require 'pry' +rescue NameError, LoadError + # pry may not be compatible with current Ruby version +end + +require 'ostruct' +require 'free_zipcode_data' +require 'free_zipcode_data/runner' -Dir[Pathname.new(File.dirname(__FILE__)).parent.join('spec/support/**/*.rb')].sort.each { |f| require f } +Dir[Pathname.new(File.dirname(__FILE__)).parent.join('spec/support/**/*.rb')].each { |f| require f } RSpec.configure do |config| + config.include DatabaseHelpers + + # Silence progress bar output during tests + config.before do + allow(ProgressBar).to receive(:create).and_wrap_original do |method, **args| + method.call(**args, output: StringIO.new) + end + end + config.expect_with :rspec do |expectations| expectations.include_chain_clauses_in_custom_matcher_descriptions = true end diff --git a/spec/support/database_helpers.rb b/spec/support/database_helpers.rb new file mode 100644 index 0000000..91cf648 --- /dev/null +++ b/spec/support/database_helpers.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +require 'sqlite3' + +module DatabaseHelpers + def create_test_database(line_count: 5) + db = SQLite3::Database.new(':memory:') + db.execute_batch(<<-SQL) + CREATE TABLE meta ( + id integer not null primary key, + name varchar(255), + value varchar(255) + ) + SQL + db.execute("INSERT INTO meta (name, value) VALUES ('line_count', #{line_count})") + db + end + + def seed_countries(db, tablename: 'countries') + table = FreeZipcodeData::CountryTable.new(database: db, tablename: tablename) + table.build + [ + { country: 'US' }, + { country: 'CA' }, + { country: 'GB' } + ].each { |row| table.write(row) } + end + + def seed_states(db, tablename: 'states') + table = FreeZipcodeData::StateTable.new(database: db, tablename: tablename) + table.build + [ + { country: 'US', short_state: 'NY', state: 'New York' }, + { country: 'US', short_state: 'CA', state: 'California' }, + { country: 'US', short_state: 'IL', state: 'Illinois' } + ].each { |row| table.write(row) } + end + + def seed_counties(db, tablename: 'counties') + table = FreeZipcodeData::CountyTable.new(database: db, tablename: tablename) + table.build + [ + { county: 'New York', short_county: '061', short_state: 'NY', state: 'New York' }, + { county: 'Los Angeles', short_county: '037', short_state: 'CA', state: 'California' }, + { county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' } + ].each { |row| table.write(row) } + end +end