From a354dcd1483edcfa71c891f9b1a3fae0c02d9ed7 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 19 Nov 2025 14:00:37 -0400 Subject: [PATCH] Generate ISO country schemas using `.jq` templates Signed-off-by: Juan Cruz Viotti --- Makefile | 39 +++-- generate/iso/country/main.py | 211 -------------------------- templates/iso/country/2020/alpha-2.jq | 27 ++++ templates/iso/country/2020/alpha-3.jq | 27 ++++ templates/iso/country/2020/numeric.jq | 27 ++++ 5 files changed, 109 insertions(+), 222 deletions(-) delete mode 100644 generate/iso/country/main.py create mode 100644 templates/iso/country/2020/alpha-2.jq create mode 100644 templates/iso/country/2020/alpha-3.jq create mode 100644 templates/iso/country/2020/numeric.jq diff --git a/Makefile b/Makefile index 7eaea916..6ea3a401 100644 --- a/Makefile +++ b/Makefile @@ -85,7 +85,10 @@ GENERATED = \ schemas/xbrl/utr/volume-item-type-normative.json \ schemas/xbrl/utr/volume-item-type.json \ schemas/xbrl/utr/volume-per-monetary-item-type-normative.json \ - schemas/xbrl/utr/volume-per-monetary-item-type.json + schemas/xbrl/utr/volume-per-monetary-item-type.json \ + schemas/iso/country/2020/alpha-2.json \ + schemas/iso/country/2020/alpha-3.json \ + schemas/iso/country/2020/numeric.json # TODO: Make `jsonschema fmt` automatically detect test files all: common test @@ -113,25 +116,37 @@ lint: common test: $(JSONSCHEMA) test ./test -build/iso/currency/list-%.json: scripts/xml2json.py vendor/data/iso/currency/list-%.xml +build/iso/currency/list-%.json: \ + scripts/xml2json.py \ + vendor/data/iso/currency/list-%.xml $(PYTHON) $< $(word 2,$^) $@ -schemas/iso/currency/2015/historical/alpha-code.json: build/iso/currency/list-three.json templates/iso/currency/2015/historical/alpha-code.jq +schemas/iso/currency/2015/historical/alpha-code.json: \ + build/iso/currency/list-three.json \ + templates/iso/currency/2015/historical/alpha-code.jq $(MKDIRP) $(dir $@) $(JQ) --from-file $(word 2,$^) $< > $@ $(JSONSCHEMA) fmt $@ -schemas/iso/currency/2015/historical/alpha-currency.json: build/iso/currency/list-three.json templates/iso/currency/2015/historical/alpha-currency.jq +schemas/iso/currency/2015/historical/alpha-currency.json: \ + build/iso/currency/list-three.json \ + templates/iso/currency/2015/historical/alpha-currency.jq $(MKDIRP) $(dir $@) $(JQ) --from-file $(word 2,$^) $< > $@ $(JSONSCHEMA) fmt $@ -schemas/iso/currency/2015/historical/numeric-code.json: build/iso/currency/list-three.json templates/iso/currency/2015/historical/numeric-code.jq +schemas/iso/currency/2015/historical/numeric-code.json: \ + build/iso/currency/list-three.json \ + templates/iso/currency/2015/historical/numeric-code.jq $(MKDIRP) $(dir $@) $(JQ) --from-file $(word 2,$^) $< > $@ $(JSONSCHEMA) fmt $@ -schemas/iso/currency/2015/historical/numeric-currency.json: build/iso/currency/list-three.json templates/iso/currency/2015/historical/numeric-currency.jq +schemas/iso/currency/2015/historical/numeric-currency.json: \ + build/iso/currency/list-three.json \ + templates/iso/currency/2015/historical/numeric-currency.jq $(MKDIRP) $(dir $@) $(JQ) --from-file $(word 2,$^) $< > $@ $(JSONSCHEMA) fmt $@ -schemas/iso/currency/2015/%.json: build/iso/currency/list-one.json templates/iso/currency/2015/%.jq +schemas/iso/currency/2015/%.json: \ + build/iso/currency/list-one.json \ + templates/iso/currency/2015/%.jq $(MKDIRP) $(dir $@) $(JQ) --from-file $(word 2,$^) $< > $@ $(JSONSCHEMA) fmt $@ @@ -144,10 +159,12 @@ schemas/xbrl/utr/%.json: build/xbrl/utr/utr.json templates/xbrl/utr/%.jq $(JQ) --from-file $(word 2,$^) $< > $@ $(JSONSCHEMA) fmt $@ -generate-iso-language: generate/iso/language/main.py - $(PYTHON) $< -generate-iso-country: generate/iso/country/main.py - $(PYTHON) $< +schemas/iso/country/2020/%.json: \ + vendor/iso3166/all/all.json \ + templates/iso/country/2020/%.jq + $(MKDIRP) $(dir $@) + $(JQ) --from-file $(word 2,$^) $< > $@ + $(JSONSCHEMA) fmt $@ # TODO: Add a `jsonschema pkg` command instead diff --git a/generate/iso/country/main.py b/generate/iso/country/main.py deleted file mode 100644 index 3cb431a3..00000000 --- a/generate/iso/country/main.py +++ /dev/null @@ -1,211 +0,0 @@ -import json -import os -import sys -import re - - -def format_json_compact_arrays(obj): - json_string = json.dumps(obj, indent=2, ensure_ascii=False) - - def compact_array(match): - content = match.group(1) - items = [item.strip() for item in re.findall(r'"[^"]*"|\d+', content)] - if len(items) > 0: - return '[ ' + ', '.join(items) + ' ]' - return '[]' - - json_string = re.sub(r'\[\s*\n\s*((?:"[^"]*"|\d+)(?:\s*,\s*\n\s*(?:"[^"]*"|\d+))*)\s*\n\s*\]', compact_array, json_string) - - return json_string - - -def parse_countries(all_file, slim_2_file, slim_3_file): - with open(all_file, 'r') as file: - all_data = json.load(file) - - with open(slim_2_file, 'r') as file: - slim_2_data = json.load(file) - - with open(slim_3_file, 'r') as file: - slim_3_data = json.load(file) - - alpha_2 = {} - alpha_3 = {} - numeric = {} - - for entry in all_data: - alpha_2_code = entry.get("alpha-2") - alpha_3_code = entry.get("alpha-3") - numeric_code = entry.get("country-code") - name = entry.get("name") - region = entry.get("region") or None - sub_region = entry.get("sub-region") or None - - if alpha_2_code: - alpha_2[alpha_2_code] = { - "name": name, - "alpha-3": alpha_3_code, - "numeric": numeric_code, - "region": region, - "sub-region": sub_region - } - - if alpha_3_code: - alpha_3[alpha_3_code] = { - "name": name, - "alpha-2": alpha_2_code, - "numeric": numeric_code, - "region": region, - "sub-region": sub_region - } - - if numeric_code: - numeric[numeric_code] = { - "name": name, - "alpha-2": alpha_2_code, - "alpha-3": alpha_3_code, - "region": region, - "sub-region": sub_region - } - - return alpha_2, alpha_3, numeric - - -def generate_alpha_2_schema(alpha_2, output_dir): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "ISO 3166-1:2020 Alpha-2 Country Code", - "description": "A two-letter country code from ISO 3166-1", - "examples": [code for code in sorted(alpha_2.keys())[:4]], - "x-license": "https://github.com/sourcemeta/std/blob/main/LICENSE", - "x-links": ["https://www.iso.org/iso-3166-country-codes.html"], - "anyOf": [ - { - "title": metadata["name"], - "x-alpha-3": metadata["alpha-3"], - "x-numeric": int(metadata["numeric"]), - **({"x-region": metadata["region"]} if metadata["region"] else {}), - **({"x-sub-region": metadata["sub-region"]} if metadata["sub-region"] else {}), - "const": code - } - for code, metadata in sorted(alpha_2.items()) - ] - } - - file_path = os.path.join(output_dir, "alpha-2.json") - with open(file_path, 'w') as file: - file.write(format_json_compact_arrays(schema)) - file.write('\n') - print(f"Generated {file_path} with {len(alpha_2)} codes") - - -def generate_alpha_3_schema(alpha_3, output_dir): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "ISO 3166-1:2020 Alpha-3 Country Code", - "description": "A three-letter country code from ISO 3166-1", - "examples": [code for code in sorted(alpha_3.keys())[:4]], - "x-license": "https://github.com/sourcemeta/std/blob/main/LICENSE", - "x-links": ["https://www.iso.org/iso-3166-country-codes.html"], - "anyOf": [ - { - "title": metadata["name"], - "x-alpha-2": metadata["alpha-2"], - "x-numeric": int(metadata["numeric"]), - **({"x-region": metadata["region"]} if metadata["region"] else {}), - **({"x-sub-region": metadata["sub-region"]} if metadata["sub-region"] else {}), - "const": code - } - for code, metadata in sorted(alpha_3.items()) - ] - } - - file_path = os.path.join(output_dir, "alpha-3.json") - with open(file_path, 'w') as file: - file.write(format_json_compact_arrays(schema)) - file.write('\n') - print(f"Generated {file_path} with {len(alpha_3)} codes") - - -def generate_numeric_schema(numeric, output_dir): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "ISO 3166-1:2020 Numeric Country Code", - "description": "A three-digit numeric country code from ISO 3166-1", - "examples": [int(code) for code in sorted(numeric.keys())[:4]], - "x-license": "https://github.com/sourcemeta/std/blob/main/LICENSE", - "x-links": ["https://www.iso.org/iso-3166-country-codes.html"], - "anyOf": [ - { - "title": metadata["name"], - "x-alpha-2": metadata["alpha-2"], - "x-alpha-3": metadata["alpha-3"], - **({"x-region": metadata["region"]} if metadata["region"] else {}), - **({"x-sub-region": metadata["sub-region"]} if metadata["sub-region"] else {}), - "const": int(code) - } - for code, metadata in sorted(numeric.items()) - ] - } - - file_path = os.path.join(output_dir, "numeric.json") - with open(file_path, 'w') as file: - file.write(format_json_compact_arrays(schema)) - file.write('\n') - print(f"Generated {file_path} with {len(numeric)} codes") - - -def generate_alpha_all_schema(output_dir): - schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "ISO 3166-1:2020 Alphabetic Country Code", - "description": "A two-letter or three-letter alphabetic country code from ISO 3166-1", - "examples": ["AF", "AFG", "US", "USA"], - "x-license": "https://github.com/sourcemeta/std/blob/main/LICENSE", - "x-links": ["https://www.iso.org/iso-3166-country-codes.html"], - "anyOf": [ - {"$ref": "alpha-2.json"}, - {"$ref": "alpha-3.json"} - ] - } - - file_path = os.path.join(output_dir, "alpha-all.json") - with open(file_path, 'w') as file: - file.write(format_json_compact_arrays(schema)) - file.write('\n') - print(f"Generated {file_path}") - - - - -def main(): - script_dir = os.path.dirname(os.path.abspath(__file__)) - project_root = os.path.abspath(os.path.join(script_dir, "..", "..", "..")) - - all_file = os.path.join(project_root, "vendor", "iso3166", "all", "all.json") - slim_2_file = os.path.join(project_root, "vendor", "iso3166", "slim-2", "slim-2.json") - slim_3_file = os.path.join(project_root, "vendor", "iso3166", "slim-3", "slim-3.json") - - if not os.path.exists(all_file): - print(f"Error: Data file not found: {all_file}", file=sys.stderr) - sys.exit(1) - if not os.path.exists(slim_2_file): - print(f"Error: Data file not found: {slim_2_file}", file=sys.stderr) - sys.exit(1) - if not os.path.exists(slim_3_file): - print(f"Error: Data file not found: {slim_3_file}", file=sys.stderr) - sys.exit(1) - - output_dir = os.path.join(project_root, "schemas", "iso", "country", "2020") - os.makedirs(output_dir, exist_ok=True) - - alpha_2, alpha_3, numeric = parse_countries(all_file, slim_2_file, slim_3_file) - - generate_alpha_2_schema(alpha_2, output_dir) - generate_alpha_3_schema(alpha_3, output_dir) - generate_numeric_schema(numeric, output_dir) - generate_alpha_all_schema(output_dir) - - -if __name__ == "__main__": - main() diff --git a/templates/iso/country/2020/alpha-2.jq b/templates/iso/country/2020/alpha-2.jq new file mode 100644 index 00000000..1fb43cff --- /dev/null +++ b/templates/iso/country/2020/alpha-2.jq @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "ISO 3166-1:2020 Alpha-2 Country Code", + "description": "A two-letter country code from ISO 3166-1", + "examples": ( + map(select(.["alpha-2"] != null and .["alpha-2"] != "")) + | sort_by(.["alpha-2"]) + | .[0:4] + | map(.["alpha-2"]) + ), + "x-license": "https://github.com/sourcemeta/std/blob/main/LICENSE", + "x-links": ["https://www.iso.org/iso-3166-country-codes.html"], + "anyOf": ( + map(select(.["alpha-2"] != null and .["alpha-2"] != "")) + | sort_by(.["alpha-2"]) + | map({ + "title": .name, + "x-alpha-3": .["alpha-3"], + "x-numeric": (.["country-code"] | tonumber) + } + + (if .region != null and .region != "" then {"x-region": .region} else {} end) + + (if .["sub-region"] != null and .["sub-region"] != "" then {"x-sub-region": .["sub-region"]} else {} end) + + { + "const": .["alpha-2"] + }) + ) +} diff --git a/templates/iso/country/2020/alpha-3.jq b/templates/iso/country/2020/alpha-3.jq new file mode 100644 index 00000000..10346f71 --- /dev/null +++ b/templates/iso/country/2020/alpha-3.jq @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "ISO 3166-1:2020 Alpha-3 Country Code", + "description": "A three-letter country code from ISO 3166-1", + "examples": ( + map(select(.["alpha-3"] != null and .["alpha-3"] != "")) + | sort_by(.["alpha-3"]) + | .[0:4] + | map(.["alpha-3"]) + ), + "x-license": "https://github.com/sourcemeta/std/blob/main/LICENSE", + "x-links": ["https://www.iso.org/iso-3166-country-codes.html"], + "anyOf": ( + map(select(.["alpha-3"] != null and .["alpha-3"] != "")) + | sort_by(.["alpha-3"]) + | map({ + "title": .name, + "x-alpha-2": .["alpha-2"], + "x-numeric": (.["country-code"] | tonumber) + } + + (if .region != null and .region != "" then {"x-region": .region} else {} end) + + (if .["sub-region"] != null and .["sub-region"] != "" then {"x-sub-region": .["sub-region"]} else {} end) + + { + "const": .["alpha-3"] + }) + ) +} diff --git a/templates/iso/country/2020/numeric.jq b/templates/iso/country/2020/numeric.jq new file mode 100644 index 00000000..e310e770 --- /dev/null +++ b/templates/iso/country/2020/numeric.jq @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "ISO 3166-1:2020 Numeric Country Code", + "description": "A three-digit numeric country code from ISO 3166-1", + "examples": ( + map(select(.["country-code"] != null and .["country-code"] != "")) + | sort_by(.["country-code"]) + | .[0:4] + | map(.["country-code"] | tonumber) + ), + "x-license": "https://github.com/sourcemeta/std/blob/main/LICENSE", + "x-links": ["https://www.iso.org/iso-3166-country-codes.html"], + "anyOf": ( + map(select(.["country-code"] != null and .["country-code"] != "")) + | sort_by(.["country-code"]) + | map({ + "title": .name, + "x-alpha-2": .["alpha-2"], + "x-alpha-3": .["alpha-3"] + } + + (if .region != null and .region != "" then {"x-region": .region} else {} end) + + (if .["sub-region"] != null and .["sub-region"] != "" then {"x-sub-region": .["sub-region"]} else {} end) + + { + "const": (.["country-code"] | tonumber) + }) + ) +}