# OC4IDS Quality Criteria, Checks and Metrics

Use this notebook to check data against the OC4IDS publication criteria, to perform quality checks and to calculate quality metrics. After running checks, use the status report notebook to report on the results.

## How to use this notebook

1. Run the cells in [Setup](#scrollTo=wh6V7iqi76GR)
2. [Choose a load](#scrollTo=U4apQasSWqjv) to check
3. Run all cells in [Run checks](#scrollTo=OPnM-ojl78gX)

## Setup

In [None]:
# @title ### Enter database credentials
# @markdown ODS users: Enter the password for the `postgres` user, from the ODS password database.
import getpass

print('Enter your credentials')
user = 'postgres'
password = getpass.getpass('Password:')

In [None]:
# @title ### Setup notebook environment

!pip install --upgrade ipython-sql > pip.log

connection_string = 'postgresql://' + user + ':' + password + '@oc4ids-database-2.cuujgua4wses.us-east-1.rds.amazonaws.com/postgres'

# https://pypi.org/project/ipython-sql/
%load_ext sql
%sql $connection_string
%config SqlMagic.autopandas = True  # Return Pandas DataFrames instead of regular result sets
%config SqlMagic.displaycon = False  # Don't show connection string after execute
%config SqlMagic.feedback = False  # Don't print number of rows affected by DML

# https://colab.research.google.com/notebooks/data_table.ipynb
%load_ext google.colab.data_table
from google.colab.data_table import DataTable
DataTable.max_columns = 50 # Increase max columns so that dataframes with many columns are rendered as data tables
DataTable.include_index = False # Remove the index from data tables for easier copy-pasting to Google Docs

In [None]:
# @title ### Define functions

import requests

def add_exchange_rates(dates):

  """
  Fetch exchange rates (USD base currency) for dates and add them to the exchange_rates table in the database.

  :dates: A one-dimensional Pandas Dataframe containing dates in YYYY-MM-DD format.
  """

  for date in dates['date']:
    r = requests.get(f'https://openexchangerates.org/api/historical/{date}.json?app_id=a937e272181d4ed494e11fcd1fb83c0a&base=USD')

    if r.status_code == requests.codes.ok:
      rates = r.json()
      pairs = json.dumps(rates['rates'])

      query = f"""

      insert into
        exchange_rates
      select
        :date as date,
        key as currency,
        value::text::numeric as rate
      from
        json_each('{pairs}')
      where
        ('{date}', key) not in (select distinct date, currency from exchange_rates);

      """

      %sql {query}

## Choose a load

In [None]:
# @title ### Get a list of loads (collections of collections)

%%sql

select
  load_id,
  array_agg(id) as collection_ids,
  min(data_version) as min_data_version,
  max(data_version) as max_data_version
from
  collection
where
  load_id is not null
group by
  load_id
order by
  max(data_version) desc;

### Choose `load_id`s to check and compare to

In [None]:
load_id = 'dev_1'

In [None]:
comparison_load_id = 'dev_2'

In [None]:
# @title ### Get collection ids

result = %sql select array_agg(id) as collection_ids from collection where load_id = :load_id;
collection_ids = tuple(result['collection_ids'][0])

## Run checks

In [None]:
# @title Set `run_id`

from datetime import datetime
run_id = datetime.now()

In [None]:
# @title Populate `run_collection` table

%%sql

insert into
  run_collection
select
  :run_id as run_id,
  id as collection_id
from
  collection
where
  load_id = :load_id;

### criteria_registered

In [None]:
%%sql

with failures as (
  select
    collection_id as collection_id,
    json_object_agg(project_id, left(project_id, 13)) as output
  from
    projects
  where
    collection_id in :collection_ids
  and
    left(project_id, 13) not in (select prefix from registered_prefixes)
  group by
    collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
	:run_id as run_id,
	'criteria_registered' as check_id,
	collection.id as collection_id,
  case
		when output is not null then false
		else true
	end as result,
  output
from
	failures
right join
  collection on
	failures.collection_id = collection.id
where
	collection.id in :collection_ids;

### criteria_appropriate

In [None]:
%%sql

with additional_fields as (
  select
      collection_id,
      jsonb_object_agg(key, json_build_object('count', value -> 'count', 'examples', value -> 'examples')) as output
  from
      collection_check
  cross join
      jsonb_each(cove_output -> 'additional_fields')
  where
    collection_id in :collection_ids
  and
    value -> 'additional_field_descendance' is null
  and
    jsonb_array_length(value -> 'examples') > 0
  group by
    collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
	:run_id as run_id,
	'criteria_appropriate' as check_id,
	collection.id,
  null as result,
  output
from
	additional_fields
right join
  collection on
	additional_fields.collection_id = collection.id
where
	collection.id in :collection_ids;

### criteria_active

In [None]:
%%sql

insert into
  check_results (run_id, check_id, collection_id, result, output)
select
  :run_id as run_id,
  'criteria_active' as check_id,
  collection_id,
  case when max(data ->> 'updated')::date >=  current_date - interval '12 month' then true else false end as result,
  null as output
from
  projects
where
  collection_id in :collection_ids
group by
  collection_id;

### criteria_valid

In [None]:
%%sql

with failures as (
  select
    collection_id,
    count(*) = 0 as result
  from
    collection_check
  cross join
    jsonb_array_elements(cove_output -> 'validation_errors') as validation_errors
  where
    collection_id in :collection_ids
  group by
    collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
	:run_id as run_id,
	'criteria_valid' as check_id,
	collection.id,
	case
		when result is not null then false
		else true
	end as result,
  null as output
from
	failures
right join
  collection on
	failures.collection_id = collection.id
where
	collection.id in :collection_ids;

### criteria_conformant

In [None]:
%%sql

with failures as (
    select
        collection_id,
        jsonb_build_object('count', count(distinct key)) as output
    from
        collection_check
    cross join
        jsonb_each(cove_output -> 'structure_warnings')
    where
      collection_id in :collection_ids
    group by
      collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
	:run_id as run_id,
	'criteria_conformant' as check_id,
	collection.id as collection_id,
	case
		when output is not null then false
		else true
	end as result,
	null as output
from
	failures
right join
  collection on
	failures.collection_id = collection.id
where
	collection.id in :collection_ids;

### metrics_new_projects

In [None]:
%%sql

with new_projects as (
  select
    collection_id,
    jsonb_build_object('count', count(project_id)) as output
  from
    projects
  where
    collection_id in :collection_ids
    and
    project_id not in (
    select
      project_id
    from
      projects
    where
      collection_id in (
      select
        id
      from
        collection
      where
        load_id = :comparison_load_id
      )
  )
  group by
    collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
	:run_id as run_id,
	'metrics_new_projects' as check_id,
	collection.id as collection_id,
  null as result,
	coalesce(output, jsonb_build_object('count', null)) as output
from
	new_projects
right join
  collection on
	new_projects.collection_id = collection.id
where
	collection.id in :collection_ids;

### metrics_last_updated

In [None]:
%%sql

insert into
  check_results (run_id, check_id, collection_id, result, output)
select
  :run_id as run_id,
  'metrics_last_updated' as check_id,
  collection_id,
  null as result,
  jsonb_build_object('date', max(data ->> 'updated')::date) as output
from
  projects
where
  collection_id in :collection_ids
group by
  collection_id;

### metrics_earliest_start_date

In [None]:
%%sql

insert into
  check_results (run_id, check_id, collection_id, result, output)
select
  :run_id as run_id,
  'metrics_earliest_start_date' as check_id,
  collection_id,
  null as result,
  jsonb_build_object('date', min(data -> 'period' ->> 'startDate')::date) as output
from
  projects
where
  collection_id in :collection_ids
group by
  collection_id;

### metrics_latest_end_date

In [None]:
%%sql

insert into
  check_results (run_id, check_id, collection_id, result, output)
select
  :run_id as run_id,
  'metrics_latest_end_date' as check_id,
  collection_id,
  null as result,
  jsonb_build_object('date', max(data -> 'period' ->> 'endDate')::date) as output
from
  projects
where
  collection_id in :collection_ids
group by
  collection_id;

### metrics_additional_field_count

In [None]:
%%sql

with counts as (
  select distinct
    collection_id,
    jsonb_build_object('count', count(distinct key)) as output
  from
      collection_check
  cross join
      jsonb_each(cove_output -> 'additional_fields')
  where
    collection_id in :collection_ids
  group by
    collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
  :run_id as run_id,
  'metrics_additional_field_count' as check_id,
	collection.id as collection_id,
  null as result,
	coalesce(output, jsonb_build_object('count', null))
from
	counts
right join
  collection on
	counts.collection_id = collection.id
where
	collection.id in :collection_ids;

### metrics_project_count

In [None]:
%%sql

with counts as (
select
    collection_id,
    jsonb_build_object('count', count(*)) as output
from
    projects
where
    collection_id in :collection_ids
group by
    collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
  :run_id as run_id,
  'metrics_project_count' as check_id,
	collection.id as collection_id,
  null as result,
	coalesce(output, jsonb_build_object('count', null)) as output
from
	counts
right join
  collection on
	counts.collection_id = collection.id
where
	collection.id in :collection_ids;

### metrics_validation_error_count

In [None]:
%%sql

with counts as (
  select
    collection_id,
    jsonb_build_object('count', count(distinct replace(trim('"' from (validation_errors -> 0)::text), '\', '')::jsonb -> 'message')) as output
  from
    collection_check
  cross join
    jsonb_array_elements(cove_output -> 'validation_errors') as validation_errors
  where
    collection_id in :collection_ids
  group by
    collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
  :run_id as run_id,
  'metrics_validation_error_count' as check_id,
	collection.id as collection_id,
  null as result,
	coalesce(output, jsonb_build_object('count', null))
from
	counts
right join
  collection on
	counts.collection_id = collection.id
where
	collection.id in :collection_ids;

### metrics_structure_warning_count

In [None]:
%%sql

with counts as (
  select
    collection_id,
    jsonb_build_object('count', count(distinct key)) as output
  from
    collection_check
  cross join
    jsonb_each(cove_output -> 'structure_warnings')
  where
    collection_id in :collection_ids
  group by
    collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
  :run_id as run_id,
  'metrics_structure_warning_count' as check_id,
	collection.id as collection_id,
  null as result,
	coalesce(output, jsonb_build_object('count', null))
from
	counts
right join
  collection on
	counts.collection_id = collection.id
where
	collection.id in :collection_ids;

### semantics_sector_codelist

In [None]:
%%sql

insert into
  check_results (run_id, check_id, collection_id, result, output)
select
  :run_id as run_id,
  'semantics_sector_codelist' as check_id,
  collection_id as collection_id,
CASE
  WHEN
    jsonb_array_length(cove_output -> 'additional_open_codelist_values' -> 'projects/sector' -> 'values') > 0 THEN false
  ELSE
    true
END as result,
  jsonb_build_object('all_projects', cove_output -> 'additional_open_codelist_values' -> 'projects/sector' -> 'values') as output
from
  collection_check
where
  collection_id in :collection_ids;

### semantics_public_authority_names

In [None]:
%%sql

with names as (
	select
		collection_id,
		project_id,
		data -> 'publicAuthority' -> 'name' as name,
		row_number() over (partition by collection_id
	order by
		random()) as rank
	from
		projects
	where
		collection_id in :collection_ids
		and data -> 'publicAuthority' -> 'name' is not null
), output as (
	select
		collection_id,
		json_object_agg(project_id, name) as output
	from
		names
	where
		rank <= 10
	group by
		collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
	:run_id as run_id,
	'semantics_public_authority_names' as check_id,
	collection.id,
	null as result,
	output
from
	output
right join
  collection on
	output.collection_id = collection.id
where
	collection.id in :collection_ids;

### semantics_supplier_names

In [None]:
%%sql

with names as (
	select
		collection_id,
		project_id,
		suppliers -> 'name' as name,
		row_number() over (partition by collection_id
	order by
		random()) as rank
	from
		projects
	cross join jsonb_array_elements(data -> 'contractingProcesses') as contractingProcesses
  cross join jsonb_array_elements(contractingProcesses -> 'summary' -> 'suppliers') as suppliers
	where
		collection_id in :collection_ids
  and
    data -> 'contractingProcesses' is not null
  and
    contractingProcesses -> 'summary' -> 'suppliers' is not null
  and
    suppliers -> 'name' is not null
), output as (
	select
		collection_id,
		json_object_agg(project_id, name) as output
	from
		names
	where
		rank <= 10
	group by
		collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
	:run_id as run_id,
	'semantics_supplier_names' as check_id,
	collection.id,
	null as result,
	output
from
	output
right join
  collection on
	output.collection_id = collection.id
where
	collection.id in :collection_ids;

### semantics_budgets

Get previously unseen dates:

In [None]:
%%sql dates <<

select distinct
  coalesce(data -> 'budget' ->> 'approvalDate', data -> 'period' ->> 'startDate')::date as date
from
  projects
where
  collection_id in :collection_ids
and
  data -> 'period' -> 'startDate' is not null
and
  data -> 'budget' -> 'amount' -> 'currency' is not null
and
  data -> 'budget' -> 'amount' -> 'amount' is not null
and
  data -> 'budget' -> 'amount' ->> 'currency' != 'USD'
and
  coalesce(data -> 'budget' ->> 'approvalDate', data -> 'period' ->> 'startDate')::date not in (select distinct date from exchange_rates)
order by
  date asc;

Add exchange rates to database:

In [None]:
add_exchange_rates(dates)

Run check:

In [None]:
%%sql

with numeric_budgets as(
  select
    collection_id,
    project_id,
    coalesce(data -> 'budget' ->> 'approvalDate', data -> 'period' ->> 'startDate')::date as date,
    (data -> 'budget' -> 'amount' -> 'amount')::numeric as amount,
    data -> 'budget' -> 'amount' ->> 'currency' as currency
  from
    projects
  where
    collection_id in :collection_ids
  and
    jsonb_typeof(DATA -> 'budget' -> 'amount' -> 'amount') = 'number'
), usd_budgets as(
  select
    collection_id,
    project_id,
    amount / exchange_rates.rate as amount_usd
  from
    numeric_budgets
  join
    exchange_rates
  on
    numeric_budgets.date = exchange_rates.date
  and
    numeric_budgets.currency = exchange_rates.currency
), failures as(
  select
    collection_id,
    json_object_agg(project_id, amount_usd) as output
  from
    usd_budgets
  where
    amount_usd <= 0
  or
    amount_usd >=5000000000
  group by
    collection_id
)
-- insert into
--   check_results (run_id, check_id, collection_id, result, output)
select
	:run_id as run_id,
	'semantics_budgets' as check_id,
	collection.id as collection_id,
	case
		when output is not null then false
		else true
	end as result,
	output
from
	failures
right join
  collection on
	failures.collection_id = collection.id
where
	collection.id in :collection_ids;

### semantics_contract_values

Get previously unseen dates:

In [None]:
%%sql dates <<

select distinct
  coalesce(
    contracting_processes -> 'summary' -> 'contractPeriod' ->> 'startDate',
    contracting_processes -> 'summary' -> 'tender' ->> 'datePublished')::date as date
from
  projects
cross join
  jsonb_array_elements(data -> 'contractingProcesses') as contracting_processes
where
  collection_id in :collection_ids
and
  contracting_processes -> 'summary' -> 'contractValue' -> 'currency' is not null
and
  contracting_processes -> 'summary' -> 'contractValue' -> 'amount' is not null
and
  contracting_processes -> 'summary' -> 'contractValue' ->> 'currency' != 'USD'
and
  coalesce(
    contracting_processes -> 'summary' -> 'contractPeriod' ->> 'startDate',
    contracting_processes -> 'summary' -> 'tender' ->> 'datePublished')::date not in (select distinct date from exchange_rates)
order by
  date asc;

Add exchange rates to database:

In [None]:
add_exchange_rates(dates)

Run check:

In [None]:
%%sql

with numeric_values as(
  select
    collection_id,
    project_id,
    coalesce(
      contracting_processes -> 'summary' -> 'contractPeriod' ->> 'startDate',
      contracting_processes -> 'summary' -> 'tender' ->> 'datePublished')::date as date,
    (contracting_processes -> 'summary' -> 'contractValue' -> 'amount')::numeric as amount,
    contracting_processes -> 'summary' -> 'contractValue' ->> 'currency' as currency
  from
    projects
  cross join
    jsonb_array_elements(data -> 'contractingProcesses') as contracting_processes
  where
    collection_id in :collection_ids
  and
    jsonb_typeof(contracting_processes -> 'summary' -> 'contractValue' -> 'amount') = 'number'
), usd_values as(
  select
    collection_id,
    project_id,
    amount / exchange_rates.rate as amount_usd
  from
    numeric_values
  join
    exchange_rates
  on
    numeric_values.date = exchange_rates.date
  and
    numeric_values.currency = exchange_rates.currency
), failures as (
  select
    collection_id,
    json_object_agg(project_id, amount_usd) as output
  from
    usd_values
  where
    amount_usd <= 0
  or
    amount_usd >=5000000000
  group by
    collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
  :run_id AS run_id,
  'semantics_contract_values' AS check_id,
	collection.id as collection_id,
	case
		when output is not null then false
		else true
	end as result,
	output
from
	failures
right join
  collection on
	failures.collection_id = collection.id
where
	collection.id in :collection_ids;

### semantics_funder_names

In [None]:
%%sql

with names as (
	select
		collection_id,
		project_id,
		parties -> 'name' as name,
		row_number() over (partition by collection_id
	order by
		random()) as rank
	from
		projects
	cross join jsonb_array_elements(data -> 'parties') as parties
	where
		collection_id in :collection_ids
  and
    data -> 'parties' is not null
  and
    parties -> 'name' is not null
	and
		parties -> 'roles' ? 'funder'
), output as (
	select
		collection_id,
		json_object_agg(project_id, name) as output
	from
		names
	where
		rank <= 10
	group by
		collection_id
	)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
	:run_id as run_id,
	'semantics_funder_names' as check_id,
	collection.id,
	null as result,
	output
from
	output
right join
  collection on
	output.collection_id = collection.id
where
	collection.id in :collection_ids;

### semantics_dates

In [None]:
%%sql

with dates as(
select
	collection_id,
	project_id,
	'updated' as path,
	data ->> 'updated' as value
from
	projects
where
	collection_id in :collection_ids
	and
  data -> 'updated' is not null
	and
  data ->> 'updated' != ''
union
select
	collection_id,
	project_id,
	'period/startDate' as path,
	data -> 'period' ->> 'startDate' as value
from
	projects
where
	collection_id in :collection_ids
	and
  data -> 'period' -> 'startDate' is not null
	and
  data -> 'period' ->> 'startDate' != ''
union
select
	collection_id,
	project_id,
	'period/endDate' as path,
	data -> 'period' ->> 'endDate' as value
from
	projects
where
	collection_id in :collection_ids
	and
  data -> 'period' -> 'endDate' is not null
	and
  data -> 'period' ->> 'endDate' != ''
union
select
	collection_id,
	project_id,
	'completion/endDate' as path,
	data -> 'completion' ->> 'endDate' as value
from
	projects
where
	collection_id in :collection_ids
	and
  data -> 'completion' is not null
	and
  data -> 'completion' ->> 'endDate' != ''
),
failures as (
	select
		collection_id,
		json_object_agg(project_id, value) as output
	from
		dates
	where
		value::date <= '1970-01-01'::date
		or
		value::date >= '2050-01-01'::date
	group by
		collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
	:run_id as run_id,
	'semantics_dates' as check_id,
	collection.id,
	case
		when output is not null then false
		else true
	end as result,
	output
from
	failures
right join
  collection on
	failures.collection_id = collection.id
where
	collection.id in :collection_ids;

### semantics_role_coherence

In [None]:
%%sql

with missing_roles as (
  -- publicAuthority
  select
    collection_id,
    project_id,
    'publicAuthority' as role
  from
    projects
  join
    jsonb_array_elements(data -> 'parties') as parties
  on
    parties -> 'id' = data -> 'publicAuthority' -> 'id'
  where
    collection_id in :collection_ids
  and
    not parties -> 'roles' ? 'publicAuthority'
  union all
  -- budget/sourceParty
  select
    collection_id,
    project_id,
    'sourceParty' as role
  from
    projects
  join
    jsonb_array_elements(data -> 'parties') as parties
  on
    parties -> 'id' = data -> 'budget' -> 'sourceParty' -> 'id'
  where
    collection_id in :collection_ids
  and
    not parties -> 'roles' ? 'sourceParty'
  union all
  -- contractingProcesses/summary/tender/tenderers
  select
    collection_id,
    project_id,
    'tenderer' as role
  from
    projects
  join
    jsonb_array_elements(data -> 'parties') as parties
  cross join
    jsonb_array_elements(data -> 'contractingProcesses') as contracting_processes
  cross join
    jsonb_array_elements(contracting_processes -> 'summary' -> 'tender' -> 'tenderers') as tenderers
  on
    parties -> 'id' =  tenderers -> 'id'
  where
    collection_id in :collection_ids
  and
    not parties -> 'roles' ? 'tenderer'
  union all
  -- contractingProcesses/summary/tender/procuringEntity
  select
    collection_id,
    project_id,
    'procuringEntity' as role
  from
    projects
  join
    jsonb_array_elements(data -> 'parties') as parties
  cross join
    jsonb_array_elements(data -> 'contractingProcesses') as contracting_processes
  on
    parties -> 'id' =  contracting_processes -> 'summary' -> 'tender' -> 'procuringEntity' -> 'id'
  where
    collection_id in :collection_ids
  and
    not parties -> 'roles' ? 'procuringEntity'
  union all
  -- contractingProcesses/summary/tender/administrativeEntity
  select
    collection_id,
    project_id,
    'administrativeEntity' as role
  from
    projects
  join
    jsonb_array_elements(data -> 'parties') as parties
  cross join
    jsonb_array_elements(data -> 'contractingProcesses') as contracting_processes
  on
    parties -> 'id' =  contracting_processes -> 'summary' -> 'tender' -> 'administrativeEntity' -> 'id'
  where
    collection_id in :collection_ids
  and
    not parties -> 'roles' ? 'administrativeEntity'
  union all
  -- contractingProcesses/summary/suppliers
  select
    collection_id,
    project_id,
    'supplier' as role
  from
    projects
  join
    jsonb_array_elements(data -> 'parties') as parties
  cross join
    jsonb_array_elements(data -> 'contractingProcesses') as contracting_processes
  cross join
    jsonb_array_elements(contracting_processes -> 'summary' -> 'suppliers') as suppliers
  on
    parties -> 'id' =  suppliers -> 'id'
  where
    collection_id in :collection_ids
  and
    not parties -> 'roles' ? 'supplier'
), failures as (
  select
    collection_id,
    json_object_agg(project_id, role) as output
  from
    missing_roles
  group by
    collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
	:run_id as run_id,
	'semantics_role_coherence' as check_id,
	collection.id as collection_id,
	case
		when output is not null then false
		else true
	end as result,
	output
from
	failures
right join
  collection on
	failures.collection_id = collection.id
where
	collection.id in :collection_ids;

### semantics_coordinates

In [None]:
%%sql

with invalid_coordinates as (
  select
    collection_id,
    project_id,
    locations -> 'geometry' -> 'coordinates' as coordinates
  from
    projects
  cross join
    jsonb_array_elements(data -> 'locations') as locations
  where
    collection_id in :collection_ids
  and
    locations -> 'geometry' ->> 'type' = 'Point'
  and
    ((locations -> 'geometry' -> 'coordinates' ->> 0)::numeric not between -90 and 90
    or
    (locations -> 'geometry' -> 'coordinates' ->> 1)::numeric not between -180 and 180)
  union all
  select
    collection_id,
    project_id,
    locations -> 'geometry' -> 'coordinates' as coordinates
  from
    projects
  cross join
    jsonb_array_elements(data -> 'locations') as locations
  cross join
    jsonb_array_elements(locations -> 'geometry' -> 'coordinates') as coordinates
  where
    collection_id in :collection_ids
  and
    locations -> 'geometry' ->> 'type' = 'LineString'
  and
    ((coordinates ->> 0)::numeric not between -90 and 90
    or
    (coordinates ->> 1)::numeric not between -180 and 180)
), failures as (
  select
    collection_id,
    json_object_agg(project_id, coordinates) as output
  from
    invalid_coordinates
  group by
    collection_id
)
insert into
  check_results (run_id, check_id, collection_id, result, output)
select
	:run_id as run_id,
	'semantics_coordinates' as check_id,
	collection.id as collection_id,
	case
		when output is not null then false
		else true
	end as result,
	output
from
	failures
right join
  collection on
	failures.collection_id = collection.id
where
	collection.id in :collection_ids;