Skip to content

Some useful SQL Scripts

Pascal Dihé edited this page Oct 4, 2017 · 47 revisions

Scripts for Data Import and Management

https://github.com/switchonproject/cids-custom-switchon-server/tree/dev/src/model/scripts

General SQL Scripts for Maintenance and Inspection

Get Resource by collection

Get the id and name of resources that belong to a specific collection

SELECT resource.id, resource.name
    FROM 
	resource
    WHERE collection = (SELECT DISTINCT tag.id      
            FROM tag      
            WHERE tag.name ILIKE 'NTSG - AE_Land3'           
                AND tag.taggroup IN 
                (SELECT id FROM taggroup WHERE name ILIKE 'collection' ) limit 1)
  GROUP BY
	resource.id, resource.name
	ORDER BY resource.id ASC

Get Tag id by name

Get the id of a tag by tagname and taggroup name

    SELECT
        DISTINCT tag.id      
    FROM
        tag      
    WHERE
        tag.name ILIKE 'NTSG - AE_Land3'           
        AND tag.taggroup IN(
            SELECT
                id                   
            FROM
                taggroup                   
            WHERE
                name ILIKE 'collection'         
        ) limit 1

Find Collections

Find Collections (tags) with more than 5 resources

    SELECT
        tag.name                              
    FROM
        tag                           
    LEFT JOIN
        taggroup              
            ON tag.taggroup = taggroup.id                                  
    LEFT JOIN
        resource              
            ON resource.collection = tag.id,
        cs_class                                      
    WHERE
        cs_class.name = 'tag'                                              
        AND  taggroup.name = 'collection'                                              
        AND         resource."type" = (
            SELECT
                tag.id                                                                  
            FROM
                tag                                                                               
            LEFT JOIN
                taggroup                      
                    ON tag.taggroup = taggroup.id                                                                              
            WHERE
                taggroup.name = 'resource type'                                                                                      
                AND                   tag.name = 'external data'                                             
        )                              
    GROUP BY
        tag.name,
        tag.id,
        cs_class.id                               
    HAVING
        (
            COUNT(resource.id) > 5
        )                              
    ORDER BY
        tag.name ASC;

Create new Representation

Create a new representation and associate it with a resource.

WITH rep as 
(INSERT INTO "public".representation 
("type", spatialresolution, "name", description, applicationprofile, tags, 
"function", contentlocation, temporalresolution, protocol, content, 
spatialscale, contenttype, uuid, uploadmessage, uploadstatus) 
VALUES (213, NULL, 'corine:clc00_c111 Tileserver', 
'CLC:clc00_c211 Tileserver', 1359, 11950, 72, 
'http://tl-243.xtr.deltares.nl/tileserver/corine:clc00_c111/{z}/{x}/{y}.png', 
NULL, 205, NULL, NULL, 59, 'corine:clc00_c111', NULL, NULL) 
RETURNING id) INSERT INTO "public".jt_resource_representation (representationid, resource_reference) 
SELECT id, 11867 from rep;

Copy CSW View to static data table

csw_copy_data_table_from_view_for_pycsw.sql

Find resources by bbox without search geometry

SELECT count(distinct r.id) FROM resource r 
INNER JOIN geom g ON r.spatialcoverage = g.id 
AND g.geo_field && st_GeomFromEWKT('SRID=4326;POLYGON ((-9.194288369255645 54.72093488558994, -9.194288369255645 35.72093488558994, 13.805711630744355 35.72093488558994, 13.805711630744355 54.72093488558994, -9.194288369255645 54.72093488558994))') 
AND st_intersects(st_GeomFromEWKT('SRID=4326;POLYGON ((-9.194288369255645 54.72093488558994, -9.194288369255645 35.72093488558994, 13.805711630744355 35.72093488558994, 13.805711630744355 54.72093488558994, -9.194288369255645 54.72093488558994))'), geo_field) 
WHERE r.id IS NOT NUL

Find resources by bbox with search geometry

Spatial search using the geom_search table

SELECT  count  (distinct resource.id) FROM resource
INNER JOIN geom_search ON resource.id = geom_search.resource
AND geom_search.geo_field && st_GeomFromEWKT('SRID=4326;POLYGON ((-9.194288369255645 54.72093488558994, -9.194288369255645 35.72093488558994, 13.805711630744355 35.72093488558994, 13.805711630744355 54.72093488558994, -9.194288369255645 54.72093488558994))') 
AND st_intersects(st_GeomFromEWKT('SRID=4326;POLYGON ((-9.194288369255645 54.72093488558994, -9.194288369255645 35.72093488558994, 13.805711630744355 35.72093488558994, 13.805711630744355 54.72093488558994, -9.194288369255645 54.72093488558994))'), geom_search.geo_field) 
WHERE resource.id IS NOT NULL

Find unrelated geometries in the geom table

Find geometries, that are not directly connected to resources.

SELECT count(geom.id)
FROM geom
WHERE geom.id NOT IN
    (SELECT spatialcoverage
     FROM resource WHERE spatialcoverage IS NOT NULL);

Find unrelated geometries in the geom_search table

Find search geometries, that are not directly connected to resources.

SELECT count(geom_search.id)
FROM geom_search
WHERE resource NOT IN (
  SELECT id
  FROM resource);

Remove unrelated geometries in the geom_search table

Remove search geometries, that are not directly connected to resources.

DELETE
FROM geom_search
WHERE geom_search.resource NOT IN
    (SELECT id
     FROM resource);

Remove orphaned geom references from the geom_search table

Remove geom references from search geometries table, that are no longer associated to the respective resource

UPDATE geom_search
SET "geom" = NULL
WHERE id IN
    ( SELECT id
     FROM geom_search
     WHERE geom IN
         (SELECT id
          FROM geom
          WHERE geom.id NOT IN
              (SELECT spatialcoverage
               FROM resource
               WHERE spatialcoverage IS NOT NULL)));

Insert imported geometries

Insert geometries imported from a .shp file from a temporary import table into the common geom_search table and simplify them with ST_Envelope. Replace '4711' with the respective resource id.

INSERT INTO public.geom_search(resource, geo_field)
SELECT 4711,
       ST_Envelope(geom)
FROM import_tables.geosearch_import;

Fill geom_search with valid resources geometries

Find Multipoint / Multipolygons and small bboxes and add them to the geometry search table. Simplify geometries to speed up geo-search.

TRUNCATE "public".geom_search;

INSERT INTO public.geom_search(resource, geo_field, geom)
SELECT DISTINCT resource.id,
                geom.geo_field,
                geom.id
FROM resource
JOIN geom ON resource.spatialcoverage = geom.id
WHERE
    (SELECT ST_GeometryType(geo_field) IN ('ST_MultiPoint',
                                           'ST_MultiPolygon',
                                           'ST_Point'))
  OR (
        (SELECT ST_GeometryType(geo_field) = 'ST_Polygon')
      AND
        (SELECT ST_Area(geo_field) < 100));


UPDATE geom_search
SET geo_field = ST_Simplify(geo_field,0.1)
WHERE ST_GeometryType(geo_field) = 'ST_MultiPolygon';

Find MultiPoint Resources

SELECT name, description,
  (SELECT ST_GeometryType(geo_field)) AS mp
FROM resource
JOIN geom ON resource.spatialcoverage = geom.id
WHERE
    (SELECT ST_GeometryType(geo_field) = 'ST_MultiPoint')
ORDER BY resource.name;

Find used tags of a specific tag group

SELECT DISTINCT tag.name
FROM tag
INNER JOIN jt_resource_tag ON jt_resource_tag.tagid = tag.id
WHERE taggroup IN
    (SELECT id
     FROM taggroup
     WHERE name ILIKE '%cuahsi%')
ORDER BY tag.name

Find used topic categories

SELECT DISTINCT tag.name
FROM tag
INNER JOIN resource ON resource.topiccategory = tag.id
WHERE taggroup IN
    (SELECT id
     FROM taggroup
     WHERE name ILIKE 'topic category')
ORDER BY tag.name

Find duplicate tags of a specific tag group

SELECT tag.name,
    COUNT(tag.name) AS NumOccurrences
FROM tag
LEFT JOIN taggroup ON tag.taggroup = taggroup.id
WHERE taggroup.name = 'keywords - open'
GROUP BY 
    tag.name
HAVING ( COUNT(tag.name) > 1 )
ORDER BY tag.name;

Rename Tags

UPDATE jt_resource_tag
SET tagid =
  (SELECT id
   FROM tag
   WHERE taggroup IN
       (SELECT id
        FROM taggroup
        WHERE name ILIKE '%cuahsi%')
     AND tag.name = 'Land cover' LIMIT 1)
WHERE tagid =
    (SELECT id
     FROM tag
     WHERE taggroup IN
         (SELECT id
          FROM taggroup
          WHERE name ILIKE '%cuahsi%')
       AND tag.name = 'Land surface classification' LIMIT 1);

DELETE
FROM tag
WHERE taggroup =
    (SELECT id
     FROM taggroup
     WHERE name ILIKE '%cuahsi%')
  AND tag.name = 'Land surface';

Add Keywords to Resources

--INSERT INTO jt_resource_tag (resource_reference, tagid)
SELECT resource.id,
       tag.id
FROM resource,
     tag
WHERE resource.name ILIKE 'AMSRE_36V_AM_FT_%'
  AND tag.name ILIKE 'Temperature, soil'
  AND tag.taggroup = 169;

Find duplicate organisations

SELECT DISTINCT n1.* FROM contact n1
inner join contact n2 on n2.organisation=n1.organisation
where n1.id <> n2.id
ORDER BY n1.organisation

Import CUAHSI Tabluar Ontology

Import the CUAHSI Tabluar Ontology into new CUAHSI keyword list and skip duplicate keywords:

See https://github.com/switchonproject/cids-custom-switchon-server/blob/dev/src/model/scripts/import_import_cuashi_ontology.sql

List all keywords

Get a list of all keywords in the Meta-Data Repository

select name from tag 
where taggroup in 
(select id from taggroup where name ilike '%keyword%') 
group by tag.name order by tag.name

geom_search entries

Show the last 10 entries in the geom search table including the type of the geometry

SELECT id,
       resource,
       geom,
       ST_GeometryType(geo_field)
FROM geom_search
ORDER BY resource DESC LIMIT 10

conflicting geom_search entries

Show the last 10 conflicting GeometryCollection entries in the geom search table that cannot be used with spatial search

SELECT id,
       resource,
       geom,
       ST_AsText(geo_field)
FROM geom_search
WHERE ST_GeometryType(geo_field) = 'ST_GeometryCollection'
ORDER BY resource DESC LIMIT 10

Sanitize conflicting geom_search entries

Sanitize conflicting GeometryCollection entries in the geom search table by extracting only polygons

UPDATE geom_search
SET geo_field = ST_CollectionExtract(geo_field, 3)
WHERE id IN
    (SELECT id
     FROM geom_search
     WHERE ST_GeometryType(geo_field) = 'ST_GeometryCollection')

geosearch_import entries

Show the last 10 entries in the geosearch_import table including the WKT geometries

SELECT ogc_fid,
       fid,
       ST_AsText(geom)
FROM import_tables.geosearch_import
ORDER BY ogc_fid LIMIT 10;

geom_search duplicates

Find real duplicate entries in the geom_search table

SELECT id,
       resource,
       geom,
       ST_ASTEXT(geo_field)
FROM geom_search
WHERE resource IN
    (SELECT DISTINCT resource
     FROM geom_search
     GROUP BY resource HAVING COUNT(resource) = 2)
ORDER BY resource;

geom_search to geom

Update the spatial coverage of a specific resource with the minimum bounding box of their geometries from the geom_search table.

WITH geom_coverage AS
  (INSERT INTO "public".geom (geo_field) SELECT ST_Envelope(ST_ConvexHull(ST_Collect(geo_field))) AS geo_field
   FROM public.geom_search
   WHERE resource = 8710 RETURNING id)
UPDATE "public".resource
SET spatialcoverage =
  (SELECT id
   FROM geom_coverage)
WHERE id = 8710

duplicate WMS/TMS representations

Delete duplicate WMS/TMS representations

    DELETE 
    FROM
        representation 
    where
        id IN(
            SELECT
                min(representation.id)     
            FROM
                representation                                                       
            JOIN
                jt_resource_representation                                                                                                                                              
                    ON jt_resource_representation.representationid = representation.id               -- 
                    AND jt_resource_representation.resource_reference = 8559                  
            WHERE
                representation.type = 213                                                                                              
                AND representation.function = 72                                                                                      
                AND representation.protocol IN (
                    186, 205                                                                                    
                )                                                                                     
                AND representation.contenttype IN (
                    51,59                                                                                             
                )            
            group by
                resource_reference   
            having
                count(resource_reference ) > 1
        )

clean jt_resource_representation

Clean jt_resource_representation by removing all references to no-existent resources and representations

DELETE 
FROM
    "public".jt_resource_representation 
WHERE
    resource_reference NOT IN (
        select
            id 
        from
            resource) 
DELETE
FROM
    "public".jt_resource_representation 
WHERE
    representationid NOT IN (
        select
            id 
        from
            representation) 

Find SWITCH-ON Resources

Find all resources uploaded to switch-on or zenodo servers (deltares.nl) inlcuding the DOI, if available

    
    select
        resource.id,
        resource.name,
        string_agg(metadata.uuid,
        ';') as DOI,
        contact.name as contact,
        contact.email,
        contact.organisation,
        tag.name as accessconditions,
        'http://www.water-switch-on.eu/sip-webclient/byod/#/resource/' || resource.id as byod_link,
        string_agg(representation.contentlocation,
        ';') as dataset_urls     
    from
        resource                    
    join
        jt_resource_representation                                                                                                                      
            on resource.id = jt_resource_representation.resource_reference                                      
    join
        representation                                                                                                                      
            on representation.id = jt_resource_representation.representationid                                                                                                         
            and representation.type = 212                                        
            and function = 71                                          
    left join
        contact                                                                                                    
            on contact.id = resource.contact                                                                                  
            and contact.name != 'SWITCH-ON (Meta-Data Provider)'     
    left join
        jt_metadata_resource             
            on resource.id = jt_metadata_resource.resource_reference     
    left join
        metadata                                                                                           
            on metadata.id = jt_metadata_resource.metadataid                                                                            
            and metadata."type" = 1542     
    join
        tag                           
            on tag.id = resource.accessconditions               
    where
        representation.contentlocation ilike '%deltares.nl%'                                                                
        or representation.contentlocation ilike '%zenodo%'        
    group by
        resource.id,
        resource.name,
        contact.name,
        contact.email,
        contact.organisation,
        tag.id               
    order by
        resource.id asc

Categorise SWITCH-ON Resources

Assign all resources uploaded to switch-on or zenodo servers (deltares.nl) to category 'SWITCH-ON Open Data' (149)

    update
        resource                
    set
        collection = 1453,
        type = 1544
    where
        id in(
            select
                distinct resource.id                                            
            from
                resource                                                  
            join
                jt_resource_representation                                                                                          
                    on resource.id = jt_resource_representation.resource_reference                                                  
            join
                representation                                                                                          
                    on representation.id = jt_resource_representation.representationid                                                                             
                    and representation.type = 212                                                 
            where
                representation.contentlocation ilike '%deltares.nl%'                                                    
                or representation.contentlocation ilike '%zenodo%'                                            
            group by
                resource.id                                              
            order by
                resource.id asc                           
        )

Generate sitemap.txt

Generate clean resource URLs for sitemap.txt

    select
        'http://www.water-switch-on.eu/sip-webclient/byod/resource/' || resource.id as url       
    from
        resource 
    order by
        id asc

Get ODR-Tool resources

Get a list of resources added with help of the ODR Tool

    select
        distinct resource.id as resource_id,
        resource.name as resource_name,
        metadata.creationdate as registration_data,
        'http://www.water-switch-on.eu/sip-webclient/byod/resource/' || resource.id as byod_link,
        'http://data.water-switch-on.eu/switchon_server_rest/SWITCHON.RESOURCE/' || resource.id as metadata_link                      
    from
        resource                                        
    join
        jt_metadata_resource                                                           
            on resource.id = jt_metadata_resource.resource_reference                          
    join
        metadata                                                                                                                                               
            on metadata.id = jt_metadata_resource.metadataid                                             
            and metadata."type" = 177                                            
            and metadata."name" = 'OpenData Registration Meta-Data'               
    order by
        metadata.creationdate desc

Count search geometries

Count all search geometries

    select
        sum(ST_NumGeometries (geom_search.geo_field)) 
    from
        geom_search