From a86563cfb1843cd186184dec4028b8d442925323 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 7 Apr 2016 21:47:51 +0200 Subject: [PATCH] make tables for external data (Tiger and aux) configurable Introduces two new settings CONST_Use_US_Tiger_Data and CONST_Use_Aux_Location_data, which are disabled by default. When false the corresponding tables are ignored in queries and updates. Aux and tiger tables are no longer created by default. This has to be done by the corresponding import scripts. The former aux table creation can be found in sql/aux_tables.sql for reference. --- lib/Geocode.php | 115 ++++++++++++++++++++++------------------- lib/PlaceLookup.php | 4 +- lib/ReverseGeocode.php | 2 +- settings/settings.php | 10 ++++ sql/aux_tables.sql | 6 +++ sql/functions.sql | 10 +++- sql/tables.sql | 9 ---- utils/setup.php | 23 +++++++-- website/details.php | 15 ++++-- website/hierarchy.php | 10 ++-- 10 files changed, 125 insertions(+), 79 deletions(-) create mode 100644 sql/aux_tables.sql diff --git a/lib/Geocode.php b/lib/Geocode.php index f2ca5e500..611ca6de6 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -431,56 +431,62 @@ function getDetails($aPlaceIDs) if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) { - //query also location_property_tiger and location_property_aux - //Tiger search only if a housenumber was searched and if it was found (i.e. aPlaceIDs[placeID] = housenumber != -1) (realized through a join) - //only Tiger housenumbers need to be interpolated, because they are saved as lines with start- and endnumber, the common osm housenumbers are usually saved as points - $sHousenumbers = ""; - $i = 0; - $length = count($aPlaceIDs); - foreach($aPlaceIDs as $placeID => $housenumber) - { - $i++; - $sHousenumbers .= "(".$placeID.", ".$housenumber.")"; - if($i<$length) - $sHousenumbers .= ", "; + if (CONST_Use_US_Tiger_Data) + { + //query also location_property_tiger and location_property_aux + //Tiger search only if a housenumber was searched and if it was found (i.e. aPlaceIDs[placeID] = housenumber != -1) (realized through a join) + //only Tiger housenumbers need to be interpolated, because they are saved as lines with start- and endnumber, the common osm housenumbers are usually saved as points + $sHousenumbers = ""; + $i = 0; + $length = count($aPlaceIDs); + foreach($aPlaceIDs as $placeID => $housenumber) + { + $i++; + $sHousenumbers .= "(".$placeID.", ".$housenumber.")"; + if($i<$length) + $sHousenumbers .= ", "; + } + + $sSQL .= "union "; + $sSQL .= "select 'T' as osm_type, place_id as osm_id, 'place' as class, 'house' as type, null as admin_level, 30 as rank_search, 30 as rank_address, min(place_id) as place_id, min(parent_place_id) as parent_place_id, 'us' as country_code"; + $sSQL .= ", get_address_by_language(place_id, housenumber_for_place, $sLanguagePrefArraySQL) as langaddress "; + $sSQL .= ", null as placename"; + $sSQL .= ", null as ref"; + if ($this->bIncludeExtraTags) $sSQL .= ", null as extra"; + if ($this->bIncludeNameDetails) $sSQL .= ", null as names"; + $sSQL .= ", avg(st_x(centroid)) as lon, avg(st_y(centroid)) as lat,"; + $sSQL .= $sImportanceSQL."-1.15 as importance "; + $sSQL .= ", (select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(blub.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance "; + $sSQL .= ", null as extra_place "; + $sSQL .= " from (select place_id"; + //interpolate the Tiger housenumbers here + $sSQL .= ", ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) as centroid, parent_place_id, housenumber_for_place "; + $sSQL .= "from (location_property_tiger "; + $sSQL .= " join (values ".$sHousenumbers.") as housenumbers(place_id, housenumber_for_place) using(place_id)) "; + $sSQL .= " where housenumber_for_place>=0 and 30 between $this->iMinAddressRank and $this->iMaxAddressRank) as blub"; //postgres wants an alias here + $sSQL .= " group by place_id, housenumber_for_place"; //is this group by really needed?, place_id + housenumber (in combination) are unique + if (!$this->bDeDupe) $sSQL .= ", place_id "; } - $sSQL .= "union "; - $sSQL .= "select 'T' as osm_type, place_id as osm_id, 'place' as class, 'house' as type, null as admin_level, 30 as rank_search, 30 as rank_address, min(place_id) as place_id, min(parent_place_id) as parent_place_id, 'us' as country_code"; - $sSQL .= ", get_address_by_language(place_id, housenumber_for_place, $sLanguagePrefArraySQL) as langaddress "; - $sSQL .= ", null as placename"; - $sSQL .= ", null as ref"; - if ($this->bIncludeExtraTags) $sSQL .= ", null as extra"; - if ($this->bIncludeNameDetails) $sSQL .= ", null as names"; - $sSQL .= ", avg(st_x(centroid)) as lon, avg(st_y(centroid)) as lat,"; - $sSQL .= $sImportanceSQL."-1.15 as importance "; - $sSQL .= ", (select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(blub.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance "; - $sSQL .= ", null as extra_place "; - $sSQL .= " from (select place_id"; - //interpolate the Tiger housenumbers here - $sSQL .= ", ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) as centroid, parent_place_id, housenumber_for_place "; - $sSQL .= "from (location_property_tiger "; - $sSQL .= " join (values ".$sHousenumbers.") as housenumbers(place_id, housenumber_for_place) using(place_id)) "; - $sSQL .= " where housenumber_for_place>=0 and 30 between $this->iMinAddressRank and $this->iMaxAddressRank) as blub"; //postgres wants an alias here - $sSQL .= " group by place_id, housenumber_for_place"; //is this group by really needed?, place_id + housenumber (in combination) are unique - if (!$this->bDeDupe) $sSQL .= ", place_id "; - - $sSQL .= " union "; - $sSQL .= "select 'L' as osm_type, place_id as osm_id, 'place' as class, 'house' as type, null as admin_level, 0 as rank_search, 0 as rank_address, min(place_id) as place_id, min(parent_place_id) as parent_place_id, 'us' as country_code, "; - $sSQL .= "get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) as langaddress, "; - $sSQL .= "null as placename, "; - $sSQL .= "null as ref, "; - if ($this->bIncludeExtraTags) $sSQL .= "null as extra, "; - if ($this->bIncludeNameDetails) $sSQL .= "null as names, "; - $sSQL .= "avg(ST_X(centroid)) as lon, avg(ST_Y(centroid)) as lat, "; - $sSQL .= $sImportanceSQL."-1.10 as importance, "; - $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_aux.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; - $sSQL .= "null as extra_place "; - $sSQL .= "from location_property_aux where place_id in ($sPlaceIDs) "; - $sSQL .= "and 30 between $this->iMinAddressRank and $this->iMaxAddressRank "; - $sSQL .= "group by place_id"; - if (!$this->bDeDupe) $sSQL .= ", place_id"; - $sSQL .= ", get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) "; + if (CONST_Use_Aux_Location_data) + { + $sSQL .= " union "; + $sSQL .= "select 'L' as osm_type, place_id as osm_id, 'place' as class, 'house' as type, null as admin_level, 0 as rank_search, 0 as rank_address, min(place_id) as place_id, min(parent_place_id) as parent_place_id, 'us' as country_code, "; + $sSQL .= "get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) as langaddress, "; + $sSQL .= "null as placename, "; + $sSQL .= "null as ref, "; + if ($this->bIncludeExtraTags) $sSQL .= "null as extra, "; + if ($this->bIncludeNameDetails) $sSQL .= "null as names, "; + $sSQL .= "avg(ST_X(centroid)) as lon, avg(ST_Y(centroid)) as lat, "; + $sSQL .= $sImportanceSQL."-1.10 as importance, "; + $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_aux.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; + $sSQL .= "null as extra_place "; + $sSQL .= "from location_property_aux where place_id in ($sPlaceIDs) "; + $sSQL .= "and 30 between $this->iMinAddressRank and $this->iMaxAddressRank "; + $sSQL .= "group by place_id"; + if (!$this->bDeDupe) $sSQL .= ", place_id"; + $sSQL .= ", get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) "; + } } $sSQL .= " order by importance desc"; @@ -1431,7 +1437,7 @@ function lookup() $aPlaceIDs = $this->oDB->getCol($sSQL); // If nothing found try the aux fallback table - if (!sizeof($aPlaceIDs)) + if (CONST_Use_Aux_Location_data && !sizeof($aPlaceIDs)) { $sSQL = "select place_id from location_property_aux where parent_place_id in (".$sPlaceIDs.") and housenumber = '".pg_escape_string($aSearch['sHouseNumber'])."'"; if (sizeof($this->aExcludePlaceIDs)) @@ -1444,7 +1450,7 @@ function lookup() } //if nothing was found in placex or location_property_aux, then search in Tiger data for this housenumber(location_property_tiger) $searchedHousenumber = intval($aSearch['sHouseNumber']); - if (!sizeof($aPlaceIDs)) + if (CONST_Use_US_Tiger_Data && !sizeof($aPlaceIDs)) { //new query for lines, not housenumbers anymore if($searchedHousenumber%2 == 0){ @@ -1616,9 +1622,12 @@ function lookup() $sSQL .= "and (placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank "; if (14 >= $this->iMinAddressRank && 14 <= $this->iMaxAddressRank) $sSQL .= " OR (extratags->'place') = 'city'"; if ($this->aAddressRankList) $sSQL .= " OR placex.rank_address in (".join(',',$this->aAddressRankList).")"; - $sSQL .= ") UNION select place_id from location_property_tiger where place_id in (".join(',',array_keys($aResultPlaceIDs)).") "; - $sSQL .= "and (30 between $this->iMinAddressRank and $this->iMaxAddressRank "; - if ($this->aAddressRankList) $sSQL .= " OR 30 in (".join(',',$this->aAddressRankList).")"; + if (CONST_Use_US_Tiger_Data) + { + $sSQL .= ") UNION select place_id from location_property_tiger where place_id in (".join(',',array_keys($aResultPlaceIDs)).") "; + $sSQL .= "and (30 between $this->iMinAddressRank and $this->iMaxAddressRank "; + if ($this->aAddressRankList) $sSQL .= " OR 30 in (".join(',',$this->aAddressRankList).")"; + } $sSQL .= ")"; if (CONST_Debug) var_dump($sSQL); $aFilteredPlaceIDs = $this->oDB->getCol($sSQL); diff --git a/lib/PlaceLookup.php b/lib/PlaceLookup.php index 94e414dc7..c1fec6496 100644 --- a/lib/PlaceLookup.php +++ b/lib/PlaceLookup.php @@ -122,7 +122,7 @@ function lookup() $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted", $this->aLangPrefOrder))."]"; - if ($this->sType == 'tiger') + if (CONST_Use_US_Tiger_Data && $this->sType == 'tiger') { $sSQL = "select place_id,partition, 'T' as osm_type, place_id as osm_id, 'place' as class, 'house' as type, null as admin_level, housenumber, null as street, null as isin, postcode,"; $sSQL .= " 'us' as country_code, parent_place_id, null as linked_place_id, 30 as rank_address, 30 as rank_search,"; @@ -166,7 +166,7 @@ function lookup() if ($this->bAddressDetails) { - if($this->sType == 'tiger') // to get addressdetails for tiger data, the housenumber is needed + if(CONST_Use_US_Tiger_Data && $this->sType == 'tiger') // to get addressdetails for tiger data, the housenumber is needed $aAddress = $this->getAddressNames($aPlace['housenumber']); else $aAddress = $this->getAddressNames(); diff --git a/lib/ReverseGeocode.php b/lib/ReverseGeocode.php index 3dff161e4..75a9b71c3 100644 --- a/lib/ReverseGeocode.php +++ b/lib/ReverseGeocode.php @@ -158,7 +158,7 @@ function lookup() } // Only street found? If it's in the US we can check TIGER data for nearest housenumber - if ($bIsInUnitedStates && $iMaxRank_orig >= 28 && $iPlaceID && ($aPlace['rank_search'] == 26 || $aPlace['rank_search'] == 27 )) + if (CONST_Use_US_Tiger_Data && $bIsInUnitedStates && $iMaxRank_orig >= 28 && $iPlaceID && ($aPlace['rank_search'] == 26 || $aPlace['rank_search'] == 27 )) { $fSearchDiam = 0.001; $sSQL = 'SELECT place_id,parent_place_id,30 as rank_search, ST_line_locate_point(linegeo,'.$sPointSQL.') as fraction'; diff --git a/settings/settings.php b/settings/settings.php index e1dd625fd..266ff356b 100644 --- a/settings/settings.php +++ b/settings/settings.php @@ -13,6 +13,16 @@ @define('CONST_Limit_Reindexing', true); // Set to false to avoid importing extra postcodes for the US. @define('CONST_Use_Extra_US_Postcodes', true); + // Set to true after importing Tiger house number data for the US. + // Note: The tables must already exist or queries will throw errors. + // After changing this setting run ./utils/setup --create-functions + // again. + @define('CONST_Use_US_Tiger_Data', false); + // Set to true after importing other external house number data. + // Note: the aux tables must already exist or queries will throw errors. + // After changing this setting run ./utils/setup --create-functions + // again. + @define('CONST_Use_Aux_Location_data', false); // Proxy settings @define('CONST_HTTP_Proxy', false); diff --git a/sql/aux_tables.sql b/sql/aux_tables.sql new file mode 100644 index 000000000..81054731d --- /dev/null +++ b/sql/aux_tables.sql @@ -0,0 +1,6 @@ +CREATE TABLE location_property_aux () INHERITS (location_property); +CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id); +CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux USING BTREE (parent_place_id); +CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber); +GRANT SELECT ON location_property_aux TO "{www-user}"; + diff --git a/sql/functions.sql b/sql/functions.sql index 5b068dfe0..4256490eb 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -1024,7 +1024,7 @@ BEGIN --DEBUG: RAISE WARNING 'placex_insert:END: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type; - RETURN NEW; -- @DIFFUPDATES@ The following is not needed until doing diff updates, and slows the main index process down + RETURN NEW; -- %DIFFUPDATES% The following is not needed until doing diff updates, and slows the main index process down IF NEW.rank_address > 0 THEN IF (ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(NEW.geometry)) THEN @@ -1650,6 +1650,7 @@ BEGIN END IF; END IF; + -- %NOTIGERDATA% IF 0 THEN -- for the USA we have an additional address table. Merge in zip codes from there too IF NEW.rank_search = 26 AND NEW.calculated_country_code = 'us' THEN FOR location IN SELECT distinct postcode from location_property_tiger where parent_place_id = NEW.place_id LOOP @@ -1662,6 +1663,7 @@ BEGIN nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); END LOOP; END IF; + -- %NOTIGERDATA% END IF; -- RAISE WARNING 'ISIN: %', isin_tokens; @@ -2257,18 +2259,22 @@ DECLARE hadcountry BOOLEAN; BEGIN --first query tiger data + -- %NOTIGERDATA% IF 0 THEN select parent_place_id,'us', 30, postcode, null, 'place', 'house' from location_property_tiger WHERE place_id = in_place_id AND in_housenumber>=startnumber AND in_housenumber <= endnumber INTO for_place_id,searchcountrycode, searchrankaddress, searchpostcode, searchhousename, searchclass, searchtype; IF for_place_id IS NOT NULL THEN searchhousenumber = in_housenumber::text; END IF; - + -- %NOTIGERDATA% END IF; + + -- %NOAUXDATA% IF 0 THEN IF for_place_id IS NULL THEN select parent_place_id,'us', housenumber, 30, postcode, null, 'place', 'house' from location_property_aux WHERE place_id = in_place_id INTO for_place_id,searchcountrycode, searchhousenumber, searchrankaddress, searchpostcode, searchhousename, searchclass, searchtype; END IF; + -- %NOAUXDATA% END IF; IF for_place_id IS NULL THEN select parent_place_id, calculated_country_code, housenumber, rank_search, postcode, name, class, type from placex diff --git a/sql/tables.sql b/sql/tables.sql index 0434e13af..49338c4a3 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -100,15 +100,6 @@ CREATE TABLE location_property ( ); SELECT AddGeometryColumn('location_property', 'centroid', 4326, 'POINT', 2); -CREATE TABLE location_property_aux () INHERITS (location_property); -CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id); -CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux USING BTREE (parent_place_id); -CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber); -GRANT SELECT ON location_property_aux TO "{www-user}"; - -CREATE TABLE location_property_tiger (linegeo GEOMETRY, place_id BIGINT, partition INTEGER, parent_place_id BIGINT, startnumber INTEGER, endnumber INTEGER, interpolationtype TEXT, postcode TEXT); -GRANT SELECT ON location_property_tiger TO "{www-user}"; - drop table IF EXISTS search_name; CREATE TABLE search_name ( place_id BIGINT, diff --git a/utils/setup.php b/utils/setup.php index 0026a8135..cfe7f5601 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -219,9 +219,26 @@ if (!file_exists(CONST_InstallPath.'/module/nominatim.so')) fail("nominatim module not built"); $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql'); $sTemplate = str_replace('{modulepath}', CONST_InstallPath.'/module', $sTemplate); - if ($aCMDResult['enable-diff-updates']) $sTemplate = str_replace('RETURN NEW; -- @DIFFUPDATES@', '--', $sTemplate); - if ($aCMDResult['enable-debug-statements']) $sTemplate = str_replace('--DEBUG:', '', $sTemplate); - if (CONST_Limit_Reindexing) $sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate); + if ($aCMDResult['enable-diff-updates']) + { + $sTemplate = str_replace('RETURN NEW; -- %DIFFUPDATES%', '--', $sTemplate); + } + if ($aCMDResult['enable-debug-statements']) + { + $sTemplate = str_replace('--DEBUG:', '', $sTemplate); + } + if (CONST_Limit_Reindexing) + { + $sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate); + } + if (!CONST_Use_US_Tiger_Data) + { + $sTemplate = str_replace('-- %NOTIGERDATA% ', '', $sTemplate); + } + if (!CONST_Use_Aux_Location_data) + { + $sTemplate = str_replace('-- %NOAUXDATA% ', '', $sTemplate); + } pgsqlRunScript($sTemplate); } diff --git a/website/details.php b/website/details.php index c7a567552..5edef6f50 100755 --- a/website/details.php +++ b/website/details.php @@ -56,10 +56,17 @@ $iPlaceID = (int)$_GET['place_id']; - $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_tiger where place_id = '.$iPlaceID); - if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; - $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_aux where place_id = '.$iPlaceID); - if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; + if (CONST_Use_US_Tiger_Data) + { + $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_tiger where place_id = '.$iPlaceID); + if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; + } + + if (CONST_Use_Aux_Location_data) + { + $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_aux where place_id = '.$iPlaceID); + if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; + } $hLog = logStart($oDB, 'details', $_SERVER['QUERY_STRING'], $aLangPrefOrder); diff --git a/website/hierarchy.php b/website/hierarchy.php index 5e3d7299d..6a281aa87 100755 --- a/website/hierarchy.php +++ b/website/hierarchy.php @@ -47,13 +47,13 @@ $iPlaceID = (int)$_GET['place_id']; - $sAuxHouseNumber = false; - $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_tiger where place_id = '.$iPlaceID); - if ($iParentPlaceID) + if (CONST_Use_US_Tiger_Data) { - $iPlaceID = $iParentPlaceID; + $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_tiger where place_id = '.$iPlaceID); + if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; } - else + + if (CONST_Use_Aux_Location_data) { $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_aux where place_id = '.$iPlaceID); if ($iParentPlaceID) $iPlaceID = $iParentPlaceID;