Skip to content
Permalink
Browse files

POI writer: normalize names option (#1123)

  • Loading branch information...
KamikX authored and devemux86 committed Jun 30, 2019
1 parent 12b966f commit ca9f1d1ca5f6cddfdae7655a6e88dd07cd3d6a44
@@ -50,6 +50,7 @@ The `--poi-writer`, or short `--pw` task indicates that the POI writer plugin sh
|`tag-conf-file`|Path to an XML configuration file that contains mappings from OSM tags to category names and a hierarchy of those categories.|path to an XML file|(blank) internal default poi mapping is used|
|`names`|Add only named entities.|true/false|true|
|`ways`|Also parse ways.|true/false|true|
|`normalize`|Add normalized_name (for accent insensitive search). *Works if all-tags is true.*|true/false|false|
|`geo-tags`|Add geo tags.|true/false|false|
|`filter-categories`|Drop empty categories.|true/false|true|

@@ -1,6 +1,7 @@
/*
* Copyright 2015-2017 devemux86
* Copyright 2017-2018 Gustl22
* Copyright 2019 Kamil Donoval
*
* This program is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free Software
@@ -21,41 +22,17 @@
import org.locationtech.jts.geom.Polygon;
import org.mapsforge.core.model.BoundingBox;
import org.mapsforge.core.model.LatLong;
import org.mapsforge.poi.storage.DbConstants;
import org.mapsforge.poi.storage.PoiCategory;
import org.mapsforge.poi.storage.PoiCategoryFilter;
import org.mapsforge.poi.storage.PoiCategoryManager;
import org.mapsforge.poi.storage.UnknownPoiCategoryException;
import org.mapsforge.poi.storage.WhitelistPoiCategoryFilter;
import org.mapsforge.poi.storage.*;
import org.mapsforge.poi.writer.logging.LoggerWrapper;
import org.mapsforge.poi.writer.logging.ProgressManager;
import org.mapsforge.poi.writer.model.PoiWriterConfiguration;
import org.openstreetmap.osmosis.core.container.v0_6.EntityContainer;
import org.openstreetmap.osmosis.core.domain.v0_6.Entity;
import org.openstreetmap.osmosis.core.domain.v0_6.Node;
import org.openstreetmap.osmosis.core.domain.v0_6.Relation;
import org.openstreetmap.osmosis.core.domain.v0_6.Tag;
import org.openstreetmap.osmosis.core.domain.v0_6.Way;
import org.openstreetmap.osmosis.core.domain.v0_6.WayNode;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import org.openstreetmap.osmosis.core.domain.v0_6.*;

import java.sql.*;
import java.text.Normalizer;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.TreeMap;
import java.util.*;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -88,6 +65,7 @@ public static PoiWriter newInstance(PoiWriterConfiguration configuration, Progre
private static final GeometryFactory GEOMETRY_FACTORY = new GeometryFactory();

private static final Pattern NAME_LANGUAGE_PATTERN = Pattern.compile("(name)(:)([a-zA-Z]{1,3}(?:[-_][a-zA-Z0-9]{1,8})*)");
private static final Pattern NAME_NORMALIZE_PATTERN = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");

final PoiWriterConfiguration configuration;
private final ProgressManager progressManager;
@@ -301,6 +279,17 @@ String getTagValue(Collection<Tag> tags, String key) {
return null;
}

/**
* Normalize / remove accents.
*
* @param str string with accents
* @return string without accents
*/
private String normalize(String str) {
String normalizedString = Normalizer.normalize(str, Normalizer.Form.NFD);
return NAME_NORMALIZE_PATTERN.matcher(normalizedString).replaceAll("");
}

/**
* Post-process.
*/
@@ -493,6 +482,12 @@ private void processEntity(Entity entity, double latitude, double longitude) {
if (tagMap.isEmpty()) {
for (Tag t : entity.getTags()) {
tagMap.put(t.getKey().toLowerCase(Locale.ENGLISH), t.getValue());

// If normalize is enabled and key == name
if (configuration.isNormalize() && t.getKey().toLowerCase(Locale.ENGLISH).equals("name")) {
String normalizedValue = normalize(t.getValue().toLowerCase(Locale.ROOT));
tagMap.put("normalized_name", normalizedValue);
}
}
}
categories.add(pc);
@@ -1,6 +1,7 @@
/*
* Copyright 2015-2017 devemux86
* Copyright 2017-2018 Gustl22
* Copyright 2019 Kamil Donoval
*
* This program is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free Software
@@ -33,6 +34,7 @@
private int fileSpecificationVersion;
private boolean filterCategories;
private boolean names;
private boolean normalize;
private File outputFile;
private String preferredLanguage;
private boolean progressLogs;
@@ -148,6 +150,13 @@ public boolean isNames() {
return names;
}

/**
* @return the normalize
*/
public boolean isNormalize() {
return normalize;
}

/**
* @return the progressLogs
*/
@@ -241,6 +250,14 @@ public void setNames(boolean names) {
this.names = names;
}

/**
* @param normalize the normalize to set
*/
public void setNormalize(boolean normalize) {
this.normalize = normalize;
}


/**
* @param outputFile the output file to set
*/
@@ -3,6 +3,7 @@
* Copyright 2010, 2011 Karsten Groll
* Copyright 2015-2017 devemux86
* Copyright 2017-2018 Gustl22
* Copyright 2019 Kamil Donoval
*
* This program is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free Software
@@ -38,6 +39,7 @@
private static final String PARAM_FILTER_CATEGORIES = "filter-categories";
private static final String PARAM_GEO_TAGS = "geo-tags";
private static final String PARAM_NAMES = "names";
private static final String PARAM_NORMALIZE = "normalize";
private static final String PARAM_OUTFILE = "file";
private static final String PARAM_PREFERRED_LANGUAGE = "preferred-language";
private static final String PARAM_PROGRESS_LOGS = "progress-logs";
@@ -53,6 +55,7 @@ protected TaskManager createTaskManagerImpl(TaskConfiguration taskConfig) {
configuration.setFilterCategories(getBooleanArgument(taskConfig, PARAM_FILTER_CATEGORIES, true));
configuration.setGeoTags(getBooleanArgument(taskConfig, PARAM_GEO_TAGS, false));
configuration.setNames(getBooleanArgument(taskConfig, PARAM_NAMES, true));
configuration.setNormalize(getBooleanArgument(taskConfig, PARAM_NORMALIZE, false));
configuration.addOutputFile(getStringArgument(taskConfig, PARAM_OUTFILE, Constants.DEFAULT_PARAM_OUTFILE));
configuration.setPreferredLanguage(getStringArgument(taskConfig, PARAM_PREFERRED_LANGUAGE, null));
configuration.setProgressLogs(getBooleanArgument(taskConfig, PARAM_PROGRESS_LOGS, true));

0 comments on commit ca9f1d1

Please sign in to comment.
You can’t perform that action at this time.