From b97c966bf404aed99d9205abe4df470731648e23 Mon Sep 17 00:00:00 2001 From: Dave Johnson Date: Wed, 3 Jan 2018 08:54:54 -0500 Subject: [PATCH] Support for MB tiles data provider, including tests --- .../org/mrgeo/hdfs/utils/HadoopFileUtils.java | 28 +- .../hdfs/vector/HdfsVectorDataProvider.java | 2 +- .../vector/HdfsVectorDataProviderFactory.java | 2 +- .../main/java/org/mrgeo/utils/FileUtils.java | 9 +- ...rgeo.data.vector.VectorDataProviderFactory | 31 - .../MbVectorTilesDataProvider.java | 80 +- .../MbVectorTilesDataProviderFactory.java | 132 ++++ .../MbVectorTilesInputFormat.java | 37 +- .../MbVectorTilesRecordReader.java | 4 +- ...rgeo.data.vector.VectorDataProviderFactory | 31 + .../MbVectorTilesInputFormatTest.java | 152 +++- .../MbVectorTilesRecordReaderTest.java | 15 - .../AmbulatoryPt.dbf | Bin 0 -> 3034 bytes .../AmbulatoryPt.geojson | 15 + .../AmbulatoryPt.mbtiles | Bin 0 -> 15360 bytes .../AmbulatoryPt.prj | 1 + .../AmbulatoryPt.sbn | Bin 0 -> 212 bytes .../AmbulatoryPt.sbx | Bin 0 -> 124 bytes .../AmbulatoryPt.shp | Bin 0 -> 324 bytes .../AmbulatoryPt.shp.xml | 711 ++++++++++++++++++ .../AmbulatoryPt.shx | Bin 0 -> 164 bytes 21 files changed, 1118 insertions(+), 132 deletions(-) create mode 100644 mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesDataProviderFactory.java create mode 100644 mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/resources/META-INF/services/org.mrgeo.data.vector.VectorDataProviderFactory create mode 100644 mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.dbf create mode 100644 mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.geojson create mode 100644 mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.mbtiles create mode 100644 mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.prj create mode 100644 mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.sbn create mode 100644 mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.sbx create mode 100644 mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.shp create mode 100644 mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.shp.xml create mode 100644 mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.shx diff --git a/mrgeo-core/src/main/java/org/mrgeo/hdfs/utils/HadoopFileUtils.java b/mrgeo-core/src/main/java/org/mrgeo/hdfs/utils/HadoopFileUtils.java index 38dcff809..f0dacc240 100644 --- a/mrgeo-core/src/main/java/org/mrgeo/hdfs/utils/HadoopFileUtils.java +++ b/mrgeo-core/src/main/java/org/mrgeo/hdfs/utils/HadoopFileUtils.java @@ -113,8 +113,8 @@ public static void copyFileToHdfs(String fromFile, String toFile, { Path toPath = new Path(toFile); Path fromPath = new Path(fromFile); - FileSystem srcFS = getFileSystem(toPath); - FileSystem dstFS = getFileSystem(fromPath); + FileSystem srcFS = getFileSystem(fromPath); + FileSystem dstFS = getFileSystem(toPath); Configuration conf = HadoopUtils.createConfiguration(); InputStream in = null; @@ -134,6 +134,30 @@ public static void copyFileToHdfs(String fromFile, String toFile, } } +public static void copyFileFromHdfs(String fromFile, File toLocalFile) throws IOException +{ + Path fromPath = new Path(fromFile); + FileSystem srcFS = getFileSystem(fromPath); + + Configuration conf = HadoopUtils.createConfiguration(); + InputStream in = null; + OutputStream out = null; + try + { + in = srcFS.open(fromPath); + out = new FileOutputStream(toLocalFile); + + IOUtils.copyBytes(in, out, conf, true); + toLocalFile.deleteOnExit(); + } + catch (IOException e) + { + IOUtils.closeStream(out); + IOUtils.closeStream(in); + throw e; + } +} + public static void copyToHdfs(Path fromDir, Path toDir, String fileName) throws IOException { diff --git a/mrgeo-core/src/main/java/org/mrgeo/hdfs/vector/HdfsVectorDataProvider.java b/mrgeo-core/src/main/java/org/mrgeo/hdfs/vector/HdfsVectorDataProvider.java index e7cb6466c..feaecf063 100644 --- a/mrgeo-core/src/main/java/org/mrgeo/hdfs/vector/HdfsVectorDataProvider.java +++ b/mrgeo-core/src/main/java/org/mrgeo/hdfs/vector/HdfsVectorDataProvider.java @@ -207,7 +207,7 @@ private static Path resolveName(Configuration conf, String input, @SuppressWarnings("squid:S1166") // Exception caught and handled @SuppressFBWarnings(value = "PATH_TRAVERSAL_IN", justification = "method only makes complete URI out of the name") -private static Path resolveNameToPath(Configuration conf, String input, +public static Path resolveNameToPath(Configuration conf, String input, ProviderProperties providerProperties, boolean mustExist) throws IOException { if (input.indexOf('/') >= 0) diff --git a/mrgeo-core/src/main/java/org/mrgeo/hdfs/vector/HdfsVectorDataProviderFactory.java b/mrgeo-core/src/main/java/org/mrgeo/hdfs/vector/HdfsVectorDataProviderFactory.java index 922ad60cc..281b03a3e 100644 --- a/mrgeo-core/src/main/java/org/mrgeo/hdfs/vector/HdfsVectorDataProviderFactory.java +++ b/mrgeo-core/src/main/java/org/mrgeo/hdfs/vector/HdfsVectorDataProviderFactory.java @@ -127,7 +127,7 @@ public void delete(String name, } } -private Path getBasePath(Configuration conf) +public static Path getBasePath(Configuration conf) { return HdfsVectorDataProvider.getBasePath(conf); } diff --git a/mrgeo-core/src/main/java/org/mrgeo/utils/FileUtils.java b/mrgeo-core/src/main/java/org/mrgeo/utils/FileUtils.java index acb55e201..1c4f42019 100644 --- a/mrgeo-core/src/main/java/org/mrgeo/utils/FileUtils.java +++ b/mrgeo-core/src/main/java/org/mrgeo/utils/FileUtils.java @@ -32,12 +32,9 @@ public class FileUtils public static File createUniqueTmpDir() throws IOException { - File baseDir = new File(System.getProperty("java.io.tmpdir")); - - String username = "mrgeo-" + System.getProperty("user.name"); - String baseName = "-" + System.currentTimeMillis(); - - File tempDir = new File(baseDir, username + "/" + baseName); + File tmpUserDir = createTmpUserDir(); + String baseName = "" + System.currentTimeMillis(); + File tempDir = new File(tmpUserDir, baseName); return createDisposibleDirectory(tempDir); } diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-geowave/src/main/resources/META-INF/services/org.mrgeo.data.vector.VectorDataProviderFactory b/mrgeo-dataprovider/mrgeo-dataprovider-geowave/src/main/resources/META-INF/services/org.mrgeo.data.vector.VectorDataProviderFactory index a123674da..f847d5a17 100644 --- a/mrgeo-dataprovider/mrgeo-dataprovider-geowave/src/main/resources/META-INF/services/org.mrgeo.data.vector.VectorDataProviderFactory +++ b/mrgeo-dataprovider/mrgeo-dataprovider-geowave/src/main/resources/META-INF/services/org.mrgeo.data.vector.VectorDataProviderFactory @@ -13,35 +13,4 @@ # See the License for the specific language governing permissions and limitations under the License. # -# -# Copyright 2009-2016 DigitalGlobe, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and limitations under the License. -# -# - -# -# Copyright 2009-2015 DigitalGlobe, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and limitations under the License. -# - org.mrgeo.data.vector.geowave.GeoWaveVectorDataProviderFactory diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesDataProvider.java b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesDataProvider.java index 8ad2857ef..1f491cfe8 100644 --- a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesDataProvider.java +++ b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesDataProvider.java @@ -19,15 +19,22 @@ import com.almworks.sqlite4java.SQLiteException; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.RecordWriter; import org.mrgeo.data.ProviderProperties; import org.mrgeo.data.vector.*; import org.mrgeo.geometry.Geometry; +import org.mrgeo.hdfs.utils.HadoopFileUtils; +import org.mrgeo.hdfs.vector.HdfsVectorDataProvider; +import org.mrgeo.utils.FileUtils; import org.mrgeo.utils.tms.Bounds; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map; @@ -36,14 +43,18 @@ public class MbVectorTilesDataProvider extends VectorDataProvider { static Logger log = LoggerFactory.getLogger(MbVectorTilesDataProvider.class); - protected static boolean canOpen( + private static Map localCopies = new HashMap(); + + private Configuration conf; + + protected static boolean canOpen(Configuration conf, String input, ProviderProperties providerProperties) throws IOException { - MbVectorTilesSettings dbSettings = parseResourceName(input); + MbVectorTilesSettings dbSettings = parseResourceName(input, conf, providerProperties); SQLiteConnection conn = null; try { - conn = getDbConnection(dbSettings); + conn = getDbConnection(dbSettings, conf); return true; } catch(IOException e) { @@ -58,23 +69,40 @@ protected static boolean canOpen( } @SuppressFBWarnings(value = "PATH_TRAVERSAL_IN", justification = "File must be specified by the user") - static SQLiteConnection getDbConnection(MbVectorTilesSettings dbSettings) throws IOException + static SQLiteConnection getDbConnection(MbVectorTilesSettings dbSettings, + Configuration conf) throws IOException { - // TODO: Download the file to a local directory if it is remote. See - // HadoopFileUtils for how we handle SequenceFile and MapFile. We should - // do something similar for these files. Keep in mind that we will also - // need to be able to copy files from HDFS in addition to S3 because the - // SQLite DB has to be on the file system. String filename = dbSettings.getFilename(); - java.io.File dbFile = new java.io.File(filename); - if (!dbFile.exists()) { - throw new IOException("The MB tiles file must be in the file system: " + filename); + Path filepath = new Path(filename); + FileSystem fs = HadoopFileUtils.getFileSystem(conf, filepath); + + File dbFile = null; + if (fs instanceof LocalFileSystem) { + dbFile = new File(filepath.toUri().getPath()); + } + else { + String localName = localCopies.get(filename); + if (localName == null) { + dbFile = new File(FileUtils.createUniqueTmpDir(), new File(filename).getName()); + Path localFilePath = new Path("file://" + dbFile.getAbsolutePath()); + log.info("Attempting to copy MB tiles file " + filename + + " to the local machine at " + dbFile.getAbsolutePath()); + fs.copyToLocalFile(false, filepath, localFilePath, true); + dbFile.deleteOnExit(); + localCopies.put(filename, dbFile.getAbsolutePath()); + } + else { + log.info("Using a copy of " + filename + + " already transferred to the local machine at " + localName); + dbFile = new File(localName); + } } + try { return new SQLiteConnection(dbFile).open(false); } catch(SQLiteException e) { - throw new IOException("Unable to open MB tiles file: " + filename, e); + throw new IOException("Unable to open MB tiles file: " + dbFile.getAbsolutePath(), e); } } @@ -83,30 +111,38 @@ public MbVectorTilesDataProvider(Configuration conf, String inputPrefix, ProviderProperties providerProperties) { super(inputPrefix, input, providerProperties); + this.conf = conf; } @Override public VectorMetadataReader getMetadataReader() { - return new MbVectorTilesMetadataReader(this); + // Not yet implemented. The metadata for mb tiles vector features + // is potentially different for every feature. So it doesn't make + // sense to provide metadata here. + return null; } @Override public VectorMetadataWriter getMetadataWriter() { - // Not yet implemented + // Not yet implemented. The metadata for mb tiles vector features + // is potentially different for every feature. So it doesn't make + // sense to write metadata here. return null; } @Override public VectorReader getVectorReader() throws IOException { + // Not yet implemented return null; } @Override public VectorReader getVectorReader(VectorReaderContext context) throws IOException { + // Not yet implemented return null; } @@ -120,7 +156,7 @@ public VectorWriter getVectorWriter() throws IOException @Override public RecordReader getRecordReader() throws IOException { - MbVectorTilesSettings results = parseResourceName(getResourceName()); + MbVectorTilesSettings results = parseResourceName(getResourceName(), conf, getProviderProperties()); return new MbVectorTilesRecordReader(results); } @@ -134,7 +170,7 @@ public RecordWriter getRecordWriter() @Override public VectorInputFormatProvider getVectorInputFormatProvider(VectorInputFormatContext context) throws IOException { - MbVectorTilesSettings results = parseResourceName(getResourceName()); + MbVectorTilesSettings results = parseResourceName(getResourceName(), conf, getProviderProperties()); return new MbVectorTilesInputFormatProvider(context, this, results); } @@ -159,7 +195,7 @@ public void move(String toResource) throws IOException MbVectorTilesSettings parseResourceName() throws IOException { - return parseResourceName(getResourceName()); + return parseResourceName(getResourceName(), conf, getProviderProperties()); } /** @@ -170,13 +206,17 @@ MbVectorTilesSettings parseResourceName() throws IOException * * @param input */ - private static MbVectorTilesSettings parseResourceName(String input) throws IOException + private static MbVectorTilesSettings parseResourceName(String input, + Configuration conf, + ProviderProperties providerProperties) throws IOException { Map settings = new HashMap(); parseDataSourceSettings(input, settings); String filename; if (settings.containsKey("filename")) { - filename = settings.get("filename"); + filename = HdfsVectorDataProvider.resolveNameToPath(conf, + settings.get("filename"), + providerProperties, false).toString(); } else { throw new IOException("Missing expected filename setting"); diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesDataProviderFactory.java b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesDataProviderFactory.java new file mode 100644 index 000000000..34e507d4b --- /dev/null +++ b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesDataProviderFactory.java @@ -0,0 +1,132 @@ +/* + * Copyright 2009-2017. DigitalGlobe, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ + +package org.mrgeo.data.vector.mbvectortiles; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.mrgeo.data.DataProviderException; +import org.mrgeo.data.ProviderProperties; +import org.mrgeo.data.vector.VectorDataProvider; +import org.mrgeo.data.vector.VectorDataProviderFactory; +import org.mrgeo.hdfs.utils.HadoopFileUtils; +import org.mrgeo.hdfs.vector.HdfsVectorDataProviderFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class MbVectorTilesDataProviderFactory implements VectorDataProviderFactory +{ + static Logger log = LoggerFactory.getLogger(MbVectorTilesDataProviderFactory.class); + + @Override + public boolean isValid(Configuration conf) + { + return true; + } + + @Override + @SuppressWarnings("squid:S2696") // Exception caught and handled + public void initialize(Configuration config) throws DataProviderException + { + } + + @Override + public String getPrefix() + { + return "mbvt"; + } + + @Override + public Map getConfiguration() + { + return null; + } + + @Override + public void setConfiguration(Map settings) + { + } + + @Override + public VectorDataProvider createVectorDataProvider(final String prefix, + final String input, + final Configuration conf, + final ProviderProperties providerProperties) + { + return new MbVectorTilesDataProvider(conf, prefix, input, providerProperties); + } + + @Override + public String[] listVectors(final Configuration conf, + final ProviderProperties providerProperties) throws IOException + { + Path usePath = HdfsVectorDataProviderFactory.getBasePath(conf); + FileSystem fs = HadoopFileUtils.getFileSystem(conf, usePath); + FileStatus[] fileStatuses = fs.listStatus(usePath); + if (fileStatuses != null) + { + List results = new ArrayList<>(fileStatuses.length); + for (FileStatus status : fileStatuses) + { + if (canOpen(status.getPath().toString(), conf, providerProperties)) + { + results.add(status.getPath().getName()); + } + } + String[] retVal = new String[results.size()]; + return results.toArray(retVal); + } + return new String[0]; + } + + @Override + public boolean canOpen(final String input, + final Configuration conf, + final ProviderProperties providerProperties) throws IOException + { + return MbVectorTilesDataProvider.canOpen(conf, input, providerProperties); + } + + @Override + public boolean canWrite(final String input, + final Configuration conf, + final ProviderProperties providerProperties) throws IOException + { + throw new IOException("MB vector tiles provider does not support writing vectors"); + } + + @Override + public boolean exists(final String name, + final Configuration conf, + final ProviderProperties providerProperties) throws IOException + { + return canOpen(name, conf, providerProperties); + } + + @Override + public void delete(final String name, + final Configuration conf, + final ProviderProperties providerProperties) throws IOException + { + throw new IOException("MB vector tiles provider does not support deleting vectors"); + } +} diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesInputFormat.java b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesInputFormat.java index 3aa69b916..7f6483a98 100644 --- a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesInputFormat.java +++ b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesInputFormat.java @@ -18,12 +18,9 @@ import com.almworks.sqlite4java.SQLiteConnection; import com.almworks.sqlite4java.SQLiteException; import com.almworks.sqlite4java.SQLiteStatement; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.*; import org.mrgeo.data.vector.FeatureIdWritable; -import org.mrgeo.data.vector.VectorInputFormat; import org.mrgeo.geometry.Geometry; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,10 +29,11 @@ import java.util.ArrayList; import java.util.List; -public class MbVectorTilesInputFormat extends VectorInputFormat +public class MbVectorTilesInputFormat extends InputFormat { private static Logger log = LoggerFactory.getLogger(MbVectorTilesInputFormat.class); private MbVectorTilesSettings dbSettings; + private int zoomLevel = -1; public MbVectorTilesInputFormat(MbVectorTilesSettings dbSettings) { @@ -45,13 +43,13 @@ public MbVectorTilesInputFormat(MbVectorTilesSettings dbSettings) @Override public List getSplits(JobContext context) throws IOException, InterruptedException { - // TODO: Download the sqlite file if it's not local - int zoomLevel = dbSettings.getZoom(); + zoomLevel = dbSettings.getZoom(); if (zoomLevel < 0) { // Get the max zoom from the tile data SQLiteConnection conn = null; try { - conn = MbVectorTilesDataProvider.getDbConnection(dbSettings); + conn = MbVectorTilesDataProvider.getDbConnection(dbSettings, + context.getConfiguration()); String query = "SELECT MAX(zoom_level) FROM tiles"; SQLiteStatement stmt = null; try { @@ -59,6 +57,9 @@ public List getSplits(JobContext context) throws IOException, Interr if (stmt.step()) { zoomLevel = stmt.columnInt(0); } + else { + throw new IOException("Unable to get the max zoom level of " + dbSettings.getFilename()); + } } finally { if (stmt != null) { @@ -75,9 +76,12 @@ public List getSplits(JobContext context) throws IOException, Interr } } } - long recordCount = getRecordCount(zoomLevel); + long recordCount = getRecordCount(context.getConfiguration()); long recordsPerPartition = dbSettings.getTilesPerPartition(); - long numPartitions = recordCount / recordsPerPartition + 1; + long numPartitions = recordCount / recordsPerPartition; + if (numPartitions * recordsPerPartition < recordCount) { + numPartitions += 1; + } List splits = new ArrayList(); for (int i=0; i < numPartitions; i++) { MbVectorTilesInputSplit split = new MbVectorTilesInputSplit(i * recordsPerPartition, recordsPerPartition, zoomLevel); @@ -87,18 +91,21 @@ public List getSplits(JobContext context) throws IOException, Interr } @Override - public RecordReader createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { - return super.createRecordReader(split, context); + public RecordReader createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException + { + MbVectorTilesRecordReader reader = new MbVectorTilesRecordReader(dbSettings); + reader.initialize(split, context); + return reader; } // @SuppressFBWarnings(value = {"SQL_INJECTION_JDBC", "SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING"}, justification = "User supplied queries are a requirement") - protected long getRecordCount(int zoomLevel) throws IOException + protected long getRecordCount(Configuration conf) throws IOException { String countQuery = "SELECT COUNT(*) FROM tiles WHERE zoom_level=?"; // Run the count query and grab the result. SQLiteConnection conn = null; try { - conn = MbVectorTilesDataProvider.getDbConnection(dbSettings); + conn = MbVectorTilesDataProvider.getDbConnection(dbSettings, conf); SQLiteStatement stmt = null; try { stmt = conn.prepare(countQuery, false); diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesRecordReader.java b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesRecordReader.java index ed224bcf3..7787e2e65 100644 --- a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesRecordReader.java +++ b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesRecordReader.java @@ -71,7 +71,8 @@ public void initialize(InputSplit split, TaskAttemptContext context) throws IOEx currIndex = (offset < 0) ? 0 : offset - 1; try { - conn = MbVectorTilesDataProvider.getDbConnection(dbSettings); + conn = MbVectorTilesDataProvider.getDbConnection(dbSettings, + context.getConfiguration()); // If the offset is < 0, then there is only one partition, so no need // for a limit query. String query ="SELECT tile_column, tile_row, tile_data FROM tiles WHERE zoom_level=? order by zoom_level, tile_column, tile_row"; @@ -372,6 +373,7 @@ else if (v.hasUintValue()) { tags.put(tagKey, value); } } + isKey = !isKey; } } if (geom != null) { diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/resources/META-INF/services/org.mrgeo.data.vector.VectorDataProviderFactory b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/resources/META-INF/services/org.mrgeo.data.vector.VectorDataProviderFactory new file mode 100644 index 000000000..97a39cdd8 --- /dev/null +++ b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/main/resources/META-INF/services/org.mrgeo.data.vector.VectorDataProviderFactory @@ -0,0 +1,31 @@ +# +# Copyright 2009-2017. DigitalGlobe, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. +# + +# +# Copyright 2009-2017. DigitalGlobe, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. +# + +org.mrgeo.data.vector.mbvectortiles.MbVectorTilesDataProviderFactory diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/test/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesInputFormatTest.java b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/test/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesInputFormatTest.java index bb1af7df9..9b811c315 100644 --- a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/test/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesInputFormatTest.java +++ b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/test/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesInputFormatTest.java @@ -15,81 +15,153 @@ package org.mrgeo.data.vector.mbvectortiles; -import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.*; import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; +import org.mrgeo.core.MrGeoConstants; +import org.mrgeo.core.MrGeoProperties; +import org.mrgeo.data.ProviderProperties; +import org.mrgeo.data.vector.FeatureIdWritable; +import org.mrgeo.geometry.Geometry; +import org.mrgeo.hdfs.utils.HadoopFileUtils; +import org.mrgeo.test.TestUtils; +import org.mrgeo.utils.HadoopUtils; +import java.io.File; +import java.io.IOException; import java.util.List; public class MbVectorTilesInputFormatTest { - private MbVectorTilesInputFormat getInputFormatForWaterDefaultZoom() + private static double POINT_EPSILON = 1e-7; + private static String input; + private static Path hdfsInput; + private Configuration conf; + private TaskAttemptContext context; + private ProviderProperties providerProperties; + + @BeforeClass + public static void init() throws IOException { - String[] layers = { "water" }; - MbVectorTilesSettings dbSettings = new MbVectorTilesSettings( - "/home/dave.johnson/Downloads/2017-07-03_new-zealand_wellington.mbtiles", - layers - ); - MbVectorTilesInputFormat inputFormat = new MbVectorTilesInputFormat(dbSettings); - return inputFormat; + String inputDir = TestUtils.composeInputDir(MbVectorTilesInputFormatTest.class); + input = "file://" + inputDir; + hdfsInput = TestUtils.composeInputHdfs(MbVectorTilesInputFormatTest.class, true); } - private MbVectorTilesInputFormat getInputFormatForWaterZoom14(int tilesPerPartition) + @Before + public void setup() throws IOException { - String[] layers = { "water" }; - MbVectorTilesSettings dbSettings = new MbVectorTilesSettings( - "/home/dave.johnson/Downloads/2017-07-03_new-zealand_wellington.mbtiles", - layers, - 14, - tilesPerPartition, - null - - ); - MbVectorTilesInputFormat inputFormat = new MbVectorTilesInputFormat(dbSettings); - return inputFormat; + MrGeoProperties.resetProperties(); + Job j = Job.getInstance(new Configuration()); + conf = j.getConfiguration(); + context = HadoopUtils.createTaskAttemptContext(conf, new TaskAttemptID()); + providerProperties = null; } @Test - public void getRecordCountZoom0() throws Exception + public void getSplitsZoom0() throws Exception { - MbVectorTilesInputFormat inputFormat = getInputFormatForWaterDefaultZoom(); - Assert.assertEquals(1L, inputFormat.getRecordCount(0)); + Path dbPath = new Path(input, "AmbulatoryPt.mbtiles"); + MbVectorTilesSettings dbSettings = new MbVectorTilesSettings(dbPath.toString(), new String[] { "ambulatory"}, 0, 1, null); + MbVectorTilesInputFormat ifmt = new MbVectorTilesInputFormat(dbSettings); + List splits = ifmt.getSplits(context); + Assert.assertNotNull(splits); + Assert.assertEquals(1, splits.size()); + Assert.assertEquals(1L, ifmt.getRecordCount(conf)); } @Test - public void getRecordCountZoom2() throws Exception + public void getSplitsZoom11_1() throws Exception { - MbVectorTilesInputFormat inputFormat = getInputFormatForWaterDefaultZoom(); - Assert.assertEquals(2L, inputFormat.getRecordCount(6)); + Path dbPath = new Path(input, "AmbulatoryPt.mbtiles"); + MbVectorTilesSettings dbSettings = new MbVectorTilesSettings(dbPath.toString(), new String[] { "ambulatory"}, 11, 1, null); + MbVectorTilesInputFormat ifmt = new MbVectorTilesInputFormat(dbSettings); + List splits = ifmt.getSplits(context); + Assert.assertNotNull(splits); + Assert.assertEquals(3, splits.size()); + Assert.assertEquals(3L, ifmt.getRecordCount(conf)); + int count = 0; + for (InputSplit split: splits) { + RecordReader reader = ifmt.createRecordReader(split, context); + Assert.assertNotNull(reader); + while (reader.nextKeyValue()) count++; + } + System.out.println("count: " + count); } @Test - public void getRecordCountZoom14() throws Exception + public void getSplitsZoom11_2() throws Exception { - MbVectorTilesInputFormat inputFormat = getInputFormatForWaterDefaultZoom(); - Assert.assertEquals(4720L, inputFormat.getRecordCount(14)); + Path dbPath = new Path(input, "AmbulatoryPt.mbtiles"); + Path hdfsPath = new Path(hdfsInput, "AmbulatoryPt.mbtiles"); + try { + // Also test that we can load from the vector.base + MrGeoProperties.getInstance().setProperty(MrGeoConstants.MRGEO_HDFS_VECTOR, hdfsInput.toString()); + HadoopFileUtils.copyFileToHdfs(dbPath.toString(), hdfsPath.toString()); + MbVectorTilesDataProvider dp = new MbVectorTilesDataProvider(conf, "mbvt", "filename=AmbulatoryPt.mbtiles;zoom=11;partition_size=2", providerProperties); + MbVectorTilesSettings dbSettings = dp.parseResourceName(); + MbVectorTilesInputFormat ifmt = new MbVectorTilesInputFormat(dbSettings); + List splits = ifmt.getSplits(context); + Assert.assertNotNull(splits); + Assert.assertEquals(2, splits.size()); + Assert.assertEquals(3L, ifmt.getRecordCount(conf)); + } + finally { + HadoopFileUtils.delete(hdfsPath); + } } @Test - public void getRecordCountZoom15() throws Exception + public void getSplitsZoom11_3() throws Exception { - MbVectorTilesInputFormat inputFormat = getInputFormatForWaterDefaultZoom(); - Assert.assertEquals(0L, inputFormat.getRecordCount(15)); + Path dbPath = new Path(input, "AmbulatoryPt.mbtiles"); + MbVectorTilesSettings dbSettings = new MbVectorTilesSettings(dbPath.toString(), new String[] { "ambulatory"}, 11, 3, null); + MbVectorTilesInputFormat ifmt = new MbVectorTilesInputFormat(dbSettings); + List splits = ifmt.getSplits(context); + Assert.assertNotNull(splits); + Assert.assertEquals(1, splits.size()); + Assert.assertEquals(3L, ifmt.getRecordCount(conf)); } @Test public void getSplitsZoom14_1() throws Exception { - MbVectorTilesInputFormat inputFormat = getInputFormatForWaterZoom14(100); - List splits = inputFormat.getSplits(null); - Assert.assertEquals(48, splits.size()); + Path dbPath = new Path(input, "AmbulatoryPt.mbtiles"); + MbVectorTilesSettings dbSettings = new MbVectorTilesSettings(dbPath.toString(), new String[] { "ambulatory"}, 14, 1, null); + MbVectorTilesInputFormat ifmt = new MbVectorTilesInputFormat(dbSettings); + List splits = ifmt.getSplits(context); + Assert.assertNotNull(splits); + Assert.assertEquals(4, splits.size()); + Assert.assertEquals(4L, ifmt.getRecordCount(conf)); + int count = 0; + for (InputSplit split: splits) { + RecordReader reader = ifmt.createRecordReader(split, context); + Assert.assertNotNull(reader); + while (reader.nextKeyValue()) count++; + } + Assert.assertEquals(8, count); } @Test - public void getSplitsZoom14_2() throws Exception + public void getSplitsZoom14_3() throws Exception { - MbVectorTilesInputFormat inputFormat = getInputFormatForWaterZoom14(1000); - List splits = inputFormat.getSplits(null); - Assert.assertEquals(5, splits.size()); + Path dbPath = new Path(input, "AmbulatoryPt.mbtiles"); + MbVectorTilesSettings dbSettings = new MbVectorTilesSettings(dbPath.toString(), new String[] { "ambulatory"}, 14, 3, null); + MbVectorTilesInputFormat ifmt = new MbVectorTilesInputFormat(dbSettings); + List splits = ifmt.getSplits(context); + Assert.assertNotNull(splits); + Assert.assertEquals(2, splits.size()); + Assert.assertEquals(4L, ifmt.getRecordCount(conf)); + int count = 0; + for (InputSplit split: splits) { + RecordReader reader = ifmt.createRecordReader(split, context); + Assert.assertNotNull(reader); + while (reader.nextKeyValue()) count++; + } + Assert.assertEquals(8, count); } } \ No newline at end of file diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/test/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesRecordReaderTest.java b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/test/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesRecordReaderTest.java index b2b92f14a..ef958ab69 100644 --- a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/test/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesRecordReaderTest.java +++ b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/src/test/java/org/mrgeo/data/vector/mbvectortiles/MbVectorTilesRecordReaderTest.java @@ -33,21 +33,6 @@ public class MbVectorTilesRecordReaderTest private static double POINT_EPSILON = 1e-7; private static String input; - private MbVectorTilesInputFormat getInputFormatForWaterZoom14(int tilesPerPartition) - { - String[] layers = { "water" }; - MbVectorTilesSettings dbSettings = new MbVectorTilesSettings( - "/home/dave.johnson/Downloads/2017-07-03_new-zealand_wellington.mbtiles", - layers, - 6, - tilesPerPartition, - null - - ); - MbVectorTilesInputFormat inputFormat = new MbVectorTilesInputFormat(dbSettings); - return inputFormat; - } - @BeforeClass public static void init() { diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.dbf b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.dbf new file mode 100644 index 0000000000000000000000000000000000000000..4772ef84e0a43a1aff7c08473efb385deb3da14b GIT binary patch literal 3034 zcmeH}L2uJA7>0x3fRK)o4oZ#~cZZC-K_MpU*qIWHwvPv&^V% z{Zp_0o8JFg{bq`KdiM9KO%YM81nee3_z^iOXPM()QDiX**2XcAiOJ1@(;rj3HN ztf=_XK(?=UJSU|)69!=jDHWnQmIop@f#g6To4ba=VuZ1CBTZI-0u*bHgI4^>D9lA6 zn_NB|;hG9XYu9ZTLyCKboNsN4t(&IuEX7M<d_=U$)nr!FYTHc&gFy0xO0TBYc8_ zu^5jHHPG~*_JqZb5WOs8geMRLq8KbTiR%xW;&ET@_8p5~xXS?I2!(J%rAD;HaKCd% zF~V2Q(qI}HFe4OAtd`ca;u^n+yUd>vBhvTXV6n5XzpyDbj|e-0X>7bjsO5!TrJBTo p7Lbx!Uv*5%;?&tq7^4Z`Xkc+?$Fj5D$duJI5moQHDXp zBDqhBSgsI@Va{VJreP9D|eVc(KE(yDKNJYDL^)RRSQQEdl$I%%9W9Ae{(wJ`R?Zq(CPW~T@uGQ>Ps;W?CMe_YIB4OJ zNTE2=iisgmjEynT@?3|CH1x)PlJ^i-QD8&{FttV!*ur?8oVG%BQx!W6g73rU;bU+a zoCn9lZZHit1I#fuhJX@*wXv!=z3k90q+(IBX#$TU6H6k5>?EE&NZBn4SwZK}{( zwoH+uTp)LGyhsLO)2xB?8fqliP)9L9#zjVH5MjLuvZOekIDsbtq3J5HVMHW|z?IrK zIoVR}8O{v4wH?Ej;mELaq+8oKIodiq0H!0#bY{?;X^sd1Hk`rXflff8h~q?~Im_cX zgP?ZSGSrDPrwY?3V1oKu&!_@J!mMvny?vWgq?s@f%QnJO@QkE zcx)#GzlUGKPv8#tI@}D`!l&Rva0$E?-U=^+d2kBs11|)#f^o1FYz#YKJ7GMS$3Oc| z!ZMcE7y|zi0%SY}i^XVY`&g_RKST#uq!|v;z7}f@l=L?g1+7k+qEo9sL|f?8>nZ74 z-67gir$$FXtKo7;9oj>5HmP%*lJ3${&}1wV*QcqZdo_k=Ph79Mf>yUSR9Us$B;mD zEKIR39F4U1jMHCJFKyq6G;}@gemb?`d3TrggYibz4GVS@>W+V4*6`@Lq0x(o4VFrw6wB1di91M z^U8dmW~)nWUd-!Wt<$ef_qCbYtUDUF?+jUe}5{KTB)VIm8IJi$zpN2k&#hR z?10FJw@qa04WVMew}sDvl&J|Y0RWcH$6`mj;6-)s=8l72w>)|$7hHxVfCiGzAMIk# z-1|!l1bN_}NEBjY2tXl5zu1eX9QVRPiBBIlpkgcZ-vELMST&3`WPmC->Qa{z6^akK zn9<%4igum5JRW*jCEFrz2+LIzl7lXFv`aqIVx@;Zgzwo^p^%)QkQ@vlqh0cq7v8PY zgxV=DAcf?3h2&t!9PN_-y8Bp$G4%cAhsTEWABKK|fJILNuYZ-2Rx6bZPXj^l2>6fP6v!XQdMNvsh^;T7PY!?@otE)Y~zI*B8 zHZyK|E9ML(klwPzOtYfZxTFbhkwrtvf+yFLALHX3DO&!^>G`Kjub z%L+B z#{lCa3*PEBHyxb4Ogo~n&*vCpkqenxz}WP3#f}&Evu}p>ru363q4Vsl;M*+Iqj5SNc+*J|FT+x%wxPfcgX3QPX;~i+;Oid5apf`&4@Tu>NE45RAjT z0k40iOBHL#+kP-QMe7~W0KRMe`}3Sq^Sa4;JHm=G3-Xp`74Owr91i9+MftlriV8!s z!grfrY!LHV{1O&7C$CgXJ_pim(8~-63}lrZDb6a&EX)teD#^<_n71QhPef68aBe|S z_`%}*tZfB(3l|6Ggk=>JQ;Lgo3HnzoJ-=_A@9UL1ZG+#c(;-I~yUat^X0JE4)D^S% zrsmc)mi0B&)fX?;*Eb)mzx?g&Ov7s_OEdPnC=XlTW3@BYM7#8C`=t zUn2kX>z#?yj(4*>1zHVWm#)gxckee}81K~j(QzPq-I^tn?$(jND$=Cj7M{Q3^@U1` z<+FztX&*VS{i_$|J`Qms7M*+fEa*h)wbKs!Z#cgq-t3S+;Oz?XEu3QC9i-nd7?g*OyI=r8rvOR_Aa}O~l`yQtC53 zhq@qW8X-h8_CxFTJpqkBJuD%>`agM1U?G=rg zqSLr+N$bm$mGlACb4R|!JZhZer0Dw86kVTu_N@eCbas6!qmt&SLd>e!RZ9KW2l|gC zV)Xx+>iO>NMiPEAwlLbMU`lWkeZ~W&;eB9t`QQC%B7y7 zvX35QXxyp15AxwZNmC)ae1&eb%RZBHrb!n%yTSU|u>M2c5cU9PXQIOY1K?7x%4pfqJbp*}b4v)4-~*e5#{P z0;|?a`CYHnc*>_bCk{*?*VTP)P~(2lqpvOMnOolietS*MRb*XWR@u+m{2=DBC!)`` z%)95;P5>(x(YVLpJf2zq3m)`iwWX(1ErprZvj3mmfy%N}Bc}(~r zJH{RvI@bw}ZQ6KL!}ZPfHi~}x*_&2Z!qsZ%YqN2BvtyF^Dtq+Ye@3)g`lcmjUO9Vb zZa_;J>rghkrEbZrIScaLma)wiEccxDR83lbBjld_fVKL1bMX^SSOu8tls)ZL^B&k| ziVpZMO>M@lz2sox)Ym2a*`}KRAnDw!i7J@S3Q{s(9Ei~Pu{{wT@6$-y3M}NS_9L+v zp7w17(>|&tiiKb7jfuMA41Hs7EA`SutA*QaQ|-5!A9()gNN-1RNZ-+?b;*gHn_qUO zd0c;?k@b_?B(LQDwL1s$U3KGfA0~xpVfVfez6p-)d?j*!;-ld@`{Y|}b+QR>S3yO0 zQg^|3_TL!2T51oBpEP@BP%yo$Ibc&TY2C3*lfaJ9-r7Scr#F_K?A+klRCki}rp~}{ z&%Ac3PWwrUr@?{!dg9E+e-`5bTpT5xd1FEG`eH4O>#q#|pB?L>r{sz{~pmEuaB?B859M`MA z%Fh4h$(9+Ft8I<3Ofu7Bac#Hh{pU~A4pvso2*2y{DzNm%_Z4|hkV! z&XjK!_?P?5tNi*hHT>0sb{+fUqO;fRCH-@E-IE!aedyRMdLL_+{DWlmq17cWz7w8d zIG=MaG|X9zb*bdnE*W1}SMPJ%5|b%=yP=+e zn?Gn*JdjLD`H>WzRpa&Km#>@f;VP5Ki*QQ^4ldH8hQl5kVnT=}7ZUKX@L7t#$x6-F z{G2n5wQY+&n}1ZD>9GH!*Y~-k$k1~-tF%&LLfc>N&new~t$ZClBQN_gC&j{T%heq3 zw1qZ#y%z7@K0FgR-+R#(Tjq{M+7t@^f%a=a%(t*ySds7BR&d(>^>$r;c!-DfOApPw)e z373o0j=P^o2y$3A^%$wXj9#&5k!NhqsiSuj9ylka885#p(5!~z?glmI9G=~i@AvD6 zcQFHdKiFuNAL@H(UNU#@wR1^tjNdFWA=eerkgJ8!)v z2go+ug$6c6%`VSuSDl+JGv^dmYc4k2Q5x@WmT%8Cm`X5%{jRj(8I%i)jq#ZE_`Pe# z#p~ag_ul1V;tCti4t&nWvaAB;{>*UoS8)tZ=GD*Z-A^(YJJ)xtn9^*p)$P(%D0|6+ z)Dsh%{#J-Z>R+3K^yB<71H*2Yn`|+C2jNdXZ z&@-dfEpT(FH%Tpyu&uAmc*`1CW+HNIn|Ab_N8FDA-q9833O^nQ_g=-y8aq62nIlP(?bZTY3}wCLT950g%u|DN#elNnfa3^YV* zZxvBP8|;QFYj4;dh(Cnd1O9S*;P+!-;m{Z;eUCer0TuveeXP^`>dahRef%_FNFW&41`V>agiVy;u{L`q|vAhgytyYkKoulAT;L@96FUwL%fCq z*wO9m5PzOXBux?~utfqk;+DWef;>QKj^GkEAPQ z0Vw^IEbQ3G2r7+=;wGnP!xVj(vKyxChbf0)%5j)-GW}zDRDtVMS|i1C>*vBT*RL(0W7{)%te_JUaWv8 zL6ogX86lX@Lp(%Wv6Lf@OH#5!*-}0@;x7Z&Y?vZ&k46Ss6XGL|2emnu%@KfmMM)sx z5ut{PHI&33aX>EtC}D8?>q?>Z8%ns-inP(&Ql> z8FHT!L+-OjE7adD5TVkkP9PcjpVZ9YEd*tuXn$QOzDyS9GJQIi!{@PuGQKUBV;e0_ zm@ef>5(FHcbUK?0?ga>dzOd2z6%pd7C^?%iVNd6CqPW8WBS1qNyp>@q<;!A0(a>KC zYX4*57WDoPI1Dl7#t;}o;J+3DT`bO6PjBeTA9}o(Mnz~7FgS{dvErK3@O?_UBSHs1 G+W!DcQBKPM literal 0 HcmV?d00001 diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.prj b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.prj new file mode 100644 index 000000000..6788a84e8 --- /dev/null +++ b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.prj @@ -0,0 +1 @@ +PROJCS["NAD_1983_StatePlane_Maryland_FIPS_1900",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",400000],PARAMETER["False_Northing",0],PARAMETER["Central_Meridian",-77],PARAMETER["Standard_Parallel_1",38.3],PARAMETER["Standard_Parallel_2",39.45],PARAMETER["Latitude_Of_Origin",37.66666666666666],UNIT["Meter",1]] \ No newline at end of file diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.sbn b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.sbn new file mode 100644 index 0000000000000000000000000000000000000000..87b1ec8c1d0f6080b9bb449ddfc90c90cb8ed4d3 GIT binary patch literal 212 zcmZQzQ0Myp|6c(ECddK`aX3n-&-FYf?cm7ZF~L|b@ra|u!nxr*LPm~^mnu{^Z6z_a zVHIHnit-@IFag;hF=ilU2VxMPd7PznUn9Cml@?*VCchyRZMfixR~ aAEQ5zW)%z)3I@_FoW5LsK$;m!GXelAUl>XN literal 0 HcmV?d00001 diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.sbx b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.sbx new file mode 100644 index 0000000000000000000000000000000000000000..1aa4cc247741cb0e86fc011ddb98554d360cca53 GIT binary patch literal 124 zcmZQzQ0Myp|6c(ECa?nvaX3n-&-FYf?cm7ZF~L|b@ra|u!nxr*LPm~^mnu{^Z6z_a VVHGg~it+%l6A-fiu@?}_006Gi4oLt2 literal 0 HcmV?d00001 diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.shp b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.shp new file mode 100644 index 0000000000000000000000000000000000000000..dc88f7490059706e50d829e6e7fcf42a53aa9f55 GIT binary patch literal 324 zcmZQzQ0HR64i>#&W?*0h$~j1%^PHL6H-4JLO=)Q7jh4aFbqFu5YtUm@PE5{@3ps%%B`z<%U_ W$$byMq4CgM!tqq_StHjY42}S#E;@7o literal 0 HcmV?d00001 diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.shp.xml b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.shp.xml new file mode 100644 index 000000000..f1d776b88 --- /dev/null +++ b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.shp.xml @@ -0,0 +1,711 @@ + + + + + + + + DC GIS + 3/12/03 + Ambulatory Surgical Centers + vector digital data + \\Octo-awllx69a6l\OCTOGIS\Data\DOH\AmbulatoryPt.shp + + + DC GIS + 5/31/03 + 5/31/03 + DC Geographic Information System + vector digital data + + + AmbulatoryPt + + + + Ambulatory Surgical Centers. The dataset contains locations and attributes of Ambulatory Surgical + Centers, created as part of the DC Geographic Information System (DC GIS) for the D.C. Office of the Chief + Technology Officer (OCTO) and participating D.C. government agencies. + + A database provided by the DC Department of Health's (DOH) website identified Ambulatory Surgical Centers and DC + GIS staff geo-processed the data. + + All DC GIS data is stored and exported in Maryland State Plane coordinates NAD 83 meters. + + METADATA CONTENT IS IN PROCESS OF VALIDATION AND SUBJECT TO CHANGE. + + This data is used for the planning and management of Washington, D.C. by local government + agencies. + + en + + + + + 3/12/03 + + + publication date + + + Complete + As needed + + + + -77.049036 + -76.958920 + 38.981919 + 38.868359 + + + + + + + + 395751.322075 + 403559.335007 + 133394.024991 + 145999.017596 + + + + + none + Health care + Private facilities + Surgery + Prevention + Treatment + + + Washington, D.C. + District of Columbia + DC + DC GIS + DCGIS + + + Contact OCTO GIS. Any data obtained outside of OCTO GIS are unauthorized copies. + Data cannot be redistributed in any manner without written authorization. OCTO makes no claims as to the + completeness, accuracy or content of any data contained hereon, and makes no representation of any kind, + including, but not limited to, the warranty of the accuracy or fitness for a particular use, nor are any such + warranties to be implied or inferred with respect to the information or data furnished herein. + + + + + D.C. Office of the Chief Technology Officer + GIS Data Coordinator + + + mailing and physical address +
441 4th St NW, Suite 930 South
+ Washington + DC + 20001 + USA +
+ (202) 727-5660 + dcgis@dc.gov + 8:30 am - 5 pm +
+
+ DC Department of Health (DOH) website + Microsoft Windows 2000 Version 5.0 (Build 2195) Service Pack 4; ESRI ArcCatalog 8.3.0.800 + + Shapefile +
+ + + Data believed to be complete and accurate, subject to change. Digital version compared with original + source data for accuracy and completeness. Validated by source and/or responsible agency. + + + In process of validation by source and/or responsible agency. + Data believed to be complete and accurate, subject to change. Digital version compared with original + source data for accuracy and completeness. Validated by source and/or responsible agency. + + + + In process of validation by source and/or responsible agency. + + + N/A + + + + + + + DC Department of Health + 2003 + DOH website + Site visited march 2003. + http://dchealth.dc.gov/ + + + website + + + + 2003 + + + publication date + + DOH website + locations and attributes + + + + + Alexandria Mapping Company + 2002 + Metro Washington DC + 34th + + Alexandria, VA + ADC + + http://www.adcmap.com + + + 24000 + atlas + + + + 8/1/2002 + + + publication date + + ADC - Washington DC Metro + location + + + + Data Collection: + + Through the application's functionality and usability requirements, OCTO GIS staff compose a list of data + layers needed for the application. From this, staff determine the appropriate source and location for each + layer, and assess the effort needed to get it. + + Some layers, already part of the DC Atlas, are incorporated into the data store. For DC agency data, agency + liaisons ask DC personnel about data availability and acquisition. For third-party data, OCTO-GIS evaluates + options and either finds alternatives, purchases, or licenses the data. In some cases, staff create data from + existing sources and then add required attribution. + + 5/31/03 + + + + D.C. Office of the Chief Technology Officer + GIS Data Coordinator + + (202) 727-5660 + dcgis@dc.gov + 8:30 am - 5 pm + + + + + Data Preparation: + + After the collection, the data team prepares the data for qa/qc and inclusion in the data store. This step + involves: 1.) configuring the datasets into a common form; 2.) geocoding tabular data using a common address + base; 3.) generating or converting paper documents into digital format; 4.) populating required fields; and + 5.) resolving data duplication and conflicts. + + For geocoding, staff use a derivative of the Office of Tax and Revenue (OTR) owner-point database and the + District Department of Transportation's (DDOT) street inventory system (SIS) street centerline file as the + address base. Through automated procedures on the owner-point derivative file, personnel clean and standardize + the irregular premise address information into street number, name, type, and quadrant fields. Also, staff use + the merged SIS file containing the five centerlines classifications (street, alley, drive, service road, and + ramp). + + First, staff match tabular data to the standardized owner-points. Then, the rest are matched to the street + centerline file. After this, staff compose a list of the unmatched records needing further research. Since not + all of the records geocode, staff note the final matching figures (number of hits, rejects, and the total). + + For web links, team members check the validity of existing links and find matching web sites that directly + represent the feature. Staff use several web search engines to find appropriate web links. DC government sites + are primarily used, when available. + + To eliminate data redundancy, staff identify possible conflicting layers and duplicate features. In some + cases, staff pick the best source. In others, the attributes in the multiple layers are consolidated. + Sometimes, staff map the conflicting layers and investigate feature clusters as areas of potential + duplication. + + Where applicable, team members move points to the center of the corresponding buildings. Staff then select the + building footprints and assign the appropriate GIS_ID value that links the points with the appropriate + building polygon(s). + + 5/31/03 + + + + D.C. Office of the Chief Technology Officer + GIS Data Coordinator + + (202) 727-5660 + dcgis@dc.gov + 8:30 am - 5 pm + + + + + Data QA/QC + + After data preparation, the data team performs qa/qc. Various processes check the following parameters: 1.) + spatial and attribute accuracy; 2.) data usability; 3.) data dictionary adherence; 4.) specialized metrics, + such as connectivity and directionality; and 5.) data load success. These checks are, as follows: + + Visual - staff perform a structured visual check of the data, using on-screen, plotting, and customized + routines, and, at times, comparing it to the source documents. + + Edge-matching - For grided datasets, staff review the spatial capture, connectivity, and attribution of data + across tile boundaries, where applicable. + + Automated - Staff compare the data to documented standards and source files through automated procedures and + reports. + + Specialized - Staff review more specific data parameters, such as directionality, connectivity , and topology. + + Edit Verification - Staff ensure that previous errors are fixed to specifications. + + Field Verification - Occasionally, through field work, team members compare the GIS data with the real world. + + Data Loading - Staff evaluate the success and evaluate the process of loading raw data into a structured + database. + + Since most of the data use addresses as the spatial reference, staff compare the data against base layers + (buildings, roads, street centerlines, owner-points, parks, and orthophotography) and move features where + needed. + + In some cases, OCTO GIS personnel produce maps and reports for source agency review. Through an iterative + process, staff modify and enhance the layer to agency specifications. For some datasets, the agency endorses + the final product for data load and publication. + + 5/31/03 + + + + D.C. Office of the Chief Technology Officer + GIS Data Coordinator + + (202) 727-5660 + dcgis@dc.gov + 8:30 am - 5 pm + + + + + + Final Data Preparation: + + For final preparation, staff ensure all of the needed items are present (unneeded items are deleted), the + metadata is complete and current, the shapefile is in the proper projection, and a spatial index exists. If + not already done, a GIS_ID (unique id for features in the layer) is computed with automated tools. + + 5/31/03 + + + + D.C. Office of the Chief Technology Officer + GIS Data Coordinator + + (202) 727-5660 + dcgis@dc.gov + 8:30 am - 5 pm + + + + + + Data Load: + + After final preparation, OCTO GIS staff load the data into the application's test database, where SDE handles + the spatial data and ORACLE manages the attribute information. + + First, staff add primary key and name fields to the layer. Second, personnel register the layer into the + appropriate tables and establish the unique feature-ids (the link between the spatial object and its + attributes) specific to that layer. Then, staff populate feature, search, address, web url, contact, data + source, and other tables, where applicable. For point layers, team members calculate each feature's geographic + extent and populate the features table. Lastly, each layer is intersected with the base geography (2002 + Advisory Neighborhood Commission areas, 2002 wards, zip codes, neighborhoods, and neighborhood clusters) to + populate the "what's-within" tables for the web reports. Views are refreshed when needed. + + 5/31/03 + + + + D.C. Office of the Chief Technology Officer + GIS Data Coordinator + + (202) 727-5660 + dcgis@dc.gov + 8:30 am - 5 pm + + + + + + Data Load QA/QC + + OCTO GIS performs spatial checks comparing the test SDE database with source files to check for completeness, + spatial integrity, and proper attribution. First, staff compare the test database contents with documented + standards. Then, team members check the geography and attribution, as well as compare dataset parameters + between both layers. + + The data team performs attribute checks on the ORACLE database to ensure the existence and validity of views. + Again, staff compare the database contents with documented standards. For critical views, personnel check the + view structures and data against source files and documentation. + + 5/31/03 + + + + D.C. Office of the Chief Technology Officer + GIS Data Coordinator + + (202) 727-5660 + dcgis@dc.gov + 8:30 am - 5 pm + + + + + Metadata imported. + C:\DOCUME~1\roew\LOCALS~1\Temp\xml29.tmp + 20030529 + + + + Dataset copied. + C:\OCTOGIS\Data\DOH\AmbulatoryPt + 20030725 + + + + + + Vector + + + Entity point + 8 + + + Simple + Point + FALSE + 8 + TRUE + FALSE + + + + + + + + coordinate pair + + 0.000032 + 0.000032 + + meters + + + State Plane Coordinate System 1983 + + 1900 + + 38.300000 + 39.450000 + -77.000000 + 37.666667 + 400000.000000 + 0.000000 + + + + + + North American Datum of 1983 + Geodetic Reference System 80 + 6378137.000000 + 298.257222 + + + GCS_North_American_1983 + NAD_1983_StatePlane_Maryland_FIPS_1900 + + + + + 1.000000 + Explicit elevation coordinate included with horizontal coordinates + + + + + + + AmbulatoryPt + Feature Class + 8 + + + FID + Internal feature number. + ESRI + + Sequential unique whole numbers that are automatically generated. + + FID + OID + 4 + 0 + 0 + + + Shape + Feature geometry. + ESRI + + Coordinates defining the features. + + Shape + Geometry + 0 + 0 + 0 + + + NAME + NAME + String + 35 + Facility name + + + ADDRESS + ADDRESS + String + 33 + Physical address + + + webURL + webURL + String + 254 + DC Dept of Health - Ambulatory Surgery Centers list + + + GIS_ID + GIS_ID + String + 16 + Sequential OCTO GIS identifier that links points and polygons + + + SSL + SSL + String + 16 + Single square lot + + + + + + + + D.C. Office of the Chief Technology Officer + GIS Data Coordinator + + + mailing and physical address +
441 4th St NW, Suite 930 South
+ Washington + DC + 20001 + USA +
+ (202) 727-5660 + dcgis@dc.gov + 8:30 am - 5 pm +
+
+ Downloadable Data + OCTO makes no claims as to the completeness, accuracy or content of any data contained hereon, and makes + no representation of any kind, including, but not limited to, the warranty of the accuracy or fitness for a + particular use, nor are any such warranties to be implied or inferred, with respect to the information or data + furnished herein. Subject to terms and conditions of the Memorandum of Agreement. Memorandum of Agreement must + have authorized signature. + + + + + 0.028 + 0.028 + + + + + + + + + + Most DC GIS datasets can be downloaded from "http://dcgis.dc.gov" on the data center page. Orders can + also be made by contacting OCTO GIS at "dcgis@dc.gov", especially for datasets not available for download. + + +
+ + 20040106 + + + + D.C. Office of the Chief Technology Officer + GIS Data Coordinator + + + mailing and physical address +
441 4th St NW, Suite 930 South
+ Washington + DC + 20001 + USA +
+ (202) 727-5660 + dcgis@dc.gov + 8:30 am - 5 pm +
+
+ FGDC Content Standards for Digital Geospatial Metadata + FGDC-STD-001-1998 + local time + + http://www.esri.com/metadata/esriprof80.html + ESRI Metadata Profile + + en + + http://www.esri.com/metadata/esriprof80.html + ESRI Metadata Profile + + 01/07/04 +
+ + 20030725 + 15213700 + FALSE + 20040106 + 14301700 + 20040106 + 14301700 + {25AB6F12-3363-422E-A5A4-1A85B1952BF7} + + + Microsoft Windows 2000 Version 5.0 (Build 2195) Service Pack 4; ESRI ArcCatalog 8.3.0.800 + + + + + + AmbulatoryPt + + + + + + + + + + + 395751.322075 + 403559.335007 + 145999.017596 + 133394.024991 + 1 + + + + + -77.049036 + -76.95892 + 38.981919 + 38.868359 + 1 + + + + + + ISO 19115 Geographic Information - Metadata + DIS_ESRI1.0 + + + + + + + dataset + + + + + 002 + file://\\OCTO-AWLLX69A6L\C$\OCTOGIS\DataRequest\Data\Hospital\AmbulatoryPt.shp + Local Area Network + + 0.028 + + + Shapefile + + + + + + + NAD_1983_StatePlane_Maryland_FIPS_1900 + + + + + + + + + + + + + 8 + + + + 20040106 +
diff --git a/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.shx b/mrgeo-dataprovider/mrgeo-dataprovider-mbvectortiles/testFiles/org.mrgeo.data.vector.mbvectortiles/MbVectorTilesInputFormatTest/AmbulatoryPt.shx new file mode 100644 index 0000000000000000000000000000000000000000..5173bdc5a8d51275734946264d0bb24f8d03e70a GIT binary patch literal 164 zcmZQzQ0HR64uW1VGcYg$l~ eqlg*-c_4cnptK*9j)BryP`Uz2w?XMCAQ}Kzqz+R6 literal 0 HcmV?d00001