Skip to content

Commit

Permalink
Merge pull request #2908 from entur/otp2_ds4_extract_otp_datastore
Browse files Browse the repository at this point in the history
Otp2 ds4 extract otp datastore
  • Loading branch information
abyrd committed Jan 10, 2020
2 parents 43980d2 + 89efabf commit cc1a9d3
Show file tree
Hide file tree
Showing 71 changed files with 4,187 additions and 1,326 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package org.opentripplanner.datastore;

import java.io.Closeable;
import java.util.Collection;

/**
* A composite data source contain a collection of other {@link DataSource}s.
* <p>
* Example: file-directories and zip files with gtfs or netex data.
*/
public interface CompositeDataSource extends DataSource, Closeable {

/**
* Open the composite data source and read the content. For a random access data source
* (local-file system), this does not read each entry, but just the metadata for each of them.
* But, for a streamed data source(cloud storage) it will fetch the entire content - this might
* be using a lot of memory.
*/
Collection<DataSource> content();

/**
* Retrieve a single entry by name, or {@code null} if not found.
* <p>
* Example:
* <p>
* {@code DataSource routesSrc = gtfsSource.entry("routes.txt")}
*/
DataSource entry(String name);

/**
* Delete content and container in store.
*/
default void delete() {
throw new UnsupportedOperationException(
"This datasource type " + getClass().getSimpleName()
+ " do not support DELETE. Can not delete: " + path()
);
}
}
131 changes: 131 additions & 0 deletions src/main/java/org/opentripplanner/datastore/DataSource.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package org.opentripplanner.datastore;

import org.apache.commons.io.IOUtils;
import org.opentripplanner.common.LoggingUtil;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.text.SimpleDateFormat;


/**
* A data source is generalized type to represent an file, database blob or unit that OTP read or
* write to.
* <p>
* The data source instance contain metadata like {@code name}, {@code description}, {@code type}
* and so on. To access (read from or write to) a datasource the methods {@link #asInputStream()}
* and {@link #asOutputStream()} will open a connection to the underlying data source and make it
* available for reading/writing.
* <p>
* Only metadata is retrieved before a stream is opened, making sure minimum data is transferred
* before it is actually needed.
* <p>
* The data source metadata should be fetched once. The data is NOT updated even if the source
* itself changes. If this happens it might cause the streaming to fail.
* <p>
* Concurrent modifications to underlying data-sources is not accounted for, and there is no need
* to support that in the implementation of this class. This means that we assume all input- and
* output-files in OTP are stable (not changed in any way) during the period OTP need to access
* these files.
*/
public interface DataSource {

/**
* @return the short name identifying the source within its scope (withing a {@link
* OtpDataStore} or {@link CompositeDataSource}) Including the file extension.
* <p>
* Examples:
* <p>
* {@code build-config.json, gtfs.zip and stops.txt}
*/
String name();

/**
* @return the full path (or description) to be used when describing this data source. This
* method is mainly used for humans to identify the source in logs and error handling.
*/
String path();

/**
* The file type this data source is identified as.
*/
FileType type();

/**
* @return size in bytes, if unknown returns {@code -1}
*/
default long size() { return -1; }

/**
* @return last modified timestamp in ms, if unknown returns {@code -1}
*/
default long lastModified() { return -1; }

/**
* @return true is it exist in the data store; hence calling {@link #asInputStream()} is safe.
*/
default boolean exists() { return true; }

/**
* @return {@code true} if it is possible to write to data source. Also, return {@code true} if
* if it is not easy to check. No guarantee is given and the {@link #asOutputStream()} may
* fail. This method can be used to avoid consuming a lot of resource before writing to a
* datasource, if this method return {@code false}.
*/
default boolean isWritable() { return true; }

/**
* Connect to this data source and make it available as an input stream. The caller is
* responsible to close the connection.
* <p>
* Note! This method might get called several times, and each time a new Stream should be
* created.
*/
default InputStream asInputStream() {
throw new UnsupportedOperationException(
"This datasource type " + getClass().getSimpleName()
+ " do not support READING. Can not read from: " + path()
);
}

/**
* Return the content as a byte array. The implementation may chose to implement this in a
* more efficient way - not reading the input stream. Do not change the data returned.
* <p/>
* Calling this method is the same as reading everything off the {@link #asInputStream()}.
*/
default byte[] asBytes() {
try {
return IOUtils.toByteArray(asInputStream());
}
catch (IOException e) {
throw new RuntimeException(e.getMessage(), e);
}
}

default OutputStream asOutputStream() {
throw new UnsupportedOperationException(
"This datasource type " + getClass().getSimpleName()
+ " do not support WRITING. Can not write to: " + path()
);
}

/**
* Return an info string like this:
* <p>
* {@code [icon] [filename] [path] [date time] [file size]}
*/
default String detailedInfo() {
String dir = path().substring(0, path().length() - name().length() - 1);
String info = String.format("%s %s %s", type().icon(), name(), dir);
if (lastModified() > 0) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
info += " " + sdf.format(lastModified());
}
if (size() > 0) {
info += " " + LoggingUtil.fileSizeToString(size());
}
return info;
}
}
65 changes: 65 additions & 0 deletions src/main/java/org/opentripplanner/datastore/FileType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package org.opentripplanner.datastore;

import java.util.EnumSet;

/**
* Represents the different types of files that might be present in a router / graph build
* directory. We want to detect even those that are not graph builder inputs so we can effectively
* warn when unknown file types are present. This helps point out when config files have been
* misnamed (builder-config vs. build-config).
*/
public enum FileType {
CONFIG( "⚙️", "Config file"),
OSM("🌍", "OpenStreetMap data"),
DEM("🏔", "Elevation data"),
GTFS("🚌", "GTFS data"),
NETEX("🚌", "NeTEx data"),
GRAPH("🦠", "OTP Graph file"),
REPORT("📈", "Issue report"),
OTP_STATUS("⏳", "OTP build status"),
UNKNOWN("❓", "Unknown file");

private final String icon;
private final String text;

FileType(String icon, String text) {
this.icon = icon;
this.text = text;
}

/**
* Emoji (icon) for the given type
*/
public String icon() {
return icon;
}

public String text() {
return text;
}

/**
* Return {@code true} if the the file is an INPUT data file. This is GTFS, Netex, OpenStreetMap,
* and elevation data files. Config files and graphs are not considered input data files.
* <p>
* At least one input data file must be present to build a graph.
*/
public boolean isInputDataSource() {
return EnumSet.of(GTFS, NETEX, OSM, DEM).contains(this);
}

/**
* Return {@code true} if the the file is an OUTPUT data file/directory. This is the graph files,
* build-report and the otp-status file. Config files are not considered output data files.
*/
public boolean isOutputDataSource() {
return EnumSet.of(GRAPH, REPORT, OTP_STATUS).contains(this);
}

/**
* @return true if GTFS or NETEX file type.
*/
public boolean isTransit() {
return EnumSet.of(GTFS, NETEX).contains(this);
}
}

0 comments on commit cc1a9d3

Please sign in to comment.