Skip to content

Commit

Permalink
pygw starter code (#1526)
Browse files Browse the repository at this point in the history
Signed-off-by: Charles Yu <charles.yu108@gmail.com>
  • Loading branch information
charlesyu108 authored and mawhitby committed Mar 6, 2019
1 parent a40297d commit 72efc46
Show file tree
Hide file tree
Showing 14 changed files with 511 additions and 18 deletions.
25 changes: 24 additions & 1 deletion python/pom.xml
Expand Up @@ -5,8 +5,8 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.locationtech.geowave</groupId>
<parent>
<artifactId>geowave-parent</artifactId>
<groupId>org.locationtech.geowave</groupId>
<artifactId>geowave-parent</artifactId>
<relativePath>../</relativePath>
<version>1.0.0-SNAPSHOT</version>
</parent>
Expand All @@ -28,5 +28,28 @@
<artifactId>geowave-datastore-rocksdb</artifactId>
<version>1.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.locationtech.geowave</groupId>
<artifactId>geowave-python</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.locationtech.geowave</groupId>
<artifactId>geowave-example</artifactId>
<version>1.0.0-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.locationtech.geowave</groupId>
<artifactId>geowave-extension-parent</artifactId>
<version>1.0.0-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.locationtech.geowave</groupId>
<artifactId>geowave-datastore-redis</artifactId>
<version>1.0.0-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>
</project>
@@ -1,27 +1,23 @@
package org.locationtech.geowave.python;

import org.locationtech.geowave.core.store.api.DataStore;
import org.locationtech.geowave.core.store.api.DataStoreFactory;
import org.locationtech.geowave.datastore.rocksdb.config.RocksDBOptions;
import org.locationtech.geowave.examples.ingest.SimpleIngest;
import py4j.GatewayServer;

public class ApiGateway {
// Some starter code
public DataStore getRocksDB(String namespace, String dir) {
RocksDBOptions ops = new RocksDBOptions(namespace);

ops.setDirectory(dir);
/**
* Declaring public fields which act as "submodules"
*/

return DataStoreFactory.createDataStore(ops);
}
public Debug debug = new Debug();
public DataStoreInterfacer storeInterfacer = new DataStoreInterfacer();
// Expose the simpleIngest example code
public SimpleIngest simpleIngest = new SimpleIngest();

public int getSize(DataStore d) {
return d.getIndices().length;
}
public static void main(String[] args) {
GatewayServer server = new GatewayServer(new ApiGateway());
GatewayServer.turnLoggingOn();
server.start();
}

public static void main(String[] args) {
GatewayServer server = new GatewayServer(new ApiGateway());

server.start();
}
}
@@ -0,0 +1,46 @@
package org.locationtech.geowave.python;

import org.locationtech.geowave.core.index.persist.Persistable;
import org.locationtech.geowave.core.store.CloseableIterator;
import org.locationtech.geowave.core.store.adapter.exceptions.MismatchedIndexToAdapterMapping;
import org.locationtech.geowave.core.store.api.*;

import java.net.URL;

/**
* Most of this is not necessary for the time-being.
* Currently, this exists to bridge unachievable method sigs in python, primarily that addType takes var-args for Indices
* TODO: This is only a partial implementation
*/

public class DataStoreInterfacer {

public <T> void ingest(DataStore ds, URL url, Index... index) throws MismatchedIndexToAdapterMapping {
ds.ingest(url, index);
}

public <T> void ingest(DataStore ds, URL url, IngestOptions<T> options, Index... index)
throws MismatchedIndexToAdapterMapping {
ds.ingest(url, options, index);
}

public <T> CloseableIterator<T> query(DataStore ds, final Query<T> query) {
return ds.query(query);
}

public <P extends Persistable, R, T> R aggregate(DataStore ds, final AggregationQuery<P, R, T> query) {
return ds.aggregate(query);
}

public Index[] getIndices(DataStore ds) {
Index[] indices = ds.getIndices();
System.out.println(indices);
System.out.println(indices.length);
return indices;
}

public <T> void addType(DataStore ds, DataTypeAdapter<T> dataTypeAdapter, Index initialIndex){
ds.addType(dataTypeAdapter, initialIndex);
}

}
51 changes: 51 additions & 0 deletions python/src/main/java/org/locationtech/geowave/python/Debug.java
@@ -0,0 +1,51 @@
package org.locationtech.geowave.python;

import java.lang.reflect.Field;
import java.lang.reflect.Method;

/**
* A class for debugging tools.
*/
public class Debug {

/**
* Prints information about the object on both python (returns a String) and java side.
* @param obj
*/
public String printObject(Object obj) {
System.out.println(obj);
return obj.toString();
}

/**
* Prints (verbose) information about the object on both python (returns a String) and java side.
* @param obj
* @param verbose
*/
public String printObject(Object obj, boolean verbose) {
if (!verbose) {
return printObject(obj);
}

String methods = "";

for (Method method : obj.getClass().getMethods() ) {
methods = methods + (method.getName() + " ;");
}

String fields = "";

for (Field field : obj.getClass().getFields() ) {
fields = fields + (field.getName() + "; ");
}

String info = "Object: " + obj.toString() + "\n" +
"Class: " + obj.getClass().toString() + "\n" +
"isNull: " + (obj == null) + "\n" +
"Methods: " + methods + "\n" +
"Fields: " + fields + "\n";

System.out.println(info);
return info;
}
}
2 changes: 2 additions & 0 deletions python/src/main/python/.gitignore
@@ -0,0 +1,2 @@
*__pycache__*
venv
90 changes: 90 additions & 0 deletions python/src/main/python/README.md
@@ -0,0 +1,90 @@
# Pygw Prototype Dev Guide
Intro dev guide for the pygw framework as it exists now in its current form.

## Python Environment Prereqs:
- Python3
- A virtualenv with `requirements.txt` installed

**install steps**
To set up a new virtualenv run `virtualenv -p python3 venv`
Activate the virtualenv with `source venv/bin/activate`
Install requirements with `pip install -r requirements.txt`

## Using pygw
You must have an instance of Geowave Py4J Java Gateway Server running.
Do this by running `org.locationtech.geowave.python.ApiGateway.java` in Eclipse/ IntelliJ

Now, you can import pygw models into your python environment with:
`from pygw import *`

## Running through the pygw Simple Feature Type Ingest Example
You can step through a simple ingest pipeline (the same one as `org.locationtech.geowave.examples.ingest.SimpleIngest`)
by following these steps:
```
from pygw.sft_example import *
# Create a point feature type
point = Point()
# Create a builder for this feature type
builder = PointBuilder(point)
# Create an adapter for point type
adapter = PointFeatureDataAdapter(point)
# Create a Spatial Index
index = SpatialIndex()
# Create a RocksDB geowave datastore with gw_namespace of "geowave.hello" and that lives under "./world" directory
# NOTE: "./world" is relative to whatever directory your java gateway is running out of.
ds = RocksDbDs("geowave.hello", "./world")
# Registering the point adapter with the spatial index to your datastore
ds.add_type(adapter, index)
# Creating a writer to ingest data
writer = ds.create_writer(adapter.get_type_name())
# Create fake data (Returns a list of java objects that python can iterate over)
j_data = config.GATEWAY.entry_point.simpleIngest.getGriddedFeatures(builder._java_ref, 1000)
# Ingest this data into the datastore using the writer
for data in j_data:
writer._java_ref.write(data)
# Close the writer
writer.close()
```
## Dev Notes:

### Submodule descriptions
#### __init__.py
The main entry point for pygw is the top-level pygw __init__.py
In that __init__ file, the important child libraries are linked and imported into context.
Also, configurations are setup here.

#### config.py
A singleton object of type GlobalConfigurations called `config` is declared here that encapsulates all module-wide definitions.

In general, to use config definitions in a module, you would do `from .config import config`. Then, you can reference any
declared definitions in the GlobalConfigurations object with something like `config.GATEWAY`.

NOTE: the GlobalConfigurations has an `init()` method. This is INTENTIONALLY not an `__init__` method. The definitions are only populated when `pygw/__init__.py` has imported everything and calls `config.init()`

#### base_models.py
Base Wrapper models for PyGw. Only put objects here that expose major reusable APIs like important Geowave Java interfaces, for example.

##### base_models.PyGWJavaWrapper
This is the base class that EVERY pygw object that encapsulates a java reference must subclass.
Its `_java_ref` field references the java reference in the JVM that is bound to the PyGw python object.

#### debug.py
This exposes a handy function called `print_obj` that you can use to help you debug wonky situations with raw java objects. It will print information about the object in question on both the python side and on the java server side. There's a `verbose` flag that will give you more information about the object in question.

#### stores.py and indices.py
These contain implementation-specific constructors of extended base_model objects, ex. "RocksDbDs" and "RedisDs"

### misc notes:
- "j_"-prefixed notation : I prefix all my raw java reference variables with a "j_" to distinguish them from python variables
- To use call Java Methods that take in PyGw objects (like an Index, for example) you can't directly invoke the method on the PyGW object. You have to call it on the `._java_ref` member of that object.
-
9 changes: 9 additions & 0 deletions python/src/main/python/pygw/__init__.py
@@ -0,0 +1,9 @@
from .config import config
from .base_models import *
from .stores import *
from .indices import *
from .debug import *

# Init connection to Java Gateway (`config.gateway`)
# This should be called only once.
config.init()
76 changes: 76 additions & 0 deletions python/src/main/python/pygw/base_models.py
@@ -0,0 +1,76 @@
class PyGwJavaWrapper:
"""[INTERNAL] Base Class for all PyGw Objects that wrap py4j objects"""

def __init__(self, gateway, java_ref):
self._gateway = gateway
self._java_ref = java_ref

def __repr__(self):
return "PyGW {} Object with JavaRef@{}".format(self.__class__, self._java_ref)

def __eq__(self, other):
if not isinstance(other, PyGwJavaWrapper):
return False
return self._java_ref == other._java_ref

def is_instance_of(self, java_class):
return isinstance(self._gateway, self._java_ref, java_class)

class DataStore(PyGwJavaWrapper):
"""Wrapper to expose all of DataStore API"""

def __init__(self, gateway, java_ref):
super().__init__(gateway, java_ref)
self.interfacer = gateway.entry_point.storeInterfacer

def get_indices(self):
j_indices = self.interfacer.getIndices(self._java_ref)
return [Index(self._gateway, j_index) for j_index in j_indices]

def add_type(self, type_adapter, index):
"""NOTE: This is slightly different from java api. Currently does not support var-arg initial indices"""
j_adapter = type_adapter._java_ref
j_index = index._java_ref
self.interfacer.addType(self._java_ref, j_adapter, j_index)

def create_writer(self, type_adapter_name):
j_writer = self._java_ref.createWriter(type_adapter_name)
return Writer(self._gateway, j_writer)

class DataTypeAdapter(PyGwJavaWrapper):
"""Wrapper to expose all of DataTypeAdapter API"""
# TODO: Implement API
def get_type_name(self):
return self._java_ref.getTypeName()

class Index(PyGwJavaWrapper):
"""Wrapper to expose all of Index API"""
def get_name(self):
return self._java_ref.getName()

def get_index_strategy(self):
j_obj = self._java_ref.getIndexStrategy()
return j_obj.getClass().toString()

def get_index_model(self):
j_obj = self._java_ref.getIndexModel()
return j_obj.getClass().toString()

class Writer(PyGwJavaWrapper):
"""Wrapper to expose all of Writer API"""
def __init__(self, gateway, java_ref):
super().__init__(gateway, java_ref)
self.is_open = True

def write(self, data):
if not self.is_open:
raise RuntimeError("Writer is already closed!")
self._java_ref.write(data)

def close(self):
if self.is_open:
self._java_ref.close()
self.is_open = False
# Might want to introduce a method/flag here that bulk writes in java.
# Ex. give it a list of a data and it calls a java endpoint to do all the writing and closing.
# Current pipeline will make N py4j calls for an N-element ingest
33 changes: 33 additions & 0 deletions python/src/main/python/pygw/config.py
@@ -0,0 +1,33 @@
from py4j.java_gateway import JavaGateway, GatewayParameters, java_import

class GlobalConfigurations:

def init(self):
# Set-up Main Gateway Connection to JVM
self.GATEWAY = JavaGateway(gateway_parameters=GatewayParameters(auto_field=True))

### Import Java Modules and Define Names here for easier access: ###

# Geowave Core Store
java_import(self.GATEWAY.jvm, "org.locationtech.geowave.core.store.api")
self.MODULE__core_store = self.GATEWAY.jvm.org.locationtech.geowave.core.store.api

# Geowave RocksDb Config
java_import(self.GATEWAY.jvm, "org.locationtech.geowave.datastore.rocksdb.config")
self.MODULE__rocksdb_config = self.GATEWAY.jvm.org.locationtech.geowave.datastore.rocksdb.config

# Geowave Redis Config
java_import(self.GATEWAY.jvm, "org.locationtech.geowave.datastore.redis.config")
self.MODULE__redis_config = self.GATEWAY.jvm.org.locationtech.geowave.datastore.redis.config

# Geowave Geotime Ingest
java_import(self.GATEWAY.jvm, "org.locationtech.geowave.core.geotime.ingest")
self.MODULE__geotime_ingest = self.GATEWAY.jvm.org.locationtech.geowave.core.geotime.ingest

# Geotools Feature Simple
java_import(config.GATEWAY.jvm, "org.geotools.feature.simple")
self.MODULE__feature_simple = self.GATEWAY.jvm.org.geotools.feature.simple

# Note - Module-wide Singleton!
global config
config = GlobalConfigurations()

0 comments on commit 72efc46

Please sign in to comment.