diff --git a/builtin/sampled_dataset.cc b/builtin/sampled_dataset.cc index b9f4dc627..014ae5a34 100644 --- a/builtin/sampled_dataset.cc +++ b/builtin/sampled_dataset.cc @@ -292,6 +292,10 @@ SampledDataset(MldbServer * owner, { auto sampleConfig = config.params.convert(); + if (sampleConfig.dataset == nullptr) { + throw HttpReturnException(400, "You need to define the dataset key"); + } + SqlExpressionMldbScope context(owner); bondTableExpression = sampleConfig.dataset->bind(context); diff --git a/mongodb/doc/MongoDataset.md b/mongodb/doc/MongoDataset.md index b4eb9cfbc..de4963470 100644 --- a/mongodb/doc/MongoDataset.md +++ b/mongodb/doc/MongoDataset.md @@ -19,3 +19,36 @@ and MongoDB by allowing MLDB SQL queries to run over a MongoDB collection. ## Configuration ![](%%config dataset mongodb.dataset) + +## Example + +For this example, we will use a MongoDB database populated with data provided by +the book MongoDB In Action. The zipped json file is available at +[http://mng.bz/dOpd](http://mng.bz/dOpd). + +Here we create a dataset named "mongodb_zips_bridge". + +```python +mldb.put('/v1/datasets/mongodb_zips_bridge', { + 'type' : 'mongodb.dataset', + 'params' : { + 'connectionScheme': 'mongodb://khan.mldb.ai:11712/zips', + 'collection': 'zips' + } +}) +``` + +We can directly query it. + +```python +mldb.query("SELECT * NAMED zip FROM mongodb_zips_bridge ORDER BY pop DESC LIMIT 5") +``` + +| _id | city | loc.x | loc.y | pop | state | zip | +|-----|------|-------|-------|-----|-------|-----| +| _rowName | +| 60623 | 57d2f5eb21af5ee9c4e22302 | CHICAGO | 87.715700 | 41.849015 | 112047 | IL | 60623 | +| 11226 | 57d2f5eb21af5ee9c4e24f28 | BROOKLYN | 73.956985 | 40.646694 | 111396 | NY | 11226 | +| 10021 | 57d2f5eb21af5ee9c4e24e7f | NEW YORK | 73.958805 | 40.768476 | 106564 | NY | 10021 | +| 10025 | 57d2f5eb21af5ee9c4e24e4f | NEW YORK | 73.968312 | 40.797466 | 100027 | NY | 10025 | +| 90201 | 57d2f5eb21af5ee9c4e21258 | BELL GARDENS | 118.172050 | 33.969177 | 99568 | CA | 90201 | diff --git a/mongodb/doc/MongoImport.md b/mongodb/doc/MongoImport.md index 54ba8dcc4..b9b188706 100644 --- a/mongodb/doc/MongoImport.md +++ b/mongodb/doc/MongoImport.md @@ -18,3 +18,45 @@ dataset. ## Configuration ![](%%config procedure mongodb.import) + +## Example + +For this example, we will use a MongoDB database populated with data provided by +the book MongoDB In Action. The zipped json file is available at +[http://mng.bz/dOpd](http://mng.bz/dOpd). + +Here we import the zips collection into an MLDB dataset called mongodb_zips. + +```python +mldb.post('/v1/procedures', { + 'type' : 'mongodb.import', + 'params' : { + 'connectionScheme': 'mongodb://khan.mldb.ai:11712/zips', + 'collection': 'zips', + 'outputDataset' : { + 'id' : 'mongodb_zips', + 'type' : 'sparse.mutable' + } + } +}) +``` + +We can now query the imported data as we would any other MLDB Dataset. + +```python +mldb.query("SELECT * FROM mongodb_zips LIMIT 5") +``` +| _id | city | loc.x | loc.y | pop | state | zip | +|-----|------|-------|-------|-----|-------|-----| +| _rowName | +| 57d2f5eb21af5ee9c4e27f08 | 57d2f5eb21af5ee9c4e27f08 | BONDURANT | 110.335287 | 43.223798 | 116 | WY | 82922 | +| 57d2f5eb21af5ee9c4e27f07 | 57d2f5eb21af5ee9c4e27f07 | KAYCEE | 106.563230 | 43.723625 | 876 | WY | 82639 | +| 57d2f5eb21af5ee9c4e27f05 | 57d2f5eb21af5ee9c4e27f05 | CLEARMONT | 106.458071 | 44.661010 | 350 | WY | 82835 | +| 57d2f5eb21af5ee9c4e27f03 | 57d2f5eb21af5ee9c4e27f03 | ARVADA | 106.109191 | 44.689876 | 107 | WY | 82831 | +| 57d2f5eb21af5ee9c4e27f01 | 57d2f5eb21af5ee9c4e27f01 | COKEVILLE | 110.916419 | 42.057983 | 905 | WY | 83114 | + +Here we did not provide any named parameter so oid() was used. This is why +_rowName and _id have the same values. + +Another element to note is how the loc object was imported. The sub object was +disassembled and imported as loc.x and loc.y into MLDB. diff --git a/mongodb/doc/MongoQueryFunction.md b/mongodb/doc/MongoQueryFunction.md index 44e7d4c2d..7fb485ef9 100644 --- a/mongodb/doc/MongoQueryFunction.md +++ b/mongodb/doc/MongoQueryFunction.md @@ -20,3 +20,59 @@ similar to the ## Configuration ![](%%config function mongodb.query) + +## Example + +For this example, we will use a MongoDB database populated with data provided by +the book MongoDB In Action. The zipped json file is available at +[http://mng.bz/dOpd](http://mng.bz/dOpd). + +Here we create the query function on the MongoDB zips database zips collection. + +```python +mldb.put("/v1/functions/mongo_query", { + "type": "mongodb.query", + "params": { + "connectionScheme": 'mongodb://khan.mldb.ai:11712/zips', + "collection": 'zips' + } +}) +``` + +A direct call to the function looks like + +```python +import json +mldb.get('/v1/functions/mongo_query/application', + input={'query' : json.dumps({'zip' : {'$eq' : '60623'}})} +).json() +``` + +With the output + +```json +{ + 'output': { + '_id': u'57d2f5eb21af5ee9c4e22302', + 'city': 'CHICAGO', + 'loc': [['x', [87.7157, '2016-09-09T17:48:27Z']], + ['y', [41.849015, '2016-09-09T17:48:27Z']]], + 'pop': 112047, + 'state': 'IL', + 'zip': '60623' + } +} +``` + +Here is an example of the function beign used within a query. + +```python +mldb.query(""" + SELECT mongo_query({query: '{"loc.x" : {"$eq" : 73.968312}}'}) AS * +""") +``` + +| _id | city | loc.x | loc.y | pop | state | zip | +|-----|------|-------|-------|-----|-------|-----| +| _rowName | +| result | 57d2f5eb21af5ee9c4e24e4f | NEWORK | 73.968312 | 40.797466 | 100027 | NY | 10025 diff --git a/mongodb/doc/MongoRecord.md b/mongodb/doc/MongoRecord.md index 2fc401234..db43bf1dc 100644 --- a/mongodb/doc/MongoRecord.md +++ b/mongodb/doc/MongoRecord.md @@ -23,3 +23,31 @@ Rows are stored in collections with the following format: ## Configuration ![](%%config dataset mongodb.record) + +## Example + +Here we create the dataset named "mldb_to_mongodb" which will write to mongodb +database "zips" collection "mldb_coll". + +```python +mldb.put("/v1/datasets/mldb_to_mongodb", { + "type": "mongodb.record", + "params": { + "connectionScheme": 'mongodb://khan.mldb.ai:11712/zips', + "collection": 'mldb_coll' + } +}) + +``` + +Then we record a row with 2 columns. + +```python +print mldb.post('/v1/datasets/mldb_to_mongodb/rows', { + 'rowName' : 'row1', + 'columns' : [ + ['colA', 'valA', 0], + ['colB', 'valB', 0] + ] +}) +``` diff --git a/testing/MLDB-1242_sampled_dataset.py b/testing/MLDB-1242_sampled_dataset.py index 49ec9b3bf..1e3de4231 100644 --- a/testing/MLDB-1242_sampled_dataset.py +++ b/testing/MLDB-1242_sampled_dataset.py @@ -134,6 +134,17 @@ def test_functions_with_invalid_params(self): with self.assertRaises(mldb_wrapper.ResponseException) as re: mldb.get("/v1/query", q="select * from sample(toy, {fraction: 2})") + def test_cant_create_wo_ds(self): + # MLDB-1977 + msg = "You need to define the dataset key" + with self.assertRaisesRegexp(mldb_wrapper.ResponseException, msg) as re: + mldb.put('/v1/datasets/sampled', { + 'type' : 'sampled', + 'params' : { + 'fraction' : 0.99 + } + }) + if __name__ == '__main__': mldb.run_tests()