diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..cb10dad --- /dev/null +++ b/.travis.yml @@ -0,0 +1,12 @@ +language: node_js + +node_js: + - '0.12' + - '0.10' +sudo: false # Enable docker-based containers +cache: + directories: # Cache dependencies + - node_modules + +script: + - npm test diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..4647313 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,17 @@ +# Change Log +All notable changes to this project will be documented in this file. +This project adheres to [Semantic Versioning](http://semver.org/). + +## [0.1.1] - 2015-05-06 +### Added +* Flattening all object based properties +* ensuring that each feature contains each field + +## [0.1.0] - 2015-04-21 +### Changed +* This project now uses `standard` as its code formatting +* Keeping a legit changelog +* Added tape testing with sinon stubs in the controller tests + +[0.1.1]: https://github.com/Esri/koop/releases/compare/v0.1.0...v0.1.1 +[0.1.0]: https://github.com/Esri/koop/releases/tag/v0.1.0 diff --git a/README.md b/README.md index e0fc9e2..ae421b9 100644 --- a/README.md +++ b/README.md @@ -1,43 +1,50 @@ ## Socrata Provider for [Koop](https://github.com/Esri/koop) ------------ This provider makes it possible to access [Socrata's JSON API](http://dev.socrata.com/docs/formats/json.html) as either GeoJSON or an Esri FeatureService. This is particular useful for making maps and doing analysis on the web. -## Installation +## Install To install/use this provider you first need a working installation of [Koop](https://github.com/Esri/koop). Then from within the koop directory you'll need to run the following: - ``` - npm install https://github.com/chelm/koop-socrata/tarball/master - ``` +``` +npm install https://github.com/koopjs/koop-socrata/tarball/master +``` ## Register Socrata Hosts -Once this provider's been installed you need to "register" a particular instance of Socrate with your Koop instance. To do this you make `POST` request to the `/socrata` endpoint like so: +Once this provider's been installed you need to "register" a particular instance of Socrata with your Koop instance. To do this you make `POST` request to the `/socrata` endpoint like so: - ``` - curl --data "host=https://data.nola.gov&id=nola" localhost:1337/socrata - ``` +``` +curl --data "host=https://data.nola.gov&id=nola" localhost:1337/socrata +``` +*for Windows users, download cURL from http://curl.haxx.se/download.html or use a tool of your choice to generate the POST request* -What you'll need for that request to work is an ID and a the URL of the Socrata instance. The ID is what you'll use to reference datasets that come from Socrata in Koop. +What you'll need for that request to work is an ID and the URL of the Socrata instance. The ID is what you'll use to reference datasets that come from Socrata in Koop. To make sure this works you can visit: http://localhost:1337/socrata and you should see all of the register hosts. ## Access Socrata Data -To access a dataset hosted in Socrata you'll need a "resource id" from Socrata. Datasets in Socrata can be accessed as raw JSON like this: +To access a dataset hosted in Socrata you'll need a "Resource ID" from Socrata. Datasets in Socrata can be accessed as raw JSON like this: -* [https://data.nola.gov/Geographic-Reference/NOLA-Short-Term-Rentals-Map/psp3-bvzw](https://data.nola.gov/Geographic-Reference/NOLA-Short-Term-Rentals-Map/psp3-bvzw) translates into -> https://data.nola.gov/resource/psp3-bvzw.json +* [https://data.nola.gov/Health-Education-and-Social-Services/NOLA-Grocery-Stores/fwm6-d78i](https://data.nola.gov/Health-Education-and-Social-Services/NOLA-Grocery-Stores/fwm6-d78i) translates into -> https://data.nola.gov/resource/fwm6-d78i.json -And then the ID `psp3-bvzw` can be referenced in Koop like so: +And then the ID `fwm6-d78i` can be referenced in Koop like so: -[http://koop.dc.esri.com/socrata/nola/psp3-bvzw](http://koop.dc.esri.com/socrata/nola/psp3-bvzw) +http://koop.dc.esri.com/socrata/nola/fwm6-d78i +If your Socrata data has more than one location column, you can specify the desired location column in the http request like this: + +https://path_to_koop/socrata/socrataProvider/dataSetID!spatialColumn + +## Handle Large Datasets + +The Socrata API defaults to 1000 results per request, but can be set to return up to 50,000. Koop will page through large datasets to capture all the points. To change the number of results per request, modify the 'limit' variable in the socrata.getResoruce function in models/Socrata.js. ## Examples -Here's a few examples of data hosted in Socrata and accessed via Koop +Here are a few examples of data hosted in Socrata and accessed via Koop. -* GeoJSON [http://koop.dc.esri.com/socrata/nola/psp3-bvzw](http://koop.dc.esri.com/socrata/nola/psp3-bvzw) -* FeatureService [http://koop.dc.esri.com/socrata/nola/psp3-bvzw/FeatureServer/0] -* All of the publicly registered Socrata instances [http://koop.dc.esri.com/socrata](http://koop.dc.esri.com/socrata) +* GeoJSON: http://koop.dc.esri.com/socrata/nola/fwm6-d78i +* FeatureService: http://koop.dc.esri.com/socrata/nola/fwm6-d78i/FeatureServer/0 +* All publicly registered Socrata instances: http://koop.dc.esri.com/socrata diff --git a/controller/index.js b/controller/index.js index 32581c2..8f30a7d 100644 --- a/controller/index.js +++ b/controller/index.js @@ -136,6 +136,7 @@ var Controller = function (Socrata, BaseController) { res.send(err, 500) } else { // Get the item + req.query.limit = 10000000 Socrata.getResource(data.host, req.params.id, req.params.item, req.query, function (error, geojson) { if (error) { res.send(error, 500) diff --git a/models/Socrata.js b/models/Socrata.js index 6220f14..3614759 100644 --- a/models/Socrata.js +++ b/models/Socrata.js @@ -39,70 +39,140 @@ var Socrata = function (koop) { } socrata.socrata_path = '/resource/' - socrata.socrata_view_path = '/resource/' - // got the service and get the item socrata.getResource = function (host, hostId, id, options, callback) { - var fields, types, - type = 'Socrata', - key = id + var type = 'Socrata', + key = id, + locFieldName, + urlid, + paging = false, + limit = 1000 + + // test id for '!' character indicating presence of a column name and handle + if (id.indexOf('!') !== -1) { + locFieldName = id.substring(id.indexOf('!') + 1, id.length) + urlid = id.substring(0, id.indexOf('!')) + } else { + urlid = id + } + // attempt to load from cache, if error perform new request and get first page koop.Cache.get(type, key, options, function (err, entry) { if (err) { - var url = host + socrata.socrata_path + id + '.json' - var meta_url = host + socrata.socrata_view_path + id + '.json' - // dmf: have to make a request to the views endpoint in order to get metadata - var name - - socrata.request(meta_url, function (err, data, response) { + var url = host + socrata.socrata_path + urlid + '.json?$order=:id&$limit=' + limit + socrata.request(url, function (err, data, response) { if (err) { callback(err, null) } else { - try { - name = JSON.parse(data.body).name - } catch(e) { - callback(e, null) + // test to see if paging will be needed later + if (Object.keys(JSON.parse(data.body)).length === limit) { + paging = true } - } - socrata.request(url, function (err, data, response) { - if (err) { - callback(err, null) - } else { - try { - types = JSON.parse(data.headers['x-soda2-types']) - fields = JSON.parse(data.headers['x-soda2-fields']) - var locationField + // get name of location field + try { + var locationField + if (locFieldName) { + locationField = locFieldName + } else { + var types = JSON.parse(data.headers['x-soda2-types']) + var fields = JSON.parse(data.headers['x-soda2-fields']) types.forEach(function (t, i) { if (t === 'location') { locationField = fields[i] } }) + } - socrata.toGeojson(JSON.parse(data.body), locationField, function (err, geojson) { + // parse first page to geoJSON and insert + socrata.toGeojson(JSON.parse(data.body), locationField, fields, function (err, geojson) { + if (err) { + return callback(err) + } + geojson.updated_at = new Date(data.headers['last-modified']).getTime() + geojson.name = id + geojson.host = { + id: hostId, + url: host + } + koop.Cache.insert(type, key, geojson, 0, function (err, success) { if (err) { return callback(err) } - geojson.updated_at = new Date(data.headers['last-modified']).getTime() - geojson.name = name || id - geojson.host = { - id: hostId, - url: host - } - koop.Cache.insert(type, key, geojson, 0, function (err, success) { - if (err) { - return callback(err) + if (success) { + // check to see if paging is needed + if (paging === false) { + callback(null, [geojson]) + } else { + // create GeoJSON return object + var retGeoJSON = geojson + // detrmine count of table and needed pages + var count, pages, + pagesComplete = 0, + countUrl = host + socrata.socrata_path + urlid + '.json?$select=count(*)' + request.get(countUrl, function (err, data, response) { + if (err) { + return callback(err) + } + count = parseInt(JSON.parse(data.body)[0].count, 10) + if ((count / limit) % 1 === 0) { + pages = (count / limit - 1) + } else { + pages = Math.floor(count / limit) + } + // page through data + for (var p = 1; p <= pages; p++) { + var pUrl = host + socrata.socrata_path + urlid + '.json?$order=:id&$limit=' + limit + '&$offset=' + (p * limit) + request.get(pUrl, function (err, data, response) { + if (err) { + return callback(err) + } + // parse pages to GeoJSON and insert partial + socrata.toGeojson(JSON.parse(data.body), locationField, function (err, geojson) { + if (err) { + return callback(err) + } + geojson.updated_at = new Date(data.headers['last-modified']).getTime() + geojson.name = id + geojson.host = { + id: hostId, + url: host + } + koop.Cache.insertPartial(type, key, geojson, 0, function (err, success) { + if (err) { + return callback(err) + } + if (success) { + // append geojson to return object + geojson.features.forEach(function (f) { + retGeoJSON.features.push(f) + }) + // update pages completed and check for completion of pages + pagesComplete++ + checkDone() + } + }) + }) + }) + } + + // function to check completion of pages + var checkDone = function () { + if (pagesComplete === pages) { + callback(null, [retGeoJSON]) + } else { + + } + } + }) } - callback(null, [geojson]) - }) + } }) - } catch(e) { - if (koop && koop.log) { - koop.log.error('Unable to parse response %s', url) - } - callback(e, null) - } + }) + } catch (e) { + koop.log.error('Unable to parse response %s', url) + callback(e, null) } - }) + } }) } else { callback(null, entry) @@ -110,33 +180,74 @@ var Socrata = function (koop) { }) } - socrata.toGeojson = function (json, locationField, callback) { + socrata.toGeojson = function (json, locationField, fields, callback) { if (!json || !json.length) { callback('Error converting data to geojson', null) } else { - var geojson = {type: 'FeatureCollection', features: []} - var geojsonFeature + var geojson = { type: 'FeatureCollection', features: [] } + var geojsonFeature, + newFields = [] json.forEach(function (feature, i) { - geojsonFeature = {type: 'Feature', geometry: {}, id: i + 1} + var lat, lon + geojsonFeature = { type: 'Feature', geometry: {}, id: i + 1 } + + // make sure each feature has each property and flatten objects + fields.forEach(function (f) { + if (f.substring(0, 1) !== ':') { + if (typeof feature[f] === 'object') { + for (var v in feature[f]) { + var newAttr = f + '_' + v + feature[newAttr] = feature[f][v] + newFields.push(newAttr) + } + delete feature[f] + } + } + }) + if (feature && locationField) { - if (feature[locationField] && feature[locationField].latitude && feature[locationField].longitude) { - geojsonFeature.geometry.coordinates = [parseFloat(feature[locationField].longitude), parseFloat(feature[locationField].latitude)] + lon = parseFloat(feature[locationField].longitude) + lat = parseFloat(feature[locationField].latitude) + if ((lon < -180 || lon > 180) || (lat < -90 || lat > 90)) { + geojsonFeature.geometry = null + geojsonFeature.properties = feature + geojson.features.push(geojsonFeature) + } else { + geojsonFeature.geometry.coordinates = [lon, lat] geojsonFeature.geometry.type = 'Point' delete feature.location geojsonFeature.properties = feature geojson.features.push(geojsonFeature) } } else if (feature && feature.latitude && feature.longitude) { - geojsonFeature.geometry.coordinates = [parseFloat(feature.longitude), parseFloat(feature.latitude)] - geojsonFeature.geometry.type = 'Point' - geojsonFeature.properties = feature - geojson.features.push(geojsonFeature) + lon = parseFloat(feature.longitude) + lat = parseFloat(feature.latitude) + if ((lon < -180 || lon > 180) || (lat < -90 || lat > 90)) { + geojsonFeature.geometry = null + geojsonFeature.properties = feature + geojson.features.push(geojsonFeature) + } else { + geojsonFeature.geometry.coordinates = [lon, lat] + geojsonFeature.geometry.type = 'Point' + geojsonFeature.properties = feature + geojson.features.push(geojsonFeature) + } } else { geojsonFeature.geometry = null geojsonFeature.properties = feature geojson.features.push(geojsonFeature) } }) + // 2nd loop over the data to ensure all new fields are present + if (newFields && newFields.length) { + geojson.features.forEach(function (feature) { + newFields.forEach(function (field) { + if (!feature.properties[field]) { + feature.properties[field] = null + } + }) + }) + } callback(null, geojson) } } @@ -161,7 +272,7 @@ var Socrata = function (koop) { locationField = fields[i] } }) - socrata.toGeojson(JSON.parse(data.body), locationField, function (error, geojson) { + socrata.toGeojson(JSON.parse(data.body), locationField, fields, function (error, geojson) { geojson.updated_at = new Date(data.headers['last-modified']).getTime() geojson.name = data.name || key geojson.host = data.host diff --git a/package.json b/package.json index 0428a21..fad0e93 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "koop-socrata", - "version": "0.1.0", + "version": "0.1.1", "description": "A socrata wrapper for koop ", "main": "index.js", "scripts": { @@ -9,7 +9,6 @@ "dependencies": { "config": "~0.4.35", "ejs": "~1.0.0", - "node.extend": "~1.0.10", "request": "^2.51.0", "sphericalmercator": "~1.0.2" }, diff --git a/test/controller.js b/test/controller.js new file mode 100644 index 0000000..b382e17 --- /dev/null +++ b/test/controller.js @@ -0,0 +1,103 @@ +var koop = require('koop')({}), + kooplib = require('koop/lib'), + sinon = require('sinon'), + test = require('tape'), + request = require('supertest') + +// use Koop's local cache as a db for tests +kooplib.Cache = new kooplib.DataCache(koop) +kooplib.Cache.db = kooplib.LocalDB + +var provider = require('../index.js'), + model = provider.model(kooplib), + controller = provider.controller(model, kooplib.BaseController) + +koop._bindRoutes(provider.routes, controller) + +var sample_id = 'seattle', + sample_host = 'https://data.seattle.gov' + +// In the setup test we create several stubs that squash the +// normal behavoir of the model/controller methods. +// This allows us the only test the controller & routing and not the model here +test('setup', function (t) { + sinon.stub(model, 'register', function (id, host, callback) { + callback(null, id) + }) + sinon.stub(model, 'find', function (id, callback) { + callback(null, [{ 'id': sample_id, 'host': sample_host }]) + }) + sinon.stub(model, 'dropItem', function (host, item, options, callback) { + callback(null, true) + }) + sinon.stub(model, 'getResource', function (host, id, item, options, callback) { + callback(null, {}) + }) + sinon.stub(controller, 'processFeatureServer', function (req, res, err, geojson, callback) { + res.send({}) + }) + t.end() +}) + +test('register a socrata instance', function (t) { + request(koop) + .post('/socrata') + .set('Content-Type', 'application/json') + .send({ + 'host': sample_host, + 'id': sample_id + }) + .end(function () { + t.equals(model.register.called, true) + t.end() + }) +}) + +test('list the registered socrata instances', function (t) { + request(koop) + .get('/socrata') + .set('Content-Type', 'application/json') + .end(function () { + t.equals(model.find.called, true) + t.end() + }) +}) + +test('getting items calls the models find and findResource method', function (t) { + request(koop) + .get('/socrata/seattle/fake') + .end(function () { + t.equals(model.find.called, true) + t.equals(model.getResource.called, true) + t.end() + }) +}) + +test('dropping items calls the models dropItem method', function (t) { + request(koop) + .get('/socrata/seattle/fake/drop') + .end(function () { + t.equals(model.dropItem.called, true) + t.end() + }) +}) + +test('getting a featureservice calls the model find, getResource methods and controllers featureservice method', function (t) { + request(koop) + .get('/socrata/seattle/fake/FeatureServer') + .end(function () { + t.equals(model.find.called, true) + t.equals(model.getResource.called, true) + t.equals(controller.processFeatureServer.called, true) + t.end() + }) +}) + +test('teardown', function (t) { + model.register.restore() + model.find.restore() + model.dropItem.restore() + model.getResource.restore() + controller.processFeatureServer.restore() + t.end() +}) diff --git a/test/model.js b/test/model.js new file mode 100644 index 0000000..405ea72 --- /dev/null +++ b/test/model.js @@ -0,0 +1,104 @@ +var koop = require('koop/lib'), + test = require('tape'), + sinon = require('sinon'), + fs = require('fs') + +// use Koop's local cache as a db for tests +koop.Cache = new koop.DataCache(koop) +koop.Cache.db = koop.LocalDB + +var socrata = require('../models/Socrata.js')(koop) +var data = JSON.parse(fs.readFileSync(__dirname + '/fixtures/earthquakes.json')) + +var id = 'seattle', + host = 'https://data.seattle.gov' + +test('adding a socrata instance', function (t) { + socrata.register(id, host, function (err, success) { + if (err) throw err + t.deepEqual(success, id) + t.end() + }) +}) + +test('parsing geojson', function (t) { + t.plan(5) + + socrata.toGeojson([], 'location', [], function (err, geojson) { + t.deepEqual(err, 'Error converting data to geojson') + }) + + socrata.toGeojson(data, 'location', [], function (err, geojson) { + if (err) throw err + t.deepEqual(geojson.features.length, 1000) + }) + + var features = [{ + obj: { + prop: true + }, + location: { + latitude: 0, + longitude: 0 + } + },{ + location: { + latitude: 0, + longitude: 0 + } + }] + socrata.toGeojson(features, 'location', ['obj'], function (err, geojson) { + if (err) throw err + t.deepEqual(geojson.features[0].properties, {obj_prop: true}) + t.deepEqual(geojson.features[1].properties, {obj_prop: null}) + t.deepEqual(geojson.features.length, 2) + }) + +}) + +test('stub the request method', function (t) { + sinon.stub(socrata, 'request', function (url, callback) { + callback(null, { + 'body': '{ "features": [], "name": "Test" }', + 'headers': { + 'x-soda2-types': '[]', + 'x-soda2-fields': '[]' + } + }) + }) + + var feature = { + type: 'Feature', + properties: {}, + geometry: { + type: 'Point', + coordinates: [0, 0] + } + } + + sinon.stub(socrata, 'toGeojson', function (features, locationField, fields, callback) { + callback(null, { features: [feature] }) + }) + sinon.stub(koop.Cache, 'insert', function (type, key, geojson, layer, callback) { + callback(null, [{ features: [feature] }]) + }) + t.end() +}) + +// This test wont close +test('requesting data', function (t) { + socrata.getResource(host, id, '2tje-83f6', {}, function (err, geojson) { + if (err) throw err + t.equal(socrata.request.called, true) + t.equal(socrata.toGeojson.called, true) + t.deepEqual(geojson[0].features.length, 1) + t.end() + }) +}) + +test('teardown', function (t) { + socrata.request.restore() + socrata.toGeojson.restore() + koop.Cache.insert.restore() + t.end() +})