Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

#121 - Reducing google spreadsheet size by adding a fast parser optio…

…n. Less stable but orders of magnitude faster.
  • Loading branch information...
commit 9c5ed1e958ec13166729b94f9c6003db2229cc13 1 parent da87507
@iros iros authored
View
32 src/importers/google_spreadsheet.js
@@ -7,7 +7,9 @@
* Parameters
* options - Options object. Requires at the very least:
* key - the google spreadsheet key
- * worksheet - the index of the spreadsheet to be retrieved.
+ * gid - the index of the spreadsheet to be retrieved (1 default)
+ * OR
+ * sheetName - the name of the sheet to fetch ("Sheet1" default)
* OR
* url - a more complex url (that may include filtering.) In this case
* make sure it's returning the feed json data.
@@ -25,14 +27,34 @@
throw new Error("Set options 'key' properties to point to your google document.");
} else {
- options.worksheet = options.worksheet || 1;
- options.url = "https://spreadsheets.google.com/feeds/cells/" +
+ // turning on the "fast" option will use the farser parser
+ // that downloads less data but it's flakier (due to google not
+ // correctly escaping various strings when returning json.)
+ if (options.fast) {
+
+ options.url = "https://spreadsheets.google.com/tq?key=" + options.key;
+
+ if (options.sheetName) {
+ options.url += "&sheet=" + options.sheetName;
+ } else {
+ options.url += "&gid=" + (options.worksheet || 1);
+ delete options.worksheet;
+ }
+
+ this.callback = "misodsgs" + new Date().getTime();
+ options.url += "&tqx=version:0.6;responseHandler:" + this.callback;
+ options.url += ";reqId:0;out:json&tq&_=1335871249558#";
+
+ delete options.sheetName;
+
+ } else {
+ options.url = "https://spreadsheets.google.com/feeds/cells/" +
options.key + "/" +
options.worksheet +
"/public/basic?alt=json-in-script&callback=";
-
+ }
+
delete options.key;
- delete options.worksheet;
}
}
View
8 src/importers/remote.js
@@ -34,9 +34,15 @@
options.success( this.extract(data) );
}, this);
+ // do we have a named callback? We need to wrap our
+ // success callback in this name
+ if (this.callback) {
+ window[this.callback] = callback;
+ }
+
// make ajax call to fetch remote url.
Miso.Xhr(_.extend(this.params, {
- success : callback,
+ success : this.callback ? this.callback : callback,
error : options.error
}));
}
View
96 src/parsers/google_spreadsheet.js
@@ -10,63 +10,91 @@
* http://code.google.com/apis/gdata/samples/spreadsheet_sample.html
* Used in conjunction with the Google Spreadsheet Importer.
*/
- Miso.Parsers.GoogleSpreadsheet = function(options) {};
+ Miso.Parsers.GoogleSpreadsheet = function(options) {
+ this.fast = options.fast || false;
+ };
_.extend(Miso.Parsers.GoogleSpreadsheet.prototype, Miso.Parsers.prototype, {
parse : function(data) {
var columns = [],
- columnData = [];
+ columnData = [],
+ keyedData = {},
+ i;
+
+ if (this.fast) {
+
+ // init column names
+ columns = _.pluck(data.table.cols, "label");
+
+ // save data
+ _.each(data.table.rows, function(row) {
+ row = row.c;
+ for(i = 0; i < row.length; i++) {
+ columnData[i] = columnData[i] || [];
+ if (row[i].v === "") {
+ columnData[i].push(null);
+ } else {
+ columnData[i].push(row[i].v);
+ }
+ }
+ });
- var positionRegex = /([A-Z]+)(\d+)/;
- var columnPositions = {};
+ // convert to keyed data.
+ _.each(columns, function(colName, index) {
+ keyedData[colName] = columnData[index];
+ });
- _.each(data.feed.entry, function(cell, index) {
+ } else {
+ var positionRegex = /([A-Z]+)(\d+)/,
+ columnPositions = {};
- var parts = positionRegex.exec(cell.title.$t),
- column = parts[1],
- position = parseInt(parts[2], 10);
+ _.each(data.feed.entry, function(cell, index) {
- if (_.isUndefined(columnPositions[column])) {
+ var parts = positionRegex.exec(cell.title.$t),
+ column = parts[1],
+ position = parseInt(parts[2], 10);
- // cache the column position
- columnPositions[column] = columnData.length;
+ if (_.isUndefined(columnPositions[column])) {
- // we found a new column, so build a new column type.
- columns[columnPositions[column]] = cell.content.$t;
- columnData[columnPositions[column]] = [];
+ // cache the column position
+ columnPositions[column] = columnData.length;
+ // we found a new column, so build a new column type.
+ columns[columnPositions[column]] = cell.content.$t;
+ columnData[columnPositions[column]] = [];
- } else {
- // find position:
- var colpos = columnPositions[column];
+ } else {
- // this is a value for an existing column, so push it.
- columnData[colpos][position-1] = cell.content.$t;
+ // find position:
+ var colpos = columnPositions[column];
- }
- }, this);
+ // this is a value for an existing column, so push it.
+ columnData[colpos][position-1] = cell.content.$t;
- var keyedData = {};
+ }
+ }, this);
- _.each(columnData, function(coldata, column) {
- // fill whatever empty spaces we might have in the data due to empty cells
- coldata.length = _.max(_.pluck(columnData, "length"));
+ _.each(columnData, function(coldata, column) {
+ // fill whatever empty spaces we might have in the data due to empty cells
+ coldata.length = _.max(_.pluck(columnData, "length"));
- // slice off first space. It was alocated for the column name
- // and we've moved that off.
- coldata.splice(0,1);
+ // slice off first space. It was alocated for the column name
+ // and we've moved that off.
+ coldata.splice(0,1);
- for (var i = 0; i < coldata.length; i++) {
- if (_.isUndefined(coldata[i]) || coldata[i] === "") {
- coldata[i] = null;
+ for (var i = 0; i < coldata.length; i++) {
+ if (_.isUndefined(coldata[i]) || coldata[i] === "") {
+ coldata[i] = null;
+ }
}
- }
- keyedData[columns[column]] = coldata;
- });
+ keyedData[columns[column]] = coldata;
+ });
+ }
+
return {
columns : columns,
data : keyedData
View
48 test/unit/importers.js
@@ -358,6 +358,29 @@
stop();
});
+ test("Google spreadsheet fast parsing", function() {
+ var key = "0Asnl0xYK7V16dFpFVmZUUy1taXdFbUJGdGtVdFBXbFE";
+ var sheetName = "States";
+
+ var ds = new Miso.Dataset({
+ key : key,
+ sheetName : sheetName,
+ fast : true,
+ importer: Miso.Importers.GoogleSpreadsheet,
+ parser : Miso.Parsers.GoogleSpreadsheet
+ });
+ stop();
+ ds.fetch({
+ success : function() {
+ equals(ds.length, 6);
+ ok(this._columns.length === 3);
+ ok(_.isEqual(ds.column("State").data, ["AZ","AZ","AZ","MA","MA","MA"]));
+ ok(_.isEqual(ds.column("Value").data, [10,20,30,1,4,7]));
+ start();
+ }
+ });
+ });
+
test("more columns than rows in Google Spreadsheet", function() {
var ds = new Miso.Dataset({
key : "0AgzGUzeWla8QdDZLZnVieS1pOU5VRGxJNERvZ000SUE",
@@ -382,6 +405,31 @@
stop();
});
+ test("more columns than rows in Google Spreadsheet fast parse", function() {
+ var ds = new Miso.Dataset({
+ key : "0AgzGUzeWla8QdDZLZnVieS1pOU5VRGxJNERvZ000SUE",
+ worksheet : "1",
+ fast : true,
+ importer: Miso.Importers.GoogleSpreadsheet,
+ parser : Miso.Parsers.GoogleSpreadsheet
+ });
+ ds.fetch({
+ success : function() {
+ ok(this._columns.length === 5);
+ var ds = this;
+ var row = {'_id': this.rowByPosition(0)._id, 'one': 1, 'two': 2, 'three': 9, 'four': 9};
+
+ _.each(row, function(v,k) {
+ equals(ds.rowByPosition(0)[k], v);
+ });
+ start();
+ },
+ error : function() {
+ }
+ });
+ stop();
+ });
+
module("Polling");
test("Basic polling importer api", function() {
Please sign in to comment.
Something went wrong with that request. Please try again.