/
delimited.js
153 lines (111 loc) · 4.22 KB
/
delimited.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
// -------- Delimited Parser ----------
/**
* Handles CSV and other delimited data. Takes in a data string
* and options that can contain: {
* delimiter : "someString" <default is comma>
* }
*/
(function(global, _) {
var DS = (global.DS || (global.DS = {}));
DS.Parsers.Delimited = function(data, options) {
this.options = options || {};
this.delimiter = this.options.delimiter || ",";
this._data = data;
this.__delimiterPatterns = new RegExp(
(
// Delimiters.
"(\\" + this.delimiter + "|\\r?\\n|\\r|^)" +
// Quoted fields.
"(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
// Standard fields.
"([^\"\\" + this.delimiter + "\\r\\n]*))"
),
"gi"
);
};
_.extend(DS.Parsers.Delimited.prototype, DS.Parsers.prototype, {
_buildColumns : function(d, sample) {
d._columns = [];
// convert the csv string into the beginnings of a strict
// format. The only thing missing is type detection.
// That happens after all data is parsed.
var parseCSV = function(delimiterPattern, strData, strDelimiter) {
// Check to see if the delimiter is defined. If not,
// then default to comma.
strDelimiter = (strDelimiter || ",");
// Create an array to hold our data. Give the array
// a default empty first row.
// Create an array to hold our individual pattern
// matching groups.
var arrMatches = null;
// track how many columns we have. Once we reach a new line
// mark a flag that we're done calculating that.
var columnCount = 0;
var columnCountComputed = false;
// track which column we're on. Start with -1 because we increment it before
// we actually save the value.
var columnIndex = -1;
// Keep looping over the regular expression matches
// until we can no longer find a match.
while (arrMatches = delimiterPattern.exec(strData)){
// Get the delimiter that was found.
var strMatchedDelimiter = arrMatches[ 1 ];
// Check to see if the given delimiter has a length
// (is not the start of string) and if it matches
// field delimiter. If id does not, then we know
// that this delimiter is a row delimiter.
if ( strMatchedDelimiter.length &&
( strMatchedDelimiter !== strDelimiter )){
// we have reached a new row.
// We are clearly done computing columns.
columnCountComputed = true;
// when we're done with a row, reset the row index to 0
columnIndex = 0;
} else {
// Find the number of columns we're fetching and
// create placeholders for them.
if (!columnCountComputed) {
columnCount++;
}
columnIndex++;
}
// Now that we have our delimiter out of the way,
// let's check to see which kind of value we
// captured (quoted or unquoted).
var strMatchedValue = null;
if (arrMatches[ 2 ]){
// We found a quoted value. When we capture
// this value, unescape any double quotes.
strMatchedValue = arrMatches[ 2 ].replace(
new RegExp( "\"\"", "g" ),
"\""
);
} else {
// We found a non-quoted value.
strMatchedValue = arrMatches[ 3 ];
}
// Now that we have our value string, let's add
// it to the data array.
if (columnCountComputed) {
d._columns[columnIndex].data.push(strMatchedValue);
} else {
// we are building the column names here
d._columns.push(this._buildColumn({
name : strMatchedValue,
data : []
}));
}
}
// Return the parsed data.
return d;
};
parseCSV = _.bind(parseCSV, this);
parseCSV(
this.__delimiterPatterns,
this._data,
this.delimiter);
this._addIdColumn(d, d._columns[0].data.length);
return d;
}
});
}(this, _));