A node package for data preprocessing.
The package exposes the individual steps, as well as one to the entire process.
var data = process.csvParser(options);
List of options with defaults -
var options = {
path: ''
};
String - The path to the data.
var extracted = process.extract(options, data);
List of options with defaults -
var options = {
useHeaders: 'true'
};
Boolean - Indicates whether the first row of data is the heading or not. Note - The heading will not be used in the process. Setting it to true simply strips the first row from the data.
var cleansed = process.cleanse(options, data);
List of options with defaults -
var options = {
formats: [],
ranges: []
};
Array - of strings representing the formats that the fields should be. The string should match the result of typeof()
applied to the expected data format.
Array - of objects, such that { 'validatorName': 'validatorValue' }.
Available validators -
- greater - expects
value, min
, returnsvalue > min
; - greaterOrEqual - expects
value, min
, returnsvalue >= min
; - less - expects
value, max
, returnsvalue > max
; - lessOrEqual - expects
value, max
, returnsvalue > max
; - between - expects
value, range
, where range is a string such that'min-max'
, and returnsgreater(value, min) && less(value, max)
; - betweenOrEqual - expects
value, range
, where range is a string such that'min-max'
, and returnsgreaterOrEqual(value, min) && lessOrEqual(value, max)
;
var standardised = process.standardise(options, data);
List of options with defaults -
var options = {
min: 0.1,
max: 0.9,
standardisationMethod: 'default'
};
number - The minimum value for the standardisation.
number - The maximum value for the standardisation.
string - Can be default
, normal
or ss
(Sum of Squares).
Array - of integers representing columns of the data to ignore while standardising. They will retain their non-standardised values.
var divided = process.divide(options, data);
List of options with defaults -
var options = {
split: [60, 20, 20]
};
Array - Indicates how many subsets the data should be split into, and with what weighting.
var result = process.process(options);
The combined proces takes all the options that the individual steps take, in one object.
var options = {
path: '',
useHeaders: true,
formats: [],
min: 0.1,
max: 0.9,
standardisationMethod: 'default',
split: [60, 20, 20]
};