Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicolas Panel committed Dec 26, 2014
1 parent 791a61e commit c427fe2
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 34 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,9 @@ var clf = new svm.SVM({
```

__Notes__ :
* If at least one parameter has multiple values, [node-svm](https://github.com/nicolaspanel/node-svm/) will go through all possible combinations to see which one gives the best results (it performs grid-search to maximize [f-score](http://en.wikipedia.org/wiki/F1_score) for classification and minimize [Mean Squared Error](http://en.wikipedia.org/wiki/Mean_squared_error) for regression).
* You can override default values by creating a `.nodesvmrc` file (JSON) at the root of your project.
* If at least one parameter has multiple values, [node-svm](https://github.com/nicolaspanel/node-svm/) will go through all possible combinations to see which one gives the best results (it performs grid-search to maximize [f-score](http://en.wikipedia.org/wiki/F1_score) for classification and minimize [Mean Squared Error](http://en.wikipedia.org/wiki/Mean_squared_error) for regression).
* All possible combinations are


##Training
Expand Down
9 changes: 6 additions & 3 deletions examples/classification-real-life-example.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,21 @@
var so = require('stringify-object');
var svm = require('../lib');
var fileName = './examples/datasets/webspam_unigram_subset20000.ds';
var numeric = require('numeric');

var clf = new svm.CSVC({
gamma: 8,
gamma: [0.01, 0.1],
c: 8,
kFold: 5,
normalize: false,
kFold: 4,
normalize: true,
reduce: true, // default value
retainedVariance: 0.95
});

svm.read(fileName)
.then(function (dataset) {
console.log('dataset\'s x dimensions: ', numeric.dim(dataset.map(function(ex){ return ex[0]; })));

console.log('start training (may take a while)...');
return clf.train(dataset)
.progress(function(progress){
Expand Down
1 change: 0 additions & 1 deletion lib/commands/train.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ function train(logger, options) {

var defaults = defaultConfig();
var interactive = _o.has(options, 'interactive') ? !!options.interactive: defaults.interactive;
logger.info('train', 'checking parameters');
return checkFileExists(datasetPath)
.then(function (exists) {
if (!exists) {
Expand Down
61 changes: 33 additions & 28 deletions lib/util/read-dataset.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,44 @@ var Q = require('q');
var fs = require('fs');
var path = require('path');
var _a = require('mout/array');
var _ = require('underscore');
var numeric = require('numeric');


var readLibsvm = function (data) {
var dataset = [], nbFeatures = 0;
var lines = _a.filter(data.toString().split('\n'), function(str){
return str.trim() !== ''; // remove empty lines
});

lines.forEach(function(line){
var elts = line.split(' ');
for (var i = 1; i < elts.length; i++){
var node = elts[i].split(':');
var index = parseInt(node[0], 10);
if (index > nbFeatures){
nbFeatures = index;
}
}
});
var dataset = [];
data = data.toString();
var lines = _.chain(data.split('\n'))
.filter(function (str) { return str.trim() !== ''; }) // remove empty lines
.map(function(line){
var elts = line.split(' ');
return {
y: elts[0],
features: _.chain(elts).rest(1).map(function(str){
var node = str.split(':');
return {
index: parseInt(node[0], 10),
value: parseFloat(node[1])
};
}).value()
};
});
var nbFeatures = lines
.pluck('features')
.flatten()
.pluck('index')
.max()
.value();
console.log(nbFeatures);

lines.forEach(function(line){
var elts = line.split(' ');
var node = [];
node[0] = numeric.rep([nbFeatures],0);
for (var i = 1; i < elts.length ; i++){
var indexValue = elts[i].split(':');
var index = parseInt(indexValue[0], 10);
node[0][index - 1] = parseFloat(indexValue[1]);
}
node[1] = parseFloat(elts[0]);
dataset.push(node);
});
return dataset;
return lines
.map(function (line) {
var x = numeric.rep([nbFeatures],0);
line.features.forEach(function(f){
x[f.index-1]= f.value;
});
return [x, line.y];
}).value();
};

var readJson = function (data) {
Expand Down
4 changes: 3 additions & 1 deletion test/util/read-dataset.spec.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
'use strict';

var read = require('../../lib/util/read-dataset');
var numeric = require('numeric');
var expect = require('expect.js');
var xorProblem = [
[[0, 0], 0],
Expand All @@ -22,9 +23,10 @@ describe('#read', function(){
this.timeout(200);
read('./examples/datasets/svmguide1.ds')
.then(function(problem){
expect(problem.length).to.be(3089);
expect(numeric.dim(problem)).to.eql([3089, 2, 4]);
}).done(done);
});

});
describe('json format', function () {
it('should be able to read the xor problem', function (done) {
Expand Down

0 comments on commit c427fe2

Please sign in to comment.