From 73e0e9038152c63a5c6e9bf82ac0d6d61cb461c9 Mon Sep 17 00:00:00 2001
From: Viktor Gal <viktor.gal@maeth.com>
Date: Thu, 8 Mar 2018 15:30:48 +0100
Subject: [PATCH] remove the bullshit

---
 applications/arts/__init__.py                 |   0
 applications/arts/arts.py                     |  67 --
 applications/arts/convert_artsmat.m           |  82 --
 applications/arts/data                        |   1 -
 applications/arts/genomic.py                  | 152 ----
 applications/arts/signal_sensor.py            | 225 -----
 applications/arts/util.py                     |  82 --
 applications/asp/LICENSE                      | 340 --------
 applications/asp/NEWS                         |  11 -
 applications/asp/README                       |  45 -
 applications/asp/__init__.py                  |   0
 applications/asp/asp                          | 319 --------
 applications/asp/data                         |   1 -
 applications/asp/galaxy/asp.sh                |  43 -
 applications/asp/galaxy/asp.xml               | 115 ---
 applications/asp/genomic.py                   | 184 -----
 applications/asp/model.py                     | 143 ----
 applications/asp/seqdict.py                   |  68 --
 applications/asp/signal_detectors.py          | 172 ----
 .../evaluate_multiclass_labels.py             |  72 --
 .../classification/predict_multiclass_svm.py  |  75 --
 .../random_fourier_classification.cpp         | 175 ----
 .../random_fourier_classification.py          | 100 ---
 .../classification/train_multiclass_svm.py    |  98 ---
 applications/classification/utils.py          |  51 --
 applications/easysvm/AUTHORS                  |   3 -
 applications/easysvm/LICENSE                  | 674 ---------------
 applications/easysvm/MANIFEST.in              |   7 -
 applications/easysvm/README                   | 142 ----
 applications/easysvm/data                     |   1 -
 applications/easysvm/distutils-help.txt       |  73 --
 applications/easysvm/esvm/__init__.py         |   0
 applications/easysvm/esvm/datafuncs.py        | 162 ----
 applications/easysvm/esvm/experiment.py       | 773 ------------------
 applications/easysvm/esvm/mldata.py           | 300 -------
 applications/easysvm/esvm/mldata_arff.py      | 115 ---
 applications/easysvm/esvm/parse.py            | 232 ------
 applications/easysvm/esvm/plots.py            | 226 -----
 applications/easysvm/esvm/poim.py             | 128 ---
 applications/easysvm/esvm/utils.py            | 180 ----
 applications/easysvm/galaxy/CloudGen.xml      |  53 --
 applications/easysvm/galaxy/FastaGen.xml      |  52 --
 applications/easysvm/galaxy/MotifGen.xml      |  91 ---
 applications/easysvm/galaxy/README            |   3 -
 applications/easysvm/galaxy/easysvm.xml       | 316 -------
 applications/easysvm/galaxy/eval.xml          |  85 --
 applications/easysvm/galaxy/modelsel.xml      | 251 ------
 applications/easysvm/galaxy/poim.xml          | 106 ---
 applications/easysvm/scripts/datagen.py       | 103 ---
 applications/easysvm/scripts/easysvm.py       |  51 --
 applications/easysvm/setup.py                 |  84 --
 applications/easysvm/splicesites/__init__.py  |   0
 applications/easysvm/splicesites/test_gc.py   |  50 --
 .../easysvm/splicesites/tutorial_example.py   | 242 ------
 applications/easysvm/splicesites/utils.py     | 288 -------
 .../easysvm/tutpaper/data/effect_of_c.data    |  69 --
 .../easysvm/tutpaper/data/nonlinear.data      |  71 --
 .../easysvm/tutpaper/data/small_gc_toy.data   |  15 -
 .../tutpaper/data/small_gc_toy_outlier.data   |  15 -
 applications/easysvm/tutpaper/svm_params.py   | 377 ---------
 applications/msplicer/LICENSE                 | 340 --------
 applications/msplicer/Makefile                |   8 -
 applications/msplicer/NEWS                    |   6 -
 applications/msplicer/README                  | 110 ---
 applications/msplicer/content_sensors.py      |  56 --
 applications/msplicer/convert_mat.m           | 213 -----
 applications/msplicer/data                    |   1 -
 applications/msplicer/dna.fa                  |  74 --
 applications/msplicer/genomic.py              | 152 ----
 applications/msplicer/model.py                | 307 -------
 applications/msplicer/msplicer                | 355 --------
 applications/msplicer/plif.py                 | 224 -----
 applications/msplicer/seqdict.py              |  68 --
 applications/msplicer/signal_detectors.py     | 164 ----
 applications/ocr/Ai.py                        |  92 ---
 applications/ocr/FigureWidget.py              | 147 ----
 applications/ocr/MatrixWidget.py              |  66 --
 applications/ocr/QuadrWidget.py               |  30 -
 applications/ocr/README                       |   9 -
 applications/ocr/common.py                    |  31 -
 applications/ocr/data                         |   1 -
 applications/ocr/predict                      | 203 -----
 applications/ocr/train                        |  64 --
 applications/tapkee/faces_embedding.py        |  68 --
 applications/tapkee/octave_ltsa.m             |  11 -
 applications/tapkee/samples/data.py           |   9 -
 applications/tapkee/samples/dm.py             |  33 -
 applications/tapkee/samples/hlle.py           |  22 -
 applications/tapkee/samples/isomap.py         |  39 -
 applications/tapkee/samples/klle.py           |  35 -
 applications/tapkee/samples/la.py             |  33 -
 applications/tapkee/samples/lle.py            |  28 -
 applications/tapkee/samples/lltsa.py          |  22 -
 applications/tapkee/samples/lpp.py            |  20 -
 applications/tapkee/samples/ltsa.py           |  22 -
 applications/tapkee/samples/mds.py            |  40 -
 applications/tapkee/samples/npe.py            |  22 -
 applications/tapkee/swissroll_embedding.py    |  88 --
 applications/tapkee/words_embedding.py        |  56 --
 benchmarks/hasheddoc_benchmarks.cpp           |  51 --
 benchmarks/kernel_matrix_sum_benchmark.cpp    | 111 ---
 benchmarks/rf_feats_benchmark.cpp             | 127 ---
 benchmarks/rf_feats_kernel_comp.cpp           | 136 ---
 benchmarks/sparse_test.cpp                    | 206 -----
 104 files changed, 11859 deletions(-)
 delete mode 100644 applications/arts/__init__.py
 delete mode 100755 applications/arts/arts.py
 delete mode 100644 applications/arts/convert_artsmat.m
 delete mode 120000 applications/arts/data
 delete mode 100644 applications/arts/genomic.py
 delete mode 100644 applications/arts/signal_sensor.py
 delete mode 100644 applications/arts/util.py
 delete mode 100644 applications/asp/LICENSE
 delete mode 100644 applications/asp/NEWS
 delete mode 100644 applications/asp/README
 delete mode 100644 applications/asp/__init__.py
 delete mode 100755 applications/asp/asp
 delete mode 120000 applications/asp/data
 delete mode 100644 applications/asp/galaxy/asp.sh
 delete mode 100644 applications/asp/galaxy/asp.xml
 delete mode 100644 applications/asp/genomic.py
 delete mode 100644 applications/asp/model.py
 delete mode 100644 applications/asp/seqdict.py
 delete mode 100644 applications/asp/signal_detectors.py
 delete mode 100644 applications/classification/evaluate_multiclass_labels.py
 delete mode 100644 applications/classification/predict_multiclass_svm.py
 delete mode 100644 applications/classification/random_fourier_classification.cpp
 delete mode 100644 applications/classification/random_fourier_classification.py
 delete mode 100644 applications/classification/train_multiclass_svm.py
 delete mode 100644 applications/classification/utils.py
 delete mode 100644 applications/easysvm/AUTHORS
 delete mode 100644 applications/easysvm/LICENSE
 delete mode 100644 applications/easysvm/MANIFEST.in
 delete mode 100644 applications/easysvm/README
 delete mode 120000 applications/easysvm/data
 delete mode 100644 applications/easysvm/distutils-help.txt
 delete mode 100644 applications/easysvm/esvm/__init__.py
 delete mode 100644 applications/easysvm/esvm/datafuncs.py
 delete mode 100644 applications/easysvm/esvm/experiment.py
 delete mode 100644 applications/easysvm/esvm/mldata.py
 delete mode 100644 applications/easysvm/esvm/mldata_arff.py
 delete mode 100644 applications/easysvm/esvm/parse.py
 delete mode 100644 applications/easysvm/esvm/plots.py
 delete mode 100644 applications/easysvm/esvm/poim.py
 delete mode 100644 applications/easysvm/esvm/utils.py
 delete mode 100644 applications/easysvm/galaxy/CloudGen.xml
 delete mode 100644 applications/easysvm/galaxy/FastaGen.xml
 delete mode 100644 applications/easysvm/galaxy/MotifGen.xml
 delete mode 100644 applications/easysvm/galaxy/README
 delete mode 100644 applications/easysvm/galaxy/easysvm.xml
 delete mode 100644 applications/easysvm/galaxy/eval.xml
 delete mode 100644 applications/easysvm/galaxy/modelsel.xml
 delete mode 100644 applications/easysvm/galaxy/poim.xml
 delete mode 100644 applications/easysvm/scripts/datagen.py
 delete mode 100644 applications/easysvm/scripts/easysvm.py
 delete mode 100755 applications/easysvm/setup.py
 delete mode 100644 applications/easysvm/splicesites/__init__.py
 delete mode 100644 applications/easysvm/splicesites/test_gc.py
 delete mode 100644 applications/easysvm/splicesites/tutorial_example.py
 delete mode 100644 applications/easysvm/splicesites/utils.py
 delete mode 100644 applications/easysvm/tutpaper/data/effect_of_c.data
 delete mode 100644 applications/easysvm/tutpaper/data/nonlinear.data
 delete mode 100644 applications/easysvm/tutpaper/data/small_gc_toy.data
 delete mode 100644 applications/easysvm/tutpaper/data/small_gc_toy_outlier.data
 delete mode 100644 applications/easysvm/tutpaper/svm_params.py
 delete mode 100644 applications/msplicer/LICENSE
 delete mode 100644 applications/msplicer/Makefile
 delete mode 100644 applications/msplicer/NEWS
 delete mode 100644 applications/msplicer/README
 delete mode 100644 applications/msplicer/content_sensors.py
 delete mode 100644 applications/msplicer/convert_mat.m
 delete mode 120000 applications/msplicer/data
 delete mode 100644 applications/msplicer/dna.fa
 delete mode 100644 applications/msplicer/genomic.py
 delete mode 100644 applications/msplicer/model.py
 delete mode 100755 applications/msplicer/msplicer
 delete mode 100644 applications/msplicer/plif.py
 delete mode 100644 applications/msplicer/seqdict.py
 delete mode 100644 applications/msplicer/signal_detectors.py
 delete mode 100644 applications/ocr/Ai.py
 delete mode 100644 applications/ocr/FigureWidget.py
 delete mode 100644 applications/ocr/MatrixWidget.py
 delete mode 100644 applications/ocr/QuadrWidget.py
 delete mode 100644 applications/ocr/README
 delete mode 100644 applications/ocr/common.py
 delete mode 120000 applications/ocr/data
 delete mode 100755 applications/ocr/predict
 delete mode 100755 applications/ocr/train
 delete mode 100644 applications/tapkee/faces_embedding.py
 delete mode 100644 applications/tapkee/octave_ltsa.m
 delete mode 100644 applications/tapkee/samples/data.py
 delete mode 100644 applications/tapkee/samples/dm.py
 delete mode 100644 applications/tapkee/samples/hlle.py
 delete mode 100644 applications/tapkee/samples/isomap.py
 delete mode 100644 applications/tapkee/samples/klle.py
 delete mode 100644 applications/tapkee/samples/la.py
 delete mode 100644 applications/tapkee/samples/lle.py
 delete mode 100644 applications/tapkee/samples/lltsa.py
 delete mode 100644 applications/tapkee/samples/lpp.py
 delete mode 100644 applications/tapkee/samples/ltsa.py
 delete mode 100644 applications/tapkee/samples/mds.py
 delete mode 100644 applications/tapkee/samples/npe.py
 delete mode 100644 applications/tapkee/swissroll_embedding.py
 delete mode 100644 applications/tapkee/words_embedding.py
 delete mode 100644 benchmarks/hasheddoc_benchmarks.cpp
 delete mode 100644 benchmarks/kernel_matrix_sum_benchmark.cpp
 delete mode 100644 benchmarks/rf_feats_benchmark.cpp
 delete mode 100644 benchmarks/rf_feats_kernel_comp.cpp
 delete mode 100644 benchmarks/sparse_test.cpp

diff --git a/applications/arts/__init__.py b/applications/arts/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/applications/arts/arts.py b/applications/arts/arts.py
deleted file mode 100755
index 469d40c37ae..00000000000
--- a/applications/arts/arts.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env python
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2008-2009 Soeren Sonnenburg
-# Copyright (C) 2008-2009 Fraunhofer Institute FIRST and Max-Planck-Society
-
-import numpy
-import os
-import sys
-import optparse
-import bz2
-from signal_sensor import SignalSensor
-from genomic import read_single_fasta
-
-arts_version = 'v0.3'
-def_file = bz2.BZ2File('data/ARTS.dat.bz2')
-
-def print_version():
-    sys.stderr.write('arts ' + arts_version + '\n')
-
-def parse_options():
-    parser = optparse.OptionParser(usage="usage: %prog [options] seq.fa")
-
-    parser.add_option("-o", "--outfile", type="str", default='stdout',
-                              help="File to write the results to")
-    parser.add_option("-v", "--version", default=False,
-                              help="Show some more information")
-    parser.add_option("--organism", type="str", default='Worm',
-                              help="""use model for organism when predicting
-                              (one of Cress, Fish, Fly, Human, Worm)""")
-
-    (options, args) = parser.parse_args()
-    if options.version:
-        print_version()
-        sys.exit(0)
-
-    if len(args) != 1:
-        parser.error("incorrect number of arguments")
-
-    fafname = args[0]
-    if not os.path.isfile(fafname):
-        parser.error("fasta file does not exist")
-
-    if options.outfile == 'stdout':
-        outfile = sys.stdout
-    else:
-        try:
-            outfile = file(options.outfile, 'w')
-        except IOError:
-            parser.error("could not open %s for writing" % options.outfile)
-
-    return (fafname, outfile)
-
-if __name__ == '__main__':
-    (fafname, outfile) = parse_options()
-    seq = read_single_fasta(fafname)
-
-    arts = SignalSensor()
-    arts.from_file(def_file)
-    preds = arts.predict(seq)
-
-    for p in preds:
-        outfile.write('%+g\n' % p)
diff --git a/applications/arts/convert_artsmat.m b/applications/arts/convert_artsmat.m
deleted file mode 100644
index 2adefa1518e..00000000000
--- a/applications/arts/convert_artsmat.m
+++ /dev/null
@@ -1,82 +0,0 @@
-function convert_mat()
-
-%load('ARTS-info.mat');
-load('/home/sonne/stuff/ARTS-info.mat');
-targetname='ARTS.dat';
-
-fid=fopen(targetname,'wb');
-
-fprintf(fid, '%%arts version: 1.0\n\n');
-
-idx=find(alphas~=0);
-data=[trainData.xPos,trainData.xNeg];
-alphas=alphas(idx);
-center=trainData.tssPosition;
-
-svs1=data(par.select1+center, idx);
-svs2=data(par.select2+center, idx);
-svs3=data(par.select3+center, idx);
-
-fprintf(fid, 'b=%e\n', b);
-fprintf(fid, 'alphas=');
-write_mat(fid, alphas);
-
-fprintf(fid, 'num_kernels=%d\n', 3);
-
-fprintf(fid, 'kernel_name1=%s\n', 'wdshift');
-fprintf(fid, 'kernel_left1=%d\n', min(par.select1));
-fprintf(fid, 'kernel_center1=%d\n', 0);
-fprintf(fid, 'kernel_right1=%d\n', max(par.select1));
-fprintf(fid, 'kernel_order1=%d\n', par.order1);
-fprintf(fid, 'kernel_shift1=%d\n', par.shift1);
-fprintf(fid, 'kernel_svs1=');
-write_string(fid, svs1);
-fprintf(fid,'\n');
-
-fprintf(fid, 'kernel_name2=%s\n', 'spectrum');
-fprintf(fid, 'kernel_left2=%d\n', min(par.select2));
-fprintf(fid, 'kernel_center2=%d\n', 0);
-fprintf(fid, 'kernel_right2=%d\n', max(par.select2));
-fprintf(fid, 'kernel_order2=%d\n', par.wordLen2);
-fprintf(fid, 'kernel_svs2=');
-write_string(fid, svs2);
-fprintf(fid,'\n');
-
-fprintf(fid, 'kernel_name3=%s\n', 'spectrum');
-fprintf(fid, 'kernel_left3=%d\n', min(par.select3));
-fprintf(fid, 'kernel_center3=%d\n', 0);
-fprintf(fid, 'kernel_right3=%d\n', max(par.select3));
-fprintf(fid, 'kernel_order3=%d\n', par.wordLen3);
-fprintf(fid, 'kernel_svs3=');
-write_string(fid, svs3);
-fprintf(fid,'\n');
-fclose(fid);
-
-system(sprintf('bzip2 -9 "%s"\n', targetname));
-
-function write_string(fid, x)
-	fprintf(fid, '[\n');
-	for i=1:size(x,2),
-		fprintf(fid, '%c', x(1:(size(x,1)-1),i));
-		fprintf(fid, '%c\n', x(size(x,1),i));
-	end
-	fprintf(fid, ']\n');
-
-function write_mat(fid, x)
-	if size(x,1)==1,
-		fprintf(fid, '[');
-		fprintf(fid, '%e, ', x(1:(length(x)-1)));
-		fprintf(fid, '%e', x(end));
-	else
-		fprintf(fid, '[');
-		for i=1:size(x,2),
-			fprintf(fid, '%e, ', x(1:(size(x,1)-1),i));
-
-			if i<size(x,2)
-				fprintf(fid, '%e;\n ', x(size(x,1),i));
-			else
-				fprintf(fid, '%e', x(size(x,1),i));
-			end
-		end
-	end
-	fprintf(fid, ']\n');
diff --git a/applications/arts/data b/applications/arts/data
deleted file mode 120000
index 918310107c7..00000000000
--- a/applications/arts/data
+++ /dev/null
@@ -1 +0,0 @@
-../../data/arts
\ No newline at end of file
diff --git a/applications/arts/genomic.py b/applications/arts/genomic.py
deleted file mode 100644
index 8e7a0d394ad..00000000000
--- a/applications/arts/genomic.py
+++ /dev/null
@@ -1,152 +0,0 @@
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2006-2008 Soeren Sonnenburg
-# Written (W) 2006-2007 Mikio Braun
-# Copyright (C) 2007 Fraunhofer Institute FIRST and Max-Planck-Society
-#
-
-import time
-from string import maketrans
-
-""" this function is 100% compatible to the matlab function, thus it is one based (!)
-    use one_based=False if needed, then however the interval is [start,stop) (excluding stop)
-"""
-def load_genomic(chromosome, strand, start, stop, genome, one_based=True):
-    fname = '/fml/ag-raetsch/share/databases/genomes/' + genome + '/' + chromosome[3:] + '.flat'
-    f = file(fname)
-    if one_based:
-        f.seek(start - 1)
-        str = f.read(stop - start + 1)
-    else:
-        f.seek(start)
-        str = f.read(stop - start)
-
-    if strand == '-':
-        return reverse_complement(str)
-    elif strand == '+':
-        return str
-    else:
-        print 'strand must be + or -'
-        raise KeyError
-
-""" read a table browser ascii output file (http://genome.ucsc.edu/cgi-bin/hgTables) """
-def read_table_browser(f):
-    table = dict();
-    for l in f.readlines():
-        if not l.startswith('#'):
-            (name, chrom, strand, txStart, txEnd, cdsStart, cdsEnd, exonCount, exonStarts, exonEnds, proteinID, alignID) = l.split('\t')
-            exonStarts = [ int(i) for i in exonStarts.split(',')[:-1] ]
-            exonEnds = [ int(i) for i in exonEnds.split(',')[:-1] ]
-
-            table[name] = { 'chrom': chrom, 'strand': strand, 'txStart': int(txStart), 'txEnd': int(txEnd),
-            'cdsStart': int(cdsStart), 'cdsEnd': int(cdsEnd), 'exonCount': int(exonCount), 'exonStarts': exonStarts,
-            'exonEnds': exonEnds, 'proteinID': proteinID, 'alignID': alignID[:-1] }
-
-    return table
-
-""" get promoter region """
-def get_promoter_region(chromosome, strand, gene_start, gene_end, genome, length):
-
-    if strand == '+':
-        return load_genomic(chromosome, strand, gene_start, gene_start + length, genome, one_based=False)
-    elif strand == '-':
-        return load_genomic(chromosome, strand, gene_end, gene_end + length, genome, one_based=False)
-    else:
-        print 'unknown strand'
-        return None
-
-""" reverse + complement a DNA sequence (only letters ACGT are translated!)
-    FIXME won't work with all the rest like y... """
-def reverse_complement(str):
-    t = maketrans('acgtACGT', 'tgcaTGCA')
-    return str[len(str)::-1].translate(t)
-
-""" works only with .fa files that contain a single entry """
-def read_single_fasta(fname):
-    str = file(fname).read()
-    str = str[str.index('\n') + 1:].replace('\n', '')
-    return str
-
-""" writes only single enty .fa files """
-def write_single_fasta(fname, name, str, linelen=60):
-    header = '>' + name + '\n'
-    f = file(fname, 'a')
-    f.write(header)
-    for i in xrange(0, len(str), linelen):
-        f.write(str[i:i + linelen] + '\n')
-    f.close()
-
-""" read fasta as dictionary """
-def read_fasta(f):
-    fasta = dict()
-
-    for s in f.readlines():
-        if s.startswith('>'):
-            key = s[1:-1]
-            fasta[key] = ""
-        else:
-            fasta[key] += s[:-1]
-
-    return fasta
-
-""" write dictionary fasta """
-def write_fasta(f, d, linelen=60):
-    for k in sorted(d):
-        f.write('>%s\n' % k);
-        s = d[k]
-        for i in xrange(0, len(s), linelen):
-            f.write(s[i:i + linelen] + '\n')
-
-def write_gff(f, (source, version), (seqtype, seqname), descrlist, skipheader=False):
-    """ writes a gff version 2 file
-        descrlist is a list of dictionaries, each of which contain these fields:
-        <seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
-    """
-
-    if not skipheader:
-        f.write('##gff-version 2\n')
-        f.write('##source-version %s %s\n' % (source, version))
-
-        t = time.localtime()
-        f.write("##date %d-%d-%d %d:%d:%d\n" % t[0:6])
-
-    f.write('##Type %s %s\n' % (seqtype, seqname))
-
-    for d in descrlist:
-        f.write('%s\t%s\t%s\t%d\t%d\t%+f\t%s\t%d' % (d['seqname'], d['source'],
-                                            d['feature'], d['start'], d['end'],
-                                            d['score'], d['strand'], d['frame']))
-        if d.has_key('attributes'):
-            f.write('\t' + d['attributes'])
-            if d.has_key('comments'):
-                f.write('\t' + d['comments'])
-        f.write('\n')
-
-
-if __name__ == '__main__':
-    import sys, os
-
-    table = read_table_browser(file('/fml/ag-raetsch/home/sonne/addnet/tfbs/share/data/wt1_bibliosphere_table_browser_hg17.txt'))
-    print table.keys()
-    print table[table.keys()[0]]
-    d = { 'ahoernchen' : 'ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT',
-          'bhoernchen' : 'GATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACA' }
-
-    write_fasta(sys.stdout, d)
-    write_fasta(file('/tmp/test.fa', 'w'), d)
-
-    d2 = read_fasta(file('/tmp/test.fa'))
-    os.unlink('/tmp/test.fa')
-
-    print d
-    print d2
-    print d == d2
-
-    p = load_genomic('chr5', '+', 100000, 100100, 'hg17')
-    n = load_genomic('chr1', '-', 3000000, 3001000, 'mm7')
-    write_single_fasta('bla.fa', 'bla', 'ACGT')
-    n2 = read_single_fasta('bla.fa')
diff --git a/applications/arts/signal_sensor.py b/applications/arts/signal_sensor.py
deleted file mode 100644
index 0498570457a..00000000000
--- a/applications/arts/signal_sensor.py
+++ /dev/null
@@ -1,225 +0,0 @@
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2008 Soeren Sonnenburg
-# Written (W) 2011 Christian Widmer
-# Copyright (C) 2008-2011 Fraunhofer Institute FIRST and Max-Planck-Society
-
-import numpy
-import sys
-
-from util import *
-
-from shogun import StringCharFeatures, StringWordFeatures, CombinedFeatures, DNA
-from shogun import CombinedKernel, WeightedDegreePositionStringKernel
-from shogun import K_COMMWORDSTRING, CommWordStringKernel, IdentityKernelNormalizer
-from shogun import SortWordString
-from shogun import KernelMachine
-
-
-class Sensor(object):
-    """
-    Sensor has window (left,center,right) of length right-left+1
-    with center at "center"
-    """
-
-    def __init__(self, window=None, kernel=None, train_features=None):
-        self.kernel = kernel
-        self.window = window
-        self.train_features = train_features
-        self.preproc = None
-
-    def from_file(self, file, num):
-        """
-        parse lines with num as suffix, e.g.
-
-        kernel_<arg><num>=<value>
-        """
-        l = file.readline()
-
-        name = None
-        left = None
-        right = None
-        center = None
-        order = None
-        shift = None
-        svs = None
-
-        while l:
-            if l.find('%d=' % num) > -1:
-                if name is None: name = parse_name(l, 'kernel_name%d' % num)
-                if left is None: left = parse_int(l, 'kernel_left%d' % num)
-                if right is None: right = parse_int(l, 'kernel_right%d' % num)
-                if center is None: center = parse_int(l, 'kernel_center%d' % num)
-                if order is None: order = parse_int(l, 'kernel_order%d' % num)
-                if shift is None: shift = parse_int(l, 'kernel_shift%d' % num)
-                if svs is None: svs = parse_string(l, file, 'kernel_svs%d' % num)
-            else:
-                self.window = (left, center, right)
-                return self.init_sensor({ 'name' : name, 'order': order, 'shift' : shift}, svs)
-
-            l = file.readline()
-
-    def init_sensor(self, kernel, svs):
-        f = StringCharFeatures(svs, DNA)
-
-        kname = kernel['name']
-        if  kname == 'spectrum':
-            wf = StringWordFeatures(f.get_alphabet())
-            wf.obtain_from_char(f, kernel['order'] - 1, kernel['order'], 0, False)
-
-            pre = SortWordString()
-            pre.init(wf)
-            wf.add_preprocessor(pre)
-            wf.apply_preprocessor()
-            f = wf
-
-            k = CommWordStringKernel(0, False)
-            k.set_use_dict_diagonal_optimization(kernel['order'] < 8)
-            self.preproc = pre
-
-        elif kname == 'wdshift':
-                k = WeightedDegreePositionStringKernel(0, kernel['order'])
-                k.set_normalizer(IdentityKernelNormalizer())
-                k.set_shifts(kernel['shift'] *
-                        numpy.ones(f.get_max_vector_length(), dtype=numpy.int32))
-                k.set_position_weights(1.0 / f.get_max_vector_length() *
-                        numpy.ones(f.get_max_vector_length(), dtype=numpy.float64))
-        else:
-            raise "Currently, only wdshift and spectrum kernels supported"
-
-        self.kernel = k
-        self.train_features = f
-
-        return (self.kernel, self.train_features)
-
-    def get_test_features(self, seq, window):
-        start = self.window[0] - window[0]
-        end = len(seq) - window[1] + self.window[2]
-        size = self.window[2] - self.window[0] + 1
-        seq = seq[start:end]
-        seq = seq.replace("N", "A").replace("R", "A").replace("M", "A")
-        f = StringCharFeatures([seq], DNA)
-
-        if self.preproc:
-            wf = StringWordFeatures(f.get_alphabet())
-            o = self.train_features.get_order()
-            wf.obtain_from_char(f, 0, o, 0, False)
-            f = wf
-            f.obtain_by_sliding_window(size, 1, o - 1)
-        else:
-            f.obtain_by_sliding_window(size, 1)
-
-        return f
-
-class SignalSensor(object):
-    """
-    A collection of sensors
-    """
-    def __init__(self):
-        self.sensors = list()
-        self.kernel = CombinedKernel()
-        self.svs = CombinedFeatures()
-        self.svm = None
-        self.window = (+100000, -1000000)
-
-    def from_file(self, file):
-        sys.stderr.write('loading model file')
-        l = file.readline();
-
-        if l != '%arts version: 1.0\n':
-            sys.stderr.write("\nfile not an arts definition file\n")
-            return None
-
-        bias = None
-        alphas = None
-        num_kernels = None
-
-        while l:
-            # skip comment or empty line
-            if not (l.startswith('%') or l.startswith('\n')):
-                if bias is None: bias = parse_float(l, 'b')
-                if alphas is None: alphas = parse_vector(l, file, 'alphas')
-                if num_kernels is None: num_kernels = parse_int(l, 'num_kernels')
-
-                if num_kernels and bias and alphas is not None:
-                    for i in xrange(num_kernels):
-                        s = Sensor()
-                        (k, f) = s.from_file(file, i + 1)
-                        k.io.enable_progress()
-                        self.window = (min(self.window[0], s.window[0]),
-                                max(self.window[1], s.window[2]))
-                        self.sensors.append(s)
-                        self.kernel.append_kernel(k)
-                        self.svs.append_feature_obj(f)
-
-                    self.kernel.init(self.svs, self.svs)
-                    self.svm = KernelMachine(self.kernel, alphas,
-                            numpy.arange(len(alphas), dtype=numpy.int32), bias)
-                    self.svm.io.set_target_to_stderr()
-                    self.svm.io.enable_progress()
-                    self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus())
-                    sys.stderr.write('done\n')
-                    return
-
-            l = file.readline()
-
-        sys.stderr.write('error loading model file\n')
-
-
-    def predict(self, seq, chunk_size = int(10e6)):
-        """
-        predicts on whole contig, splits up sequence in chunks of size chunk_size
-        """
-
-        seq_len = len(seq)
-        num_chunks = int(numpy.ceil(float(seq_len) / float(chunk_size)))
-        assert(num_chunks > 0)
-
-	sys.stderr.write("number of chunks for contig: %i\n" % (num_chunks))
-
-        start = 0
-        stop = min(chunk_size, seq_len)
-
-        out = []
-
-        # iterate over chunks
-        for chunk_idx in range(num_chunks):
-
-            sys.stderr.write("processing chunk #%i\n" % (chunk_idx))
-
-            assert (start < stop)
-            chunk = seq[start:stop]
-
-            assert(len(self.sensors) > 0)
-            tf = CombinedFeatures()
-            for i in xrange(len(self.sensors)):
-                f = self.sensors[i].get_test_features(chunk, self.window)
-                tf.append_feature_obj(f)
-
-            sys.stderr.write("initialising kernel...")
-            self.kernel.init(self.svs, tf)
-            sys.stderr.write("..done\n")
-
-            self.svm.set_kernel(self.kernel)
-            lab_out = self.svm.apply().get_values()
-
-            assert(len(lab_out) > 0)
-            out.extend(lab_out)
-
-            # increment chunk
-            start = stop
-            stop = min(stop+chunk_size, seq_len)
-
-
-        l = (-self.window[0]) * [-42]
-        r = self.window[1] * [-42]
-
-        # concatenate
-        ret = l + out + r
-
-        assert(len(ret) == len(seq))
-
-        return ret
diff --git a/applications/arts/util.py b/applications/arts/util.py
deleted file mode 100644
index 9c8e653def8..00000000000
--- a/applications/arts/util.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2006-2008 Soeren Sonnenburg
-# Written (W) 2007 Gunnar Raetsch
-# Copyright (C) 2006-2008 Fraunhofer Institute FIRST and Max-Planck-Society
-#
-
-import sys
-from numpy import mat, array, inf, any, reshape, int32
-
-def parse_name(line, name):
-    if (line.startswith(name)):
-        sys.stdout.write('.'); sys.stdout.flush()
-        return line[line.find('=') + 1:-1]
-    else:
-        return None
-
-def parse_int(line, name):
-    if (line.startswith(name)):
-        sys.stdout.write('.'); sys.stdout.flush()
-        return int(line[line.find('=') + 1:-1])
-    else:
-        return None
-
-def parse_float(line, name):
-    if (line.startswith(name)):
-        sys.stdout.write('.'); sys.stdout.flush()
-        return float(line[line.find('=') + 1:-1])
-    else:
-        return None
-
-def parse_vector(line, file, name):
-    mat = parse_matrix(line, file, name)
-    if mat is None:
-     return mat
-    else:
-     mat = array(mat).flatten()
-     return mat
-
-def parse_string(line, file, name):
-    if (line.startswith(name)):
-        sys.stdout.write('.'); sys.stdout.flush()
-        l = ''
-        lines = []
-        while l is not None and l.find(']') < 0:
-            if l:
-                lines.append(l[:-1])
-            l = file.readline()
-
-        if l.find(']') < 0:
-            sys.stderr.write("string ended without ']'\n")
-            return None
-        else:
-            return lines
-    else:
-        return None
-
-def parse_matrix(line, file, name):
-    if (line.startswith(name)):
-        sys.stdout.write('.'); sys.stdout.flush()
-        if line.find(']') < 0:
-            l = ''
-            while l is not None and l.find(']') < 0:
-                line += l
-                l = file.readline()
-            if l is not None and l.find(']') >= 0:
-                line += l
-
-        if line.find(']') < 0:
-            sys.stderr.write("matrix `" + name + "' ended without ']'\n")
-            return None
-        else:
-            mm = mat(line[line.find('['):line.find(']') + 1])
-            if len(mm.shape) == 1:
-                mm = reshape(mm.shape[0], 1)
-            return mm
-    else:
-        return None
diff --git a/applications/asp/LICENSE b/applications/asp/LICENSE
deleted file mode 100644
index 5b6e7c66c27..00000000000
--- a/applications/asp/LICENSE
+++ /dev/null
@@ -1,340 +0,0 @@
-		    GNU GENERAL PUBLIC LICENSE
-		       Version 2, June 1991
-
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.
-                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-			    Preamble
-
-  The licenses for most software are designed to take away your
-freedom to share and change it.  By contrast, the GNU General Public
-License is intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users.  This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it.  (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.)  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
-  To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have.  You must make sure that they, too, receive or can get the
-source code.  And you must show them these terms so they know their
-rights.
-
-  We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
-  Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software.  If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
-  Finally, any free program is threatened constantly by software
-patents.  We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary.  To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-		    GNU GENERAL PUBLIC LICENSE
-   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-  0. This License applies to any program or other work which contains
-a notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License.  The "Program", below,
-refers to any such program or work, and a "work based on the Program"
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language.  (Hereinafter, translation is included without limitation in
-the term "modification".)  Each licensee is addressed as "you".
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope.  The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the
-Program (independent of having been made by running the Program).
-Whether that is true depends on what the Program does.
-
-  1. You may copy and distribute verbatim copies of the Program's
-source code as you receive it, in any medium, provided that you
-conspicuously and appropriately publish on each copy an appropriate
-copyright notice and disclaimer of warranty; keep intact all the
-notices that refer to this License and to the absence of any warranty;
-and give any other recipients of the Program a copy of this License
-along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a fee.
-
-  2. You may modify your copy or copies of the Program or any portion
-of it, thus forming a work based on the Program, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
-    a) You must cause the modified files to carry prominent notices
-    stating that you changed the files and the date of any change.
-
-    b) You must cause any work that you distribute or publish, that in
-    whole or in part contains or is derived from the Program or any
-    part thereof, to be licensed as a whole at no charge to all third
-    parties under the terms of this License.
-
-    c) If the modified program normally reads commands interactively
-    when run, you must cause it, when started running for such
-    interactive use in the most ordinary way, to print or display an
-    announcement including an appropriate copyright notice and a
-    notice that there is no warranty (or else, saying that you provide
-    a warranty) and that users may redistribute the program under
-    these conditions, and telling the user how to view a copy of this
-    License.  (Exception: if the Program itself is interactive but
-    does not normally print such an announcement, your work based on
-    the Program is not required to print an announcement.)
-
-These requirements apply to the modified work as a whole.  If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works.  But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
-  3. You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
-    a) Accompany it with the complete corresponding machine-readable
-    source code, which must be distributed under the terms of Sections
-    1 and 2 above on a medium customarily used for software interchange; or,
-
-    b) Accompany it with a written offer, valid for at least three
-    years, to give any third party, for a charge no more than your
-    cost of physically performing source distribution, a complete
-    machine-readable copy of the corresponding source code, to be
-    distributed under the terms of Sections 1 and 2 above on a medium
-    customarily used for software interchange; or,
-
-    c) Accompany it with the information you received as to the offer
-    to distribute corresponding source code.  (This alternative is
-    allowed only for noncommercial distribution and only if you
-    received the program in object code or executable form with such
-    an offer, in accord with Subsection b above.)
-
-The source code for a work means the preferred form of the work for
-making modifications to it.  For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable.  However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-  4. You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License.  Any attempt
-otherwise to copy, modify, sublicense or distribute the Program is
-void, and will automatically terminate your rights under this License.
-However, parties who have received copies, or rights, from you under
-this License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
-  5. You are not required to accept this License, since you have not
-signed it.  However, nothing else grants you permission to modify or
-distribute the Program or its derivative works.  These actions are
-prohibited by law if you do not accept this License.  Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
-  6. Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions.  You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
-  7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all.  For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices.  Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-  8. If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded.  In such case, this License incorporates
-the limitation as if written in the body of this License.
-
-  9. The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number.  If the Program
-specifies a version number of this License which applies to it and "any
-later version", you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation.  If the Program does not specify a version number of
-this License, you may choose any version ever published by the Free Software
-Foundation.
-
-  10. If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission.  For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this.  Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
-			    NO WARRANTY
-
-  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
-  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
-
-		     END OF TERMS AND CONDITIONS
-
-	    How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
-    Gnomovision version 69, Copyright (C) year name of author
-    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, the commands you use may
-be called something other than `show w' and `show c'; they could even be
-mouse-clicks or menu items--whatever suits your program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the program, if
-necessary.  Here is a sample; alter the names:
-
-  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
-  `Gnomovision' (which makes passes at compilers) written by James Hacker.
-
-  <signature of Ty Coon>, 1 April 1989
-  Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into
-proprietary programs.  If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library.  If this is what you want to do, use the GNU Library General
-Public License instead of this License.
diff --git a/applications/asp/NEWS b/applications/asp/NEWS
deleted file mode 100644
index ce5724b8a19..00000000000
--- a/applications/asp/NEWS
+++ /dev/null
@@ -1,11 +0,0 @@
-2009-05-25  Soeren Sonnenburg <Soeren.Sonnenburg@tuebingen.mpg.de>
-
-    * Asp version 0.2
-		- Fixes for shogun 0.7.3
-		- Several bugfixes (including input was shifted by one)
-		- Drastic speedups (e.g., predictions take ~10 minutes for Human ChrY)
-
-2008-10-14  Soeren Sonnenburg <Soeren.Sonnenburg@first.fraunhofer.de>
-
-    * Asp version 0.1
-		- Initial release of the accurate splice site predictor
diff --git a/applications/asp/README b/applications/asp/README
deleted file mode 100644
index 5842a591eb0..00000000000
--- a/applications/asp/README
+++ /dev/null
@@ -1,45 +0,0 @@
-This is the accurate splicer (asp) program accompanying the paper
-"Accurate Splice Site Prediction Using Support Vector Machines"
-by Soeren Sonnenburg, Gabriele Schweikert, Petra Philips,
-Jonas Behr and Gunnar Raetsch [1].
-
-
-ASP PROGRAM REQUIREMENTS:
-
-Asp requires a working python (2.4 or later) installation with numpy
-(version 1.0 or later) and the shogun toolbox (version 0.7.3 or later)
-- which is available from http://www.shogun-toolbox.org for Linux, MacOSX,
-cygwin/win32. If you are running Debian GNU Linux, shogun 0.7.3 is available in
-debian unstable http://packages.debian.org/unstable/science/shogun-python-modular.
-
-ASP PROGRAM RUNNING TIME AND MEMORY REQUIREMENTS:
-
-Asp requires about 100M of memory for short sequences. Memory requirements
-don't grow much (a additional linear term w.r.t. the length of the input
-sequence). On first run with a new model (see --model option below),
-asp will load and decompress the .bz2 compressed model file and store it
-as a python native pickle dump, which increases startup times a lot.
-Due to the optimizations in [2] splice form prediction (layer 1) times
-won't change much for many/long sequences.
-
-ASP PROGRAM USAGE:
-
-./asp fasta_file.fa
-
-This will read all entries in the .fa file and print a .gff file with the
-predictions for each of the entries to stdout. One may optionally specify the
-start and stop of the transcript via --start <basenum> / --stop <basenum> and
-the model via --model one of worm, fly, cress, fish, human.
-<basenum> is zero based.
-
-
-REFERENCES:
-
-[1] S. Sonnenburg, G. Schweikert, P. Philips, J. Behr and Gunnar Raetsch,
-	Accurate Splice Site Prediction, BMC Bioinformatics, Special Issue from NIPS workshop on
-	New Problems and Methods in Computational Biology Whistler, Canada, 18 December 2006},
-	December, 2007, BMC Bioinformatics,8:(Suppl. 10):S7
-
-[2]	Sonnenburg, S, Rätsch, G, Schäfer, C, Schölkopf, B. Large Scale Multiple
-	Kernel Learning. Journal of Machine Learning Research,7:1531-1565,
-	July 2006, K.Bennett and E.P.-Hernandez Editors.
diff --git a/applications/asp/__init__.py b/applications/asp/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/applications/asp/asp b/applications/asp/asp
deleted file mode 100755
index e5f9d21eec4..00000000000
--- a/applications/asp/asp
+++ /dev/null
@@ -1,319 +0,0 @@
-#!/usr/bin/env python
-"""
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-Written (W) 2007,2011 Gunnar Raetsch
-Written (W) 2006-2009 Soeren Sonnenburg
-Copyright (C) 2006-2010 Fraunhofer Institute FIRST and Max-Planck-Society
-"""
-
-try:
-	import os
-	import os.path
-	import sys
-	import pickle
-	import bz2
-	import numpy
-	import optparse
-	import array
-	import math
-
-	import genomic
-	import model
-	import seqdict
-	import shogun
-
-	d=shogun.WeightedDegreeStringKernel(1)
-	if (d.version.get_version_revision() < 2997):
-		print
-		print "ERROR: SHOGUN VERSION 0.6.2 or later required"
-		print
-		sys.exit(1)
-	from signal_detectors import signal_detectors
-except ImportError:
-	print
-	print "ERROR IMPORTING MODULES, MAKE SURE YOU HAVE SHOGUN INSTALLED"
-	print
-	sys.exit(1)
-
-asp_version='v0.3'
-
-class asp:
-	def __init__(self):
-		self.model = None
-		self.signal = None
-		self.model_name = None
-
-	def sigmoid_transform(self, x):
-		return 1/(1+math.exp(-(x+2))) ;
-
-	def load_model(self, filename):
-		self.model_name = filename
-		f=None
-		picklefile=filename+'.pickle'
-		if os.path.isfile(picklefile):
-			self.model=pickle.load(file(picklefile))
-		else:
-			if filename.endswith('.bz2'):
-				f=bz2.BZ2File(filename);
-			else:
-				f=file(filename);
-
-			self.model=model.parse_file(f)
-			f.close()
-
-			f=file(picklefile,'w')
-			pickle.dump(self.model, f)
-			f.close()
-
-		self.signal=signal_detectors(self.model)
-
-	def write_gff(self, outfile, preds, name, score_type, skipheader, strand):
-		genomic.write_gff_header(outfile, ('asp',asp_version + ' ' + self.model_name),
-				('DNA', name))
-
-		for i in xrange(len(preds[0])):
-			d=dict()
-			d['seqname']=name
-			d['source']='asp'
-			d['feature']=preds[0][i]
-			d['start']=preds[1][i]
-			d['end']=preds[1][i]+1
-			if score_type=='output':
-				d['score']=preds[2][i]
-			else:
-				d['score']=self.sigmoid_transform(preds[2][i])
-			d['strand']=strand
-			d['frame']=0
-			genomic.write_gff_line(outfile, d)
-
-	def write_spf(self, outfile, preds, name, score_type, skipheader, strand):
-		genomic.write_spf_header(outfile, ('asp', asp_version + ' ' + self.model_name),
-				('DNA', name))
-
-		for i in xrange(len(preds[0])):
-			d=dict()
-			d['seqname']=name
-			d['source']=score_type
-			if preds[0][i]=='AG':
-				d['feature']='acc'
-				if strand=='+':
-					d['position']=preds[1][i]+2
-				else:
-					d['position']=preds[1][i]-1
-			else:
-				d['feature']='don'
-				if strand=='+':
-					d['position']=preds[1][i]
-				else:
-					d['position']=preds[1][i]+1
-			if score_type=='output':
-				d['score']=preds[2][i]
-			else:
-				d['score']=self.sigmoid_transform(preds[2][i])
-			d['strand']=strand
-			genomic.write_spf_line(outfile, d)
-
-	def write_binary(self, preds, site, strand, score_type, binary_out, binary_pos):
-		out=array.array('f')
-		if score_type=='output':
-			out.fromlist(preds[2])
-		else:
-			outputs=[self.sigmoid_transform(o) for o in preds[2]] ;
-			out.fromlist(outputs)
-
-		# move positions consistent with spf output
-		if site=='acc':
-			if strand=='+':
-				p=[i+2 for i in preds[1]]
-			else:
-				p=[i-1 for i in preds[1]]
-		else:
-			if strand=='+':
-				p=[i for i in preds[1]]
-			else:
-				p=[i+1 for i in preds[1]]
-
-		pos=array.array('i')
-		pos.fromlist(p) ;
-		out.tofile(binary_out)
-		pos.tofile(binary_pos)
-
-
-	def predict_file(self, fname, (start,end), output_format, score_type, strand='+'):
-		skipheader=False
-		fasta_dict = genomic.read_fasta(file(fname))
-
-		if strand=='-':
-			for k, kseq in fasta_dict.ordered_items():
-				fasta_dict[k]=genomic.reverse_complement(kseq)
-
-		sys.stdout.write('found fasta file with ' + `len(fasta_dict)` + ' sequence(s) (strand=%s)\n' % strand)
-		seqs= seqdict.seqdict(fasta_dict, (start,end))
-
-		#get donor/acceptor signal predictions for all sequences
-		self.signal.predict_acceptor_sites_from_seqdict(seqs)
-		self.signal.predict_donor_sites_from_seqdict(seqs)
-
-		contig_no = 0 ;
-		for seq in seqs:
-			contig_no = contig_no + 1
-
-			l=len(seq.preds['donor'].get_positions())
-			p=[i+1 for i in seq.preds['donor'].get_positions()]
-			s=seq.preds['donor'].get_scores()
-			f=[]
-			for pos in p:
-				if seq.seq[pos-1:pos+1]=='GT':
-					f.append(('GT'))
-				else:
-					f.append(('GC'))
-					assert(seq.seq[pos-1:pos+1]=='GC')
-
-			if strand=='-':
-				p=p[len(p)::-1]
-				p=[len(seq.seq)-i for i in p]
-				s=s[len(s)::-1]
-				f=f[len(f)::-1]
-
-			don_preds=(f,p,s)
-
-			l=len(seq.preds['acceptor'].get_positions())
-			p=[i-1 for i in seq.preds['acceptor'].get_positions()]
-			s=seq.preds['acceptor'].get_scores()
-			f=l*['AG']
-
-			if strand=='-':
-				p=p[len(p)::-1]
-				p=[len(seq.seq)-i for i in p]
-				s=s[len(s)::-1]
-				f=f[len(f)::-1]
-
-			acc_preds=(f,p,s)
-
-			if output_format == 'binary':
-				assert(len(binary_basename)>0)
-				binary_out=file(binary_basename+'/acc/contig_%i%c.%s' % (contig_no, strand, score_type), 'w')
-				binary_pos=file(binary_basename+'/acc/contig_%i%c.pos' % (contig_no, strand), 'w')
-				self.write_binary(acc_preds, 'acc', strand, score_type, binary_out, binary_pos)
-				binary_out.close()
-				binary_pos.close()
-				binary_out=file(binary_basename+'/don/contig_%i%c.%s' % (contig_no, strand, score_type), 'w')
-				binary_pos=file(binary_basename+'/don/contig_%i%c.pos' % (contig_no, strand), 'w')
-				self.write_binary(don_preds, 'don', strand, score_type, binary_out, binary_pos)
-				binary_out.close()
-				binary_pos.close()
-			else:
-				if output_format == 'gff':
-					self.write_gff(outfile, acc_preds, seq.name, score_type, skipheader, strand)
-					self.write_gff(outfile, don_preds, seq.name, score_type, skipheader, strand)
-				else:
-					if output_format == 'spf':
-						self.write_spf(outfile, acc_preds, seq.name, score_type, skipheader, strand)
-						self.write_spf(outfile, don_preds, seq.name, score_type, skipheader, strand)
-
-
-def print_version():
-	sys.stdout.write('asp '+asp_version+'\n')
-
-def parse_options():
-	parser = optparse.OptionParser(usage="usage: %prog [options] seq.fa")
-
-	parser.add_option("-g", "--gff-file", type="str",
-							  help="File to write the results in GFF format to the given file")
-	parser.add_option("-s", "--spf-file", type="str", default='stdout',
-							  help="File to write the results in SPF format to the given file")
-	parser.add_option("-b", "--binary-basename", type="str",
-							  help="Write results in binary format to file starting with this basename")
-	parser.add_option("-v", "--version", dest='version', default=False, action='store_true',
-							  help="Show some more information")
-	parser.add_option("-t", "--transform", dest='transform', default=False, action='store_true',
-							  help="Apply sigmoid transform to scale predictions between 0 and 1")
-	parser.add_option("--start", type="int", default=499,
-							  help="coding start (zero based, relative to sequence start)")
-	parser.add_option("--stop", type="int", default=-499,
-							  help="""coding stop (zero based, if positive relative to
-							  sequence start, if negative relative to sequence end)""")
-	parser.add_option("--organism", type="str", default='Worm',
-							  help="""use asp model for organism when predicting
-							  (one of Cress, Fish, Fly, Human, Worm)""")
-
-	(options, args) = parser.parse_args()
-	if options.version:
-		print_version()
-		sys.exit(0)
-
-	score_type = 'output'
-	if options.transform!=False:
-		score_type = 'Conf_cum' ;
-
-	if len(args) != 1:
-		parser.error("incorrect number of arguments")
-
-	fafname=args[0]
-	if not os.path.isfile(fafname):
-		parser.error("fasta file does not exist")
-
-	modelfname = 'data/%s.dat.bz2' % options.organism
-	print "loading model file " + modelfname,
-
-	if not os.path.isfile(modelfname):
-		print "...not found!\n"
-		parser.error("""model should be one of:
-
-Cress, Fish, Fly, Human, Worm
-""")
-
-	if (options.gff_file and (options.spf_file!='stdout' or options.binary_basename)) or (options.spf_file!='stdout' and (options.gff_file or options.binary_basename)):
-		parser.error("Only one of the options --binary-basename, --spf-file, or --gff-file may be given")
-
-	if (options.spf_file!='stdout' or (not options.binary_basename and not options.gff_file)):
-		output_format='spf'
-		outfile_fname = options.spf_file
-	if (options.gff_file):
-		output_format='gff'
-		outfile_fname = options.gff_file
-	if (options.binary_basename):
-		output_format='binary'
-
-	if output_format!='binary':
-		if outfile_fname == 'stdout':
-			outfile=sys.stdout
-		else:
-			try:
-				outfile=file(outfile_fname,'w')
-			except IOError:
-				parser.error("could not open %s for writing" % outfile_fname)
-
-	if output_format=='binary':
-		outfile = None
-		if os.system('mkdir -p %s/acc' % options.binary_basename) != 0:
-			parser.error("could not create directory %s/acc" % options.binary_basename)
-		if os.system('mkdir -p %s/don' % options.binary_basename) != 0:
-			parser.error("could not create directory %s/don" % options.binary_basename)
-
-	if options.start<80:
-		parser.error("--start value must be >=80")
-
-	if options.stop > 0 and options.start >= options.stop - 80:
-		parser.error("--stop value must be > start + 80")
-
-	if options.stop < 0 and options.stop > -80:
-		parser.error("--stop value must be <= - 80")
-
-	# shift the start and stop a bit
-	options.start -= 1 ;
-	options.stop -= 1 ;
-
-	return ((options.start,options.stop), fafname, modelfname, output_format, score_type, outfile, options.binary_basename)
-
-
-if __name__ == '__main__':
-	(startstop, fafname, modelfname, output_format, score_type, outfile, binary_basename ) = parse_options()
-	p=asp()
-	p.load_model(modelfname);
-	p.predict_file(fafname, startstop, output_format, score_type, '+')
-	p.predict_file(fafname, startstop, output_format, score_type, '-')
diff --git a/applications/asp/data b/applications/asp/data
deleted file mode 120000
index 4afdb0dff1c..00000000000
--- a/applications/asp/data
+++ /dev/null
@@ -1 +0,0 @@
-../../data/asp
\ No newline at end of file
diff --git a/applications/asp/galaxy/asp.sh b/applications/asp/galaxy/asp.sh
deleted file mode 100644
index 00e0aa989ac..00000000000
--- a/applications/asp/galaxy/asp.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-pwd
-
-cd /mnt/galaxyTools/tools/asp-0.3
-export LD_LIBRARY_PATH=/mnt/galaxyTools/tools/shogun-0.10.0/lib
-export PYTHONPATH=/mnt/galaxyTools/tools/shogun-0.10.0/lib/python2.6/dist-packages
-
-if [ "$3" = "spf1" ]
-then
-	if [ "${10}" = "yes" ]
-	then
-		./asp $1 --organism=$2 -t -s $5
-	else
-		./asp $1 --organism=$2 -s $5
-	fi
-elif [ "$3" = "gff2" ]
-then
-	if [ "${10}" = "yes" ]
-	then
-		./asp $1 --organism=$2 -t -g $4
-	else
-		./asp $1 --organism=$2 -g $4
-	fi
-elif [ "$3" = "binary" ]
-then
-	mkdir -p $6/pred
-	echo "This dataset contains acceptor splice site predictions in binary SPF format (for use with mGene, Palmapper, QPALMA)" > $7
-	mkdir -p $8/pred
-	echo "This dataset contains donor splice site predictions in binary SPF format (for use with mGene, Palmapper, QPALMA)" > $9
-	if [ "${10}" = "yes" ]
-	then
-		./asp $1 --organism=$2 -t -b $6
-	else
-		./asp $1 --organism=$2 -b $6
-	fi
-	mv $6/acc/* $6/pred/
-	rmdir $6/acc
-	mv $6/don/* $8/pred
-	rmdir $6/don
-fi
diff --git a/applications/asp/galaxy/asp.xml b/applications/asp/galaxy/asp.xml
deleted file mode 100644
index 4147de19de4..00000000000
--- a/applications/asp/galaxy/asp.xml
+++ /dev/null
@@ -1,115 +0,0 @@
-<tool id="asp" name="ASP" version="0.3">
-	<description>Accurate splice site prediction</description>
-	<command interpreter="bash">./asp.sh
-		$fasta_input
-		$organism
-		$result_format
-		$spf_gff
-		$spf_spf
-		$acc_spf_binary.extra_files_path
-		$acc_spf_binary
-		$don_spf_binary.extra_files_path
-		$don_spf_binary
-		$sigmoid_transform
-		> $log_file
-	</command>
-	<inputs>
-		<param format="fasta" name="fasta_input" type="data" label="Genome sequence in FASTA format" help=""/>
-		<param name="organism" type="select" label="Organism" help="">
-			<option value="Cress">Arabidopsis thaliana</option>
-		        <option value="Worm">Caenorhabditis elegans</option>
-		        <option value="Fly">Drosophila melanogaster</option>
-		        <option value="Human">Homo sapiens</option>
-		        <option value="Fish">Danio rerio</option>
-		</param>
-		<param name="sigmoid_transform" type="select" label="Use sigmoid-transform on SVM outputs to obtain score between 0 and 1" help="By default it is Yes.">
-			<option value="yes">Yes</option>
-			<option value="no">No</option>
-		</param>
-		<param name="result_format" type="select" label="Prediction result file format" help="Types of file format available for prediction results.">
-			<option value="gff2">GFF (Version 2)</option>
-			<option value="spf1">ASCII SPF (Version 1)</option>
-			<option value="binary">SPF format (binary format compatible with Palmapper and QPALMA)</option>
-		</param>
-	</inputs>
-	<outputs>
-		<data format="gff" name="spf_gff" label="Splice site signal predictions in GFF format">
-			<filter>result_format=="gff2"</filter>
-		</data>
-		<data format="spf" name="spf_spf" label="Splice site signal predictions in SPF format">
-			<filter>result_format=="spf1"</filter>
-		</data>
-		<data format="spf" name="acc_spf_binary" label="Acceptor splice site signal predictions in binary SPF format">
-			<filter>result_format=="binary"</filter>
-		</data>
-		<data format="spf" name="don_spf_binary" label="Donor splice site signal predictions in binary SPF format">
-			<filter>result_format=="binary"</filter>
-		</data>
-		<data format="txt" name="log_file" label="Log File">
-			<filter>result_format=="binary"</filter>
-		</data>
-	</outputs>
-  <help>
-
-**What it does**
-
-ASP_ predicts splice sites on genomic sequences of several species [1].
-
-It takes a genomic sequence in FASTA format and for every position
-that exhibits a splice site consensus dimer (AG for acceptor splice
-sites, GT/GC for donor splice site) computes a score indicating how
-likely the position is a splice site.  ASP uses a window of 141 nt
-around each position. Therefore, there are no predictions near the
-boundaries. Predictions are performed on the forward and backward
-strand for all sequences in the FASTA file.
-
-.. _ASP: http://www.fml.tuebingen.mpg.de/raetsch/suppl/splice
-
-**References:**
-
-[1] Soeren Sonnenburg, Gabriele Schweikert, Petra Philips, Jonas Behr, and Gunnar Raetsch: Accurate splice site prediction using support vector machines, BMC Bioinformatics 2007, 8(Suppl 10):S7.
-
-------
-
-**Output:**
-
-The output is a file in GFF-like format which contains a prediction
-score for each candidate location in the genomic input sequence.
-
-Each line contains the sequence name, the position in the sequence,
-the information about which kind of splice site type (GT/GC/AG) is
-considered and the SVM score. The GT and GC lines correspond to donor
-splice site predictions (consensus sequence is either GT or GC). For
-lines with AG, the line corresponds to acceptor splice site
-predictions (AG consensus). The score is related to the likelihood
-that a the given position is a splice site. The larger the score, the
-more likely that there is a splice site. Scores above -1 (original SVM
-scores) and above 0.7 (with sigmoid transformation) are quite likely
-splice sites.
-
-The tool `SignalPredict` in the `mGene.web modules` section can also
-predict splice sites for several organisms. Here the score is
-appropriately transformed into a posterior probability that there is a
-splice site at the predicted sites. These probabilities are easier to
-interpret than the raw SVM outputs provided by ASP.
-
---------
-
-.. class:: infomark
-
-**About formats**
-
-**SPF format** Signal Prediction format was designed for providing
-information such as labels or predictions for specific genomic
-locations (with strand information). It has requires exactly six
-fields::
-
-    1. chrom - The name of the chromosome (e.g. chr1, chrY_random).
-    2. signalName - possible choices include tss, tis, acc, don, cdsStop, cleave.
-    3. scoreName - possible choices include label, output, Conf, Conf_Cum
-    4. chromPos - The position in the chromosome. (The first base in a chromosome is numbered 1.)
-    5. strand - Defines the strand - either '+' or '-'.
-    6. score - The score between -infinity and infinity. If scoreName is 'label', then the score should be either -1 or 1.
-
-</help>
-</tool>
diff --git a/applications/asp/genomic.py b/applications/asp/genomic.py
deleted file mode 100644
index 6035ea9cd11..00000000000
--- a/applications/asp/genomic.py
+++ /dev/null
@@ -1,184 +0,0 @@
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2006-2009 Soeren Sonnenburg
-# Written (W) 2006-2007 Mikio Braun
-# Copyright (C) 2007 Fraunhofer Institute FIRST and Max-Planck-Society
-
-import time
-from string import maketrans
-
-class ordered_dict(dict):
-    """
-    Provide an ordered dictionary with chromosome identifiers.
-    """
-    def __init__(self, *args, **kwargs):
-        dict.__init__(self, *args, **kwargs)
-        self._order = self.keys()
-
-    def __setitem__(self, key, value):
-        dict.__setitem__(self, key, value)
-        if key in self._order:
-            self._order.remove(key)
-        self._order.append(key)
-
-    def __delitem__(self, key):
-        dict.__delitem__(self, key)
-        self._order.remove(key)
-
-    def ordered_items(self):
-        return [(key,self[key]) for key in self._order]
-
-
-""" read a table browser ascii output file (http://genome.ucsc.edu/cgi-bin/hgTables) """
-def read_table_browser(f):
-	table=dict();
-	for l in f.readlines():
-		if not l.startswith('#'):
-			(name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,proteinID,alignID)=l.split('\t')
-			exonStarts=[ int(i) for i in exonStarts.split(',')[:-1] ]
-			exonEnds=[ int(i) for i in exonEnds.split(',')[:-1] ]
-
-			table[name]={ 'chrom': chrom, 'strand': strand, 'txStart': int(txStart), 'txEnd': int(txEnd),
-			'cdsStart': int(cdsStart), 'cdsEnd': int(cdsEnd), 'exonCount': int(exonCount), 'exonStarts': exonStarts,
-			'exonEnds': exonEnds, 'proteinID': proteinID, 'alignID': alignID[:-1] }
-
-	return table
-
-""" get promoter region """
-def get_promoter_region(chromosome, strand, gene_start, gene_end, genome, length):
-
-	if strand == '+':
-		return load_genomic(chromosome, strand, gene_start, gene_start+length, genome, one_based=False)
-	elif strand == '-':
-		return load_genomic(chromosome, strand, gene_end, gene_end+length, genome, one_based=False)
-	else:
-		print 'unknown strand'
-		return None
-
-""" reverse + complement a DNA sequence (only letters ACGT are translated!)
-	FIXME won't work with all the rest like y... """
-def reverse_complement(str):
-	t=maketrans('acgtACGT','tgcaTGCA')
-	return str[len(str)::-1].translate(t)
-
-""" works only with .fa files that contain a single entry """
-def read_single_fasta(fname):
-	str=file(fname).read()
-	str=str[str.index('\n')+1:].replace('\n','')
-	return str
-
-""" writes only single enty .fa files """
-def write_single_fasta(fname, name, str, linelen=60):
-	header= '>' + name + '\n'
-	f=file(fname,'a')
-	f.write(header)
-	for i in xrange(0,len(str),linelen):
-		f.write(str[i:i+linelen]+'\n')
-	f.close()
-
-""" read fasta as dictionary """
-def read_fasta(f):
-	fasta=ordered_dict()
-	fa=""
-	key=None
-	for s in f.readlines():
-		if s.startswith('>'):
-			if fa and key:
-				fasta[key]=fa
-			key=s[1:-1]
-			fasta[key]=""
-			fa=""
-		else:
-			fa+=s[:-1]
-
-	if fa and key:
-		fasta[key]=fa
-
-	return fasta
-
-def write_fasta(f, d, linelen=60):
-    """ write dictionary fasta """
-    for k in sorted(d):
-        f.write('>%s\n' % k);
-        s = d[k]
-        for i in xrange(0, len(s), linelen):
-            f.write(s[i:i+linelen] + '\n')
-
-def write_gff_header(f, (source, version), (seqtype, seqname)):
-	""" writes a gff version 2 file
-		descrlist is a list of dictionaries, each of which contain these fields:
-		<seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
-	"""
-	f.write('##gff-version 2\n')
-	f.write('##source-version %s %s\n' % (source, version) )
-
-	t=time.localtime()
-	f.write("##date %d-%d-%d %d:%d:%d\n" % t[0:6])
-
-	f.write('##Type %s %s\n' % (seqtype, seqname) )
-
-def write_gff_line(f, descr):
-	d=descr
-	f.write('%s\t%s\t%s\t%d\t%d\t%f\t%s\t%d' % (d['seqname'], d['source'],
-										d['feature'], d['start'], d['end'],
-										d['score'], d['strand'], d['frame']))
-	if d.has_key('attributes'):
-		f.write('\t' + d['attributes'])
-		if d.has_key('comments'):
-			f.write('\t' + d['comments'])
-	f.write('\n')
-
-def write_spf_header(f, (source, version), (seqtype, seqname)):
-	""" writes a gff version 2 file
-		descrlist is a list of dictionaries, each of which contain these fields:
-		<seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
-	"""
-
-	f.write('##spf-version 1\n')
-	f.write('##source-version %s %s\n' % (source, version) )
-
-	t=time.localtime()
-	f.write("##date %d-%d-%d %d:%d:%d\n" % t[0:6])
-
-	f.write('##Type %s %s\n' % (seqtype, seqname) )
-
-def write_spf_line(f, descr):
-	d=descr
-	f.write('%s\t%s\t%s\t%d\t%s\t%f' % (d['seqname'], d['source'],
-										d['feature'], d['position'],
-										d['strand'], d['score']))
-	if d.has_key('attributes'):
-		f.write('\t' + d['attributes'])
-		if d.has_key('comments'):
-			f.write('\t' + d['comments'])
-	f.write('\n')
-
-def write_gff(f, (source, version), (seqtype, seqname), descrlist, skipheader=False):
-	""" writes a gff version 2 file
-		descrlist is a list of dictionaries, each of which contain these fields:
-		<seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
-	"""
-
-	if not skipheader:
-		f.write('##gff-version 2\n')
-		f.write('##source-version %s %s\n' % (source, version) )
-
-		t=time.localtime()
-		f.write("##date %d-%d-%d %d:%d:%d\n" % t[0:6])
-
-	f.write('##Type %s %s\n' % (seqtype, seqname) )
-
-	for d in descrlist:
-		f.write('%s\t%s\t%s\t%d\t%d\t%f\t%s\t%d' % (d['seqname'], d['source'],
-											d['feature'], d['start'], d['end'],
-											d['score'], d['strand'], d['frame']))
-		if d.has_key('attributes'):
-			f.write('\t' + d['attributes'])
-			if d.has_key('comments'):
-				f.write('\t' + d['comments'])
-		f.write('\n')
-
diff --git a/applications/asp/model.py b/applications/asp/model.py
deleted file mode 100644
index 1410bbcfc62..00000000000
--- a/applications/asp/model.py
+++ /dev/null
@@ -1,143 +0,0 @@
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2006-2008 Soeren Sonnenburg
-# Written (W) 2007 Gunnar Raetsch
-# Copyright (C) 2006-2008 Fraunhofer Institute FIRST and Max-Planck-Society
-#
-
-import sys
-from numpy import mat,array,inf,any,reshape,int32
-
-class model(object):
-	#acceptor
-	acc_splice_b=None
-	acc_splice_order=None
-	acc_splice_window_left=None
-	acc_splice_window_right=None
-	acc_splice_alphas=None
-	acc_splice_svs=None
-
-	#donor
-	don_splice_b=None
-	don_splice_order=None
-	don_splice_window_left=None
-	don_splice_window_right=None
-	don_splice_alphas=None
-	don_splice_svs=None
-
-def parse_file(file):
-	m=model()
-
-	l=file.readline();
-
-	if l != '%asplicer definition file version: 1.0\n':
-		sys.stdout.write("\nfile not a asplicer definition file\n")
-		return None
-
-	while l:
-		if not ( l.startswith('%') or l.startswith('\n') ): # comment
-
-			#acceptor
-			if m.acc_splice_b is None: m.acc_splice_b=parse_value(l, 'acc_splice_b')
-			if m.acc_splice_order is None: m.acc_splice_order=parse_value(l, 'acc_splice_order')
-			if m.acc_splice_window_left is None: m.acc_splice_window_left=parse_value(l, 'acc_splice_window_left')
-			if m.acc_splice_window_right is None: m.acc_splice_window_right=parse_value(l, 'acc_splice_window_right')
-			if m.acc_splice_alphas is None: m.acc_splice_alphas=parse_vector(l, file, 'acc_splice_alphas')
-			if m.acc_splice_svs is None: m.acc_splice_svs=parse_string(l, file, 'acc_splice_svs')
-
-			#donor
-			if m.don_splice_b is None: m.don_splice_b=parse_value(l, 'don_splice_b')
-			if m.don_splice_order is None: m.don_splice_order=parse_value(l, 'don_splice_order')
-			if m.don_splice_window_left is None: m.don_splice_window_left=parse_value(l, 'don_splice_window_left')
-			if m.don_splice_window_right is None: m.don_splice_window_right=parse_value(l, 'don_splice_window_right')
-			if m.don_splice_alphas is None: m.don_splice_alphas=parse_vector(l, file, 'don_splice_alphas')
-			if m.don_splice_svs is None: m.don_splice_svs=parse_string(l, file, 'don_splice_svs')
-
-		l=file.readline()
-
-	sys.stdout.write('done\n')
-	return m
-
-def parse_value(line, name):
-	if (line.startswith(name)):
-		sys.stdout.write('.'); sys.stdout.flush()
-		return float(line[line.find('=')+1:-1])
-	else:
-		return None
-
-def parse_vector(line, file, name):
-    mat = parse_matrix(line, file, name)
-    if mat is None:
-     return mat
-    else:
-     mat = array(mat).flatten()
-     return mat
-
-def parse_matrix(line, file, name):
-	if (line.startswith(name)):
-		sys.stdout.write('.'); sys.stdout.flush()
-		if line.find(']') < 0:
-			l=''
-			while l is not None and l.find(']') < 0:
-				line+=l
-				l=file.readline()
-			if l is not None and l.find(']') >= 0:
-				line+=l
-
-		if line.find(']') < 0:
-			sys.stdout.write("matrix `" + name + "' ended without ']'\n")
-			return None
-		else:
-			mm = mat(line[line.find('['):line.find(']')+1])
-			if len(mm.shape)==1:
-				mm = reshape(mm.shape[0],1)
-			return mm
-	else:
-		return None
-
-def parse_string(line, file, name):
-	if (line.startswith(name)):
-		sys.stdout.write('.'); sys.stdout.flush()
-		l=''
-		lines=[]
-		while l is not None and l.find(']') < 0:
-			if l:
-				lines.append(l[:-1])
-			l=file.readline()
-
-		if l.find(']') < 0:
-			sys.stdout.write("string ended without ']'\n")
-			return None
-		else:
-			return lines
-	else:
-		return None
-
-if __name__ == '__main__':
-	import bz2
-	import sys
-	import hotshot, hotshot.stats
-
-	def load():
-		f=file('data/asp_test.dat');
-		m=parse_file(f);
-
-		print m.acc_splice_b is None
-		print m.acc_splice_order is None
-		print m.acc_splice_window_left is None
-		print m.acc_splice_window_right is None
-		print m.acc_splice_alphas is None
-		print m.acc_splice_svs is None
-
-		print m.don_splice_b is None
-		print m.don_splice_order is None
-		print m.don_splice_window_left is None
-		print m.don_splice_window_right is None
-		print m.don_splice_alphas is None
-		print m.don_splice_svs is None
-
-	load()
diff --git a/applications/asp/seqdict.py b/applications/asp/seqdict.py
deleted file mode 100644
index 74d87a769b6..00000000000
--- a/applications/asp/seqdict.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import string
-
-class predictions(object):
-	def __init__(self, positions=None, scores=None):
-		self.positions=positions
-		self.scores=scores
-
-	def set_positions(self, positions):
-		self.positions=positions;
-	def get_positions(self):
-		return self.positions
-
-	def set_scores(self, scores):
-		self.scores=scores
-	def get_scores(self):
-		return self.scores
-
-	def __str__(self):
-		return 'positions: ' + `self.positions` + 'scores: ' + `self.scores`
-	def __repr__(self):
-		return self.__str__()
-
-class sequence(object):
-	def __init__(self, name, seq, (start,end)):
-		assert(start<end<len(seq))
-		self.start=start
-		self.end=end
-		self.name=name
-		self.seq=seq
-		self.preds=dict()
-		self.preds['acceptor']=predictions()
-		self.preds['donor']=predictions()
-
-	def __str__(self):
-		s="start:" + `self.start`
-		s+=" end:" + `self.end`
-		s+=" name:" + `self.name`
-		s+=" sequence:" + `self.seq[0:10]`
-		s+="... preds:" + `self.preds`
-		return s
-	def __repr__(self):
-		return self.__str__()
-
-def seqdict(dic, (start,end)):
-	""" takes a fasta dict as input and
-	generates a list of sequence objects from it """
-
-	sequences=list()
-
-	#translate string to ACGT / all non ACGT letters are mapped to A
-	tab=''
-	for i in xrange(256):
-		if chr(i).upper() in 'ACGT':
-			tab+=chr(i).upper()
-		else:
-			tab+='A'
-
-	for seqname in dic.ordered_items():
-		seq=string.translate(seqname[1], tab)
-		seq=seq.upper()
-		if end<0:
-			stop=len(seq)+end
-		else:
-			stop=end
-
-		sequences.append(sequence(seqname[0], seq, (start,stop)))
-
-	return sequences
diff --git a/applications/asp/signal_detectors.py b/applications/asp/signal_detectors.py
deleted file mode 100644
index fe9e084791e..00000000000
--- a/applications/asp/signal_detectors.py
+++ /dev/null
@@ -1,172 +0,0 @@
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2006-2009 Soeren Sonnenburg
-# Written (W) 2007 Gunnar Raetsch
-# Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
-#
-
-import sys
-import numpy
-import seqdict
-
-from shogun import LibSVM
-from shogun import StringCharFeatures,DNA
-from shogun import WeightedDegreeStringKernel
-from shogun import DynamicIntArray
-
-class svm_splice_model(object):
-	def __init__(self, order, traindat, alphas, b, (window_left,offset,window_right), consensus):
-
-		f=StringCharFeatures(traindat, DNA)
-		wd_kernel = WeightedDegreeStringKernel(f,f, int(order))
-		wd_kernel.io.set_target_to_stdout()
-
-		self.svm=LibSVM()
-		self.svm.set_kernel(wd_kernel)
-		self.svm.set_alphas(alphas)
-		self.svm.set_support_vectors(numpy.arange(len(alphas), dtype=numpy.int32))
-		self.svm.set_bias(b)
-		self.svm.io.set_target_to_stdout()
-		self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus())
-		self.svm.set_linadd_enabled(True)
-		self.svm.set_batch_computation_enabled(True)
-
-		self.window_left=int(window_left)
-		self.window_right=int(window_right)
-
-		self.consensus=consensus
-		self.wd_kernel=wd_kernel
-		self.traindat=f
-		self.offset=offset
-
-	def get_positions(self, sequence):
-		positions=list()
-
-		for cons in self.consensus:
-			l=sequence.find(cons)
-			while l>-1:
-				if l<len(sequence)-self.window_right and l>self.window_left:
-					positions.append(l+self.offset)
-				l=sequence.find(cons, l+1)
-
-		positions.sort()
-		return positions
-
-	def get_predictions_from_seqdict(self, seqdic, site):
-		""" we need to generate a huge test features object
-			containing all locations found in each seqdict-sequence
-			and each location (this is necessary to efficiently
-			(==fast,low memory) compute the splice outputs
-		"""
-
-		seqlen=self.window_right+self.window_left+2
-
-		for s in seqdic:
-			position_list=DynamicIntArray()
-
-			sequence=s.seq
-			positions=s.preds[site].positions
-			for j in xrange(len(positions)):
-				i=positions[j] - self.offset -self.window_left
-				position_list.append_element(i)
-
-			t=StringCharFeatures([sequence], DNA)
-			t.obtain_by_position_list(seqlen, position_list)
-			self.wd_kernel.init(self.traindat, t)
-
-			self.wd_kernel.io.enable_progress()
-			l=self.svm.apply().get_values()
-			self.wd_kernel.cleanup()
-			sys.stdout.write("\n...done...\n")
-
-			num=len(s.preds[site].positions)
-			scores= num * [0]
-			for j in xrange(num):
-				scores[j]=l[j]
-			s.preds[site].set_scores(scores)
-
-	def get_positions_from_seqdict(self, seqdic, site):
-
-		for d in seqdic:
-			positions=list()
-			sequence=d.seq
-			for cons in self.consensus:
-				l=sequence.find(cons)
-				while l>-1:
-					if l<len(sequence)-self.window_right-2 and l>self.window_left:
-						positions.append(l+self.offset)
-					l=sequence.find(cons, l+1)
-			positions.sort()
-			d.preds[site].set_positions(positions)
-
-	def get_predictions(self, sequence, positions):
-
-		seqlen=self.window_right+self.window_left+2
-		num=len(positions)
-
-		position_list=DynamicIntArray()
-
-		for j in xrange(num):
-			i=positions[j] - self.offset - self.window_left
-			position_list.append_element(i)
-
-		t=StringCharFeatures([sequence], DNA)
-		t.obtain_by_position_list(seqlen, position_list)
-		self.wd_kernel.init(self.traindat, t)
-		del t
-
-		self.wd_kernel.io.enable_progress()
-		l=self.svm.apply().get_values()
-		self.wd_kernel.cleanup()
-		sys.stdout.write("\n...done...\n")
-		return l
-
-class signal_detectors(object):
-	def __init__(self, model):
-		don_consensus=['GC','GT']
-
-		self.acceptor=svm_splice_model(model.acc_splice_order, model.acc_splice_svs,
-				numpy.array(model.acc_splice_alphas).flatten(), model.acc_splice_b,
-				(model.acc_splice_window_left-2, 2, model.acc_splice_window_right+2), ['AG'])
-		self.donor=svm_splice_model(model.don_splice_order, model.don_splice_svs,
-				numpy.array(model.don_splice_alphas).flatten(), model.don_splice_b,
-				(model.don_splice_window_left+1, 0, model.don_splice_window_right-1),
-				don_consensus)
-
-	def set_sequence(self, seq):
-		self.acceptor.set_sequence(seq)
-		self.donor.set_sequence(seq)
-
-	def predict_acceptor_sites(self, seq):
-		pos=self.acceptor.get_positions(seq)
-		sys.stdout.write("computing svm output for acceptor positions\n")
-		pred=self.acceptor.get_predictions(seq, pos)
-		return (pos,pred)
-
-	def predict_donor_sites(self,seq):
-		pos=self.donor.get_positions(seq)
-		sys.stdout.write("computing svm output for donor positions\n")
-		pred=self.donor.get_predictions(seq, pos)
-		return (pos,pred)
-
-	def predict_acceptor_sites_from_seqdict(self, seqs):
-		self.acceptor.get_positions_from_seqdict(seqs, 'acceptor')
-		sys.stdout.write("computing svm output for acceptor positions\n")
-		self.acceptor.get_predictions_from_seqdict(seqs, 'acceptor')
-
-	def predict_donor_sites_from_seqdict(self, seqs):
-		self.donor.get_positions_from_seqdict(seqs, 'donor')
-		sys.stdout.write("computing svm output for donor positions\n")
-		self.donor.get_predictions_from_seqdict(seqs, 'donor')
-
-	def clear_acceptor():
-		del self.acceptor
-		self.acceptor=None
-
-	def clear_donor():
-		del self.acceptor
-		self.acceptor=None
diff --git a/applications/classification/evaluate_multiclass_labels.py b/applications/classification/evaluate_multiclass_labels.py
deleted file mode 100644
index f720bb687d0..00000000000
--- a/applications/classification/evaluate_multiclass_labels.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/env python
-
-#  Copyright (c) The Shogun Machine Learning Toolbox
-#  Written (w) 2014 Daniel Pyrathon
-#  All rights reserved.
-#
-#  Redistribution and use in source and binary forms, with or without
-#  modification, are permitted provided that the following conditions are met:
-#
-#  1. Redistributions of source code must retain the above copyright notice, this
-#     list of conditions and the following disclaimer.
-#  2. Redistributions in binary form must reproduce the above copyright notice,
-#     this list of conditions and the following disclaimer in the documentation
-#     and/or other materials provided with the distribution.
-#
-#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-#  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-#  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-#  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-#  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-#  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-#  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-#  The views and conclusions contained in the software and documentation are those
-#  of the authors and should not be interpreted as representing official policies,
-#  either expressed or implied, of the Shogun Development Team.
-
-
-import argparse
-import logging
-import numpy as np
-from shogun import (LibSVMFile, MulticlassLabels, MulticlassAccuracy)
-from utils import get_features_and_labels
-
-LOGGER = logging.getLogger(__file__)
-
-def parse_arguments():
-	parser = argparse.ArgumentParser(description="Evaluate predicted \
-					labels againsy bare truth")
-	parser.add_argument('--actual', required=True, type=str,
-					help='Path to LibSVM dataset.')
-	parser.add_argument('--predicted', required=True, type=str,
-					help='Path to serialized predicted labels')
-	return parser.parse_args()
-
-
-def main(actual, predicted):
-	LOGGER.info("SVM Multiclass evaluator")
-
-	# Load SVMLight dataset
-	feats, labels = get_features_and_labels(LibSVMFile(actual))
-
-	# Load predicted labels
-	with open(predicted, 'r') as f:
-		predicted_labels_arr = np.array([float(l) for l in f])
-		predicted_labels = MulticlassLabels(predicted_labels_arr)
-
-	# Evaluate accuracy
-	multiclass_measures = MulticlassAccuracy()
-	LOGGER.info("Accuracy = %s" % multiclass_measures.evaluate(
-		labels, predicted_labels))
-	LOGGER.info("Confusion matrix:")
-	res = multiclass_measures.get_confusion_matrix(labels, predicted_labels)
-	print res
-
-
-if __name__ == '__main__':
-	args = parse_arguments()
-	main(args.actual, args.predicted)
diff --git a/applications/classification/predict_multiclass_svm.py b/applications/classification/predict_multiclass_svm.py
deleted file mode 100644
index 79585ec6784..00000000000
--- a/applications/classification/predict_multiclass_svm.py
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env python
-
-#  Copyright (c) The Shogun Machine Learning Toolbox
-#  Written (w) 2014 Daniel Pyrathon
-#  All rights reserved.
-#
-#  Redistribution and use in source and binary forms, with or without
-#  modification, are permitted provided that the following conditions are met:
-#
-#  1. Redistributions of source code must retain the above copyright notice, this
-#     list of conditions and the following disclaimer.
-#  2. Redistributions in binary form must reproduce the above copyright notice,
-#     this list of conditions and the following disclaimer in the documentation
-#     and/or other materials provided with the distribution.
-#
-#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-#  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-#  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-#  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-#  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-#  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-#  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-#  The views and conclusions contained in the software and documentation are those
-#  of the authors and should not be interpreted as representing official policies,
-#  either expressed or implied, of the Shogun Development Team.
-
-
-import argparse
-import logging
-from contextlib import closing
-from shogun import (LibSVMFile, SparseRealFeatures, MulticlassLabels,
-											MulticlassLibSVM, SerializableHdf5File,
-											MulticlassAccuracy)
-from utils import get_features_and_labels
-
-LOGGER = logging.getLogger(__file__)
-
-def parse_arguments():
-	parser = argparse.ArgumentParser(description="Test a serialized SVM \
-					classifier agains a SVMLight test file")
-	parser.add_argument('--classifier', required=True, type=str,
-					help='Path to training dataset in LibSVM format.')
-	parser.add_argument('--testset', required=True, type=str,
-					help='Path to the SVMLight test file')
-	parser.add_argument('--output', required=True, type=str,
-					help='File path to write predicted labels')
-	return parser.parse_args()
-
-
-def main(classifier, testset, output):
-	LOGGER.info("SVM Multiclass evaluation")
-
-	svm = MulticlassLibSVM()
-	serialized_classifier = SerializableHdf5File(classifier, 'r')
-	with closing(serialized_classifier):
-		svm.load_serializable(serialized_classifier)
-
-	test_feats, test_labels = get_features_and_labels(LibSVMFile(testset))
-	predicted_labels = svm.apply(test_feats)
-
-	with open(output, 'w') as f:
-		for cls in predicted_labels.get_labels():
-			f.write("%s\n" % int(cls))
-
-	LOGGER.info("Predicted labels saved in: '%s'" % output)
-
-
-if __name__ == '__main__':
-	args = parse_arguments()
-	main(args.classifier, args.testset, args.output)
-
diff --git a/applications/classification/random_fourier_classification.cpp b/applications/classification/random_fourier_classification.cpp
deleted file mode 100644
index bcd085407c7..00000000000
--- a/applications/classification/random_fourier_classification.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * This software is distributed under BSD 3-clause license (see LICENSE file).
- *
- * Authors: Björn Esser, Evangelos Anagnostopoulos
- */
-#include <shogun/base/init.h>
-#include <shogun/features/RandomFourierDotFeatures.h>
-#include <shogun/features/SparseFeatures.h>
-#include <shogun/io/LibSVMFile.h>
-#include <shogun/labels/BinaryLabels.h>
-#include <shogun/labels/MulticlassLabels.h>
-#include <shogun/classifier/svm/SVMOcas.h>
-#include <shogun/classifier/svm/LibLinear.h>
-#include <shogun/evaluation/PRCEvaluation.h>
-#include <shogun/evaluation/ROCEvaluation.h>
-#include <shogun/evaluation/ContingencyTableEvaluation.h>
-#include <shogun/lib/Time.h>
-
-#include <stdio.h>
-
-using namespace shogun;
-
-const char* filepath = 0;
-const char* testpath = 0;
-int32_t D = 300;
-float64_t C = 0.1;
-float64_t epsilon = 0.01;
-float64_t width = 8;
-int32_t correct_dimension = -1;
-
-SGSparseMatrix<float64_t> load_data(const char* filepath, float64_t*& label_vec)
-{
-	FILE* data_file = fopen(filepath, "r");
-	SGSparseMatrix<float64_t> sparse_data;
-
-	CLibSVMFile* file_reader = new CLibSVMFile(data_file);
-	file_reader->get_sparse_matrix(sparse_data.sparse_matrix, sparse_data.num_features, sparse_data.num_vectors,
-			label_vec);
-
-	if (correct_dimension!=-1)
-		sparse_data.num_features = correct_dimension;
-
-	SG_UNREF(file_reader);
-
-	return sparse_data;
-}
-
-void print_help_message()
-{
-	SG_SPRINT("Usage : ./rf_classify --dataset path_to_data [--testset path_to_test_data] [-D number_of_samples]\n");
-	SG_SPRINT("		[-C C_for_SVM] [--epsilon SVM_epsilon] [--width gaussian_kernel_width] [--dimension feature_dimension]\n");
-	SG_SPRINT("\nPerforms binary classification on provided data using Random Fourier features with a linear SVM solver,\n");
-	SG_SPRINT("namely SVMOcas.\nParameter explanation :\n");
-	SG_SPRINT("\ndataset  : Path to data in LibSVM format. Required.");
-	SG_SPRINT("\ntestset  : Path to test data in LibSVM format. Optional.");
-	SG_SPRINT("\nD         : Number of samples for the Random Fourier features. Default value = 300");
-	SG_SPRINT("\nC         : SVM parameter C. Default value = 0.1");
-	SG_SPRINT("\nepsilon   : SVM epsilon. Default value = 0.01");
-	SG_SPRINT("\nwidth     : Gaussian Kernel width parameter. Default value = 8");
-	SG_SPRINT("\ndimension : Correct feature dimension. Optional\n");
-}
-
-void parse_arguments(int argv, char** argc)
-{
-	if (argv%2!=1)
-	{
-		print_help_message();
-		exit_shogun();
-		exit(0);
-	}
-
-	for (index_t i=1; i<argv; i++)
-	{
-		if (strcmp(argc[i],"--dataset")==0)
-			filepath = argc[++i];
-		else if (strcmp(argc[i],"--testset")==0)
-			testpath = argc[++i];
-		else if (strcmp(argc[i],"-D")==0)
-			D = atoi(argc[++i]);
-		else if (strcmp(argc[i],"-C")==0)
-			C = atof(argc[++i]);
-		else if (strcmp(argc[i],"--epsilon")==0)
-			epsilon = atof(argc[++i]);
-		else if (strcmp(argc[i],"--width")==0)
-			width = atof(argc[++i]);
-		else if (strcmp(argc[i],"--dimension")==0)
-			correct_dimension = atoi(argc[++i]);
-	}
-
-	if (filepath==0)
-	{
-		print_help_message();
-		exit_shogun();
-		exit(0);
-	}
-}
-
-int main(int argv, char** argc)
-{
-	init_shogun_with_defaults();
-
-	parse_arguments(argv, argc);
-
-	/** Reading data */
-	float64_t* label_vec = 0;
-	SGSparseMatrix<float64_t> sparse_data = load_data(filepath, label_vec);
-	SGVector<float64_t> label(label_vec, sparse_data.num_vectors);
-
-
-	/** Creating features */
-	CBinaryLabels* labels = new CBinaryLabels(label);
-	SG_REF(labels);
-
-	CSparseFeatures<float64_t>* s_feats = new CSparseFeatures<float64_t>(sparse_data);
-	SGVector<float64_t> params(1);
-	params[0] = width;
-	CRandomFourierDotFeatures* r_feats = new CRandomFourierDotFeatures(
-			s_feats, D, KernelName::GAUSSIAN, params);
-
-
-	/** Training */
-	CLibLinear* svm = new CLibLinear(C, r_feats, labels);
-	//CSVMOcas* svm = new CSVMOcas(C, r_feats, labels);
-	svm->set_epsilon(epsilon);
-	SG_SPRINT("Starting training\n");
-	CTime* timer = new CTime();
-	svm->train();
-	float64_t secs = timer->cur_runtime_diff_sec();
-	timer->stop();
-	SG_UNREF(timer);
-	SG_SPRINT("Training completed, took %fs\n", secs);
-	/** Training completed */
-
-	/** Evaluating */
-	CBinaryLabels* predicted = CLabelsFactory::to_binary(svm->apply());
-	CPRCEvaluation* prc_evaluator = new CPRCEvaluation();
-	CROCEvaluation* roc_evaluator = new CROCEvaluation();
-	CAccuracyMeasure* accuracy_evaluator = new CAccuracyMeasure();
-
-	float64_t auROC = roc_evaluator->evaluate(predicted, labels);
-	float64_t auPRC = prc_evaluator->evaluate(predicted, labels);
-	float32_t accuracy = accuracy_evaluator->evaluate(predicted, labels);
-	SG_SPRINT("Training auPRC=%f, auROC=%f, accuracy=%f ( Incorrectly predicted=%f% )\n", auPRC, auROC,
-				accuracy, (1-accuracy) * 100);
-
-	SG_UNREF(predicted);
-	SGMatrix<float64_t> w = r_feats->get_random_coefficients();
-	svm->set_features(NULL);
-
-	if (testpath!=0)
-	{
-		sparse_data = load_data(testpath, label_vec);
-		label = SGVector<float64_t>(label_vec, sparse_data.num_vectors);
-
-		s_feats = new CSparseFeatures<float64_t>(sparse_data);
-		r_feats = new CRandomFourierDotFeatures(s_feats, D, KernelName::GAUSSIAN, width, w);
-		CBinaryLabels* test_labels = new CBinaryLabels(label);
-
-		predicted = CLabelsFactory::to_binary(svm->apply(r_feats));
-		auROC = roc_evaluator->evaluate(predicted, test_labels);
-		auPRC = prc_evaluator->evaluate(predicted, test_labels);
-		accuracy = accuracy_evaluator->evaluate(predicted, test_labels);
-		SG_SPRINT("Test auPRC=%f, auROC=%f, accuracy=%f ( Incorrectly predicted=%f% )\n", auPRC, auROC,
-				accuracy, (1-accuracy) * 100);
-		SG_UNREF(predicted);
-		SG_UNREF(test_labels);
-
-	}
-	SG_UNREF(prc_evaluator);
-	SG_UNREF(roc_evaluator);
-	SG_UNREF(accuracy_evaluator);
-	SG_UNREF(svm);
-	SG_UNREF(labels);
-	exit_shogun();
-}
diff --git a/applications/classification/random_fourier_classification.py b/applications/classification/random_fourier_classification.py
deleted file mode 100644
index 07350cde384..00000000000
--- a/applications/classification/random_fourier_classification.py
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/env python
-
-#
-#  This program is free software; you can redistribute it and/or modify
-#  it under the terms of the GNU General Public License as published by
-#  the Free Software Foundation; either version 3 of the License, or
-#  (at your option) any later version.
-#
-#  Written (W) 2013 Evangelos Anagnostopoulos
-#
-
-def parse_arguments():
-	import argparse
-	parser = argparse.ArgumentParser(description=
-		"Solve binary classification problems stored in libsvm format, "
-		"using Random Fourier features and SVMOcas")
-	parser.add_argument('--dataset', required=True, type=str,
-					help='Path to training dataset in LibSVM format.')
-	parser.add_argument('--testset', type=str,
-					help='Path to test dataset in LibSVM format.')
-	parser.add_argument('-D', default=300, type=int,
-					help='The number of samples to use')
-	parser.add_argument('-C', default=0.1, type=float,
-					help='SVMOcas regularization constant')
-	parser.add_argument('--epsilon', default=0.01, type=float,
-					help='SVMOcas epsilon parameter')
-	parser.add_argument('--width', default=8, type=float,
-					help='Width of the Gaussian Kernel to approximate')
-	parser.add_argument('--dimension', type=int,
-					help='Dimension of input dataset')
-
-	return parser.parse_args()
-
-def evaluate(predicted_labels, labels, prefix="Results"):
-	from shogun import PRCEvaluation, ROCEvaluation, AccuracyMeasure
-
-	prc_evaluator = PRCEvaluation()
-	roc_evaluator = ROCEvaluation()
-	acc_evaluator = AccuracyMeasure()
-
-	auPRC = prc_evaluator.evaluate(predicted_labels, labels)
-	auROC = roc_evaluator.evaluate(predicted_labels, labels)
-	acc = acc_evaluator.evaluate(predicted_labels, labels)
-
-	print ('{0}: auPRC = {1:.5f}, auROC = {2:.5f}, acc = {3:.5f} '+
-				'({4}% incorrectly classified)').format(
-				prefix, auPRC, auROC, acc, (1-acc)*100)
-
-def load_sparse_data(filename, dimension=None):
-	input_file = LibSVMFile(args.dataset)
-	sparse_feats = SparseRealFeatures()
-	label_array = sparse_feats.load_with_labels(input_file)
-	labels = BinaryLabels(label_array)
-
-	if dimension!=None:
-		sparse_feats.set_num_features(dimension)
-
-	return {'data':sparse_feats, 'labels':labels}
-
-if __name__=='__main__':
-	from shogun import SparseRealFeatures, RandomFourierDotFeatures, GAUSSIAN
-	from shogun import LibSVMFile, BinaryLabels, SVMOcas
-	from shogun import Time
-	from numpy import array
-
-	args = parse_arguments()
-
-	print 'Loading training data...'
-	sparse_data = load_sparse_data(args.dataset,args.dimension)
-
-	kernel_params = array([args.width], dtype=float)
-	rf_feats = RandomFourierDotFeatures(sparse_data['data'], args.D, GAUSSIAN,
-				kernel_params)
-
-	svm = SVMOcas(args.C, rf_feats, sparse_data['labels'])
-	svm.set_epsilon(args.epsilon)
-	print 'Starting training.'
-	timer = Time()
-	svm.train()
-	timer.stop()
-	print 'Training completed, took {0:.2f}s.'.format(timer.time_diff_sec())
-
-	predicted_labels = svm.apply()
-	evaluate(predicted_labels, sparse_data['labels'], 'Training results')
-
-	if args.testset!=None:
-		random_coef = rf_feats.get_random_coefficients()
-		# removing current dataset from memory in order to load the test dataset,
-		# to avoid running out of memory
-		rf_feats = None
-		svm.set_features(None)
-		svm.set_labels(None)
-		sparse_data = None
-
-		print 'Loading test data...'
-		sparse_data = load_sparse_data(args.testset, args.dimension)
-		rf_feats = RandomFourierDotFeatures(sparse_data['data'], args.D, GAUSSIAN,
-					kernel_params, random_coef)
-		predicted_labels = svm.apply(rf_feats)
-		evaluate(predicted_labels, sparse_data['labels'], 'Test results')
diff --git a/applications/classification/train_multiclass_svm.py b/applications/classification/train_multiclass_svm.py
deleted file mode 100644
index 39e17d2619c..00000000000
--- a/applications/classification/train_multiclass_svm.py
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/usr/bin/env python
-
-#  Copyright (c) The Shogun Machine Learning Toolbox
-#  Written (w) 2014 Daniel Pyrathon
-#  All rights reserved.
-#
-#  Redistribution and use in source and binary forms, with or without
-#  modification, are permitted provided that the following conditions are met:
-#
-#  1. Redistributions of source code must retain the above copyright notice, this
-#     list of conditions and the following disclaimer.
-#  2. Redistributions in binary form must reproduce the above copyright notice,
-#     this list of conditions and the following disclaimer in the documentation
-#     and/or other materials provided with the distribution.
-#
-#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-#  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-#  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-#  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-#  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-#  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-#  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-#  The views and conclusions contained in the software and documentation are those
-#  of the authors and should not be interpreted as representing official policies,
-#  either expressed or implied, of the Shogun Development Team.
-
-
-import argparse
-import logging
-from contextlib import contextmanager, closing
-from shogun import (LibSVMFile, GaussianKernel, MulticlassLibSVM,
-																SerializableHdf5File, LinearKernel)
-from utils import get_features_and_labels, track_execution
-
-LOGGER = logging.getLogger(__file__)
-
-KERNELS = {
-	'linear': lambda feats, width: LinearKernel(feats, feats),
-	'gaussian': lambda feats, width: GaussianKernel(feats, feats, width),
-}
-
-def parse_arguments():
-	parser = argparse.ArgumentParser(description="Train a multiclass SVM \
-					stored in libsvm format")
-	parser.add_argument('--dataset', required=True, type=str,
-					help='Path to training dataset in LibSVM format.')
-	parser.add_argument('--capacity', default=1.0, type=float,
-					help='SVM capacity parameter')
-	parser.add_argument('--width', default=2.1, type=float,
-					help='Width of the Gaussian Kernel to approximate')
-	parser.add_argument('--epsilon', default=0.01, type=float,
-					help='SVMOcas epsilon parameter')
-	parser.add_argument('--kernel', type=str, default='linear',
-					choices=['linear', 'gaussian'],
-					help='Optionally specify a kernel type. \
-					Only Linear or Gaussian')
-	parser.add_argument('--output', required=True, type=str,
-					help='Destination path for the output serialized \
-					classifier')
-	return parser.parse_args()
-
-
-def main(dataset, output, epsilon, capacity, width, kernel_type):
-
-	LOGGER.info("SVM Multiclass classifier")
-	LOGGER.info("Epsilon: %s" % epsilon)
-	LOGGER.info("Capacity: %s" % capacity)
-	LOGGER.info("Gaussian width: %s" % width)
-
-	# Get features
-	feats, labels = get_features_and_labels(LibSVMFile(dataset))
-
-	# Create kernel
-	try:
-		kernel = KERNELS[kernel_type](feats, width)
-	except KeyError:
-		LOGGER.error("Kernel %s not available. try Gaussian or Linear" % kernel_type)
-
-	# Initialize and train Multiclass SVM
-	svm = MulticlassLibSVM(capacity, kernel, labels)
-	svm.set_epsilon(epsilon)
-	with track_execution():
-		svm.train()
-
-	# Serialize to file
-	writable_file = SerializableHdf5File(output, 'w')
-	with closing(writable_file):
-		svm.save_serializable(writable_file)
-	LOGGER.info("Serialized classifier saved in: '%s'" % output)
-
-
-if __name__ == '__main__':
-	args = parse_arguments()
-	main(args.dataset, args.output, args.epsilon, args.capacity, args.width, args.kernel)
diff --git a/applications/classification/utils.py b/applications/classification/utils.py
deleted file mode 100644
index e459c39e97e..00000000000
--- a/applications/classification/utils.py
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-
-#  Copyright (c) The Shogun Machine Learning Toolbox
-#  Written (w) 2014 Daniel Pyrathon
-#  All rights reserved.
-#
-#  Redistribution and use in source and binary forms, with or without
-#  modification, are permitted provided that the following conditions are met:
-#
-#  1. Redistributions of source code must retain the above copyright notice, this
-#     list of conditions and the following disclaimer.
-#  2. Redistributions in binary form must reproduce the above copyright notice,
-#     this list of conditions and the following disclaimer in the documentation
-#     and/or other materials provided with the distribution.
-#
-#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-#  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-#  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-#  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-#  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-#  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-#  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-#  The views and conclusions contained in the software and documentation are those
-#  of the authors and should not be interpreted as representing official policies,
-#  either expressed or implied, of the Shogun Development Team.
-
-import logging
-from contextlib import contextmanager
-from shogun import MulticlassLabels, SparseRealFeatures, Time
-
-
-logging.basicConfig(level=logging.INFO, format='[%(asctime)-15s %(module)s] %(message)s')
-LOGGER = logging.getLogger(__file__)
-
-def get_features_and_labels(input_file):
-	feats = SparseRealFeatures()
-	label_array = feats.load_with_labels(input_file)
-	labels = MulticlassLabels(label_array)
-	return feats, labels
-
-@contextmanager
-def track_execution():
-	LOGGER.info('Starting training.')
-	timer = Time()
-	yield
-	timer.stop()
-	LOGGER.info('Training completed, took {0:.2f}s.'.format(timer.time_diff_sec()))
diff --git a/applications/easysvm/AUTHORS b/applications/easysvm/AUTHORS
deleted file mode 100644
index 7c0e1b4662c..00000000000
--- a/applications/easysvm/AUTHORS
+++ /dev/null
@@ -1,3 +0,0 @@
-Cheng Soon Ong <Chengsoon.Ong@tuebingen.mpg.de>
-Gunnar Raetsch <Gunnar.Raetsch@tuebingen.mpg.de>
-Sebastian Schultheiss <Sebastian.Schultheiss@tuebingen.mpg.de>
diff --git a/applications/easysvm/LICENSE b/applications/easysvm/LICENSE
deleted file mode 100644
index 94a9ed024d3..00000000000
--- a/applications/easysvm/LICENSE
+++ /dev/null
@@ -1,674 +0,0 @@
-                    GNU GENERAL PUBLIC LICENSE
-                       Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
-  The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works.  By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.  We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors.  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-  To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights.  Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received.  You must make sure that they, too, receive
-or can get the source code.  And you must show them these terms so they
-know their rights.
-
-  Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
-  For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software.  For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
-  Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so.  This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software.  The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable.  Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products.  If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
-  Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary.  To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                       TERMS AND CONDITIONS
-
-  0. Definitions.
-
-  "This License" refers to version 3 of the GNU General Public License.
-
-  "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
-  "The Program" refers to any copyrightable work licensed under this
-License.  Each licensee is addressed as "you".  "Licensees" and
-"recipients" may be individuals or organizations.
-
-  To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy.  The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
-  A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-  To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy.  Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-  To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies.  Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
-  An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License.  If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-  1. Source Code.
-
-  The "source code" for a work means the preferred form of the work
-for making modifications to it.  "Object code" means any non-source
-form of a work.
-
-  A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-  The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form.  A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-  The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities.  However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work.  For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-  The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
-  The Corresponding Source for a work in source code form is that
-same work.
-
-  2. Basic Permissions.
-
-  All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met.  This License explicitly affirms your unlimited
-permission to run the unmodified Program.  The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work.  This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-  You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force.  You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright.  Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
-  Conveying under any other circumstances is permitted solely under
-the conditions stated below.  Sublicensing is not allowed; section 10
-makes it unnecessary.
-
-  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-  No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-  When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
-  4. Conveying Verbatim Copies.
-
-  You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-  You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-  5. Conveying Modified Source Versions.
-
-  You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
-    a) The work must carry prominent notices stating that you modified
-    it, and giving a relevant date.
-
-    b) The work must carry prominent notices stating that it is
-    released under this License and any conditions added under section
-    7.  This requirement modifies the requirement in section 4 to
-    "keep intact all notices".
-
-    c) You must license the entire work, as a whole, under this
-    License to anyone who comes into possession of a copy.  This
-    License will therefore apply, along with any applicable section 7
-    additional terms, to the whole of the work, and all its parts,
-    regardless of how they are packaged.  This License gives no
-    permission to license the work in any other way, but it does not
-    invalidate such permission if you have separately received it.
-
-    d) If the work has interactive user interfaces, each must display
-    Appropriate Legal Notices; however, if the Program has interactive
-    interfaces that do not display Appropriate Legal Notices, your
-    work need not make them do so.
-
-  A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit.  Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-  6. Conveying Non-Source Forms.
-
-  You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
-    a) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by the
-    Corresponding Source fixed on a durable physical medium
-    customarily used for software interchange.
-
-    b) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by a
-    written offer, valid for at least three years and valid for as
-    long as you offer spare parts or customer support for that product
-    model, to give anyone who possesses the object code either (1) a
-    copy of the Corresponding Source for all the software in the
-    product that is covered by this License, on a durable physical
-    medium customarily used for software interchange, for a price no
-    more than your reasonable cost of physically performing this
-    conveying of source, or (2) access to copy the
-    Corresponding Source from a network server at no charge.
-
-    c) Convey individual copies of the object code with a copy of the
-    written offer to provide the Corresponding Source.  This
-    alternative is allowed only occasionally and noncommercially, and
-    only if you received the object code with such an offer, in accord
-    with subsection 6b.
-
-    d) Convey the object code by offering access from a designated
-    place (gratis or for a charge), and offer equivalent access to the
-    Corresponding Source in the same way through the same place at no
-    further charge.  You need not require recipients to copy the
-    Corresponding Source along with the object code.  If the place to
-    copy the object code is a network server, the Corresponding Source
-    may be on a different server (operated by you or a third party)
-    that supports equivalent copying facilities, provided you maintain
-    clear directions next to the object code saying where to find the
-    Corresponding Source.  Regardless of what server hosts the
-    Corresponding Source, you remain obligated to ensure that it is
-    available for as long as needed to satisfy these requirements.
-
-    e) Convey the object code using peer-to-peer transmission, provided
-    you inform other peers where the object code and Corresponding
-    Source of the work are being offered to the general public at no
-    charge under subsection 6d.
-
-  A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-  A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling.  In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage.  For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product.  A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
-  "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source.  The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
-  If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information.  But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-  The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed.  Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
-  Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-  7. Additional Terms.
-
-  "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law.  If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-  When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it.  (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.)  You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-  Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
-    a) Disclaiming warranty or limiting liability differently from the
-    terms of sections 15 and 16 of this License; or
-
-    b) Requiring preservation of specified reasonable legal notices or
-    author attributions in that material or in the Appropriate Legal
-    Notices displayed by works containing it; or
-
-    c) Prohibiting misrepresentation of the origin of that material, or
-    requiring that modified versions of such material be marked in
-    reasonable ways as different from the original version; or
-
-    d) Limiting the use for publicity purposes of names of licensors or
-    authors of the material; or
-
-    e) Declining to grant rights under trademark law for use of some
-    trade names, trademarks, or service marks; or
-
-    f) Requiring indemnification of licensors and authors of that
-    material by anyone who conveys the material (or modified versions of
-    it) with contractual assumptions of liability to the recipient, for
-    any liability that these contractual assumptions directly impose on
-    those licensors and authors.
-
-  All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10.  If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term.  If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-  If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-  Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
-  8. Termination.
-
-  You may not propagate or modify a covered work except as expressly
-provided under this License.  Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-  However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
-  Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-  Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License.  If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-  9. Acceptance Not Required for Having Copies.
-
-  You are not required to accept this License in order to receive or
-run a copy of the Program.  Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance.  However,
-nothing other than this License grants you permission to propagate or
-modify any covered work.  These actions infringe copyright if you do
-not accept this License.  Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-  10. Automatic Licensing of Downstream Recipients.
-
-  Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License.  You are not responsible
-for enforcing compliance by third parties with this License.
-
-  An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations.  If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-  You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License.  For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-  11. Patents.
-
-  A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based.  The
-work thus licensed is called the contributor's "contributor version".
-
-  A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version.  For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-  Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-  In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement).  To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-  If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients.  "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-  If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-  A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License.  You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
-  Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-  12. No Surrender of Others' Freedom.
-
-  If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all.  For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
-  13. Use with the GNU Affero General Public License.
-
-  Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work.  The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
-  14. Revised Versions of this License.
-
-  The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-  Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation.  If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
-  If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-  Later license versions may give you additional or different
-permissions.  However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-  15. Disclaimer of Warranty.
-
-  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. Limitation of Liability.
-
-  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
-  17. Interpretation of Sections 15 and 16.
-
-  If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
-                     END OF TERMS AND CONDITIONS
-
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
-  If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
-    <program>  Copyright (C) <year>  <name of author>
-    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
-  You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
-  The GNU General Public License does not permit incorporating your program
-into proprietary programs.  If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library.  If this is what you want to do, use the GNU Lesser General
-Public License instead of this License.  But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/applications/easysvm/MANIFEST.in b/applications/easysvm/MANIFEST.in
deleted file mode 100644
index 9c0f080546d..00000000000
--- a/applications/easysvm/MANIFEST.in
+++ /dev/null
@@ -1,7 +0,0 @@
-include AUTHORS
-include distutils-help.txt
-include LICENSE
-include README
-recursive-include dependencies *
-recursive-include galaxy *
-recursive-include data *.csv *.fa
diff --git a/applications/easysvm/README b/applications/easysvm/README
deleted file mode 100644
index b24ec868c44..00000000000
--- a/applications/easysvm/README
+++ /dev/null
@@ -1,142 +0,0 @@
-.. This document is written in reStructuredText.
-.. Build command:
-   rst2html.py --date --time README README.html
-
-=============================================
- easysvm - A front end to the shogun toolbox
-=============================================
-
-.. contents::
-
-Introduction
-============
-
-This is a demo corresponding to the PLoS tutorial
-"Support Vector Machines for Sequence Analysis". It is also meant as a
-user "quick start" to using shogun (http://www.shogun-toolbox.org).
-
-
-Installation
-============
-
-Install
--------
-
-For a global install, for which you need root permissions
-
-    python setup.py install
-
-For a local install
-
-    python setup.py install --prefix=$HOME
-
-See distutils-help.txt for more details.
-
-Dependencies
-------------
-
-- `numpy`_ (>=1.0.1)
-- `pylab`_ (>=0.87.7) [optional]
-- `shogun`_ (>=0.7.3)
-- `arff`_ [optional]
-
-.. _numpy: http://numpy.scipy.org/
-.. _pylab: http://matplotlib.sourceforge.net/
-.. _shogun: http://www.shogun-toolbox.org/
-.. _arff: http://www.mit.edu/~sav/arff/
-
-
-Usage
-=====
-
-The results in the paper were produced by tutorial_example.py. Execute
-it in the data directory::
-
-    cd data
-    python ../splicesites/tutorial_example.py
-
-Galaxy interface
-----------------
-
-The following command line arguments are what is behind the galaxy
-interface, which is available as a web service from
-http://galaxy.fml.tuebingen.mpg.de/
-
-There are three types of data creation methods::
-
-    datagen.py motif arff gattaca 10 50 10-15 0.1 tttt 100 50 15 0.1 testmotif1.arff
-    datagen.py cloud 100 3 0.6 1.3 testcloud1.arff
-    datagen.py motif arff gattaca 100 50 10-15 0.1 tttt 1000 50 15 0.1 testmotif2.arff
-    datagen.py cloud 1000 3 0.6 1.3 testcloud2.arff
-
-    datagen.py motif fasta gattaca 10 50 10-15 0.1 testmotifpos.fasta
-    datagen.py motif fasta tttt 100 50 15 0.1 testmotifneg.fasta
-    datagen.py motif fasta gattaca 100 50 10-15 0.1 tm1.fasta
-    datagen.py motif fasta tttt 1000 50 15 0.1 tm2.fasta
-
-Clean up::
-
-    cat tm1.fasta tm2.fasta > testmotiftest.fasta
-    rm tm1.fasta tm2.fasta
-
-
-Cross validation and evaluation on a independent validation set::
-
-    easysvm.py cv 5 10 gauss 0.6 arff testcloud1.arff cv_cloud.txt
-    easysvm.py eval cv_cloud.txt arff testcloud1.arff cv_cloud_eval.txt roc roc_cloud_cv.png
-    easysvm.py cv 5 10 wd 10 2 arff testmotif1.arff cv_motif.txt
-    easysvm.py eval cv_motif.txt arff testmotif1.arff cv_motif_eval.txt roc roc_motif_cv.png
-
-Predict on a test set::
-
-    easysvm.py pred 10 gauss 0.6 arff testcloud1.arff testcloud2.arff pred_cloud.txt
-    easysvm.py pred 10 linear arff testcloud1.arff testcloud2.arff pred_cloud.txt
-    easysvm.py pred 10 poly 3 true true arff testcloud1.arff testcloud2.arff pred_cloud.txt
-
-    easysvm.py pred 10 wd 10 2 arff testmotif1.arff testmotif2.arff pred_motif.txt
-    easysvm.py pred 10 localalign arff testmotif1.arff testmotif2.arff pred_motif.txt
-    easysvm.py pred 10 localimprove 10 1 1 arff testmotif1.arff testmotif2.arff pred_motif.txt
-
-For some kernels, investigate the importance of different motives::
-
-    easysvm.py poim 10 6 wd 10 2 arff testmotif1.arff poims.png
-
-We also support the fasta format::
-
-    easysvm.py cv 5 10 wd 10 2 fasta testmotifpos.fasta testmotifneg.fasta cv_motif.txt
-    easysvm.py eval cv_motif.txt fasta testmotifpos.fasta testmotifneg.fasta cv_motif_eval.txt roc roc_motif_cv.png
-    easysvm.py pred 10 wd 10 2 fasta testmotifpos.fasta testmotifneg.fasta testmotiftest.fasta pred_motif.txt
-    easysvm.py poim 10 6 wd 10 2 fasta testmotifpos.fasta testmotifneg.fasta poims.png
-
-Clean up::
-
-    rm testmotif1.arff testmotif2.arff testcloud1.arff testcloud2.arff
-    rm cv_cloud.txt roc_cloud_cv.png cv_motif.txt roc_motif_cv.png
-    rm pred_cloud.txt pred_motif.txt poims.png
-    rm testmotifpos.fasta testmotifneg.fasta testmotiftest.fasta
-    rm cv_cloud_eval.txt cv_motif_eval.txt
-
-
-License
-=======
-
-GPLv3_
-
-.. _GPLv3: http://gplv3.fsf.org/
-
-All programs in this collection are free software:
-you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-Copyright 2008 Cheng Soon Ong and Gunnar Raetsch
-
diff --git a/applications/easysvm/data b/applications/easysvm/data
deleted file mode 120000
index f792f7b7b32..00000000000
--- a/applications/easysvm/data
+++ /dev/null
@@ -1 +0,0 @@
-../../data/easysvm
\ No newline at end of file
diff --git a/applications/easysvm/distutils-help.txt b/applications/easysvm/distutils-help.txt
deleted file mode 100644
index 348384620c1..00000000000
--- a/applications/easysvm/distutils-help.txt
+++ /dev/null
@@ -1,73 +0,0 @@
-A quick guide to distribution of python code. How to install and how
-to build a module or package in python using distutils (setup.py)
-
-Installation:
--------------
-
-$ python setup.py install
-This builds and installs in the default python
-site-packages location. If you are not root, then the other two
-versions allow you to specify a root directory. There is a tiny
-difference between --home and --prefix.
-
-$ python setup.py install --home=$HOME
-installs the package into $HOME/lib/python/package-name
-
-$ python setup.py install --prefix=$HOME
-installs the package into
-$HOME/lib/python2.5/site-packages/package-name
-(assuming you are using python2.5, which I recommend)
-
-I suggest using
-$ python setup.py install --prefix=$HOME
-because then you can just add
-export PYTHONPATH=$HOME/lib/python2.5/site-packages/package-name:$PYTHONPATH
-to your .bashrc and you can just install all your downloaded python
-packages that use distutils in the same way. Also, it allows you to
-keep the packages for python2.3 and python2.4 separate (which --home
-doesn't).
-
-$ python setup.py build
-builds the software without installing it. This can also be used by
-the developer instead of a makefile/configure type script to compile
-his/her own code.
-
-
-Packaging your code for distribution:
--------------------------------------
-
-You have to basically write a setup.py file that tells python
-distutils where things are. distutils is smart enough to figure out
-which compiler to use for C/C++/python. It even knows when to invoke
-SWIG. The following instructions are from the distutils documentation.
-
-If all you want to do is distribute a module called foo, contained in
-a file foo.py, then your setup script can be as simple as this:
-
-from distutils.core import setup
-setup(name='foo',
-    version='1.0',
-    py_modules=['foo'],
-    )
-
-Some observations:
-* most information that you supply to the Distutils is supplied as
-keyword arguments to the setup() function
-* those keyword arguments fall into two categories: package metadata
-(name, version number) and information about what's in the package (a
-list of pure Python modules, in this case)
-* modules are specified by module name, not filename (the same will
-hold true for packages and extensions)
-* it's recommended that you supply a little more metadata, in
-particular your name, email address and a URL for the project.
-
-To create a source distribution for this module, you would create a
-setup script, setup.py, containing the above code, and run:
-$ python setup.py sdist
-which will create an archive file (e.g., tarball on Unix, ZIP file on
-Windows) containing your setup script setup.py, and your module
-foo.py. The archive file will be named foo-1.0.tar.gz (or .zip), and
-will unpack into a directory foo-1.0.
-
-For more information, look at the documentation for "Installing Python
-Modules" and "Distributing Python Modules".
diff --git a/applications/easysvm/esvm/__init__.py b/applications/easysvm/esvm/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/applications/easysvm/esvm/datafuncs.py b/applications/easysvm/esvm/datafuncs.py
deleted file mode 100644
index 5e84d8feac0..00000000000
--- a/applications/easysvm/esvm/datafuncs.py
+++ /dev/null
@@ -1,162 +0,0 @@
-"""
-This module contains code for generating toy examples
-"""
-
-#############################################################################################
-#                                                                                           #
-#    This program is free software; you can redistribute it and/or modify                   #
-#    it under the terms of the GNU General Public License as published by                   #
-#    the Free Software Foundation; either version 3 of the License, or                      #
-#    (at your option) any later version.                                                    #
-#                                                                                           #
-#    This program is distributed in the hope that it will be useful,                        #
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of                         #
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                           #
-#    GNU General Public License for more details.                                           #
-#                                                                                           #
-#    You should have received a copy of the GNU General Public License                      #
-#    along with this program; if not, see http://www.gnu.org/licenses                       #
-#    or write to the Free Software Foundation, Inc., 51 Franklin Street,                    #
-#    Fifth Floor, Boston, MA 02110-1301  USA                                                #
-#                                                                                           #
-#############################################################################################
-
-import sys
-import parse
-
-import random
-from numpy.random import randn
-from numpy import ones, concatenate, array, transpose
-from esvm.mldata import DatasetFileFASTA, init_datasetfile
-from esvm.mldata_arff import DatasetFileARFF
-
-class MotifDataDef(object):
-    motif = ''
-    numseq = 0
-    seqlenmin = 0
-    seqlenmax = 0
-    posstart = 0
-    posend = 0
-    mutrate = 0.0
-
-################################################################################
-# data generation functions
-
-def motifgen(motif, numseq, seqlenmin, seqlenmax, posstart, posend, mutrate):
-    """Generate sequences with a particular motif at a particular location.
-    Also allow a possible mutation rate of the motif.
-    """
-
-    metadata = 'motifgen(%s,%d,%d,%d,%d,%d,%1.2f)' % (motif, numseq, seqlenmin, seqlenmax, posstart, posend, mutrate)
-
-    acgt='acgt'
-    seqlist = []
-    for i in xrange(0,numseq):
-        str=[] ;
-        seqlen=random.randint(seqlenmin,seqlenmax) ;
-        for l in xrange(0,seqlen):
-            str.append(acgt[random.randint(0,3)])
-        pos=random.randint(posstart,posend) ;
-        for l in xrange(0,len(motif)):
-            if (random.random()>=mutrate) and (pos+l<seqlen) and (pos+l>=0):
-                str[pos+l]=motif[l]
-        seqlist.append(''.join(str).upper())
-
-    return metadata, seqlist
-
-
-def cloudgen(numpoint, numfeat, fracpos, width):
-    """Generate two Gaussian point clouds, centered around one and minus one."""
-
-    numpos = int(round(fracpos*numpoint))
-    numneg = numpoint - numpos
-
-    metadata = 'cloudgen(%d,%d,%d,%3.2f)' % (numpos, numneg, numfeat, width)
-
-    datapos = ones((numfeat, numpos)) + width*randn(numfeat, numpos)
-    dataneg = -ones((numfeat, numneg)) + width*randn(numfeat, numneg)
-    pointcloud = concatenate((datapos,dataneg),axis=1)
-    labels = concatenate((ones(numpos),-ones(numneg)))
-
-    return metadata, pointcloud, labels
-
-
-
-
-
-################################################################################
-# ARFF functions
-
-def arffwrite_real(filename, numpoint, numfeat, fracpos=0.5, width=1.0):
-    """Write an ARFF file containing a vectorial dataset"""
-    #import arff
-
-    (metadata, pointcloud, labels) = cloudgen(numpoint, numfeat, fracpos, width)
-
-    fp = init_datasetfile(filename,'vec')
-    fp.comment = metadata
-    fp.dataname = 'pointcloud'
-    fp.writelines(pointcloud,labels)
-
-
-def arffwrite_sequence(filename,p, n):
-    """Write an ARFF file containing a sequence dataset"""
-    #import arff
-
-    (metadatapos,seqlistpos) = motifgen(p.motif, p.numseq, p.seqlenmin, p.seqlenmax, p.posstart, p.posend, p.mutrate)
-    (metadataneg,seqlistneg) = motifgen(n.motif, n.numseq, n.seqlenmin, n.seqlenmax, n.posstart, n.posend, n.mutrate)
-
-    labels = concatenate((ones(len(seqlistpos)),-ones(len(seqlistneg))))
-    seqlist = seqlistpos + seqlistneg
-    fp = init_datasetfile(filename,'seq')
-    fp.comment = metadatapos+' '+metadataneg
-    fp.dataname = 'motif'
-    fp.writelines(seqlist,labels)
-
-
-
-def arffread(kernelname,datafilename):
-    """Decide based on kernelname whether to read a sequence or vectorial file"""
-
-    if kernelname == 'gauss' or kernelname == 'linear' or kernelname == 'poly' or kernelname == None:
-        fp = init_datasetfile(datafilename,'vec')
-    elif kernelname == 'wd' or kernelname == 'localalign' or kernelname == 'localimprove'\
-             or kernelname == 'spec' or kernelname == 'cumspec':
-        fp = init_datasetfile(datafilename,'seq')
-    elif kernelname == 'spec2' or kernelname == 'cumspec2':
-        fp = init_datasetfile(datafilename,'mseq')
-    else:
-        print 'Unknown kernel in arffread'
-
-    return fp.readlines()
-
-################################################################################
-# fasta functions
-
-def fastawrite_sequence(filename,p):
-    """Write a FASTA file containing a sequence dataset"""
-    import arff
-
-    (metadata,seqlist) = motifgen(p.motif, p.numseq, p.seqlenmin, p.seqlenmax, p.posstart, p.posend, p.mutrate)
-    labels = ones(len(seqlist))
-    fp = init_datasetfile(filename,'seq')
-    fp.writelines(seqlist,labels)
-
-def fastaread(fnamepos,fnameneg=None):
-    """Read two fasta files, the first positive, the second negative"""
-    fpos = init_datasetfile(fnamepos,'seq')
-    (fa1,lab1) = fpos.readlines()
-
-    if fnameneg is not None:
-        fneg = init_datasetfile(fnameneg,'seq')
-        (fa2,lab2) = fneg.readlines()
-
-        print 'positive: %d, negative %d' % (len(fa1),len(fa2))
-        all_labels = concatenate((ones(len(fa1)),-ones(len(fa2))))
-        all_examples = fa1 + fa2
-    else:
-        all_examples = fa1
-        all_labels = ones(len(fa1))
-
-    return all_examples, all_labels
-
diff --git a/applications/easysvm/esvm/experiment.py b/applications/easysvm/esvm/experiment.py
deleted file mode 100644
index ed4c3a79e46..00000000000
--- a/applications/easysvm/esvm/experiment.py
+++ /dev/null
@@ -1,773 +0,0 @@
-#############################################################################################
-#                                                                                           #
-#    This program is free software; you can redistribute it and/or modify                   #
-#    it under the terms of the GNU General Public License as published by                   #
-#    the Free Software Foundation; either version 3 of the License, or                      #
-#    (at your option) any later version.                                                    #
-#                                                                                           #
-#    This program is distributed in the hope that it will be useful,                        #
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of                         #
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                           #
-#    GNU General Public License for more details.                                           #
-#                                                                                           #
-#    You should have received a copy of the GNU General Public License                      #
-#    along with this program; if not, see http://www.gnu.org/licenses                       #
-#    or write to the Free Software Foundation, Inc., 51 Franklin Street,                    #
-#    Fifth Floor, Boston, MA 02110-1301  USA                                                #
-#                                                                                           #
-#############################################################################################
-
-import sys
-import random
-
-import shutil
-import numpy
-from numpy import sign, where, array, ones
-import parse
-import utils
-from poim import compute_poims
-
-import shogun
-from shogun import GaussianKernel, WeightedDegreePositionStringKernel
-from shogun import WeightedDegreeStringKernel
-from shogun import LinearKernel, PolyKernel, LocalAlignmentStringKernel
-from shogun import LocalityImprovedStringKernel
-from shogun import CommWordStringKernel, WeightedCommWordStringKernel, CommUlongStringKernel
-from shogun import CombinedKernel
-from shogun import SLOWBUTMEMEFFICIENT
-from shogun import AvgDiagKernelNormalizer
-from shogun import RealFeatures, Labels, StringCharFeatures, DNA, StringWordFeatures, StringUlongFeatures, PROTEIN
-from shogun import CombinedFeatures
-from shogun import LibSVM,GPBTSVM
-
-DefaultSVM = LibSVM
-try:
-    from shogun import SVMLight
-    LinAddSVM = SVMLight
-    LinearSVM = SVMLight
-except:
-    LinAddSVM = GPBTSVM
-    LinearSVM = LibSVM
-
-from shogun import SortWordString, SortUlongString
-
-from utils import calcprc, calcroc, accuracy
-from utils import getPartitionedSet, getCurrentSplit
-import plots
-import re
-from poim import reshape_normalize_contribs, compute_weight_mass
-
-################################################################################
-def non_atcg_convert(seq, nuc_con):
-    """ Converts Non ATCG characters from DNA sequence """
-
-    if nuc_con == '':sys.stderr.write("usage: Provide a choice for non ACGT nucleotide conversion [T|A|C|G|R|Y|N] at last\n");sys.exit(-1)
-    if re.match(r'[^ATCGRYN]', nuc_con):sys.stderr.write("usage: Conversion nucleotide choice -"+ nuc_con +"- failed. pick one from [T|A|C|G|R|Y|N]\n");sys.exit(-1)
-
-    nuc_con = nuc_con.upper()
-    mod_seq = []
-    for i in range(len(seq)):
-        if re.search(r'[^ACTG]', seq[i], re.IGNORECASE):
-            if nuc_con == 'A' or nuc_con == 'T' or nuc_con == 'C' or nuc_con == 'G':
-                seq[i] = re.sub(r'[^ATCG|actg]', nuc_con, seq[i])
-                seq[i] = seq[i].upper()
-                mod_seq.append(seq[i])
-                continue
-            if nuc_con == 'N':(nucleotide, line) = ('ATCG', '')
-            if nuc_con == 'R':(nucleotide, line) = ('AG', '')
-            if nuc_con == 'Y':(nucleotide, line) = ('TC', '')
-
-            for single_nuc in seq[i]:
-                if re.match(r'[^ACGT]', single_nuc, re.IGNORECASE):
-                    line += random.choice(nucleotide)
-                else:
-                    line += single_nuc.upper()
-            mod_seq.append(line)
-        else:
-            seq[i] = seq[i].upper()
-            mod_seq.append(seq[i])
-    return mod_seq
-
-def non_aminoacid_converter(seq, amino_con):
-    """ Converts Non amino acid characters from protein sequence """
-
-    if amino_con == '':sys.stderr.write("usage: Provide a choice for replacing non amino acid characters\n");sys.exit(-1)
-    flag = 0
-    if len(amino_con)>1:
-        if amino_con != 'random':flag = 1
-    else:
-        if re.match(r'[^GPAVLIMCFYWHKRQNEDST]', amino_con, re.IGNORECASE):flag = 1
-    if flag == 1:sys.stderr.write("usage: Replace aminoacid chioce -"+ amino_con +"- failed. Pick a valid aminoacid single letter code/random\n");sys.exit(-1)
-
-    amino_con = amino_con.upper()
-    opt_seq = []
-    for i in range(len(seq)):
-        if re.search(r'[^GPAVLIMCFYWHKRQNEDST]', seq[i], re.IGNORECASE):
-            if amino_con == 'RANDOM':
-                aminoacid = 'GPAVLIMCFYWHKRQNEDST'
-                line = ''
-                for single_amino in seq[i]:
-                    if re.match(r'[^GPAVLIMCFYWHKRQNEDST]', single_amino, re.IGNORECASE):
-                        r_amino = random.choice(aminoacid)
-                        line += r_amino
-                    else:
-                        single_amino = single_amino.upper()
-                        line += single_amino
-                opt_seq.append(line)
-            else:
-                seq[i] = re.sub(r'[^GPAVLIMCFYWHKRQNEDST|gpavlimcfywhkrqnedst]', amino_con, seq[i])
-                seq[i] = seq[i].upper()
-                opt_seq.append(seq[i])
-        else:
-            seq[i] = seq[i].upper()
-            opt_seq.append(seq[i])
-    return opt_seq
-# helper functions
-
-def create_features(kname, examples, kparam, train_mode, preproc, seq_source, nuc_con):
-    """Converts numpy arrays or sequences into shogun features"""
-
-    if kname == 'gauss' or kname == 'linear' or kname == 'poly':
-        examples = numpy.array(examples)
-        feats = RealFeatures(examples)
-
-    elif kname == 'wd' or kname == 'localalign' or kname == 'localimprove':
-        if seq_source == 'dna':
-            examples = non_atcg_convert(examples, nuc_con)
-            feats = StringCharFeatures(examples, DNA)
-        elif seq_source == 'protein':
-            examples = non_aminoacid_converter(examples, nuc_con)
-            feats = StringCharFeatures(examples, PROTEIN)
-        else:
-            sys.stderr.write("Sequence source -"+seq_source+"- is invalid. select [dna|protein]\n")
-            sys.exit(-1)
-
-    elif kname == 'spec' or kname == 'cumspec':
-        if seq_source == 'dna':
-            examples = non_atcg_convert(examples, nuc_con)
-            feats = StringCharFeatures(examples, DNA)
-        elif seq_source == 'protein':
-            examples = non_aminoacid_converter(examples, nuc_con)
-            feats = StringCharFeatures(examples, PROTEIN)
-        else:
-            sys.stderr.write("Sequence source -"+seq_source+"- is invalid. select [dna|protein]\n")
-            sys.exit(-1)
-
-        wf = StringUlongFeatures( feats.get_alphabet() )
-        wf.obtain_from_char(feats, kparam['degree']-1, kparam['degree'], 0, kname=='cumspec')
-        del feats
-
-        if train_mode:
-            preproc = SortUlongString()
-            preproc.init(wf)
-        wf.add_preprocessor(preproc)
-        ret = wf.apply_preprocessor()
-        #assert(ret)
-
-        feats = wf
-    elif kname == 'spec2' or kname == 'cumspec2':
-        # spectrum kernel on two sequences
-        feats = {}
-        feats['combined'] = CombinedFeatures()
-
-        reversed = kname=='cumspec2'
-
-        (ex0,ex1) = zip(*examples)
-
-        f0 = StringCharFeatures(list(ex0), DNA)
-        wf = StringWordFeatures(f0.get_alphabet())
-        wf.obtain_from_char(f0, kparam['degree']-1, kparam['degree'], 0, reversed)
-        del f0
-
-        if train_mode:
-            preproc = SortWordString()
-            preproc.init(wf)
-        wf.add_preprocessor(preproc)
-        ret = wf.apply_preprocessor()
-        assert(ret)
-        feats['combined'].append_feature_obj(wf)
-        feats['f0'] = wf
-
-        f1 = StringCharFeatures(list(ex1), DNA)
-        wf = StringWordFeatures( f1.get_alphabet() )
-        wf.obtain_from_char(f1, kparam['degree']-1, kparam['degree'], 0, reversed)
-        del f1
-
-        if train_mode:
-            preproc = SortWordString()
-            preproc.init(wf)
-        wf.add_preprocessor(preproc)
-        ret = wf.apply_preprocessor()
-        assert(ret)
-        feats['combined'].append_feature_obj(wf)
-        feats['f1'] = wf
-
-    else:
-        print 'Unknown kernel %s' % kname
-
-    return (feats,preproc)
-
-def create_kernel(kname,kparam,feats_train):
-    """Call the corresponding constructor for the kernel"""
-
-    if kname == 'gauss':
-        kernel = GaussianKernel(feats_train, feats_train, kparam['width'])
-    elif kname == 'linear':
-        kernel = LinearKernel(feats_train, feats_train)
-        kernel.set_normalizer(AvgDiagKernelNormalizer(kparam['scale']))
-    elif kname == 'poly':
-        kernel = PolyKernel(feats_train, feats_train, kparam['degree'], kparam['inhomogene'], kparam['normal'])
-    elif kname == 'wd':
-        kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, kparam['degree'])
-        kernel.set_normalizer(AvgDiagKernelNormalizer(float(kparam['seqlength'])))
-        kernel.set_shifts(kparam['shift']*numpy.ones(kparam['seqlength'],dtype=numpy.int32))
-        #kernel=WeightedDegreeStringKernel(feats_train, feats_train, kparam['degree'])
-    elif kname == 'spec':
-        kernel = CommUlongStringKernel(feats_train, feats_train)
-    elif kname == 'cumspec':
-        kernel = WeightedCommWordStringKernel(feats_train, feats_train)
-        kernel.set_weights(numpy.ones(kparam['degree']))
-    elif kname == 'spec2':
-        kernel = CombinedKernel()
-        k0 = CommWordStringKernel(feats_train['f0'], feats_train['f0'])
-        k0.io.disable_progress()
-        kernel.append_kernel(k0)
-        k1 = CommWordStringKernel(feats_train['f1'], feats_train['f1'])
-        k1.io.disable_progress()
-        kernel.append_kernel(k1)
-    elif kname == 'cumspec2':
-        kernel = CombinedKernel()
-        k0 = WeightedCommWordStringKernel(feats_train['f0'], feats_train['f0'])
-        k0.set_weights(numpy.ones(kparam['degree']))
-        k0.io.disable_progress()
-        kernel.append_kernel(k0)
-        k1 = WeightedCommWordStringKernel(feats_train['f1'], feats_train['f1'])
-        k1.set_weights(numpy.ones(kparam['degree']))
-        k1.io.disable_progress()
-        kernel.append_kernel(k1)
-    elif kname == 'localalign':
-        kernel = LocalAlignmentStringKernel(feats_train, feats_train)
-    elif kname == 'localimprove':
-        kernel = LocalityImprovedStringKernel(feats_train, feats_train, kparam['length'],\
-                                              kparam['indeg'], kparam['outdeg'])
-    else:
-        print 'Unknown kernel %s' % kname
-
-    kernel.set_cache_size(32)
-    return kernel
-
-def create_combined_kernel(kname, kparam, examples, train_mode, preproc):
-    """A wrapper for creating combined kernels.
-
-    kname, kparam and examples are lists.
-
-    """
-    num_kernels = len(kname)
-    feats['combined'] = CombinedFeatures()
-    kernel = CombinedKernel()
-
-    for kix in xrange(num_kernels):
-        cur_kname = '%s%d' % (kname[kix],kix)
-        (cur_feats, cur_preproc) = create_features(kname[kix], examples[kix], kparam[kix], train_mode, preproc)
-        feats[cur_kname] = cur_feats
-        cur_kernel = create_kernel(kname[kix], kparam[kix], cur_feats)
-        kernel.append_kernel(cur_kernel)
-
-    return (feats,kernel)
-
-def model2str(kparam,C,kp,shownames=True):
-    """Generates a string describing the model parameters"""
-
-    if kparam["modelsel_name"]==None or len(kparam["modelsel_params"])==1:
-        if shownames:
-            str="\tC=%1.1f" % C
-        else:
-            str="\t%1.2f" % C
-    else:
-        if type(kp)==type(int(0)):
-            if shownames:
-                str="\tC=%1.1f\t%s=%i" %(C, kparam["modelsel_name"], kp)
-            else:
-                str="\t%1.1f\t%i" %(C, kp)
-        else:
-            if shownames:
-                str="\tC=%1.1f\t%s=%1.2f" %(C, kparam["modelsel_name"], kp)
-            else:
-                str="\t%1.1f\t%1.2f" %(C, kp)
-    return str
-
-
-
-def train(trainex,trainlab,C,kname,kparam,seq_source,nuc_con):
-    """Trains a SVM with the given kernel"""
-
-    (feats_train, preproc) = create_features(kname,trainex, kparam, True, None, seq_source, nuc_con)
-
-    if kname == 'wd':
-        kparam['seqlength'] = len(trainex[0])
-    kernel = create_kernel(kname,kparam,feats_train)
-
-    if kname == 'spec2' or kname == 'cumspec2':
-        kernel.init(feats_train['combined'], feats_train['combined'])
-    else:
-        kernel.init(feats_train, feats_train)
-    kernel.io.disable_progress()
-    kernel.set_optimization_type(SLOWBUTMEMEFFICIENT)
-    labels = BinaryLabels(numpy.array(trainlab,numpy.double))
-
-    # libsvm is fine for most kernels
-    if kname in ('wd', 'spec', 'cumspec', 'spec2', 'cumspec2'):
-        # for the string kernels there exist specific optimizations that are only effective when using
-        # a LinAdd SVM implementation (e.g. SVM-light or GPBT-SVM)
-        SVMClass = LinAddSVM
-    elif kname == 'linear':
-        SVMClass = LinearSVM
-    else:
-        SVMClass=DefaultSVM
-
-    svm = SVMClass(C, kernel, labels)
-
-    svm.io.disable_progress()
-    svm.set_batch_computation_enabled(True)
-    svm.set_linadd_enabled(True)
-    svm.set_epsilon(1e-5)
-    svm.parallel.set_num_threads(svm.parallel.get_num_cpus())
-
-    svm.train()
-
-    return (svm, kernel, feats_train, preproc)
-
-def train_and_test(trainex,trainlab,testex,C,kname,kparam, seq_source, nuc_con):
-    """Trains a SVM with the given kernel, and predict on the test examples"""
-
-    (svm, kernel, feats_train, preproc) = train(trainex,trainlab,C,kname,kparam,seq_source,nuc_con)
-    (feats_test, preproc) = create_features(kname, testex, kparam, False, preproc, seq_source, nuc_con)
-    if kname == 'spec2' or kname == 'cumspec2':
-        for feats in feats_train.values():
-            feats.io.disable_progress()
-        for feats in feats_test.values():
-            feats.io.disable_progress()
-        kernel.init(feats_train['combined'], feats_test['combined'])
-    else:
-        feats_train.io.disable_progress()
-        feats_test.io.disable_progress()
-        kernel.init(feats_train, feats_test)
-
-    kernel.set_optimization_type(SLOWBUTMEMEFFICIENT)
-    output = svm.apply().get_labels()
-
-    return output
-
-def crossvalidation(cv, kname, kparam, C, all_examples, all_labels, seq_source, nuc_con):
-    """Perform cross validation using an SVM
-
-    cv -- the number of folds
-    kernel -- the kernel used
-    data -- the dataset, assumed to be compatible to kernel, label is in the first column
-
-    """
-    print 'Using %i-fold crossvalidation' % cv
-    partitions = getPartitionedSet(len(all_labels), cv)
-    error = []
-    sum_accuracy = 0.0
-    sum_roc = 0.0
-    all_outputs=[0.0] * len(all_labels)
-    all_split=[-1] * len(all_labels)
-
-    for repetition in xrange(cv):
-        XT, LT, XTE, LTE = getCurrentSplit(repetition, partitions, all_labels, all_examples)
-        numpos = len(where(array(LTE)>0)[0])
-        svmout = train_and_test(XT, LT, XTE, C, kname, kparam, seq_source, nuc_con)
-
-        for i in xrange(len(svmout)):
-            all_outputs[partitions[repetition][i]] = svmout[i]
-            all_split[partitions[repetition][i]] = repetition ;
-
-    return (all_outputs, all_split)
-
-def evaluate(predictions, splitassignments, labels, roc_fname=None, prc_fname=None):
-    """Evaluate prediction results
-    """
-
-    res_str = ""
-
-    cv = 1
-    if splitassignments!=None:
-        for split in splitassignments:
-            if split+1>cv:
-                cv=int(split+1)
-    if cv>1:
-        res_str = "Evaluating on %i examples in %i splits\n" % (len(labels),cv)
-    else:
-        res_str = "Evaluating on %i examples\n" % len(labels)
-
-    output_splits = cv* [[]]
-    label_splits = cv* [[]]
-    for i in xrange(cv):
-        label_splits[i]=[]
-        output_splits[i]=[]
-
-    for i in xrange(0,len(labels)):
-        if cv>1:
-            split=int(splitassignments[i])
-        else:
-            split=0
-        output_splits[split].append(predictions[i])
-        label_splits[split].append(labels[i])
-
-    error = []
-    sum_accuracy = 0.0
-    sum_roc = 0.0
-    sum_prc = 0.0
-
-    for split in xrange(cv):
-        res_str += 'Split %d\n' % (split+1)
-
-        LTE = label_splits[split] ;
-        svmout = output_splits[split]
-
-        numpos=0
-        for l in LTE:
-            if l==1:
-                numpos+=1
-        istwoclass = numpos>0 and numpos<len(LTE)
-        res_str += '   number of positive examples = %i\n' % numpos
-        res_str += '   number of negative examples = %i\n' % (len(LTE)-numpos)
-        if istwoclass:
-            auROC = calcroc(svmout,LTE)
-            res_str += '   Area under ROC curve        = %2.1f %%\n' % (100.0*auROC)
-            sum_roc += auROC
-            if roc_fname!=None:
-                if split!=cv-1:
-                    plots.plotroc(svmout, LTE, split==cv-1, None, "ROC curve of SVM, split %i" % (split+1))
-                else:
-                    plots.plotroc(svmout, LTE, split==cv-1, roc_fname, "ROC curve of SVM, split %i" % (split+1))
-            auPRC = calcprc(svmout,LTE)
-            res_str += '   Area under PRC curve        = %2.1f %%\n' % (100.0*auPRC)
-            sum_prc += auPRC
-            if prc_fname!=None:
-                if split!=cv-1:
-                    plots.plotprc(svmout, LTE, None, "PRC curve of SVM, split %i" % (split+1))
-                else:
-                    plots.plotprc(svmout, LTE, prc_fname, "PRC curve of SVM, split %i" % (split+1))
-
-        acc = accuracy(svmout, LTE)
-        res_str += '   accuracy (at threshold 0)   = %2.1f %% \n' % (100.0*acc)
-        sum_accuracy += acc
-
-    numpos=0
-    for l in labels:
-        if l==1:
-            numpos+=1
-
-    mean_roc = sum_roc/cv
-    mean_prc = sum_prc/cv
-    mean_acc = sum_accuracy/cv
-
-    res_str += 'Averages\n'
-    res_str += '   number of positive examples = %i\n' % round(numpos/cv)
-    res_str += '   number of negative examples = %i\n' % round((len(labels)-numpos)/cv)
-    res_str += '   Area under ROC curve        = %2.1f %%\n' % (100.0*mean_roc)
-    res_str += '   Area under PRC curve        = %2.1f %%\n' % (100.0*mean_prc)
-    res_str += '   accuracy (at threshold 0)   = %2.1f %% \n' % (100.0*mean_acc)
-
-    return (res_str,mean_roc,mean_prc,mean_acc)
-
-
-def svm_cv(argv):
-    """A top level script to parse input parameters and run cross validation"""
-
-    assert(argv[1]=='cv')
-    if len(argv)<5:sys.stderr.write("usage: %s cv repeat C kernelname [kernelparameters] [arff|fasta] inputfiles outputfile [dna|protein] non(nucleotide|amino)converter \n" % argv[0]);sys.exit(-1)
-
-    # parse input parameters
-    cv = int(argv[2])
-    C = float(argv[3])
-    (kernelname,kparam,argv_rest) = parse.parse_kernel_param(argv[4:],False)
-    (examples,labels,argv_rest) = parse.parse_input_file_train(kernelname, argv_rest)
-
-    (seq_source, nuc_con) = ('', '')
-    if kernelname == 'spec' or kernelname == 'wd':
-        if len(argv_rest)<1:sys.stderr.write("outputfile [dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
-        if len(argv_rest)<2:sys.stderr.write("[dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
-        if len(argv_rest)<3:
-            if argv_rest[-1] == 'dna':
-                sys.stderr.write("non-nucleotide converter like [A|T|C|G|R|Y|N] is missing. Cannot continue.\n")
-                sys.exit(-1)
-            elif argv_rest[-1] == 'protein':
-                sys.stderr.write("non-amino acid converter like [G|P|A|V|L|I|M|C|F|Y|W|H|K|R|Q|N|E|D|S|T|random] is missing. Cannot continue.\n")
-                sys.exit(-1)
-            else:
-                sys.stderr.write("Here expect FASTA sequence type as [dna|protein] instead of -"+ argv_rest[-1] +"- Cannot continue.\n")
-                sys.exit(-1)
-        if len(argv_rest)>3:sys.stderr.write("Too many arguments\n");sys.exit(-1)
-        seq_source = argv_rest[1]
-        nuc_con = argv_rest[2]
-
-    if kernelname == 'linear' or kernelname == 'gauss' or kernelname == 'poly':
-        if len(argv_rest)<1:sys.stderr.write("outputfile misssing\n");sys.exit(-1)
-        if len(argv_rest)>1:sys.stderr.write("Too many arguments\n");sys.exit(-1)
-    outfilename = argv_rest[0]
-
-    utils.check_params(kparam, C, len(examples[0]))
-
-    # run cross-validation
-    (all_outputs, all_split) = crossvalidation(cv, kernelname, kparam, C, examples, labels, seq_source, nuc_con)
-    try:
-        f = open(outfilename, 'w+')
-    except:
-        sys.stderr.write('Fails to open the outputfile at ' + outfilename + ' Cannot continue.\n')
-        sys.exit(-1)
-    res_str = '#example\toutput\tsplit\n'
-    f.write(res_str)
-    for ix in xrange(len(all_outputs)):
-	res_str = '%d\t%2.7f\t%d\n' % (ix,all_outputs[ix],all_split[ix])
-        f.write(res_str)
-    f.close()
-
-def svm_modelsel(argv):
-    """A top level script to parse input parameters and run model selection"""
-
-    assert(argv[1]=='modelsel')
-    if len(argv)<5:sys.stderr.write("usage: %s modelsel repeat Cs kernelname [kernelparameters] [arff|fasta] inputfiles  outputfile [dna|protein] non(nucleotide|amino)converter\n" % argv[0]);sys.exit(-1)
-
-    # parse input parameters
-    cv = int(argv[2])
-    Cs = parse.parse_float_list(argv[3])
-    (kernelname,kparam,argv_rest) = parse.parse_kernel_param(argv[4:], True)
-    (examples,labels,argv_rest) = parse.parse_input_file_train(kernelname, argv_rest)
-
-    (seq_source, nuc_con) = ('', '')
-    if kernelname == 'spec' or kernelname == 'wd':
-        if len(argv_rest)<1:sys.stderr.write("outputfile [dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
-        if len(argv_rest)<2:sys.stderr.write("[dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
-        if len(argv_rest)<3:
-            if argv_rest[-1] == 'dna':
-                sys.stderr.write("non-nucleotide converter like [A|T|C|G|R|Y|N] is missing. Cannot continue.\n")
-                sys.exit(-1)
-            elif argv_rest[-1] == 'protein':
-                sys.stderr.write("non-amino acid converter like [G|P|A|V|L|I|M|C|F|Y|W|H|K|R|Q|N|E|D|S|T|random] is missing. Cannot continue.\n")
-                sys.exit(-1)
-            else:
-                sys.stderr.write("Here expect FASTA sequence type as [dna|protein] instead of -"+ argv_rest[-1] +"- Cannot continue.\n")
-                sys.exit(-1)
-        if len(argv_rest)>3:sys.stderr.write("Too many arguments\n");sys.exit(-1)
-        seq_source = argv_rest[1]
-        nuc_con = argv_rest[2]
-
-    if kernelname == 'linear' or kernelname == 'gauss' or kernelname== 'poly':
-        if len(argv_rest)<1:sys.stderr.write("outputfile missing\n");sys.exit(-1)
-        if len(argv_rest)>1:sys.stderr.write("Too many arguments\n");sys.exit(-1)
-
-    outfilename = argv_rest[0]
-
-    # run cross-validation
-    mean_rocs=[] ;
-    mean_prcs=[] ;
-    mean_accs=[] ;
-    all_Cs = [] ;
-    all_kparam=[] ;
-
-    if kparam["modelsel_name"]==None:
-        for C in Cs:
-            utils.check_params(kparam, C, len(examples[0]))
-
-            (all_outputs, all_split) = crossvalidation(cv, kernelname, kparam, C, examples, labels, seq_source, nuc_con)
-            (res_str, mean_roc, mean_prc, mean_acc) = evaluate(all_outputs, all_split, labels)
-            mean_rocs.append(mean_roc)
-            mean_prcs.append(mean_prc)
-            mean_accs.append(mean_acc)
-            all_Cs.append(C)
-            all_kparam.append(None)
-    else: # also optimize one kernel parameter
-        for C in Cs:
-            for kp in kparam["modelsel_params"]:
-                kparam[kparam["modelsel_name"]] = kp
-                utils.check_params(kparam, C, len(examples[0]))
-
-                (all_outputs, all_split) = crossvalidation(cv, kernelname, kparam, C, examples, labels, seq_source, nuc_con)
-                (res_str, mean_roc, mean_prc, mean_acc) = evaluate(all_outputs, all_split, labels)
-                mean_rocs.append(mean_roc)
-                mean_prcs.append(mean_prc)
-                mean_accs.append(mean_acc)
-                all_Cs.append(C)
-                all_kparam.append(kp)
-
-    max_roc=numpy.max(numpy.array(mean_rocs))
-    max_prc=numpy.max(numpy.array(mean_prcs))
-    max_acc=numpy.max(numpy.array(mean_accs))
-    try:
-        f = open(outfilename, 'w+')
-    except:
-        sys.stderr.write('Fails to open the outputfile at ' + outfilename + ' Cannot continue.\n')
-        sys.exit(-1)
-    if kparam["modelsel_name"]==None or len(kparam["modelsel_params"])==1:
-        detail_str = "\tC\tROC\tPRC\tAccuracy (at threshold 0)\n"
-    else:
-        detail_str = "\tC\t%s\tROC\tPRC\tAccuracy (at threshold 0)\n" % kparam["modelsel_name"]
-
-    best_roc_str=''
-    best_prc_str=''
-    best_acc_str=''
-    for i in xrange(len(all_Cs)):
-        # determine the best parameter combinations
-        if mean_rocs[i]==max_roc:
-            rocsym='+'
-            best_roc_str+=model2str(kparam, all_Cs[i], all_kparam[i])+'\n'
-        else:
-            rocsym=' '
-        if mean_prcs[i]==max_prc:
-            prcsym='+'
-            best_prc_str+=model2str(kparam, all_Cs[i], all_kparam[i])+'\n'
-        else:
-            prcsym=' '
-        if mean_accs[i]==max_acc:
-            accsym='+'
-            best_acc_str+=model2str(kparam, all_Cs[i], all_kparam[i])+'\n'
-        else:
-            accsym=' '
-
-        detail_str+=model2str(kparam, all_Cs[i], all_kparam[i], False)+'\t'
-        if kparam["modelsel_name"]==None or len(kparam["modelsel_params"])==1:
-            detail_str += '%c%2.1f%%\t%c%2.1f%%\t%c%2.1f%%\n' % (rocsym, 100*mean_rocs[i], prcsym, 100*mean_prcs[i], accsym, 100*mean_accs[i])
-        else:
-            detail_str += '%c%2.1f%%\t%c%2.1f%%\t%c%2.1f%%\n' % (rocsym, 100*mean_rocs[i], prcsym, 100*mean_prcs[i], accsym, 100*mean_accs[i])
-
-    f.write('Best model(s) according to ROC measure:\n%s' % best_roc_str)
-    f.write('\nBest model(s) according to PRC measure:\n%s' % best_prc_str)
-    f.write('\nBest model(s) according to accuracy measure:\n%s' % best_acc_str)
-
-    f.write('\nDetailed results:\n')
-    f.write(detail_str)
-    f.close()
-
-def svm_pred(argv):
-    """A top level script to parse input parameters and train and predict"""
-
-    assert(argv[1]=='pred')
-    if len(argv)<6:sys.stderr.write("usage: %s pred C kernelname kernelparameters [arff|fasta] inputfiles  outputfile [dna|protein] non(nucleotide|amino)converter\n" % argv[0]);sys.exit(-1)
-
-    # parse input parameters
-    C = float(argv[2])
-    (kernelname,kparam,argv_rest) = parse.parse_kernel_param(argv[3:],False)
-    (trainex, trainlab, testex, argv_rest) = parse.parse_input_file_train_test(kernelname, argv_rest)
-
-    (seq_source, nuc_con) = ('', '')
-    if kernelname == 'spec' or kernelname == 'wd':
-        if len(argv_rest)<1:sys.stderr.write("outputfile [dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
-        if len(argv_rest)<2:sys.stderr.write("[dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
-        if len(argv_rest)<3:
-            if argv_rest[-1] == 'dna':
-                sys.stderr.write("non-nucleotide converter like [A|T|C|G|R|Y|N] is missing. Cannot continue.\n")
-                sys.exit(-1)
-            elif argv_rest[-1] == 'protein':
-                sys.stderr.write("non-amino acid converter like [G|P|A|V|L|I|M|C|F|Y|W|H|K|R|Q|N|E|D|S|T|random] is missing. Cannot continue.\n")
-                sys.exit(-1)
-            else:
-                sys.stderr.write("Here expect FASTA sequence type as [dna|protein] instead of -"+ argv_rest[-1] +"- Cannot continue.\n")
-                sys.exit(-1)
-        if len(argv_rest)>3:sys.stderr.write("Too many arguments\n");sys.exit(-1)
-        seq_source = argv_rest[1]
-        nuc_con = argv_rest[2]
-
-    if kernelname == 'linear' or kernelname== 'poly' or kernelname == 'gauss':
-        if len(argv_rest)<1:sys.stderr.write("outputfile missing\n");sys.exit(-1)
-        if len(argv_rest)>1:sys.stderr.write("Too many arguments\n");sys.exit(-1)
-
-    outfilename = argv_rest[0]
-
-    utils.check_params(kparam, C, len(trainex[0]))
-
-    # run training and testing
-    svmout = train_and_test(trainex, trainlab, testex, C, kernelname, kparam, seq_source, nuc_con)
-
-    # write output file
-    try:
-        f = open(outfilename,'w')
-    except:
-        sys.stderr.write('Fails to open the outputfile at ' + outfilename + ' Cannot continue.\n')
-        sys.exit(-1)
-
-    res_str = '#example\toutput\n'
-    f.write(res_str)
-    for ix in xrange(len(svmout)):
-        res_str = str(ix)+'\t'+str(svmout[ix])+'\n'
-        f.write(res_str)
-    f.close()
-
-def svm_eval(argv):
-    """A top level script to parse input parameters and evaluate"""
-
-    assert(argv[1]=='eval')
-    if len(argv)<6:sys.stderr.write("usage: %s eval predictionfile [arff|fasta] inputfiles outputfile [roc|prc figure.png]\n" % argv[0]);sys.exit(-1)
-
-    # parse input parameters
-    (predictions, splitassignments) = parse.parse_prediction_file(argv[2])
-    (trainex, trainlab, argv_rest) = parse.parse_input_file_train(None, argv[3:])
-    if len(argv_rest)<1:sys.stderr.write("Output file missing\n");sys.exit(-1)
-    if len(argv_rest)>3:sys.stderr.write("Too many arguments\n");sys.exit(-1)
-    outfilename = argv_rest[0]
-    roc_fname = None
-    prc_fname = None
-
-    if len(argv_rest)>2:
-        if argv_rest[1]=='roc':
-	    roc_fname=argv_rest[2]
-	elif argv_rest[1]=='prc':
-            prc_fname=argv_rest[2]
-	else:
-	    sys.stderr.write('Usage: [roc|prc]')
-	    sys.exit(-1)
-
-    # run training and testing
-    (res_str,mean_roc,mean_prc,mean_acc) = evaluate(predictions, splitassignments, trainlab, roc_fname, prc_fname)
-
-    # write output file
-    try:
-        f = open(outfilename,'w')
-    except:
-        sys.stderr.write('Fails to open the outputfile at ' + outfilename + ' Cannot continue.\n')
-        sys.exit(-1)
-
-    f.write(res_str)
-    f.close()
-
-
-def svm_poim(argv):
-    """A top level script to parse input parameters and plot poims"""
-
-    assert(argv[1]=='poim')
-    if len(argv)<7:sys.stderr.write("usage: %s poim C poimdegree wd [kernelparameters] [arff|fasta] inputfiles  poim.png [dna|protein] non(nucleotide|amino)converter\n" % argv[0]);sys.exit(-1)
-
-    # parse input parameters
-    C = float(argv[2])
-    poimdegree = int(argv[3])
-    (kernelname,kparam,argv_rest) = parse.parse_kernel_param(argv[4:], False)
-    (examples,labels,argv_rest) = parse.parse_input_file_train(kernelname, argv_rest)
-
-    if len(argv_rest)<1:sys.stderr.write("poim.png [dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
-    if len(argv_rest)<2:sys.stderr.write("[dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
-    if len(argv_rest)<3:
-        if argv_rest[-1] == 'dna':
-            sys.stderr.write("non-nucleotide converter like [A|T|C|G|R|Y|N] is missing. Cannot continue.\n")
-            sys.exit(-1)
-        elif argv_rest[-1] == 'protein':
-            sys.stderr.write("non-amino acid converter like [G|P|A|V|L|I|M|C|F|Y|W|H|K|R|Q|N|E|D|S|T|random] is missing. Cannot continue.\n")
-            sys.exit(-1)
-        else:
-            sys.stderr.write("Here expect FASTA sequence type as [dna|protein] instead of -"+ argv_rest[-1] +"- Cannot continue.\n")
-            sys.exit(-1)
-    if len(argv_rest)>3:sys.stderr.write("Too many arguments\n");sys.exit(-1)
-    poimfilename = argv_rest[0]
-    seq_source = argv_rest[1]
-    nuc_con = argv_rest[2]
-
-    utils.check_params(kparam, C, len(examples[0]))
-
-    # train svm and compute POIMs
-    (svm, kernel, feats_train, preproc) = train(examples,labels,C,kernelname,kparam,seq_source,nuc_con)
-    (poim, max_poim, diff_poim, poim_totalmass) = compute_poims(svm, kernel, poimdegree, len(examples[0]))
-
-    # plot poims
-    plots.plot_poims(poimfilename, poim, max_poim, diff_poim, poim_totalmass, poimdegree, len(examples[0]))
-
diff --git a/applications/easysvm/esvm/mldata.py b/applications/easysvm/esvm/mldata.py
deleted file mode 100644
index 0e75a1ddde6..00000000000
--- a/applications/easysvm/esvm/mldata.py
+++ /dev/null
@@ -1,300 +0,0 @@
-#!/usr/bin/env python
-
-"""Classes to encapsulate the idea of a dataset in machine learning,
-   including file access. Currently this focuses on reading and writing
-   transparently to different file formats.
-
-   A dataset is modeled as an (example,label) tuple, each of which is an array.
-   The base class doesn't know how to split, so just returns one array.
-
-   The three classes currently implemented use three
-   different ways of iterating through files:
-   - CSV uses the python module csv's iterator
-   - ARFF always reads the whole file, and does a slice
-   - FASTA uses a hand crafted while loop that behaves like a generator
-
-   The class DatasetFileARFF is in mldata-arff.py.
-"""
-
-
-#############################################################################################
-#                                                                                           #
-#    This program is free software; you can redistribute it and/or modify                   #
-#    it under the terms of the GNU General Public License as published by                   #
-#    the Free Software Foundation; either version 3 of the License, or                      #
-#    (at your option) any later version.                                                    #
-#                                                                                           #
-#    This program is distributed in the hope that it will be useful,                        #
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of                         #
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                           #
-#    GNU General Public License for more details.                                           #
-#                                                                                           #
-#    You should have received a copy of the GNU General Public License                      #
-#    along with this program; if not, see http://www.gnu.org/licenses                       #
-#    or write to the Free Software Foundation, Inc., 51 Franklin Street,                    #
-#    Fifth Floor, Boston, MA 02110-1301  USA                                                #
-#                                                                                           #
-#############################################################################################
-
-import sys
-from numpy import array, concatenate
-import csv
-import re
-
-try:
-    import arff
-    have_arff = True
-except ImportError:
-    have_arff = False
-
-
-class DatasetFileBase(file):
-    """A Base class defining barebones and common behaviour
-    """
-
-    def __init__(self,filename,extype):
-        """Just the normal file __init__,
-        followed by the specific class corresponding to the file extension.
-
-        """
-        self.extype = extype
-        self.filename = filename
-
-
-    def readlines(self,idx=None):
-        """Read the lines defined by idx (a numpy array).
-        Default is read all lines.
-
-        """
-        if idx is None:
-            data = self.readlines()
-        else:
-            data = self.readlines()[idx]
-            #itertools.islice(open('tempx.txt'), 11, 12).next()
-            #file("filename").readlines()[11]
-            #linecache.getline(  filename, lineno[, module_globals])
-        return data
-
-    def writelines(self,data,idx=None):
-        """Write the lines defined by idx (a numpy array).
-        Default is write all lines.
-
-        data is assumed to be a numpy array.
-
-        """
-        if idx is None:
-            self.writelines(data)
-        else:
-            self.writelines(data[idx])
-
-
-
-class DatasetFileCSV(DatasetFileBase):
-    """Comma Seperated Values file.
-
-    Labels are in the first column.
-
-    """
-    def __init__(self,filename,extype):
-        DatasetFileBase.__init__(self,filename,extype)
-
-    def readlines(self,idx=None):
-        """Read from file and split data into examples and labels"""
-        reader = csv.reader(open(self.filename,'r'), delimiter=',', quoting=csv.QUOTE_NONE)
-        labels = []
-        examples = []
-        for ix,line in enumerate(reader):
-            if idx is None or ix in idx:
-                labels.append(float(line[0]))
-                if self.extype == 'vec':
-                    examples.append(array(map(float,line[1:])))
-                elif self.extype == 'seq':
-                    examples.append(line[1:][0])
-                elif self.extype == 'mseq':
-                    examples.append(array(line[1:]))
-
-        if self.extype == 'vec':
-            examples = array(examples).T
-            print '%d features, %d examples' % examples.shape
-        elif self.extype == 'seq':
-            print 'sequence length = %d, %d examples' % (len(examples[0]),len(examples))
-        elif self.extype == 'mseq':
-            printstr = 'sequence lengths = '
-            for seq in examples[0]:
-                printstr += '%d, ' % len(seq)
-            printstr += '%d examples' % len(examples)
-            print printstr
-
-        return (examples,array(labels))
-
-
-    def writelines(self,examples,labels,idx=None):
-        """Merge the examples and labels and write to file"""
-        if idx==None:
-            idx = range(len(labels))
-        if self.extype == 'seq':
-            data = zip(labels[idx],list(array(examples)[idx]))
-        if self.extype == 'mseq':
-            data = []
-            for ix,curlab in enumerate(labels):
-                data.append([curlab]+list(examples[ix]))
-        elif self.extype == 'vec':
-            data = []
-            for ix,curlab in enumerate(labels):
-                data.append(concatenate((array([curlab]),examples[:,ix].T)))
-
-        fp = open(self.filename,'w')
-        writer = csv.writer(fp,delimiter=',',quoting=csv.QUOTE_NONE)
-        for ix in idx:
-            writer.writerow(data[ix])
-        fp.close()
-
-
-
-
-class DatasetFileFASTA(DatasetFileBase):
-    """Fasta format file, labels are in the comment after keyword 'label'.
-    label=1
-    label=-1
-
-    """
-    def __init__(self,filename,extype):
-        if extype != 'seq':
-            print 'Can only write fasta file for sequences!'
-            raise IOError
-        DatasetFileBase.__init__(self,filename,extype)
-        self.fp = None
-
-    def readlines(self,idx=None):
-        """Read from file and split data into examples and labels"""
-        self.fp = open(self.filename,'r')
-        line = self.fp.readline()
-
-        examples = []
-        labels = []
-        ix = 0
-        while True:
-            if not line : break
-            (ex,lab,line) = self.readline(line)
-            if idx is None or ix in idx:
-                examples.append(ex)
-                labels.append(lab)
-            ix += 1
-
-        print 'sequence length = %d, %d examples' % (len(examples[0]),len(examples))
-        return (examples,array(labels))
-
-    def writelines(self,examples,labels,idx=None,linelen=60):
-        """Write the examples and labels and write to file"""
-        if idx==None:
-            idx = range(len(labels))
-
-        fp = open(self.filename,'w')
-        for ix in idx:
-            fp.write('> %d label=%d\n'%(ix,round(labels[ix])))
-            for lineidx in xrange(0, len(examples[ix]), linelen):
-                fp.write(examples[ix][lineidx:lineidx+linelen] + '\n')
-        fp.close()
-
-
-    def readline(self,line):
-        """Reads a fasta entry and returns the label and the sequence"""
-        if line[0] == '' : return
-
-        assert(line[0] == '>')
-        # Use list comprehension to get the integer that comes after label=
-        a = line.split()
-        label = float([b.split('=')[1] for b in a if b.split('=')[0]=='label'][0])
-
-        lines = []
-        line = self.fp.readline()
-        while True:
-            if not line : break
-            if line[0] == ">": break
-            #Remove trailing whitespace, and any internal spaces
-            lines.append(line.rstrip().replace(" ",""))
-            line = self.fp.readline()
-
-        return (''.join(lines),label,line)
-
-
-def init_datasetfile(filename,extype):
-    """A factory that returns the appropriate class based on the file extension.
-
-    recognised file extensions
-    - .csv  : Comma Separated Values
-    - .arff : Attribute-Relation File Format (weka)
-    - .fa   : Fasta file format (seq only)
-    - .fasta: same as above.
-
-    Since the file type does not determine what type of data is actually being used,
-    the user has to supply the example type.
-
-    extype can be ('vec','seq','mseq')
-    vec - array of floats
-    seq - single sequence
-    mseq - multiple sequences
-
-    """
-    allowedtypes = ('vec','seq','mseq')
-    assert(extype in allowedtypes)
-    # map the file extensions to the relevant classes
-    _format2dataset = {'csv'   : DatasetFileCSV,
-                       'fa'    : DatasetFileFASTA,
-                       'fasta' : DatasetFileFASTA,
-                       }
-    if have_arff:
-        from esvm.mldata_arff import DatasetFileARFF
-        _format2dataset['arff'] = DatasetFileARFF
-
-    extension = detect_extension(filename)
-    return _format2dataset[extension](filename,extype)
-
-
-def detect_extension(filename):
-    """Get the file extension"""
-    if filename.count('.') > 1:
-        print 'WARNING: %s has more than one . using last one' % filename
-    detect_ext = filename.split('.')[-1]
-    if have_arff:
-        known_ext = ('csv','arff','fasta','fa')
-    else:
-        known_ext = ('csv','fasta','fa')
-
-    if detect_ext not in known_ext:
-        print 'WARNING: %s is an unknown file extension, defaulting to csv' % detect_ext
-        detect_ext = 'csv'
-
-    if detect_ext == 'csv':
-        fasta_flag = 0
-        arff_flag = 0
-        run_c = 0
-        f = open(filename,'r')
-        for line in f:
-           line = line.strip()
-           if re.match(r'^>',line):
-               fasta_flag = 1
-               break
-           if re.match(r'^@',line):
-               arff_flag = 1
-               break
-           if run_c == 5:
-               break
-        f.close()
-        if fasta_flag == 1:
-           detect_ext = 'fasta'
-        elif arff_flag == 1:
-           detect_ext = 'arff'
-        else:
-           detect_ext = 'csv'
-
-    return detect_ext
-
-
-def convert(infile,outfile,extype):
-    """Copy data from infile to outfile, possibly converting the file format."""
-    fp1 = init_datasetfile(infile,extype)
-    (examples,labels) = fp1.readlines()
-    fp2 = init_datasetfile(outfile,extype)
-    fp2.writelines(examples,labels)
-
diff --git a/applications/easysvm/esvm/mldata_arff.py b/applications/easysvm/esvm/mldata_arff.py
deleted file mode 100644
index ce92a560357..00000000000
--- a/applications/easysvm/esvm/mldata_arff.py
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/usr/bin/env python
-
-"""Classes to encapsulate the idea of a dataset in machine learning,
-   including file access.
-
-   This file contains the ARFF class for people who have arff installed.
-"""
-
-
-#############################################################################################
-#                                                                                           #
-#    This program is free software; you can redistribute it and/or modify                   #
-#    it under the terms of the GNU General Public License as published by                   #
-#    the Free Software Foundation; either version 3 of the License, or                      #
-#    (at your option) any later version.                                                    #
-#                                                                                           #
-#    This program is distributed in the hope that it will be useful,                        #
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of                         #
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                           #
-#    GNU General Public License for more details.                                           #
-#                                                                                           #
-#    You should have received a copy of the GNU General Public License                      #
-#    along with this program; if not, see http://www.gnu.org/licenses                       #
-#    or write to the Free Software Foundation, Inc., 51 Franklin Street,                    #
-#    Fifth Floor, Boston, MA 02110-1301  USA                                                #
-#                                                                                           #
-#############################################################################################
-
-try:
-    import arff
-    have_arff = True
-except ImportError:
-    have_arff = False
-
-
-import sys
-from numpy import array, concatenate
-import csv
-from esvm.mldata import DatasetFileBase
-
-class DatasetFileARFF(DatasetFileBase):
-    """Attribute-Relation File Format file, uses module arff.
-
-    Labels are in the first column.
-    """
-    def __init__(self,filename,extype,dataname='ARFFdata',comment=''):
-        """Do the base class init, then add some arff specific metadata"""
-        if not have_arff:
-            print 'import arff failed, currently cannot support ARFF file format'
-            return
-        DatasetFileBase.__init__(self,filename,extype)
-        self.dataname = dataname
-        self.comment = comment
-
-    def readlines(self,idx=None):
-        """Read from file and split data into examples and labels"""
-        fp = open(self.filename,'r')
-        (dataname,issparse,alist,data) = arff.arffread(fp)
-        fp.close()
-        self.dataname = dataname
-
-        #if (alist[0][0]!='label'):
-        #    sys.stderr.write('First column of ARFF file needs to be the label\n')
-        #    sys.exit(-1)
-
-        if idx is None:
-            idx = range(len(data))
-
-        labels = [data[ix][0] for ix in idx]
-        labels = array(labels)
-        if self.extype == 'vec':
-            examples = [data[ix][1:] for ix in idx]
-            examples = array(examples).T
-            print '%d features, %d examples' % examples.shape
-        elif self.extype == 'seq':
-            examples = [data[ix][1] for ix in idx]
-            print 'sequence length = %d, %d examples' % (len(examples[0]),len(examples))
-        elif self.extype == 'mseq':
-            examples = [data[ix][1:] for ix in idx]
-            printstr = 'sequence lengths = '
-            for seq in examples[0]:
-                printstr += '%d, ' % len(seq)
-            printstr += '%d examples' % len(examples)
-            print printstr
-
-        return (examples, labels)
-
-    def writelines(self,examples,labels,idx=None):
-        """Merge the examples and labels and write to file"""
-        alist = [('label',1,[])]
-
-        if idx is not None:
-            examples = examples[idx]
-            labels = labels[idx]
-
-        if self.extype == 'vec':
-            data = list(concatenate((labels.reshape(len(labels),1),examples.T),axis=1))
-            for ix in xrange(examples.shape[0]):
-                attname = 'att%d' % ix
-                alist.append((attname,1,[]))
-        elif self.extype == 'seq':
-            data = zip(labels,examples)
-            alist.append(('sequence',0,[]))
-        elif self.extype == 'mseq':
-            data = []
-            for ix,curlab in enumerate(labels):
-                data.append([curlab]+list(examples[ix]))
-            alist.append(('upstream sequence',0,[]))
-            alist.append(('downstream sequence',0,[]))
-
-        fp = open(self.filename,'w')
-        arff.arffwrite(fp,alist,data,name=self.dataname,comment=self.comment)
-        fp.close()
-
-
diff --git a/applications/easysvm/esvm/parse.py b/applications/easysvm/esvm/parse.py
deleted file mode 100644
index 34823255001..00000000000
--- a/applications/easysvm/esvm/parse.py
+++ /dev/null
@@ -1,232 +0,0 @@
-"""
-This module contains code to parse the input arguments to the command line:
-- easysvm.py
-- datagen.py
-"""
-
-#############################################################################################
-#                                                                                           #
-#    This program is free software; you can redistribute it and/or modify                   #
-#    it under the terms of the GNU General Public License as published by                   #
-#    the Free Software Foundation; either version 3 of the License, or                      #
-#    (at your option) any later version.                                                    #
-#                                                                                           #
-#    This program is distributed in the hope that it will be useful,                        #
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of                         #
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                           #
-#    GNU General Public License for more details.                                           #
-#                                                                                           #
-#    You should have received a copy of the GNU General Public License                      #
-#    along with this program; if not, see http://www.gnu.org/licenses                       #
-#    or write to the Free Software Foundation, Inc., 51 Franklin Street,                    #
-#    Fifth Floor, Boston, MA 02110-1301  USA                                                #
-#                                                                                           #
-#############################################################################################
-
-import datafuncs
-import sys
-
-################################################################################
-# basic types
-
-def parse_range(str):
-    list=str.split("-")
-
-    if len(list)==1:
-        return (int(list[0]), int(list[0]))
-    if len(list)==2:
-        return (int(list[0]), int(list[1]))
-    sys.stderr.write("Cannot parse range '%s'\n" %str)
-    sys.exit(-1)
-
-
-def parse_float_list(str):
-    list=str.split(",")
-    float_list=[] ;
-    for elem in list:
-        float_list.append(float(elem))
-    return float_list
-
-
-def parse_int_list(str):
-    list=str.split(",")
-    int_list=[] ;
-    for elem in list:
-        int_list.append(int(elem))
-    return int_list
-
-
-################################################################################
-# input files
-
-def parse_input_file_train(kernelname, argv):
-    """Parse the input and output file names"""
-
-    if len(argv)<2 or (argv[0]=="fasta" and len(argv)<3) or (argv[0]!='fasta' and argv[0]!='arff'):
-        sys.stderr.write("data usage: arff <train.arff>\n        or: fasta <train_pos.fa> <train_neg.fa>\n")
-        sys.exit(-1)
-
-    if argv[0] == 'fasta':
-        datafilenamepos = argv[1]
-        datafilenameneg = argv[2]
-        (examples, labels) = datafuncs.fastaread(datafilenamepos, datafilenameneg)
-        argv_rest=argv[3:]
-    elif argv[0] == 'arff':
-        datafilename = argv[1]
-        (examples, labels) = datafuncs.arffread(kernelname, datafilename)
-        argv_rest=argv[2:]
-    else:
-        print 'Error in parse_input_file'
-
-    return (examples,labels,argv_rest)
-
-
-def parse_input_file_train_test(kernelname, argv):
-    """Parse the input and output file names"""
-
-    if len(argv)<3 or (argv[0]=="fasta" and len(argv)<4) or (argv[0]!='fasta' and argv[0]!='arff'):
-        sys.stderr.write("data usage: arff <train.arff> <test.arff>\n        or: fasta <train_pos.fa> <train_neg.fa> <test.fa>\n")
-        sys.exit(-1)
-
-    if argv[0] == 'fasta':
-        datafilenamepos = argv[1]
-        datafilenameneg = argv[2]
-        datafilenametest = argv[3]
-        (trainex, trainlab) = datafuncs.fastaread(datafilenamepos, datafilenameneg)
-        (testex, testlab) = datafuncs.fastaread(datafilenametest)
-        argv_rest=argv[4:]
-    elif argv[0] == 'arff':
-        datafilename = argv[1]
-        datafilenametest = argv[2]
-        (trainex, trainlab) = datafuncs.arffread(kernelname, datafilename)
-        (testex, testlab) = datafuncs.arffread(kernelname, datafilenametest)
-        argv_rest=argv[3:]
-    else:
-        print 'Error in parse_input_file'
-
-    return (trainex,trainlab,testex,argv_rest)
-
-################################################################################
-# prediction file
-
-def parse_prediction_file(fname):
-    outputs=[]
-    splitassignments=[]
-
-    f = open(fname)
-    str=f.read()
-    lines = str.split('\n')
-    num=0
-    for line in lines:
-        if len(line)>0 and line[0] != '#':
-            elems=line.split('\t')
-            assert(len(elems)>1)
-            assert(int(elems[0]) == num)
-            num+=1
-            if len(elems)==2:
-                outputs.append(float(elems[1]))
-            else:
-                assert(len(elems)==3)
-                outputs.append(float(elems[1]))
-                splitassignments.append(float(elems[2]))
-    f.close()
-    if len(splitassignments)==0:
-        splitassignments = None
-
-    return (outputs, splitassignments)
-
-################################################################################
-# kernel parameters
-
-def parse_kernel_param(argv, allow_modelsel_params):
-    """Parse the arguments for a particular kernel"""
-
-    if len(argv)<1:
-        sys.stderr.write("kernel usage: <kernelname> [<parameters>]\n")
-        sys.exit(-1)
-
-    kernelname = argv[0]
-    kparam = {}
-    kparam["name"]=kernelname
-    kparam["modelsel_name"]=None
-    kparam["modelsel_params"]=None
-
-    if kernelname == 'gauss':
-        if len(argv)<2:
-            sys.stderr.write("kernel usage: gauss <width>\n")
-            sys.exit(-1)
-        if allow_modelsel_params:
-            kparam['width'] = None
-            kparam["modelsel_name"]="width"
-            kparam["modelsel_params"]=parse_float_list(argv[1])
-        else:
-            kparam['width'] = float(argv[1])
-        argv_rest=argv[2:]
-    elif kernelname == 'linear':
-	kparam['scale']=1
-        # no parameters
-        argv_rest=argv[1:]
-    elif kernelname == 'poly':
-        if len(argv)<4:
-            sys.stderr.write("kernel usage: poly <degree> <true|false> <true|false>\n")
-            sys.exit(-1)
-        if allow_modelsel_params:
-            kparam['degree'] = None
-            kparam["modelsel_name"]="degree"
-            kparam["modelsel_params"]=parse_int_list(argv[1])
-        else:
-            kparam['degree'] = int(argv[1])
-        kparam['inhomogene'] = (argv[2] == 'true')
-        kparam['normal'] = (argv[3] == 'true')
-        argv_rest=argv[4:]
-    elif kernelname == 'wd':
-        if len(argv)<3:
-            sys.stderr.write("kernel usage: wd <degree> <shift>\n")
-            sys.exit(-1)
-        if allow_modelsel_params:
-            kparam['degree'] = None
-            kparam["modelsel_name"]="degree"
-            kparam["modelsel_params"]=parse_int_list(argv[1])
-        else:
-            kparam['degree'] = int(argv[1])
-        if allow_modelsel_params and len(kparam["modelsel_params"])==1:
-            kparam['degree'] = kparam["modelsel_params"][0]
-            kparam['shift'] = None
-            kparam["modelsel_name"] = "shift"
-            kparam["modelsel_params"]=parse_int_list(argv[2])
-        else:
-            kparam['shift'] = int(argv[2])
-        argv_rest=argv[3:]
-    elif kernelname == 'spec':
-        if len(argv)<2:
-            sys.stderr.write("kernel usage: spec <degree>\n")
-            sys.exit(-1)
-        if allow_modelsel_params:
-            kparam['degree'] = None
-            kparam["modelsel_name"]="degree"
-            kparam["modelsel_params"]=parse_int_list(argv[1])
-        else:
-            kparam['degree'] = int(argv[1])
-        argv_rest=argv[2:]
-    elif kernelname == 'localalign':
-        # no parameters
-        argv_rest=argv[1:]
-    elif kernelname == 'localimprove':
-        if len(argv)<4:
-            sys.stderr.write("kernel usage: localimprove <length> <indegree> <outdegree>\n")
-            sys.exit(-1)
-        kparam['length'] = int(argv[1])
-        if allow_modelsel_params:
-            kparam['width'] = None
-            kparam["modelsel_name"]="indeg"
-            kparam["modelsel_params"]=parse_int_list(argv[2])
-        else:
-            kparam['indeg'] = int(argv[2])
-        kparam['outdeg'] = int(argv[3])
-        argv_rest=argv[4:]
-    else:
-        sys.stderr.write( 'Unknown kernel name %s in parse_kernel_param\n' % kernelname )
-        sys.exit(-1)
-
-    return kernelname,kparam,argv_rest
-
diff --git a/applications/easysvm/esvm/plots.py b/applications/easysvm/esvm/plots.py
deleted file mode 100644
index 98e7322e234..00000000000
--- a/applications/easysvm/esvm/plots.py
+++ /dev/null
@@ -1,226 +0,0 @@
-"""
-This module contains code for commonly used plots
-"""
-
-#############################################################################################
-#                                                                                           #
-#    This program is free software; you can redistribute it and/or modify                   #
-#    it under the terms of the GNU General Public License as published by                   #
-#    the Free Software Foundation; either version 3 of the License, or                      #
-#    (at your option) any later version.                                                    #
-#                                                                                           #
-#    This program is distributed in the hope that it will be useful,                        #
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of                         #
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                           #
-#    GNU General Public License for more details.                                           #
-#                                                                                           #
-#    You should have received a copy of the GNU General Public License                      #
-#    along with this program; if not, see http://www.gnu.org/licenses                       #
-#    or write to the Free Software Foundation, Inc., 51 Franklin Street,                    #
-#    Fifth Floor, Boston, MA 02110-1301  USA                                                #
-#                                                                                           #
-#############################################################################################
-
-import sys
-import random
-import numpy
-import warnings
-import shutil
-
-from shogun import Labels
-from shogun import *
-
-def plotroc(output, LTE, draw_random=False, figure_fname="", roc_label='ROC'):
-    """Plot the receiver operating characteristic curve"""
-    import pylab
-    import matplotlib
-
-    pylab.figure(1,dpi=150,figsize=(4,4))
-    fontdict=dict(family="cursive",weight="bold",size=7,y=1.05) ;
-
-    pm=PerformanceMeasures(Labels(numpy.array(LTE)), Labels(numpy.array(output)))
-
-    points=pm.get_ROC()
-    points=numpy.array(points).T # for pylab.plot
-    pylab.plot(points[0], points[1], 'b-', label=roc_label)
-    if draw_random:
-        pylab.plot([0, 1], [0, 1], 'r-', label='random guessing')
-    pylab.axis([0, 1, 0, 1])
-    ticks=numpy.arange(0., 1., .1, dtype=numpy.float64)
-    pylab.xticks(ticks,size=10)
-    pylab.yticks(ticks,size=10)
-    pylab.xlabel('1 - specificity (false positive rate)',size=10)
-    pylab.ylabel('sensitivity (true positive rate)',size=10)
-    pylab.legend(loc='lower right', prop = matplotlib.font_manager.FontProperties('tiny'))
-
-    if figure_fname!=None:
-        warnings.filterwarnings('ignore','Could not match*')
-        tempfname = figure_fname + '.png'
-	pylab.savefig(tempfname)
-	shutil.move(tempfname,figure_fname)
-
-    auROC=pm.get_auROC()
-    return auROC ;
-
-def plotprc(output, LTE, figure_fname="", prc_label='PRC'):
-    """Plot the precision recall curve"""
-    import pylab
-    import matplotlib
-
-    pylab.figure(2,dpi=150,figsize=(4,4))
-
-    pm=PerformanceMeasures(Labels(numpy.array(LTE)), Labels(numpy.array(output)))
-
-    points=pm.get_PRC()
-    points=numpy.array(points).T # for pylab.plot
-    pylab.plot(points[0], points[1], 'b-', label=prc_label)
-    pylab.axis([0, 1, 0, 1])
-    ticks=numpy.arange(0., 1., .1, dtype=numpy.float64)
-    pylab.xticks(ticks,size=10)
-    pylab.yticks(ticks,size=10)
-    pylab.xlabel('sensitivity (true positive rate)',size=10)
-    pylab.ylabel('precision (1 - false discovery rate)',size=10)
-    pylab.legend(loc='lower right')
-
-    if figure_fname!=None:
-        warnings.filterwarnings('ignore','Could not match*')
-        tempfname = figure_fname + '.png'
-	pylab.savefig(tempfname)
-	shutil.move(tempfname,figure_fname)
-
-    auPRC=pm.get_auPRC()
-    return auPRC ;
-
-def plotcloud(cloud, figure_fname="", label='cloud'):
-    """Plot the cloud of points (the first two dimensions only)"""
-    import pylab
-    import matplotlib
-
-    pylab.figure(1,dpi=150,figsize=(4,4))
-
-    pos = []
-    neg = []
-    for i in xrange(len(cloud)):
-        if cloud[i][0]==1:
-            pos.append(cloud[i][1:])
-        elif cloud[i][0]==-1:
-            neg.append(cloud[i][1:])
-
-    fontdict=dict(family="cursive",weight="bold",size=10,y=1.05) ;
-    pylab.title(label, fontdict)
-    points=numpy.array(pos).T # for pylab.plot
-    pylab.plot(points[0], points[1], 'b+', label='positive')
-    points=numpy.array(neg).T # for pylab.plot
-    pylab.plot(points[0], points[1], 'rx', label='negative')
-    #pylab.axis([0, 1, 0, 1])
-    #ticks=numpy.arange(0., 1., .1, dtype=numpy.float64)
-    #pylab.xticks(ticks,size=10)
-    #pylab.yticks(ticks,size=10)
-    pylab.xlabel('dimension 1',size=10)
-    pylab.ylabel('dimension 2',size=10)
-    pylab.legend(loc='lower right')
-
-    if figure_fname!=None:
-        warnings.filterwarnings('ignore','Could not match*')
-        tempfname = figure_fname + '.png'
-	pylab.savefig(tempfname)
-	shutil.move(tempfname,figure_fname)
-
-def plot_poims(poimfilename, poim, max_poim, diff_poim, poim_totalmass, poimdegree, max_len):
-    """Plot a summary of the information in poims"""
-    import pylab
-    import matplotlib
-
-    pylab.figure(3, dpi=150, figsize=(4,5))
-
-    # summary figures
-    fontdict=dict(family="cursive",weight="bold",size=7,y=1.05) ;
-    pylab.subplot(3,2,1)
-    pylab.title('Total POIM Mass', fontdict)
-    pylab.plot(poim_totalmass) ;
-    pylab.ylabel('weight mass', size=5)
-
-    pylab.subplot(3,2,3)
-    pylab.title('POIMs', fontdict)
-    pylab.pcolor(max_poim, shading='flat') ;
-
-    pylab.subplot(3,2,5)
-    pylab.title('Differential POIMs', fontdict)
-    pylab.pcolor(diff_poim, shading='flat') ;
-
-    for plot in [3, 5]:
-        pylab.subplot(3,2,plot)
-        ticks=numpy.arange(1., poimdegree+1, 1, dtype=numpy.float64)
-        ticks_str = []
-        for i in xrange(0, poimdegree):
-            ticks_str.append("%i" % (i+1))
-            ticks[i] = i + 0.5
-        pylab.yticks(ticks, ticks_str)
-        pylab.ylabel('degree', size=5)
-
-    # per k-mer figures
-    fontdict=dict(family="cursive",weight="bold",size=7,y=1.04) ;
-
-    # 1-mers
-    pylab.subplot(3,2,2)
-    pylab.title('1-mer Positional Importance', fontdict)
-    pylab.pcolor(poim[0], shading='flat') ;
-    ticks_str = ['A', 'C', 'G', 'T']
-    ticks = [0.5, 1.5, 2.5, 3.5]
-    pylab.yticks(ticks, ticks_str, size=5)
-    pylab.axis([0, max_len, 0, 4])
-
-    # 2-mers
-    pylab.subplot(3,2,4)
-    pylab.title('2-mer Positional Importance', fontdict)
-    pylab.pcolor(poim[1], shading='flat') ;
-    i=0 ;
-    ticks=[] ;
-    ticks_str=[] ;
-    for l1 in ['A', 'C', 'G', 'T']:
-        for l2 in ['A', 'C', 'G', 'T']:
-            ticks_str.append(l1+l2)
-            ticks.append(0.5+i) ;
-            i+=1 ;
-    pylab.yticks(ticks, ticks_str, fontsize=5)
-    pylab.axis([0, max_len, 0, 16])
-
-    # 3-mers
-    pylab.subplot(3,2,6)
-    pylab.title('3-mer Positional Importance', fontdict)
-    pylab.pcolor(poim[2], shading='flat') ;
-    i=0 ;
-    ticks=[] ;
-    ticks_str=[] ;
-    for l1 in ['A', 'C', 'G', 'T']:
-        for l2 in ['A', 'C', 'G', 'T']:
-            for l3 in ['A', 'C', 'G', 'T']:
-                if numpy.mod(i,4)==0:
-                    ticks_str.append(l1+l2+l3)
-                    ticks.append(0.5+i) ;
-                i+=1 ;
-    pylab.yticks(ticks, ticks_str, fontsize=5)
-    pylab.axis([0, max_len, 0, 64])
-
-    # x-axis on last two figures
-    for plot in [5, 6]:
-        pylab.subplot(3,2,plot)
-        pylab.xlabel('sequence position', size=5)
-
-
-    # finishing up
-    for plot in xrange(0,6):
-        pylab.subplot(3,2,plot+1)
-        pylab.xticks(fontsize=5)
-
-    for plot in [1,3,5]:
-        pylab.subplot(3,2,plot)
-        pylab.yticks(fontsize=5)
-
-    pylab.subplots_adjust(hspace=0.35) ;
-
-    # write to file
-    warnings.filterwarnings('ignore','Could not match*')
-    pylab.savefig('/tmp/temppylabfig.png')
-    shutil.move('/tmp/temppylabfig.png',poimfilename)
-
diff --git a/applications/easysvm/esvm/poim.py b/applications/easysvm/esvm/poim.py
deleted file mode 100644
index f50fd6a426d..00000000000
--- a/applications/easysvm/esvm/poim.py
+++ /dev/null
@@ -1,128 +0,0 @@
-"""
-This module contains code for computing
-Position Oligomer Importance Matrices
-"""
-
-#############################################################################################
-#                                                                                           #
-#    This program is free software; you can redistribute it and/or modify                   #
-#    it under the terms of the GNU General Public License as published by                   #
-#    the Free Software Foundation; either version 3 of the License, or                      #
-#    (at your option) any later version.                                                    #
-#                                                                                           #
-#    This program is distributed in the hope that it will be useful,                        #
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of                         #
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                           #
-#    GNU General Public License for more details.                                           #
-#                                                                                           #
-#    You should have received a copy of the GNU General Public License                      #
-#    along with this program; if not, see http://www.gnu.org/licenses                       #
-#    or write to the Free Software Foundation, Inc., 51 Franklin Street,                    #
-#    Fifth Floor, Boston, MA 02110-1301  USA                                                #
-#                                                                                           #
-#############################################################################################
-
-import numpy
-from numpy import ones
-
-
-def compute_poims(svm, kernel, poimdegree, max_len):
-    """For a trained SVM, compute Position Oligomer Importance Matrices"""
-
-    distr = ones((max_len,4))/4 ;
-    kernel.prepare_POIM2(distr)
-
-    kernel.compute_POIM2(poimdegree, svm) ;
-    poim = kernel.get_POIM2()
-    kernel.cleanup_POIM2()
-
-    (poim, max_poim, diff_poim) = reshape_normalize_contribs(poim, poimdegree, max_len)
-    (poim_weightmass, poim_totalmass) = compute_weight_mass(poim, poimdegree, max_len)
-
-    poim_totalmass=poim_totalmass/numpy.sum(poim_totalmass)
-
-    return (poim, max_poim, diff_poim, poim_totalmass)
-
-
-def compute_weight_mass(C, maxOrder, seqLen):
-
-    mass=numpy.zeros((maxOrder, seqLen), numpy.double);
-    total=numpy.zeros((1, seqLen), numpy.double);
-    for i in xrange(0,maxOrder):
-        mass[i,:] = sum(numpy.abs(C[i]))
-    total = sum(mass);
-
-    return (mass,total)
-
-def getstringprobsMC(maxOrder,distrib,length, abcSize):
-
-    pmatrix = []
-    for k in xrange(0,maxOrder):
-	pmatrix.append(ones(4^k,len))
-
-        for l in xrange(0,len):
-            for sigma in xrange(0, abcSize):
-                prob = distrib(sigma,l);
-                for k in xrange(0, maxOrder):
-                    for relpos in xrange(0, min(k,l)):
-                        vi = genindexvector_spos(k,sigma-1,relpos,abcSize);
-                        pmatrix[k][vi,l-relpos+1] = pmatrix[k][vi,l-relpos+1]*prob;
-
-    return pmatrix
-
-def getV2_poimMC(u, strprobs, abcSize):
-  VV = [];
-  for k in xrange(0, len(u)):
-      m = abcSize^k;
-      VV.append( numpy.ones(4**(k+1),1)*mean(u[k]*strprobs[k] ) )
-
-  return VV
-
-def reshape_normalize_contribs(C, maxOrder, seqLen, opts={}):
-
-    alphabetSize = 4;
-    Contribs = [] ;
-    l=0;
-    for i in xrange(0, maxOrder):
-        L = l + (alphabetSize**(i+1)) * seqLen;
-        vec=C[l:L].copy() ;
-        Contribs.append(vec.reshape( seqLen, alphabetSize**(i+1) ).T) ;
-        l = L;
-
-    assert( l == len(C) );
-
-    if opts.has_key("distribution"):
-        strprobs = getstringprobsMC(length(Contribs), opts["distribution"], seqLen, 4);
-        MyV2 = getV2_poimMC(Contribs, strprobs, seqLen, 4);
-
-        for i in xrange(0, maxOrder ):
-            Contribs[i] = Contribs[i] -MyV2[i];
-
-    if opts.has_key("background"):
-        for i in xrange(0, maxOrder ):
-            Contribs[i] = Contribs[i]*(opts["background"][i]!=0);
-
-    maxContribs = numpy.zeros( (maxOrder, seqLen), numpy.double );
-    maxp_org = numpy.zeros( (maxOrder, seqLen), numpy.double );
-    maxp_str= numpy.zeros( (maxOrder, seqLen), numpy.int );
-    for i in xrange(0, maxOrder ):
-        con=numpy.abs(Contribs[i]) ;
-	maxContribs[i,:] = numpy.max(con, axis=0)
-	maxp_str[i,:] = numpy.argmax(con, axis=0)
-
-    diffmaxContribs = numpy.zeros( (maxOrder, seqLen), numpy.double );
-
-    for k in xrange(1, maxOrder ):
-	numsy=4**(k+1);
-	for l in  xrange(0, seqLen-k):
-            km=maxp_str[k,l] ;
-            A=numpy.abs(Contribs[k-1][numpy.floor(km/4),l]);
-            B=numpy.abs(Contribs[k-1][numpy.mod(km,numsy/4),l+1]);
-            #zA=numpy.mod(km,4)+1;
-            #zB=numpy.floor(km/(numsy/4))+1;
-            #correction=sum([A/distribution(zA, l+k-1), B/distribution(zB, l)]);
-            correction=numpy.max([A, B]);
-            diffmaxContribs[k,l] = maxContribs[k,l] - correction;
-
-    return (Contribs, maxContribs, diffmaxContribs)
-
diff --git a/applications/easysvm/esvm/utils.py b/applications/easysvm/esvm/utils.py
deleted file mode 100644
index dfdefa24456..00000000000
--- a/applications/easysvm/esvm/utils.py
+++ /dev/null
@@ -1,180 +0,0 @@
-#############################################################################################
-#                                                                                           #
-#    This program is free software; you can redistribute it and/or modify                   #
-#    it under the terms of the GNU General Public License as published by                   #
-#    the Free Software Foundation; either version 3 of the License, or                      #
-#    (at your option) any later version.                                                    #
-#                                                                                           #
-#    This program is distributed in the hope that it will be useful,                        #
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of                         #
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                           #
-#    GNU General Public License for more details.                                           #
-#                                                                                           #
-#    You should have received a copy of the GNU General Public License                      #
-#    along with this program; if not, see http://www.gnu.org/licenses                       #
-#    or write to the Free Software Foundation, Inc., 51 Franklin Street,                    #
-#    Fifth Floor, Boston, MA 02110-1301  USA                                                #
-#                                                                                           #
-#############################################################################################
-
-import sys
-import random
-import numpy
-import warnings
-import shutil
-
-from shogun import Labels
-from shogun import *
-
-################################################################################
-# evaluation functions
-
-def confusionMatrix(labels_test, labels_predicted):
-    """Compute the matrix of predictions versus labels"""
-    if len(labels_test) != len(labels_predicted):
-        return 0
-    TP = 0; FP = 0; TN = 0; FN = 0
-    for i in range(0, len(labels_test)):
-        if labels_test[i] == 0 or labels_predicted[i] == 0:
-            return 0
-        if labels_test[i] > 0:
-            if labels_predicted[i] > 0: TP += 1
-            else: FN +=1
-        else:
-            if labels_predicted[i] > 0: FP += 1
-            else: TN += 1
-    return (TP, TN, FP, FN)
-
-def accuracy(output, labels_test):
-    """How many correct predictions?"""
-    TP, TN, FP, FN = confusionMatrix(labels_test, numpy.sign(output))
-    return float(TP + TN) / (TP + TN + FP + FN)
-
-def calcroc(output, LTE):
-    """The area under the receiver operating characteristic curve"""
-    pm=ROCEvaluation()
-    pm.evaluate(Labels(numpy.array(output)), Labels(numpy.array(LTE)))
-
-    auROC=pm.get_auROC()
-    return auROC
-
-def calcprc(output, LTE):
-    """The area under the precision recall curve"""
-    pm=PRCEvaluation()
-    pm.evaluate(Labels(numpy.array(output)), Labels(numpy.array(LTE)))
-
-    auPRC=pm.get_auPRC()
-    return auPRC
-
-
-def calcperf(output, LTE, perflist):
-    """Compute all the performance measures in perflist"""
-    resperf = []
-    for perf in perflist:
-        resperf.append(apply(perf,(output,LTE)))
-
-    return resperf
-
-
-################################################################################
-# splitting functions
-
-def getPartitionedSet(total, crossval_repeat, seed=None):
-    """Generate a list of indices, splitting the dataset"""
-    if seed==None:
-        random.seed(123456789)
-    else:
-        random.seed(seed)
-
-    size = int(total / crossval_repeat)
-    mod = total % crossval_repeat
-
-    splits = []
-    for i in range(0, crossval_repeat):
-        if i < mod:
-            splits.append(size + 1)
-        else:
-            splits.append(size)
-
-    ipartition = random.sample(xrange(0,total), total) # random sampling
-
-    index = 0
-    partitions = []
-
-    for size in splits:
-        partitions.append(ipartition[index:index+size])
-        index += size
-
-    return partitions
-
-
-def getCurrentSplit(repetition, partitions, labels, seqs):
-    """Split the data into training and test sets"""
-    X = []; Y = []; XT = []; YT = []
-    for i in range(0, len(partitions)):
-        if type(seqs) == type(list([])):
-            for j in range(0, len(partitions[i])):
-                if repetition != i:
-                    X.append(seqs[partitions[i][j]])
-                    Y.append(labels[partitions[i][j]])
-                else:
-                    XT.append(seqs[partitions[i][j]])
-                    YT.append(labels[partitions[i][j]])
-        else:
-            if repetition != i:
-                if len(X) == 0:
-                    X = seqs.take(partitions[i],axis=1)
-                    Y = labels.take(partitions[i])
-                else:
-                    X = numpy.concatenate((X,seqs.take(partitions[i],axis=1)),axis=1)
-                    Y = numpy.concatenate((Y,labels.take(partitions[i])))
-            else:
-                XT = seqs.take(partitions[i],axis=1)
-                YT = labels.take(partitions[i])
-
-    return X, Y, XT, YT
-
-################################################################################
-
-def check_params(params, C, max_len):
-    """Check for validity of parameters"""
-    if (C<=0):
-        sys.stderr.write( "\nerror: the parameter 'C' has to be larger than 0\n" )
-        assert(C>0)
-
-    if params.has_key("degree"):
-        if (params["degree"]<=0):
-            sys.stderr.write( "\nerror: the parameter 'degree' has to be larger than 0\n" )
-            assert(params["degree"]>0)
-
-    if params.has_key("width"):
-	print params["width"]
-        if (params["width"]<=0):
-            sys.stderr.write( "\nerror: the parameter 'width' has to be larger than 0\n" )
-            assert(params["width"]>0)
-
-    if params.has_key("shift"):
-        if (params["shift"]<0) or (params["shift"]>max_len):
-            sys.stderr.write( "\nerror: the parameter 'shift' has to be larger than 0 and smaller than %i\n" % max_len )
-            assert((params["shift"]>=0) and (params["shift"]<=max_len))
-
-    if params.has_key("poim_degree"):
-        if params["poim_degree"]>8:
-            sys.stderr.write( "\nerror: the parameter 'poim_degree' has to be smaller than 8\n" )
-            assert(params["poim_degree"]<=8)
-
-    if params.has_key("crossval_repeat"):
-        if params["crossval_repeat"]<1:
-            sys.stderr.write( "\nerror: number of cross-validation repeats has to be larger than one\n" )
-            assert(params["crossval_repeat"]>1)
-
-    if params.has_key("inhomogene"):
-        if params["inhomogene"]!=True and params["inhomogene"]!=False:
-            sys.stderr.write( "\nerror: the parameter 'inhomogene' has to be True or False\n" )
-            assert(params["inhomogene"]==True or params["inhomogene"]==False)
-
-    if params.has_key("normal"):
-        if params["normal"]!=True and params["normal"]!=False:
-            sys.stderr.write( "\nerror: the parameter 'normal' has to be True or False\n" )
-            assert(params["normal"]==True or params["normal"]==False)
-
diff --git a/applications/easysvm/galaxy/CloudGen.xml b/applications/easysvm/galaxy/CloudGen.xml
deleted file mode 100644
index fe064b132fe..00000000000
--- a/applications/easysvm/galaxy/CloudGen.xml
+++ /dev/null
@@ -1,53 +0,0 @@
-<tool id="cloudgen" name="CloudGen (ARFF)">
-  <description>Generation of a toy data set</description>
-  <command interpreter="python2.5">datagen.py cloud $number_of_examples
-  $number_of_features $fraction_positive $spread $outfile_arff $outfile_png
-  </command>
-  <inputs>
-    <param name="number_of_examples" type="integer" value="500"
-    help="Number of examples that should be generated (typically between 10 and 10,000 when used for learning)">
-      <label>Number of examples</label>
-    </param>
-    <param name="number_of_features" type="integer" value="2"
-    help="Length of the feature vector for each example (typically between 1 and 20).">
-      <label>Number of dimensions</label>
-    </param>
-    <param name="fraction_positive" type="float" value="0.5"
-    help="The fraction of examples which are positive (typically between 0.1 and 0.9).">
-      <label>Fraction of positive examples</label>
-    </param>
-    <param name="spread" type="float" value="1"
-    help="The spread of points around the center of each cloud">
-      <label>Spread</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="arff" name="outfile_arff" />
-    <data format="png" name="outfile_png" />
-  </outputs>
-  <help>
-
-      This tool is part of the MLB Galaxy package, adding some machine
-      learning functionality to PSU's Galaxy framework.  Copyright (C)
-      2008 Sebastian J. Schultheiss (sebi@umich.edu), Gunnar Raetsch
-      (raetsch@tuebingen.mpg.de) and Cheng Soon Ong (chengsoon.ong@tuebingen.mpg.de)
-
-      This program is free software; you can redistribute it and/or modify
-      it under the terms of the GNU General Public License as published by
-      the Free Software Foundation; either version 3 of the License, or
-      (at your option) any later version.
-
-      This program is distributed in the hope that it will be useful,
-      but WITHOUT ANY WARRANTY; without even the implied warranty of
-      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-      GNU General Public License for more details.
-
-      You should have received a copy of the GNU General Public License
-      along with this program; if not, see http://www.gnu.org/licenses
-      or write to the Free Software Foundation, Inc., 51 Franklin Street,
-      Fifth Floor, Boston, MA 02110-1301  USA
-
-
-  </help>
-</tool>
-
diff --git a/applications/easysvm/galaxy/FastaGen.xml b/applications/easysvm/galaxy/FastaGen.xml
deleted file mode 100644
index 8189e31cc82..00000000000
--- a/applications/easysvm/galaxy/FastaGen.xml
+++ /dev/null
@@ -1,52 +0,0 @@
-<tool id="motifgen-fasta" name="MotifGen (FASTA)">
-  <description>Generation of a sequence toy data set for motif discovery in FASTA format.</description>
-  <command interpreter="python">datagen.py motif fasta $p_motif
-  $p_number_of_sequences $p_length $p_position
-  $p_mutation_rate $outfile_fasta
-  </command>
-  <inputs>
-    <param name="p_motif" type="text" value='GATTACA' help="The motif that should be generated into random sequences. The Motif is typically 4 to 10 nucleotides long and should only contain the letters A,C,G, and T.">
-      <label>The Positive Motif</label>
-    </param>
-    <param name="p_number_of_sequences" type="integer" value="100" help="Number of sequences that should be generated--equal to the number of examples (typically between 10 and 10,000 sequences when used for learning)">
-      <label>Number of sequences</label>
-    </param>
-    <param name="p_length" type="text" value="50" help="Length of the generated sequences (typically between 10 and 1000 nucleotides). This can also be a range of lengths, e.g. 30-50.">
-      <label>Sequence length (range)</label>
-    </param>
-    <param name="p_position" type="text" value="10" help="Position at which the motif should be placed (should be between zero and the sequence length). This can also be a range, e.g. 10-20.">
-      <label>Motif position (range)</label>
-    </param>
-    <param name="p_mutation_rate" type="float" value="0.1" help="Probability that a letter in the motif is replaced by a random letter">
-      <label>Motif mutation rate</label>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="outfile_fasta" />
-  </outputs>
-  <help>
-
-      This tool is part of the MLB Galaxy package, adding some machine
-      learning functionality to PSU's Galaxy framework.  Copyright (C)
-      2008 Sebastian J. Schultheiss (sebi@umich.edu), Gunnar Raetsch
-      (raetsch@tuebingen.mpg.de) and Cheng Soon Ong (chengsoon.ong@tuebingen.mpg.de)
-
-      This program is free software; you can redistribute it and/or modify
-      it under the terms of the GNU General Public License as published by
-      the Free Software Foundation; either version 3 of the License, or
-      (at your option) any later version.
-
-      This program is distributed in the hope that it will be useful,
-      but WITHOUT ANY WARRANTY; without even the implied warranty of
-      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-      GNU General Public License for more details.
-
-      You should have received a copy of the GNU General Public License
-      along with this program; if not, see http://www.gnu.org/licenses
-      or write to the Free Software Foundation, Inc., 51 Franklin Street,
-      Fifth Floor, Boston, MA 02110-1301  USA
-
-
-  </help>
-</tool>
-
diff --git a/applications/easysvm/galaxy/MotifGen.xml b/applications/easysvm/galaxy/MotifGen.xml
deleted file mode 100644
index 43934ed4a8e..00000000000
--- a/applications/easysvm/galaxy/MotifGen.xml
+++ /dev/null
@@ -1,91 +0,0 @@
-<tool id="motifgen-arff" name="MotifGen (ARFF)">
-  <description>Generation of a sequence toy data set for motif discovery in ARFF format.</description>
-  <command interpreter="python">datagen.py motif arff $p_motif
-  $p_number_of_sequences $p_length $p_position
-  $p_mutation_rate
-  #if $negative_model.negseq=="1"
-    T $negative_model.n_number_of_sequences $negative_model.n_length 1 1
-  #else
-    $negative_model.n_motif
-    $negative_model.n_number_of_sequences $negative_model.n_length $negative_model.n_position
-    $negative_model.n_mutation_rate
-  #end if
-  $outfile_arff
-  </command>
-  <inputs>
-    <param name="p_motif" type="text" value='GATTACA' help="The motif that should be generated into random sequences. A realistic Motif is typically 4 to 10 nucleotides long and should only contain the letters A, C, G, and T.">
-      <label>The Positive Motif</label>
-    </param>
-    <param name="p_number_of_sequences" type="integer" value="100" help="Number of sequences that should be generated--equal to the number of examples (typically between 10 and 10,000 sequences when used for learning)">
-      <label>Number of sequences</label>
-    </param>
-    <param name="p_length" type="text" value="50" help="Length of the generated sequences (typically between 10 and 1000 nucleotides). This can also be a range of lengths, e.g. 30-50.">
-      <label>Sequence length (range)</label>
-    </param>
-    <param name="p_position" type="text" value="10" help="Position at which the motif should be placed (should be between zero and the sequence length). This can also be a range, e.g. 10-20.">
-      <label>Motif position (range)</label>
-    </param>
-    <param name="p_mutation_rate" type="float" value="0.1" help="Probability that a letter in the motif is replaced by a random letter">
-      <label>Motif mutation rate</label>
-    </param>
-    <conditional name="negative_model">
-    <param name="negseq" type="select" label="Negative Sequences" help="Either use completely random sequences or another sequence motif as negative class.">
-        <option value="1">Random sequences</option>
-	<option value="2">Sequence Motif</option>
-      </param>
-    <when value="1">
-    <param name="n_number_of_sequences" type="integer" value="500" help="Number of sequences that should be generated--equal to the number of examples (typically between 10 and 10,000 sequences when used for learning)">
-      <label>Number of sequences</label>
-    </param>
-    <param name="n_length" type="text" value="50" help="Length of the generated sequences (typically between 10 and 1000 nucleotides). This can also be a range of lengths, e.g. 30-50.">
-      <label>Sequence length (range)</label>
-    </param>
-    </when>
-    <when value="2">
-    <param name="n_motif" type="text" value='TTTT' help="The motif that should be generated into random sequences. A realistic motif is typically 4 to 10 nucleotides long and should only contain the letters A, C, G, and T.">
-      <label>The Negative Motif</label>
-    </param>
-    <param name="n_number_of_sequences" type="integer" value="500" help="Number of sequences that should be generated--equal to the number of examples (typically between 10 and 10,000 sequences when used for learning)">
-      <label>Number of sequences</label>
-    </param>
-    <param name="n_length" type="text" value="50" help="Length of the generated sequences (typically between 10 and 1000 nucleotides). This can also be a range of lengths, e.g. 30-50.">
-      <label>Sequence length (range)</label>
-    </param>
-    <param name="n_position" type="text" value="15" help="Position at which the motif should be placed (should be between zero and the sequence length). This can also be a range, e.g. 10-20.">
-      <label>Motif position (range)</label>
-    </param>
-    <param name="n_mutation_rate" type="float" value="0.1" help="Probability that a letter in the motif is replaced by a random letter">
-      <label>Motif mutation rate</label>
-    </param>
-   </when>
-  </conditional>
-  </inputs>
-  <outputs>
-    <data format="arff" name="outfile_arff" />
-  </outputs>
-  <help>
-
-      This tool is part of the MLB Galaxy package, adding some machine
-      learning functionality to PSU's Galaxy framework.  Copyright (C)
-      2008 Sebastian J. Schultheiss (sebi@umich.edu), Gunnar Raetsch
-      (raetsch@tuebingen.mpg.de) and Cheng Soon Ong (chengsoon.ong@tuebingen.mpg.de)
-
-      This program is free software; you can redistribute it and/or modify
-      it under the terms of the GNU General Public License as published by
-      the Free Software Foundation; either version 3 of the License, or
-      (at your option) any later version.
-
-      This program is distributed in the hope that it will be useful,
-      but WITHOUT ANY WARRANTY; without even the implied warranty of
-      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-      GNU General Public License for more details.
-
-      You should have received a copy of the GNU General Public License
-      along with this program; if not, see http://www.gnu.org/licenses
-      or write to the Free Software Foundation, Inc., 51 Franklin Street,
-      Fifth Floor, Boston, MA 02110-1301  USA
-
-
-  </help>
-</tool>
-
diff --git a/applications/easysvm/galaxy/README b/applications/easysvm/galaxy/README
deleted file mode 100644
index 1e7b2566082..00000000000
--- a/applications/easysvm/galaxy/README
+++ /dev/null
@@ -1,3 +0,0 @@
-The files in this directory are a copy of
-svn/projects/galaxy/tools/agr.  If you edit them, make sure the
-changes are also integrated into the main version.
diff --git a/applications/easysvm/galaxy/easysvm.xml b/applications/easysvm/galaxy/easysvm.xml
deleted file mode 100644
index cf147c8e711..00000000000
--- a/applications/easysvm/galaxy/easysvm.xml
+++ /dev/null
@@ -1,316 +0,0 @@
-<?xml version="1.0"?>
-<tool id="easysvm" name="Train and Test SVMs">
-<description>Train SVMs and predict for given hyper-parameters</description>
-  <command interpreter="python">easysvm.py
-	#if $protocol.expt_type=="1"
-		cv $protocol.crossval
-	#elif $protocol.expt_type=="2"
-		modelsel $protocol.crossval
-	#elif $protocol.expt_type=="3"
-		pred
-	#end if
-        $C
-	#if $kernel.kname == "1"
-	    linear
-        #elif $kernel.kname == "2"
-	    poly ${kernel.degree} ${kernel.inhomogene} true
-        #elif $kernel.kname == "3"
-	    gauss ${kernel.width}
-        #elif $kernel.kname == "4"
-	    spec ${kernel.degree}
-        #elif $kernel.kname == "5"
-	    wd ${kernel.degree} ${kernel.shift}
-        #end if
-	#if $protocol.expt_type=="1"
-	    #if $protocol.datatype.inputdatatype=="1"
-	        fasta $protocol.datatype.dataset_pos $protocol.datatype.dataset_neg
-	    #else
-	        arff $protocol.datatype.dataset
-            #end if
-	#elif $protocol.expt_type=="2"
-	    #if $protocol.datatype.inputdatatype=="1"
-	        fasta $protocol.datatype.dataset_pos $protocol.datatype.dataset_neg
-	    #else
-	        arff $protocol.datatype.dataset
-            #end if
-	#else
-	    #if $protocol.datatype.inputdatatype=="1"
-	        fasta $protocol.datatype.dataset_pos $protocol.datatype.dataset_neg
-	    #else
-	        arff $protocol.datatype.dataset
-            #end if
-	    $protocol.datatype.dataset_test
-	#end if
-	$outfile
-        #if $kernel.kname=="4" or $kernel.kname=="5"
-            #if $kernel.seq.stype=="dna"
-                dna $kernel.seq.con
-            #elif $kernel.seq.stype=="protein"
-                protein $kernel.seq.con
-            #end if
-        #end if
-  </command>
-  <inputs>
-	  <conditional name="protocol">
-	  <param name="expt_type" type="select" label="Experimental
-	  Protocol" help="In 'Cross Validation' mode, the training set is
-			split into N subsets. (N-1) subsets are used for training
-            and the remaining subset is used for testing. This is repeated
-            until every subset has been used for testing once.
-            In 'Prediction' mode, one trains on a
-	        training set and then predicts on a separate testing set.">
-        <option value="1">Train using cross validation</option>
-	<option value="3">Train and predict</option>
-      </param>
-      <when value="1">
-	  <conditional name="datatype">
-	  <param name="inputdatatype" type="select" label="Input data type" help="There are two available formats: FASTA for sequences and the more general ARFF format for sequences or numerical attributes.">
-	  <option value="1">FASTA</option>
-	  <option value="2">ARFF</option>
-	  </param>
-	  <when value="1">
-		  <param name="dataset_pos" type="data" format="fasta">
-		  <label >A data file with positive sequence for training</label>
-		  </param>
-		  <param name="dataset_neg" type="data" format="fasta">
-		  <label >A data file with negative sequence for training</label>
-		  </param>
-	  </when>
-	  <when value="2">
-		  <param name="dataset" type="data" format="arff">
-		  <label >A data file with labeled examples for training</label>
-		  </param>
-	  </when>
-	  </conditional>
-	  <param name="crossval" type="integer" size="4" value="3"
-	  help="Use the cross-validation parameter to determine the
-	  number of cross validation repeats. Typical values are between
-	  3 and 10.">
-	  <label>Number of cross validation repeats</label>
-	  </param>
-      </when>
-      <when value="3">
-	  <conditional name="datatype">
-	  <param name="inputdatatype" type="select" label="Input data type" help="">
-	  <option value="1">FASTA</option>
-	  <option value="2">ARFF</option>
-	  </param>
-	  <when value="1">
-		  <param name="dataset_pos" type="data" format="fasta">
-		  <label >A data file with positive sequence for training</label>
-		  </param>
-		  <param name="dataset_neg" type="data" format="fasta">
-		  <label >A data file with negaitive sequence for training</label>
-		  </param>
-		  <param name="dataset_test" type="data" format="fasta">
-		  <label>A data file with sequences for prediction</label>
-		  </param>
-	  </when>
-	  <when value="2">
-	         <param name="dataset" type="data" format="arff">
-		 <label >A data file with labeled examples for training</label>
-		 </param>
-		 <param name="dataset_test" type="data" format="arff">
-		 <label>A data file with examples for prediction</label>
-		 </param>
-	  </when>
-	  </conditional>
-      </when>
-    </conditional>
-    <param name="C" type="float" size="10" value="10" help="Increase C if you trust your training set. ">
-      <label>SVM regularization parameter C
-	  </label>
-    </param>
-    <conditional name="kernel">
-      <param name="kname" type="select" label="Kernel" help="The kernel
-	  determines how the 'similarity' between two examples is
-	  computed. It has to fit to the type and properties of the data: Use the Linear,
-	  Polynomial or Gaussian kernel for real-valued data and the
-	  Spectrum and Weighted Degree kernel for sequences.">
-	<option value="1">Linear</option>
-	<option value="2">Polynomial</option>
-	<option value="3">Gaussian (RBF)</option>
-	<option value="4" selected="true">Spectrum</option>
-	<option value="5">Weighted Degree</option>
-      </param>
-      <when value="1">
-      </when>
-      <when value="2">
-        <param name="degree" type="integer" size="4" value="3" help="The
-	  degree of the polynomial kernel determines the largest number of
-	  input features which are combined. ">
-	  <label>The degree of the Polynomial Kernel</label>
-	</param>
-        <param name="inhomogene" type="boolean" truevalue="true" falsevalue="false" checked="true" help="Inhomogenous polynomial kernels can model more complex features." />
-	<!--<param name="normalised" type="select" help="It is recommended to set this to yes">
-	  <option value="true">Yes</option>
-	  <option value="false">No</option>
-	</param>-->
-      </when>
-      <when value="3">
-        <param name="width" type="float" size="10" value="1" help="The
-	  kernel width controls the breadth of the Gaussian
-	  around each point. ">
-	  <label>The width of the Gaussian Kernel</label>
-	</param>
-      </when>
-      <when value="4">
-        <param name="degree" type="integer" size="10" value="6"
-	  help="Degree controls the maximal length of substrings considered in the Spectrum kernel. ">
-	  <label>The degree of the Spectrum Kernel</label>
-        </param>
-
-        <conditional name="seq">
-               <param name="stype" type="select" label="Input sequence type" help="Mention about the input data file sequence type DNA/Protein.">
-                     <option value="dna">DNA</option>
-                     <option value="protein">Protein</option>
-               </param>
-               <when value="dna">
-		     <param name="con" type="select" label="Non Nucleotide converter" help="If any Non Nucleotide character present in input sequence, replace with anyone of the following">
-                           <option value="A">Adenine[A]</option>
-                           <option value="T">Thymine[T]</option>
-                           <option value="C">Cytosine[C]</option>
-                           <option value="G">Guanine[G]</option>
-                           <option value="R">Purine[A|G]</option>
-                           <option value="Y">Pyrimidine[T|C]</option>
-                           <option value="N">Nucleotide[A|T|C|G]</option>
-                     </param>
-               </when>
-               <when value="protein">
-                    <param name="con" type="select" label="Non Amino acid converter" help="If any Non Amino acid character present in input sequence, replace with anyone of the following">
-                           <option value="G">Glycine</option>
-                           <option value="P">Proline</option>
-                           <option value="A">Alanine</option>
-                           <option value="V">Valine</option>
-                           <option value="L">Leucine</option>
-                           <option value="I">Isoleucine</option>
-                           <option value="M">Methionine</option>
-                           <option value="C">Cysteine</option>
-                           <option value="F">Phenylalanine</option>
-                           <option value="Y">Tyrosine</option>
-                           <option value="W">Tryptophan</option>
-                           <option value="H">Histidine</option>
-                           <option value="K">Lysine</option>
-                           <option value="R">Arginine</option>
-                           <option value="Q">Glutamine</option>
-                           <option value="N">Asparagine</option>
-                           <option value="E">Glutamic Acid</option>
-                           <option value="D">Aspartic Acid</option>
-                           <option value="S">Serine</option>
-                           <option value="T">Threonine</option>
-                           <option value="random">Random</option>
-                    </param>
-               </when>
-        </conditional>
-
-      </when>
-      <when value="5">
-        <param name="degree" type="integer" size="10" value="10"
-	  help="Degree controls the maximal length of substrings considered in the WD kernel. ">
-	  <label>The degree of the WD Kernel</label>
-        </param>
-        <param name="shift" type="integer" size="10" value="2"
-	  help="The shift parameter controls how many nucleotides a sequence elements can vary in position. ">
-          <label>The shift parameter of the WD Kernel to allow positional variations</label>
-        </param>
-
-        <conditional name="seq">
-               <param name="stype" type="select" label="Input sequence type" help="Mention about the input data file sequence type DNA/Protein.">
-                     <option value="dna">DNA</option>
-                     <option value="protein">Protein</option>
-               </param>
-               <when value="dna">
-		     <param name="con" type="select" label="Non Nucleotide converter" help="If any Non Nucleotide character present in input sequence, replace with anyone of the following">
-                           <option value="A">Adenine[A]</option>
-                           <option value="T">Thymine[T]</option>
-                           <option value="C">Cytosine[C]</option>
-                           <option value="G">Guanine[G]</option>
-                           <option value="R">Purine[A|G]</option>
-                           <option value="Y">Pyrimidine[T|C]</option>
-                           <option value="N">Nucleotide[A|T|C|G]</option>
-                     </param>
-               </when>
-               <when value="protein">
-                    <param name="con" type="select" label="Non Amino acid converter" help="If any Non Amino acid character present in input sequence, replace with anyone of the following">
-                           <option value="G">Glycine</option>
-                           <option value="P">Proline</option>
-                           <option value="A">Alanine</option>
-                           <option value="V">Valine</option>
-                           <option value="L">Leucine</option>
-                           <option value="I">Isoleucine</option>
-                           <option value="M">Methionine</option>
-                           <option value="C">Cysteine</option>
-                           <option value="F">Phenylalanine</option>
-                           <option value="Y">Tyrosine</option>
-                           <option value="W">Tryptophan</option>
-                           <option value="H">Histidine</option>
-                           <option value="K">Lysine</option>
-                           <option value="R">Arginine</option>
-                           <option value="Q">Glutamine</option>
-                           <option value="N">Asparagine</option>
-                           <option value="E">Glutamic Acid</option>
-                           <option value="D">Aspartic Acid</option>
-                           <option value="S">Serine</option>
-                           <option value="T">Threonine</option>
-                           <option value="random">Random</option>
-                    </param>
-               </when>
-        </conditional>
-
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="outfile" />
-  </outputs>
-  <help>
-.. class:: infomark
-
-    The SVM will classify examples according to their features and
-    report the classes to which they belong to for every entry in the
-    training set or the test set. The cross validation (CV) procedure
-    splits the data in a number of (approximately) equal sized sets
-    (controlled by the CV repeat parameter). Then, it holds out each
-    of those sets in turn for validation (prediction with the SVM),
-    while using all the other sets for training.
-
-.. class:: warningmark
-
-    Please note that the
-    total computation time is proportional to the number of
-    cross-validation rounds.
-
-.. class:: infomark
-
-    **TIP:** The result table with the individual classification for every
-    example.  To find optimal settings for C and the kernel
-    parameters, check the prediction performance with *SVM Toolbox->Evaluate Predictions*
-    and adjust in either direction to find a good setting. Repeat
-    several times if necessary. Alternatively, use *SVM Toolbox->Model Selection*.
-
------
-
-    This tool is part of the MLB Galaxy package, adding some machine
-    learning functionality to PSU's Galaxy framework.  Copyright (C)
-    2008 Sebastian J. Schultheiss (sebi@umich.edu), Gunnar Raetsch
-    (raetsch@tuebingen.mpg.de) and Cheng Soon Ong (chengsoon.ong@tuebingen.mpg.de)
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, see http://www.gnu.org/licenses
-    or write to the Free Software Foundation, Inc., 51 Franklin Street,
-    Fifth Floor, Boston, MA 02110-1301  USA
-  </help>
-</tool>
-
-
-
diff --git a/applications/easysvm/galaxy/eval.xml b/applications/easysvm/galaxy/eval.xml
deleted file mode 100644
index b7d7d8bc0e1..00000000000
--- a/applications/easysvm/galaxy/eval.xml
+++ /dev/null
@@ -1,85 +0,0 @@
-<?xml version="1.0"?>
-<tool id="eval" name="Evaluate Predictions">
-<description>Use predictions and labeled examples to measure prediction performance</description>
-  <command interpreter="python">easysvm.py
-        eval
-	$predfile
-	#if $datatype.inputdatatype=="1"
-	    fasta $datatype.dataset_pos $datatype.dataset_neg
-	#else
-	    arff $datatype.dataset
-        #end if
-	$outfile $figuretype $figurefile
-  </command>
-  <inputs>
-          <param type="data" format="tabular" name="predfile" help="Select a prediction file." label="Predictions" />
-	  <conditional name="datatype">
-	  <param name="inputdatatype" type="select" label="Input data type" help="">
-	  <option value="1">FASTA</option>
-	  <option value="2">ARFF</option>
-	  </param>
-	  <when value="1">
-		  <param name="dataset_pos" type="data" format="fasta">
-		  <label >A data file with positive sequence for training</label>
-		  </param>
-		  <param name="dataset_neg" type="data" format="fasta">
-		  <label >A data file with negaitive sequence for training</label>
-		  </param>
-	  </when>
-	  <when value="2">
-		  <param name="dataset" type="data" format="arff">
-		  <label >A data file with labeled examples for training</label>
-		  </param>
-	  </when>
-	  </conditional>
-	  <param name="figuretype" type="select" label="Graphical evaluation type" help="">
-	  <option value="roc">Receiver Operator Curve (ROC)</option>
-	  <option value="prc">Precision Recall Curve (PRC)</option>
-	  </param>
-  </inputs>
-  <outputs>
-    <data format="txt" name="outfile" />
-    <data format="png" name="figurefile" />
-  </outputs>
-  <help>
-.. class:: infomark
-
-    This tool takes a set of predictions and the a labeled dataset as input
-    and computes several performance measures. Only the labels are taken
-    into account for evaluation. Two output files are generated: a text
-    summary and either an ROC or PRC curve.
-
-.. class:: warningmark
-
-    Make sure the prediction file matches the dataset.
-
-.. class:: infomark
-
-    **TIP:** To optimize the SVM hyper-parameters, you may also use *SVM Toolbox->Model Selection*.
-
-----
-
-    This tool is part of the MLB Galaxy package, adding some machine
-    learning functionality to PSU's Galaxy framework.  Copyright (C)
-    2008 Sebastian J. Schultheiss (sebi@umich.edu), Gunnar Raetsch
-    (raetsch@tuebingen.mpg.de) and Cheng Soon Ong (chengsoon.ong@tuebingen.mpg.de)
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, see http://www.gnu.org/licenses
-    or write to the Free Software Foundation, Inc., 51 Franklin Street,
-    Fifth Floor, Boston, MA 02110-1301  USA
-  </help>
-</tool>
-
-
-
diff --git a/applications/easysvm/galaxy/modelsel.xml b/applications/easysvm/galaxy/modelsel.xml
deleted file mode 100644
index 96b07a3963e..00000000000
--- a/applications/easysvm/galaxy/modelsel.xml
+++ /dev/null
@@ -1,251 +0,0 @@
-<?xml version="1.0"?>
-<tool id="easymodelsel" name="SVM Model Selection">
-<description>Find the best combination of SVM hyper-parameters</description>
-  <command interpreter="python">easysvm.py
-        modelsel $crossval
-        $C
-	#if $kernel.kname == "1"
-	    linear
-        #elif $kernel.kname == "2"
-	    poly ${kernel.degree} ${kernel.inhomogene} true
-        #elif $kernel.kname == "3"
-	    gauss ${kernel.width}
-        #elif $kernel.kname == "4"
-	    spec ${kernel.degree}
-        #elif $kernel.kname == "5"
-	    wd ${kernel.degree} ${kernel.shift}
-        #end if
-	#if $datatype.inputdatatype=="1"
-	    fasta $datatype.dataset_pos $datatype.dataset_neg
-	#else
-	    arff $datatype.dataset
-        #end if
-	$outfile
-        #if $kernel.kname=="4" or $kernel.kname=="5"
-            #if $kernel.seq.stype=="dna"
-                dna $kernel.seq.con
-            #elif $kernel.seq.stype=="protein"
-                protein $kernel.seq.con
-            #end if
-        #end if
-  </command>
-  <inputs>
-	  <conditional name="datatype">
-	  <param name="inputdatatype" type="select" label="Input data type" help="There are two available formats: FASTA for sequences and the more general ARFF format for sequences or numerical attributes.">
-	  <option value="1">FASTA</option>
-	  <option value="2">ARFF</option>
-	  </param>
-	  <when value="1">
-		  <param name="dataset_pos" type="data" format="fasta">
-		  <label >A data file with positive sequence for training</label>
-		  </param>
-		  <param name="dataset_neg" type="data" format="fasta">
-		  <label >A data file with negaitive sequence for training</label>
-		  </param>
-	  </when>
-	  <when value="2">
-		  <param name="dataset" type="data" format="arff">
-		  <label >A data file with labeled examples for training</label>
-		  </param>
-	  </when>
-	  </conditional>
-	  <param name="crossval" type="integer" size="4" value="3"
-	  help="Use the cross-validation parameter to determine the
-	  number of cross valudation repeats. Typical values are between
-	  3 and 10.">
-	  <label>Number of cross validation repeats</label>
-	  </param>
-    <param name="C" type="text" size="10" value="0.1,1,10"
-		   help="List of regularization parameter values considered for
-		   model selection.">
-      <label>SVM regularization parameter C
-	  </label>
-    </param>
-    <conditional name="kernel">
-      <param name="kname" type="select" label="Kernel" help="The kernel
-	  determines how the 'similarity' between two examples is
-	  computed. It has to fit to the type and properties of the data: Use the Linear,
-	  Polynomial or Gaussian kernel for real-valued data and the
-	  Spectrum and Weighted Degree kernel for sequences.">
-	<option value="1">Linear</option>
-	<option value="2">Polynomial</option>
-	<option value="3">Gaussian (RBF)</option>
-	<option value="4" selected="true">Spectrum</option>
-	<option value="5">Weighted Degree</option>
-      </param>
-      <when value="1">
-      </when>
-      <when value="2">
-        <param name="degree" type="text" size="4" value="2,3,4" help="The
-	  degree of the polynomial kernel determines the largest number of
-	  input features which are combined. This can be a comma-separated
-	  list of values to be considered for model selection.">
-	  <label>The degree of the Polynomial Kernel</label>
-	</param>
-        <param name="inhomogene" type="boolean" truevalue="true" falsevalue="false" checked="true" help="Inhomogenous polynomial kernels can model more complex features." />
-	<!--<param name="normalised" type="select" help="It is recommended to set this to yes">
-	  <option value="true">Yes</option>
-	  <option value="false">No</option>
-	</param>-->
-      </when>
-      <when value="3">
-        <param name="width" type="text" size="10" value="0.1,1,10" help="The
-	  kernel width controls the breadth of the Gaussian
-	  around each point. This can be a comma-separated list of values  to be considered for model selection.">
-	  <label>The width of the Gaussian Kernel</label>
-	</param>
-      </when>
-      <when value="4">
-        <param name="degree" type="text" size="10" value="4,5,6"
-	  help="Degree controls the maximal length of substrings considered
-	  in the Spectrum kernel. This can be a comma-separated list of
-	  values  to be considered for model selection.">
-	  <label>The degree of the Spectrum Kernel</label>
-        </param>
-
-        <conditional name="seq">
-               <param name="stype" type="select" label="Input sequence type" help="Mention about the input data file sequence type DNA/Protein.">
-                     <option value="dna">DNA</option>
-                     <option value="protein">Protein</option>
-               </param>
-               <when value="dna">
-		     <param name="con" type="select" label="Non Nucleotide converter" help="If any Non Nucleotide character present in input sequence, replace with anyone of the following">
-                           <option value="A">Adenine[A]</option>
-                           <option value="T">Thymine[T]</option>
-                           <option value="C">Cytosine[C]</option>
-                           <option value="G">Guanine[G]</option>
-                           <option value="R">Purine[A|G]</option>
-                           <option value="Y">Pyrimidine[T|C]</option>
-                           <option value="N">Nucleotide[A|T|C|G]</option>
-                     </param>
-               </when>
-               <when value="protein">
-                    <param name="con" type="select" label="Non Amino acid converter" help="If any Non Amino acid character present in input sequence, replace with anyone of the following">
-                           <option value="G">Glycine</option>
-                           <option value="P">Proline</option>
-                           <option value="A">Alanine</option>
-                           <option value="V">Valine</option>
-                           <option value="L">Leucine</option>
-                           <option value="I">Isoleucine</option>
-                           <option value="M">Methionine</option>
-                           <option value="C">Cysteine</option>
-                           <option value="F">Phenylalanine</option>
-                           <option value="Y">Tyrosine</option>
-                           <option value="W">Tryptophan</option>
-                           <option value="H">Histidine</option>
-                           <option value="K">Lysine</option>
-                           <option value="R">Arginine</option>
-                           <option value="Q">Glutamine</option>
-                           <option value="N">Asparagine</option>
-                           <option value="E">Glutamic Acid</option>
-                           <option value="D">Aspartic Acid</option>
-                           <option value="S">Serine</option>
-                           <option value="T">Threonine</option>
-                           <option value="random">Random</option>
-                    </param>
-               </when>
-        </conditional>
-
-      </when>
-      <when value="5">
-        <param name="degree" type="text" size="10" value="5,10,15"
-	  help="Degree controls the maximal length of substrings considered
-	  in the WD kernel. This can be a comma-separated list of values  to
-	  be considered for model selection.">
-	  <label>The degree of the WD Kernel</label>
-        </param>
-        <param name="shift" type="text" size="10" value="0"
-	  help="The shift parameter controls how many nucleotides a sequence
-	  elements can vary in position. This can be a comma-separated list
-	  of values to be considered for model selection.">
-          <label>The shift parameter of the WD Kernel to allow positional variations</label>
-        </param>
-
-        <conditional name="seq">
-               <param name="stype" type="select" label="Input sequence type" help="Mention about the input data file sequence type DNA/Protein.">
-                     <option value="dna">DNA</option>
-                     <option value="protein">Protein</option>
-               </param>
-               <when value="dna">
-		     <param name="con" type="select" label="Non Nucleotide converter" help="If any Non Nucleotide character present in input sequence, replace with anyone of the following">
-                           <option value="A">Adenine[A]</option>
-                           <option value="T">Thymine[T]</option>
-                           <option value="C">Cytosine[C]</option>
-                           <option value="G">Guanine[G]</option>
-                           <option value="R">Purine[A|G]</option>
-                           <option value="Y">Pyrimidine[T|C]</option>
-                           <option value="N">Nucleotide[A|T|C|G]</option>
-                     </param>
-               </when>
-               <when value="protein">
-                    <param name="con" type="select" label="Non Amino acid converter" help="If any Non Amino acid character present in input sequence, replace with anyone of the following">
-                           <option value="G">Glycine</option>
-                           <option value="P">Proline</option>
-                           <option value="A">Alanine</option>
-                           <option value="V">Valine</option>
-                           <option value="L">Leucine</option>
-                           <option value="I">Isoleucine</option>
-                           <option value="M">Methionine</option>
-                           <option value="C">Cysteine</option>
-                           <option value="F">Phenylalanine</option>
-                           <option value="Y">Tyrosine</option>
-                           <option value="W">Tryptophan</option>
-                           <option value="H">Histidine</option>
-                           <option value="K">Lysine</option>
-                           <option value="R">Arginine</option>
-                           <option value="Q">Glutamine</option>
-                           <option value="N">Asparagine</option>
-                           <option value="E">Glutamic Acid</option>
-                           <option value="D">Aspartic Acid</option>
-                           <option value="S">Serine</option>
-                           <option value="T">Threonine</option>
-                           <option value="random">Random</option>
-                    </param>
-               </when>
-        </conditional>
-
-      </when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="txt" name="outfile" />
-  </outputs>
-  <help>
-.. class:: infomark
-
-    For every parameter combination of C and kernel parameters
-    the performance is estimated using cross validation. The result is a list of
-    optimal parameter combinations for several performance measures.
-
-.. class:: warningmark
-
-    Please note that the
-    total computation time scales with number of
-    cross-validation rounds and parameter combinations.
-
-----
-
-    This tool is part of the MLB Galaxy package, adding some machine
-    learning functionality to PSU's Galaxy framework.  Copyright (C)
-    2008 Sebastian J. Schultheiss (sebi@umich.edu), Gunnar Raetsch
-    (raetsch@tuebingen.mpg.de) and Cheng Soon Ong (chengsoon.ong@tuebingen.mpg.de)
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, see http://www.gnu.org/licenses
-    or write to the Free Software Foundation, Inc., 51 Franklin Street,
-    Fifth Floor, Boston, MA 02110-1301  USA
-  </help>
-</tool>
-
-
-
diff --git a/applications/easysvm/galaxy/poim.xml b/applications/easysvm/galaxy/poim.xml
deleted file mode 100644
index 840af860ad0..00000000000
--- a/applications/easysvm/galaxy/poim.xml
+++ /dev/null
@@ -1,106 +0,0 @@
-<?xml version="1.0"?>
-<tool id="easypoim" name="Positional Oligomer Matrices">
-<description>Visualize SVMs with WD kernel</description>
-  <command interpreter="python">easysvm.py
-        poim
-        $C
-	${poimdegree}
-	wd ${degree} ${shift}
-	#if $datatype.inputdatatype=="1"
-	    fasta $datatype.dataset_pos $datatype.dataset_neg
-	#else
-	    arff $datatype.dataset
-        #end if
-	$poimfile
-        #if $seq.stype=="dna"
-            dna $seq.con
-        #end if
-  </command>
-  <inputs>
-	  <conditional name="datatype">
-	  <param name="inputdatatype" type="select" label="Input data type" help="There are two available formats: FASTA for sequences and the more general ARFF format for sequences or numerical attributes.">
-	  <option value="1">FASTA</option>
-	  <option value="2">ARFF</option>
-	  </param>
-	  <when value="1">
-		  <param name="dataset_pos" type="data" format="fasta">
-		  <label >A data file with positive sequence for training</label>
-		  </param>
-		  <param name="dataset_neg" type="data" format="fasta">
-		  <label >A data file with negaitive sequence for training</label>
-		  </param>
-	  </when>
-	  <when value="2">
-		  <param name="dataset" type="data" format="arff">
-		  <label >A data file with labeled examples for training</label>
-		  </param>
-	  </when>
-	  </conditional>
-    <param name="C" type="float" size="10" value="1" help="Increase C if you trust your training set.">
-      <label>SVM regularization parameter C</label>
-    </param>
-        <param name="degree" type="integer" size="10" value="8"
-	  help="Degree controls the maximal length of substrings considered in the WD kernel. ">
-	  <label>The degree of the WD Kernel</label>
-        </param>
-        <param name="shift" type="integer" size="10" value="0"
-	  help="The shift parameter controls how many nucleotides a sequence elements can vary in position. ">
-          <label>The shift parameter of the WD Kernel to allow positional variations</label>
-        </param>
-        <param name="poimdegree" type="integer" size="10" value="6" help="">
-          <label>Poim degree</label>
-    </param>
-        <conditional name="seq">
-               <param name="stype" type="select" label="Input sequence type" help="Mention about the input data file sequence type DNA/Protein.">
-                     <option value="dna">DNA</option>
-               </param>
-               <when value="dna">
-		     <param name="con" type="select" label="Non Nucleotide converter" help="If any Non Nucleotide character present in input sequence, replace with anyone of the following">
-                           <option value="A">Adenine[A]</option>
-                           <option value="T">Thymine[T]</option>
-                           <option value="C">Cytosine[C]</option>
-                           <option value="G">Guanine[G]</option>
-                           <option value="R">Purine[A|G]</option>
-                           <option value="Y">Pyrimidine[T|C]</option>
-                           <option value="N">Nucleotide[A|T|C|G]</option>
-                     </param>
-               </when>
-        </conditional>
-  </inputs>
-  <outputs>
-    <data format="png" name="poimfile" />
-  </outputs>
-  <help>
-
-.. class:: infomark
-
-    This tool trains an SVM with Weighted Degree kernel and computes
-    the so-called Positional Oligomer Importance Matrices that can be used
-    visualize features describing the decision boundary of the learned classifier.
-
-----
-
-    This tool is part of the MLB Galaxy package, adding some machine
-    learning functionality to PSU's Galaxy framework.  Copyright (C)
-    2008 Sebastian J. Schultheiss (sebi@umich.edu), Gunnar Raetsch
-    (raetsch@tuebingen.mpg.de) and Cheng Soon Ong (chengsoon.ong@tuebingen.mpg.de)
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, see http://www.gnu.org/licenses
-    or write to the Free Software Foundation, Inc., 51 Franklin Street,
-    Fifth Floor, Boston, MA 02110-1301  USA
-  </help>
-</tool>
-
-
-
diff --git a/applications/easysvm/scripts/datagen.py b/applications/easysvm/scripts/datagen.py
deleted file mode 100644
index d65c6257ad0..00000000000
--- a/applications/easysvm/scripts/datagen.py
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/usr/bin/env python
-
-#############################################################################################
-#                                                                                           #
-#    This program is free software; you can redistribute it and/or modify                   #
-#    it under the terms of the GNU General Public License as published by                   #
-#    the Free Software Foundation; either version 3 of the License, or                      #
-#    (at your option) any later version.                                                    #
-#                                                                                           #
-#    This program is distributed in the hope that it will be useful,                        #
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of                         #
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                           #
-#    GNU General Public License for more details.                                           #
-#                                                                                           #
-#    You should have received a copy of the GNU General Public License                      #
-#    along with this program; if not, see http://www.gnu.org/licenses                       #
-#    or write to the Free Software Foundation, Inc., 51 Franklin Street,                    #
-#    Fifth Floor, Boston, MA 02110-1301  USA                                                #
-#                                                                                           #
-#############################################################################################
-
-import sys
-import random
-from numpy import array
-import esvm.parse
-import esvm.plots
-from esvm.datafuncs import MotifDataDef, fastawrite_sequence, arffwrite_sequence, arffwrite_real
-from esvm.mldata import init_datasetfile
-
-if __name__ == '__main__':
-
-    if len(sys.argv)<3 or (sys.argv[1]=='motif' and sys.argv[2]!='arff' and sys.argv[2]!='fasta') \
-           or (sys.argv[1]=='motif' and sys.argv[2]=='fasta' and len(sys.argv)<9) \
-           or (sys.argv[1]=='motif' and sys.argv[2]=='arff' and len(sys.argv)<14) \
-           or (sys.argv[1]=='cloud' and len(sys.argv)<7) or (sys.argv[1]!='motif') \
-           and (sys.argv[1]!='cloud'):
-        sys.stderr.write( "usage: %s motif fasta MOTIF numSeq seqLenRange"+\
-                          "positionRange mutationRate output.fa\n"+\
-                          "or: %s motif arff MOTIFPOS numSeq-pos seqLenRange-pos "+\
-                          "positionRange-pos mutationRate-pos \\\n"+\
-                          "motif-neg numSeq-neg seqLenRange-neg positionRange-neg "+\
-                          "mutationRange-neg output.arff\n"+\
-                          "or: %s cloud numpoints dimensions fractionOfPositives "+\
-                          "cloudWidth output.arff\n" % (sys.argv[0],sys.argv[0],sys.argv[0]) )
-        sys.exit(-1)
-
-    random.seed()
-
-    if sys.argv[1] == 'motif':
-        if sys.argv[2]=='fasta':
-            # generate sequences in FASTA format
-            p = MotifDataDef()
-            p.motif = sys.argv[3]
-            p.numseq = int(sys.argv[4])
-            (p.seqlenmin,p.seqlenmax) = esvm.parse.parse_range(sys.argv[5])
-            (p.posstart,p.posend) = esvm.parse.parse_range(sys.argv[6])
-            p.mutrate = float(sys.argv[7])
-
-            filename = sys.argv[8]
-            fastawrite_sequence(filename, p)
-
-        else:
-            # generate sequences in ARFF format
-            assert(sys.argv[2]=='arff')
-            p = MotifDataDef()
-            p.motif = sys.argv[3]
-            p.numseq = int(sys.argv[4])
-            (p.seqlenmin,p.seqlenmax) = esvm.parse.parse_range(sys.argv[5])
-            (p.posstart,p.posend) = esvm.parse.parse_range(sys.argv[6])
-            p.mutrate = float(sys.argv[7])
-
-            n = MotifDataDef()
-            n.motif = sys.argv[8]
-            n.numseq = int(sys.argv[9])
-            (n.seqlenmin,n.seqlenmax) = esvm.parse.parse_range(sys.argv[10])
-            (n.posstart,n.posend) = esvm.parse.parse_range(sys.argv[11])
-            n.mutrate = float(sys.argv[12])
-
-            filename = sys.argv[13]
-            arffwrite_sequence(filename, p, n)
-
-    elif sys.argv[1] == 'cloud':
-        # generate a data cloud in ARFF format
-        numpoint = int(sys.argv[2])
-        numfeat = int(sys.argv[3])
-        fracpos = float(sys.argv[4])
-        width = float(sys.argv[5])
-
-        filename = sys.argv[6]
-        arffwrite_real(filename, numpoint, numfeat, fracpos, width)
-        if len(sys.argv)>=8:
-            fp = init_datasetfile(filename,'vec')
-            (examples,labels) = fp.readlines()
-            pointcloud = []
-            for ix in xrange(numpoint):
-                pointcloud.append(array([labels[ix],examples[0,ix],examples[1,ix]]))
-            esvm.plots.plotcloud(pointcloud,sys.argv[7],'Pointcloud')
-
-	#(examples,labels,metadata)=arffwrite_real(filename, numpoint, numfeat, fracpos, width)
-	#if len(sys.argv)>=8:
-	#	plots.plotcloud(pointcloud,sys.argv[7],metadata)
-    else:
-        print 'Unknown option %s\n' % sys.argv[1]
diff --git a/applications/easysvm/scripts/easysvm.py b/applications/easysvm/scripts/easysvm.py
deleted file mode 100644
index 7566201d125..00000000000
--- a/applications/easysvm/scripts/easysvm.py
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-
-#############################################################################################
-#                                                                                           #
-#    This program is free software; you can redistribute it and/or modify                   #
-#    it under the terms of the GNU General Public License as published by                   #
-#    the Free Software Foundation; either version 3 of the License, or                      #
-#    (at your option) any later version.                                                    #
-#                                                                                           #
-#    This program is distributed in the hope that it will be useful,                        #
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of                         #
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                           #
-#    GNU General Public License for more details.                                           #
-#                                                                                           #
-#    You should have received a copy of the GNU General Public License                      #
-#    along with this program; if not, see http://www.gnu.org/licenses                       #
-#    or write to the Free Software Foundation, Inc., 51 Franklin Street,                    #
-#    Fifth Floor, Boston, MA 02110-1301  USA                                                #
-#                                                                                           #
-#############################################################################################
-
-import sys
-import random
-from esvm.experiment import svm_cv, svm_pred, svm_poim, svm_eval, svm_modelsel
-
-if __name__ == '__main__':
-
-    if len(sys.argv)<2:
-        sys.stderr.write("usage: %s [cv|pred|modelsel|eval|poim] parameters\n" % sys.argv[0])
-        sys.exit(-1)
-
-    random.seed()
-
-    topmode = sys.argv[1]
-
-    if topmode == 'cv':
-        svm_cv(sys.argv)
-    elif topmode == 'pred':
-        svm_pred(sys.argv)
-    elif topmode == 'poim':
-        svm_poim(sys.argv)
-    elif topmode == 'eval':
-        svm_eval(sys.argv)
-    elif topmode == 'modelsel':
-        svm_modelsel(sys.argv)
-    else:
-        sys.stderr.write( "unknown mode %s (use: cv, pred, poim, eval)\n" % topmode)
-        sys.exit(-1)
-
-    sys.exit(0)
-
diff --git a/applications/easysvm/setup.py b/applications/easysvm/setup.py
deleted file mode 100755
index 774c111707b..00000000000
--- a/applications/easysvm/setup.py
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/env python
-
-#############################################################################################
-#                                                                                           #
-#    This program is free software; you can redistribute it and/or modify                   #
-#    it under the terms of the GNU General Public License as published by                   #
-#    the Free Software Foundation; either version 3 of the License, or                      #
-#    (at your option) any later version.                                                    #
-#                                                                                           #
-#    This program is distributed in the hope that it will be useful,                        #
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of                         #
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                           #
-#    GNU General Public License for more details.                                           #
-#                                                                                           #
-#    You should have received a copy of the GNU General Public License                      #
-#    along with this program; if not, see http://www.gnu.org/licenses                       #
-#    or write to the Free Software Foundation, Inc., 51 Franklin Street,                    #
-#    Fifth Floor, Boston, MA 02110-1301  USA                                                #
-#                                                                                           #
-#############################################################################################
-
-import sys
-from distutils.core import setup, Extension
-
-
-sys.stdout.write('-------------------------------------------\n')
-sys.stdout.write('easysvm - A front end to the shogun toolbox\n')
-sys.stdout.write('-------------------------------------------\n\n')
-
-
-try:
-    import numpy
-except:
-    sys.stderr.write("WARNING: did not find 'numpy'\n")
-
-try:
-    import pylab
-except:
-    sys.stderr.write("WARNING: did not find 'pylab'\n")
-
-try:
-    import shogun
-except:
-    sys.stderr.write("WARNING: did not find 'shogun'\n")
-    sys.stderr.write("         shogun is a required back end.\n")
-    sys.stderr.write("         See shogun website: http://www.shogun-toolbox.org)\n")
-
-try:
-    import arff
-except:
-    sys.stderr.write("WARNING: did not find 'arff'\n")
-    sys.stderr.write("         arff is required for reading and writing ARFF data files\n")
-    sys.stderr.write("         See arff website: http://www.mit.edu/~sav/arff/\n\n")
-    sys.stderr.write("         All other functionality should be ok.\n\n")
-
-
-
-setup (name = 'easysvm',
-       version = '0.3.3',
-       description = 'easysvm - A front end to the shogun toolbox',
-       author = ['Cheng Soon Ong', 'Gunnar Raetsch' ],
-       author_email = ['chengsoon.ong@tuebingen.mpg.de','gunnar.raetsch@tuebingen.mpg.de'],
-       license='GPLv3',
-       url = 'http://www.fml.tuebingen.mpg.de/raetsch/projects/easysvm',
-       py_modules=['esvm.datafuncs','esvm.mldata','esvm.mldata_arff','esvm.experiment','esvm.parse',\
-                   'esvm.plots','esvm.poim','esvm.utils','splicesites.utils'],
-       packages=['esvm','splicesites'],
-       scripts=['scripts/easysvm.py','scripts/datagen.py'],
-       long_description="""
-       easysvm is a front end to the shogun toolbox. It aims to be a
-       'quick start' tutorial for users interested in shogun. As such,
-       it covers only the absolute basics of machine learning.
-       More advanced users should directly use the interfaces provided
-       by shogun.
-
-       This setup.py installs two modules:
-       - esvm (which contains some basic tools for machine learning with SVMs)
-       - splicesites (which contains tools for splice site prediction)
-
-       It also installs two scripts (easysvm.py and datagen.py) in the relevant binary directory.
-       The two scripts provide a command line interface to the toolkit.
-       """
-       )
-
diff --git a/applications/easysvm/splicesites/__init__.py b/applications/easysvm/splicesites/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/applications/easysvm/splicesites/test_gc.py b/applications/easysvm/splicesites/test_gc.py
deleted file mode 100644
index 2f8a23efa09..00000000000
--- a/applications/easysvm/splicesites/test_gc.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python
-
-import bz2
-import time
-import sys
-import numpy
-import numpy.matlib
-from splicesites.utils import create_dataset
-from esvm.utils import calcroc
-from esvm.experiment import crossvalidation
-from esvm.mldata import init_datasetfile
-
-def test_gc(gcfilename):
-    """
-    Check the gc content files for conflicting labels
-    """
-    fp = init_datasetfile(gcfilename,'vec')
-    (examples,labels) = fp.readlines()
-    print '%d positive and %d negative examples' % (sum(labels>0.0),sum(labels<0.0))
-
-    distance = sqr_dist(numpy.matrix(examples),numpy.matrix(examples))
-    labdist = numpy.matrix(labels).T*numpy.matrix(labels)
-    #difflab = numpy.where(labdist.A<0,distance,numpy.matlib.ones((len(labels),len(labels))))
-    contracount = 0
-    for ix in xrange(len(labels)):
-        for iy in xrange(ix+1,len(labels)):
-            if labdist[ix,iy]<0 and distance[ix,iy]<0.01:
-                contracount += 1
-    print distance.shape, labdist.shape
-    #print '%d identical examples with opposing labels' %len(numpy.unique(numpy.where(difflab==0)[0]))
-    print '%d identical examples with opposing labels' % contracount
-
-
-def sqr_dist(a,b):
-    """Compute the square distance between vectors"""
-    dot_a = numpy.sum(numpy.multiply(a,a),axis=0).T
-    dot_b = numpy.sum(numpy.multiply(b,b),axis=0).T
-    unitvec = numpy.matlib.ones(dot_a.shape)
-    D = 2.0*a.T*b
-
-    for ix,bval in enumerate(dot_b):
-        D[:,ix] = dot_a - D[:,ix] + numpy.kron(bval,unitvec)
-
-    return D
-
-
-if __name__ == '__main__':
-    test_gc('C_elegans_don_freq.csv')
-    test_gc('C_elegans_acc_freq.csv')
-
diff --git a/applications/easysvm/splicesites/tutorial_example.py b/applications/easysvm/splicesites/tutorial_example.py
deleted file mode 100644
index 8cb7aa9e103..00000000000
--- a/applications/easysvm/splicesites/tutorial_example.py
+++ /dev/null
@@ -1,242 +0,0 @@
-#!/usr/bin/env python
-
-#############################################################################################
-#                                                                                           #
-#    This program is free software; you can redistribute it and/or modify                   #
-#    it under the terms of the GNU General Public License as published by                   #
-#    the Free Software Foundation; either version 3 of the License, or                      #
-#    (at your option) any later version.                                                    #
-#                                                                                           #
-#    This program is distributed in the hope that it will be useful,                        #
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of                         #
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                           #
-#    GNU General Public License for more details.                                           #
-#                                                                                           #
-#    You should have received a copy of the GNU General Public License                      #
-#    along with this program; if not, see http://www.gnu.org/licenses                       #
-#    or write to the Free Software Foundation, Inc., 51 Franklin Street,                    #
-#    Fifth Floor, Boston, MA 02110-1301  USA                                                #
-#                                                                                           #
-#############################################################################################
-
-import bz2
-import time
-import sys
-from splicesites.utils import create_dataset, create_modsel
-from esvm.utils import calcroc
-from esvm.experiment import crossvalidation
-from esvm.mldata import init_datasetfile
-from numpy.linalg import norm
-import numpy
-
-def write_results(f, results):
-    """
-    Write out results
-    """
-
-    f.write('Kernel\tParameters\tC\tauROC\n');
-    for i in xrange(len(results)):
-        C=results[i][0]
-        k_param=results[i][1]
-        param_name=k_param[1]['name']
-        kernel=k_param[0]
-        if kernel.endswith('2'):
-            kernel=kernel[:-1]
-        kernel_parameters= param_name + '=' + `k_param[1][param_name]`
-        perf = 100*results[i][2]
-
-        f.write('%s\t' % kernel)
-        f.write('%s\t' % kernel_parameters)
-        f.write('C=%2.2f\t' % C)
-        f.write('%2.1f%%\n' % perf)
-
-def normalize(examples, subtract_mean=False, divide_std=False, rescale=False, norm_one=False):
-    """
-    Scale GC data to ... (be on a ball? just const? 0 mean, std 1?)
-    """
-
-    if subtract_mean:
-        # mean = 0.0
-        mean=numpy.mean(examples, axis=1)
-        for i in xrange(examples.shape[1]):
-            examples[:,i]-=mean
-
-    if divide_std:
-        # std = 1.0
-        std=numpy.std(examples, axis=1)
-        for i in xrange(examples.shape[1]):
-            examples[:,i]/=(std+1e-10)
-
-    if rescale:
-        # scale to have on average 1 on linear kernel diagonal
-        scale=numpy.sqrt(numpy.mean(numpy.diag(numpy.mat(examples).T*numpy.mat(examples))))
-        examples/=scale
-
-    if norm_one:
-        # ball/circle
-        for i in xrange(examples.shape[1]):
-            examples[:,i]/=norm(examples[:,i])
-
-    return examples ;
-
-def run_single_experiment(results, num_fold_cv, kernelname, kparam, C, examples, labels):
-    """
-    Run a single experiment, i.e. for a fixed kernel and parameters
-    do num_fold cross-validation
-    """
-
-    param_name=kparam['name']
-    print 'Running C =', C, kernelname.title(), 'Kernel with', param_name, '=', kparam[param_name]
-    (all_outputs, all_split) = crossvalidation(num_fold_cv, kernelname, kparam, C, examples, labels, 'dna', 'A')
-    results.append( (C, (kernelname, kparam), calcroc(all_outputs,labels)) )
-
-def splice_example(Cs, gcfilename,seqfilename,seq2filename, plot=False):
-    """
-    For the data files, apply the set of kernels
-    """
-    # hyperparameters
-    num_fold_cv = 5
-
-    # The area under the receiver operating characteristic
-    results=[]
-
-    # Read datasets
-
-    # GC features
-    fp = init_datasetfile(gcfilename,'vec')
-    (gc_examples,gc_labels) = fp.readlines()
-    gc_examples = normalize(gc_examples, subtract_mean=True)
-
-    if plot:
-        from pylab import scatter,show
-        color=['b','r']
-        scatter(gc_examples[0,], gc_examples[1,], s=400*(gc_labels+2), c=''.join([ color[(int(i)+1)/2] for i in gc_labels]), alpha=0.1)
-        show()
-
-    # 2 sequence features
-    fp = init_datasetfile(seq2filename,'mseq')
-    (dna2_examples,dna2_labels) = fp.readlines()
-
-    # DNA sequences
-    fp = init_datasetfile(seqfilename,'seq')
-    (dna_examples,dna_labels) = fp.readlines()
-
-
-    #Define experiments to carry out
-
-    experiments=(
-    # Linear kernel on GC content
-    ('linear', {'scale':1.0, 'name':'scale'}, (gc_examples, gc_labels)),
-
-    # Polynomial kernel on GC content
-    ( 'poly', {'degree':3, 'name':'degree', 'inhomogene':True, 'normal':True}, (gc_examples, gc_labels)),
-    ( 'poly', {'degree':5, 'name':'degree', 'inhomogene':True, 'normal':True}, (gc_examples, gc_labels)),
-
-    # Gaussian kernel on GC content
-    ('gauss', {'width':100.0, 'name':'width'}, (gc_examples, gc_labels)),
-    ('gauss', {'width':1.0, 'name':'width'}, (gc_examples, gc_labels)),
-    ('gauss', {'width':0.01, 'name':'width'}, (gc_examples, gc_labels)),
-
-    # Spectrum kernel on 2 dna sequences
-    ('spec2', {'degree':1, 'name':'degree'}, (dna2_examples, dna2_labels)),
-    ('spec2', {'degree':3, 'name':'degree'}, (dna2_examples, dna2_labels)),
-    ('spec2', {'degree':5, 'name':'degree'}, (dna2_examples, dna2_labels)),
-
-    # Cumulative Spectrum kernel on 2 dna sequences
-    ('cumspec2', {'degree':1, 'name':'degree'}, (dna2_examples, dna2_labels)),
-    ('cumspec2', {'degree':3, 'name':'degree'}, (dna2_examples, dna2_labels)),
-    ('cumspec2', {'degree':5, 'name':'degree'}, (dna2_examples, dna2_labels)),
-
-    # Weighted degree kernel on dna sequences
-    ('wd', {'degree':1,'shift':0, 'name':'degree'}, (dna_examples, dna_labels)),
-    ('wd', {'degree':3,'shift':0, 'name':'degree'}, (dna_examples, dna_labels)),
-    ('wd', {'degree':5,'shift':0, 'name':'degree'}, (dna_examples, dna_labels))
-    )
-
-
-    if Cs is None:
-        for C in (0.01, 0.1, 1, 2, 5, 10):
-            for e in experiments:
-                run_single_experiment(results, num_fold_cv, e[0], e[1], C, e[2][0], e[2][1])
-    else:
-        for i in xrange(len(experiments)):
-            e=experiments[i]
-            run_single_experiment(results, num_fold_cv, e[0], e[1], Cs[i], e[2][0], e[2][1])
-
-    return results
-
-def get_best_results(results):
-    methods=('linear', 'poly', 'gauss', 'spec2', 'cumspec2', 'wd')
-    best_result=[]
-    for m in methods:
-        params=set()
-        for r in results:
-            if r[1][0]==m:
-                params.add(tuple(zip(r[1][1].keys(),r[1][1].values())))
-
-        for p in params:
-            m_result=0.0
-            m_best=None
-            for r in results:
-                if r[1][0]==m and r[1][1]==dict(p) and r[2]>m_result:
-                    m_result=r[2]
-                    m_best=r
-            best_result.append(m_best)
-    return best_result
-
-
-if __name__ == '__main__':
-    if len(sys.argv) > 1:
-        if sys.argv[1] == 'create_data':
-            create_dataset()
-            sys.exit(0)
-        elif sys.argv[1] == 'mselect':
-            results = splice_example(None, 'C_elegans_acc_modsel_gc.csv','C_elegans_acc_modsel_seq.csv','C_elegans_acc_modsel_seq2.csv')
-            #results = splice_example(None, 'C_elegans_acc_gc.csv','C_elegans_acc_seq.csv','C_elegans_acc_seq2.csv')
-            import pickle
-            pickle.dump(results, file('mselect_result.pickle','w'))
-            sys.exit(0)
-        elif sys.argv[1] == 'get_best':
-            import pickle
-            results=pickle.load(file('mselect_result.pickle'))
-            best_result=get_best_results(results)
-            write_results(sys.stdout, best_result)
-
-            #print 'Cs=[',
-            #for e in best_result:
-            #    print e[0], ",",
-            #print ']'
-            sys.exit(0)
-        else:
-            print "unknown argument"
-            sys.exit(1)
-
-    # without any argument
-    starttime = time.time()
-
-    Cs = [ 5, 10, 10, \
-           5, 0.01, 10, \
-           10, 10, 0.01, \
-           10, 10, 10, \
-           1, 1, 2 ]
-
-    # run the experiment
-    results = splice_example(Cs, 'C_elegans_acc_gc.csv','C_elegans_acc_seq.csv','C_elegans_acc_seq2.csv', False)
-
-    stoptime = time.time()
-    elapsedtime = time.strftime('Elapsed time (HH.MM:SS): %H.%M:%S',time.gmtime(stoptime-starttime))
-    print elapsedtime
-
-    write_results(file('results.txt','w'), results)
-    for curline in file('results.txt').readlines():
-        print curline.strip()
-
-if __name__ == '__main__':
-    if len(sys.argv) > 1:
-        if sys.argv[1] == 'create_data':
-            create_dataset()
-        elif sys.argv[1] == 'create_modsel':
-            create_modsel()
-            sys.exit()
-    #main()
-    print 'results in results.txt'
diff --git a/applications/easysvm/splicesites/utils.py b/applications/easysvm/splicesites/utils.py
deleted file mode 100644
index 78322522e6b..00000000000
--- a/applications/easysvm/splicesites/utils.py
+++ /dev/null
@@ -1,288 +0,0 @@
-import random
-import bz2
-import numpy
-from numpy import array, where, concatenate
-from numpy import kron, ones, sqrt, sum
-from os.path import exists
-from esvm.mldata import convert
-try:
-    import arff
-    have_arff = True
-except ImportError:
-    have_arff = False
-
-
-
-def create_dataset():
-    """Read the file with first 100k sequences from C. elegans
-    and generate some easier datasets.
-    """
-
-    if not have_arff:
-        print 'import arff failed, currently cannot create data'
-        return
-
-    # convert data to arff format
-    gen_arff('C_elegans_acc_100000.fasta.bz2','C_elegans_acc_gc.arff','C_elegans_acc_seq.arff',\
-             'C_elegans_acc_seq2.arff','C_elegans_acc_freq.arff',\
-             num_seqs=100000,subset=True,overwrite=True,normalise=False,\
-             max_pos=200,max_neg=2000)
-
-    print 'Convert from arff to csv and fasta'
-    convert('C_elegans_acc_gc.arff','C_elegans_acc_gc.csv','vec')
-    convert('C_elegans_acc_seq.arff','C_elegans_acc_seq.csv','seq')
-    convert('C_elegans_acc_freq.arff','C_elegans_acc_freq.csv','vec')
-    convert('C_elegans_acc_seq2.arff','C_elegans_acc_seq2.csv','mseq')
-    convert('C_elegans_acc_seq.arff','C_elegans_acc_seq.fa','seq')
-
-
-def create_modsel():
-    """Read the file with last 100k sequences from C. elegans
-    and generate some easier datasets.
-    """
-
-    if not have_arff:
-        print 'import arff failed, currently cannot create data'
-        return
-
-    # convert data to arff format
-    gen_arff('C_elegans_acc_modsel.fasta.bz2','C_elegans_acc_modsel_gc.arff','C_elegans_acc_modsel_seq.arff',\
-             'C_elegans_acc_modsel_seq2.arff','C_elegans_acc_modsel_freq.arff',\
-             num_seqs=100000,subset=True,overwrite=True,normalise=False,\
-             max_pos=200,max_neg=2000)
-
-    print 'Convert from arff to csv and fasta'
-    convert('C_elegans_acc_modsel_gc.arff','C_elegans_acc_modsel_gc.csv','vec')
-    convert('C_elegans_acc_modsel_seq.arff','C_elegans_acc_modsel_seq.csv','seq')
-    convert('C_elegans_acc_modsel_freq.arff','C_elegans_acc_modsel_freq.csv','vec')
-    convert('C_elegans_acc_modsel_seq2.arff','C_elegans_acc_modsel_seq2.csv','mseq')
-    convert('C_elegans_acc_modsel_seq.arff','C_elegans_acc_modsel_seq.fa','seq')
-
-
-def gen_arff(fastafilename,gcfilename,seqfilename,seq2filename,specfilename,\
-             num_seqs=100000,subset=False,max_pos=200,max_neg=2000,\
-             overwrite=False,normalise=True):
-    """If data not yet created, generate 2 arff files
-    - containing the two dimensional GC content before and after splice site
-    - containing the sequence around the splice site.
-    """
-    if (exists(gcfilename) and exists(seqfilename)) and not overwrite:
-        return
-
-    print 'Creating %s and %s from %s' % (gcfilename,seqfilename,fastafilename)
-
-    if fastafilename.find('acc')!= -1:
-        # acceptor, AG at [40:42]
-        window = (-40, 197, 42)
-    elif fastafilename.find('don')!= -1:
-        # donor, GT or GC at [40:42]
-        window = (-40, 200, 42)
-    else:
-        print "Error: Cannot determine whether donor or acceptor"
-
-    [strings, lab]=read_data(bz2.BZ2File(fastafilename), num_seqs, window)
-    # Only a subset of the examples are used.
-    if subset:
-        [strings, lab] = take_subset(strings, lab, max_pos, max_neg)
-
-    gcs=count_gs_and_cs(strings, (0, -window[0]), (-window[0]+2, -window[0]+2+window[2]))
-
-    seq_upstream = []
-    seq_downstream = []
-    for curstr in strings:
-        seq_upstream.append(curstr[0:-window[0]])
-        seq_downstream.append(curstr[(-window[0]+2):(-window[0]+2+window[2])])
-    seq_upstream = array(seq_upstream)
-    seq_downstream = array(seq_downstream)
-
-    spec_up = count_nt_freq(seq_upstream)
-    spec_down = count_nt_freq(seq_downstream)
-
-    if normalise:
-        gcs = normalise_features(gcs)
-        spec_up = normalise_features(spec_up)
-        spec_down = normalise_features(spec_down)
-
-    # sequence file
-    alist = [('label',1,[]),('sequence',0,[])]
-    f = open(seqfilename,'w')
-    arff.arffwrite(f,alist,zip(lab,strings),name=fastafilename,comment='Converted from '+fastafilename)
-    f.close()
-
-    # 2 sequence file
-    alist = [('label',1,[]),('upstream sequence',0,[]),('downstream sequence',0,[])]
-    f = open(seq2filename,'w')
-    arff.arffwrite(f,alist,zip(lab,seq_upstream,seq_downstream),\
-                   name=fastafilename,comment='Converted from '+fastafilename)
-    f.close()
-
-    # gc contents
-    alist = [('label',1,[]),('upstream',1,[]),('downstream',1,[])]
-    data = []
-    for ix,curlab in enumerate(lab):
-        data.append((curlab,gcs[0,ix],gcs[1,ix]))
-    f = open(gcfilename,'w')
-    arff.arffwrite(f,alist,data,name=fastafilename,comment='Converted from '+fastafilename)
-    f.close()
-
-    # spectrum
-    alist = [('label',1,[]),\
-             ('upA',1,[]),('upC',1,[]),('upG',1,[]),('upT',1,[]),\
-             ('downA',1,[]),('downC',1,[]),('downG',1,[]),('downT',1,[])]
-    data = []
-    for ix,curlab in enumerate(lab):
-        data.append((curlab,spec_up[0,ix],spec_up[1,ix],spec_up[2,ix],spec_up[3,ix],\
-                     spec_down[0,ix],spec_down[1,ix],spec_down[2,ix],spec_down[3,ix]))
-    if len(specfilename)>0:
-        f = open(specfilename,'w')
-        arff.arffwrite(f,alist,data,name=fastafilename,comment='Converted from '+fastafilename)
-        f.close()
-
-
-def take_subset(strings, lab, max_pos=200, max_neg=2000):
-    """Take a subset of the classes to the maximum numbers determined by
-    max_pos and max_neg
-    """
-    random.seed(123456789)
-
-    pos_idx = where(lab>0)[0]
-    neg_idx = where(lab<0)[0]
-    num_pos = len(pos_idx)
-    num_neg = len(neg_idx)
-
-    assert(num_pos < num_neg)
-    assert(max_pos < max_neg)
-
-    max_pos = min(max_pos,num_pos)
-    max_neg = min(max_neg,num_neg)
-
-    neg_sub_idx = array(random.sample(neg_idx,max_neg))
-    assert(all(lab[neg_sub_idx]<0))
-    pos_sub_idx = array(random.sample(pos_idx,max_pos))
-    assert(all(lab[pos_sub_idx]>0))
-
-    strings = concatenate((strings[pos_sub_idx],strings[neg_sub_idx]))
-    lab = concatenate((lab[pos_sub_idx],lab[neg_sub_idx]))
-
-    return (strings,lab)
-
-def balance_classes(strings, lab, max_examples=1200,ratio=5.0):
-    """Take a subset of negative examples such that
-    the number of examples in the negative class are limited to ratio.
-
-    Also limit the maximum number of examples.
-    """
-    random.seed(123456789)
-
-    pos_idx = where(lab>0)[0]
-    neg_idx = where(lab<0)[0]
-    num_pos = len(pos_idx)
-    num_neg = len(neg_idx)
-    assert(num_pos < num_neg)
-
-    max_pos = int(float(max_examples)/(ratio+1.0))
-
-    if num_pos < max_pos:
-        max_pos = num_pos
-
-    pos_idx = pos_idx[:max_pos]
-    num_pos = len(pos_idx)
-    max_neg = int(num_pos*ratio)
-    if num_neg < max_neg:
-        max_neg = num_neg
-
-    sub_idx = array(random.sample(neg_idx,max_neg))
-    assert(all(lab[sub_idx]<0))
-
-    strings = concatenate((strings[pos_idx],strings[sub_idx]))
-    lab = concatenate((lab[pos_idx],lab[sub_idx]))
-
-    return (strings,lab)
-
-def normalise_features(feats):
-    """Normalise each feature to zero mean and unit variance.
-    Assume features are column wise matrix.
-
-    """
-    (numdim,numex) = feats.shape
-
-    M = sum(feats,axis=1)/numex
-    M = M.reshape(numdim,1)
-
-    M2 = sum(feats**2,axis=1)/numex
-    M2 = M2.reshape(numdim,1)
-    SD = sqrt(M2-M**2)
-    onevec = ones((1,numex))
-    feats = (feats - kron(onevec,M))/(kron(onevec,SD))
-
-    return feats
-
-def read_data(f, num, window):
-    """Read the fasta file containing splice sites."""
-    labels=num*[0]
-    strings=num*[0]
-
-    l1 = f.readline()
-    l2 = f.readline()
-    line = 0
-    num_alt_consensus = 0
-    while l1 and l2 and line<num:
-        consensus = l2[:-1][window[1]:window[1]+2]
-        if (consensus == 'AG') or (consensus == 'GT'):
-            if 'label=-1' in l1:
-                labels[line]=-1
-            elif 'label=1' in l1:
-                labels[line]=+1
-            else:
-                print "error in line %d" % line
-                return
-
-            strings[line] = l2[:-1][window[1]+window[0] : window[1]+window[2]]
-            line+=1
-        else:
-            num_alt_consensus+=1
-            if consensus != 'GC':
-                print line, consensus
-
-        l1=f.readline()
-        l2=f.readline()
-
-    print "Number of GC consensus sites: %d" %num_alt_consensus
-    if line+num_alt_consensus!=num:
-        print "error reading file"
-        return
-    else:
-        strings = strings[:line+1]
-        labels = labels[:line+1]
-        return (array(strings), array(labels, dtype=numpy.double))
-
-def count_gs_and_cs(strings, range1, range2):
-    """Count the number of G and C in the two ranges."""
-    num=len(strings)
-    gc_count=num*[(0,0)]
-
-    for i in xrange(num):
-        x=float(strings[i].count('G', range1[0], range1[1]) +
-                        strings[i].count('C', range1[0], range1[1])) / abs(range1[1]-range1[0])
-        y=float(strings[i].count('G', range2[0], range2[1]) +
-                        strings[i].count('C', range2[0], range2[1])) / abs(range2[1]-range2[0])
-        gc_count[i]=(x,y)
-
-    return array(gc_count).T
-
-
-def count_nt_freq(strings):
-    """Count the nucleotide frequencies"""
-    num = len(strings)
-    strlen = len(strings[0])
-    ntfreq = num*[(0,0,0,0)]
-
-    for ix in xrange(num):
-        a=float(strings[ix].count('A')) / strlen
-        c=float(strings[ix].count('C')) / strlen
-        g=float(strings[ix].count('G')) / strlen
-        t=float(strings[ix].count('T')) / strlen
-        ntfreq[ix]=(a,c,g,t)
-
-    return array(ntfreq).T
-
diff --git a/applications/easysvm/tutpaper/data/effect_of_c.data b/applications/easysvm/tutpaper/data/effect_of_c.data
deleted file mode 100644
index a8b8b0e2269..00000000000
--- a/applications/easysvm/tutpaper/data/effect_of_c.data
+++ /dev/null
@@ -1,69 +0,0 @@
-#patternID,labels,0,1
-0,-1,-0.379032258065,0.635416666667
-1,-1,-0.79435483871,0.427083333333
-2,-1,-0.782258064516,0.130208333333
-3,-1,-0.479838709677,-0.4375
-4,-1,-0.322580645161,-0.166666666667
-5,-1,-0.334677419355,0.161458333333
-6,-1,-0.431451612903,0.21875
-7,-1,-0.645161290323,-0.0572916666667
-8,-1,-0.717741935484,-0.21875
-9,-1,-0.70564516129,-0.40625
-10,-1,-0.508064516129,-0.53125
-11,-1,-0.286290322581,-0.390625
-12,-1,-0.451612903226,0.0208333333333
-13,-1,-0.451612903226,0.0364583333333
-14,-1,-0.415322580645,0.380208333333
-15,-1,-0.516129032258,0.651041666667
-16,-1,-0.600806451613,0.432291666667
-17,-1,-0.701612903226,-0.0572916666667
-18,-1,-0.70564516129,-0.328125
-19,-1,-0.608870967742,-0.526041666667
-20,-1,-0.338709677419,-0.630208333333
-21,-1,-0.379032258065,-0.302083333333
-22,-1,-0.25,0.255208333333
-23,-1,-0.318548387097,0.489583333333
-24,-1,-0.576612903226,0.635416666667
-25,-1,-0.83064516129,0.015625
-26,-1,-0.766129032258,-0.416666666667
-27,-1,-0.681451612903,-0.427083333333
-28,-1,-0.552419354839,-0.09375
-29,-1,-0.516129032258,0.0625
-30,-1,-0.5,0.3125
-31,-1,-0.633064516129,0.244791666667
-32,-1,-0.709677419355,0.114583333333
-33,-1,-0.552419354839,0.369791666667
-34,-1,-0.443548387097,0.463541666667
-35,1,0.713709677419,0.755208333333
-36,1,0.556451612903,0.71875
-37,1,0.411290322581,0.479166666667
-38,1,0.407258064516,0.203125
-39,1,0.379032258065,-0.223958333333
-40,1,0.395161290323,-0.625
-41,1,0.637096774194,-0.53125
-42,1,0.927419354839,0.260416666667
-43,1,0.770161290323,0.765625
-44,1,0.560483870968,0.375
-45,1,0.58064516129,-0.0260416666667
-46,1,0.818548387097,-0.197916666667
-47,1,0.842741935484,-0.0104166666667
-48,1,0.754032258065,0.25
-49,1,0.685483870968,0.557291666667
-50,1,0.959677419355,0.822916666667
-51,1,0.826612903226,0.6875
-52,1,0.713709677419,0.302083333333
-53,1,0.403225806452,-0.328125
-54,1,0.463709677419,-0.515625
-55,1,0.560483870968,-0.416666666667
-56,1,0.576612903226,-0.192708333333
-57,1,0.423387096774,0.182291666667
-58,1,0.415322580645,0.421875
-59,1,0.387096774194,0.651041666667
-60,1,0.407258064516,0.802083333333
-61,1,0.383064516129,0.0104166666667
-62,1,0.282258064516,0.494791666667
-63,1,0.834677419355,0.447916666667
-64,-1,-0.0282258064516,0.09375
-65,1,0.0483870967742,0.213541666667
-66,1,0.0645161290323,0.171875
-67,-1,-0.0161290322581,0.0260416666667
diff --git a/applications/easysvm/tutpaper/data/nonlinear.data b/applications/easysvm/tutpaper/data/nonlinear.data
deleted file mode 100644
index 61e60f63373..00000000000
--- a/applications/easysvm/tutpaper/data/nonlinear.data
+++ /dev/null
@@ -1,71 +0,0 @@
-#patternID,labels,0,1
-0,-1,-0.685483870968,0.375
-1,-1,-0.79435483871,-0.130208333333
-2,-1,-0.459677419355,-0.21875
-3,-1,-0.495967741935,0.197916666667
-4,-1,-0.125,0.00520833333333
-5,-1,-0.0645161290323,0.411458333333
-6,-1,-0.318548387097,0.354166666667
-7,-1,-0.5,-0.302083333333
-8,-1,-0.221774193548,-0.557291666667
-9,-1,-0.0201612903226,-0.171875
-10,-1,-0.354838709677,0.03125
-11,-1,-0.290322580645,-0.359375
-12,-1,-0.576612903226,-0.536458333333
-13,-1,-0.665322580645,-0.119791666667
-14,-1,-0.548387096774,0.208333333333
-15,-1,-0.451612903226,0.421875
-16,-1,-0.225806451613,0.130208333333
-17,-1,-0.201612903226,-0.276041666667
-18,-1,-0.45564516129,-0.625
-19,1,-0.00806451612903,0.864583333333
-20,1,0.149193548387,0.755208333333
-21,1,0.270161290323,0.494791666667
-22,1,0.370967741935,0.25
-23,1,0.342741935484,-0.0885416666667
-24,1,0.282258064516,-0.375
-25,1,0.153225806452,-0.65625
-26,1,0.0362903225806,-0.828125
-27,1,-0.129032258065,-0.895833333333
-28,1,0.221774193548,0.770833333333
-29,1,0.395161290323,0.463541666667
-30,1,0.415322580645,0.0885416666667
-31,1,0.358870967742,-0.291666666667
-32,1,0.310483870968,-0.572916666667
-33,1,0.217741935484,-0.723958333333
-34,1,0.504032258065,0.520833333333
-35,1,0.592741935484,0.0729166666667
-36,1,0.479838709677,-0.296875
-37,1,0.403225806452,-0.546875
-38,1,0.366935483871,0.838541666667
-39,1,0.548387096774,0.161458333333
-40,1,0.411290322581,-0.385416666667
-41,1,0.318548387097,-0.567708333333
-42,1,0.229838709677,0.885416666667
-43,1,0.407258064516,-0.0364583333333
-44,1,0.399193548387,-0.447916666667
-45,1,0.209677419355,-0.833333333333
-46,-1,-0.29435483871,-0.0885416666667
-47,-1,-0.451612903226,-0.166666666667
-48,-1,-0.604838709677,0.0260416666667
-49,-1,-0.350806451613,0.239583333333
-50,-1,-0.572580645161,0.364583333333
-51,-1,-0.556451612903,0.515625
-52,-1,-0.258064516129,0.484375
-53,-1,-0.177419354839,0.265625
-54,-1,-0.435483870968,-0.463541666667
-55,-1,-0.677419354839,-0.333333333333
-56,-1,-0.5,-0.00520833333333
-57,-1,-0.415322580645,0.0989583333333
-58,-1,-0.443548387097,0.338541666667
-59,-1,-0.310483870968,0.161458333333
-60,-1,-0.362903225806,-0.244791666667
-61,-1,-0.548387096774,-0.40625
-62,-1,-0.592741935484,-0.140625
-63,-1,-0.395161290323,-0.046875
-64,-1,-0.435483870968,-0.354166666667
-65,-1,-0.431451612903,-0.442708333333
-66,-1,-0.5,-0.572916666667
-67,-1,-0.520161290323,-0.421875
-68,-1,-0.407258064516,0.192708333333
-69,-1,-0.637096774194,-0.0833333333333
diff --git a/applications/easysvm/tutpaper/data/small_gc_toy.data b/applications/easysvm/tutpaper/data/small_gc_toy.data
deleted file mode 100644
index 6c7ddf82f3a..00000000000
--- a/applications/easysvm/tutpaper/data/small_gc_toy.data
+++ /dev/null
@@ -1,15 +0,0 @@
-#patternID,labels,0,1
-0,-1,-0.379032258065,0.635416666667
-1,-1,-0.29435483871,0.427083333333
-2,-1,-0.782258064516,0.130208333333
-3,-1,-0.779838709677,-0.1375
-4,-1,-0.722580645161,+0.866666666667
-5,-1,-0.334677419355,0.161458333333
-6,-1,-0.131451612903,0.21875
-7,-1,-0.645161290323,-0.0572916666667
-8,-1,+0.531451612903,0.81875
-9,-1,+0.345161290323,0.3572916666667
-10,1,+0.593709677419,-0.555208333333
-11,1,-0.156451612903,-0.81875
-12,1,-0.211290322581,-0.579166666667
-13,1,+0.407258064516,-0.403125
diff --git a/applications/easysvm/tutpaper/data/small_gc_toy_outlier.data b/applications/easysvm/tutpaper/data/small_gc_toy_outlier.data
deleted file mode 100644
index 524643d7b6f..00000000000
--- a/applications/easysvm/tutpaper/data/small_gc_toy_outlier.data
+++ /dev/null
@@ -1,15 +0,0 @@
-#patternID,labels,0,1
-0,-1,-0.379032258065,0.635416666667
-1,-1,-0.29435483871,0.427083333333
-2,-1,-0.782258064516,0.130208333333
-3,-1,-0.779838709677,-0.9375
-4,-1,-0.722580645161,+0.866666666667
-5,-1,-0.334677419355,0.161458333333
-6,-1,-0.131451612903,0.21875
-7,-1,-0.645161290323,-0.0572916666667
-8,-1,+0.531451612903,0.81875
-9,-1,+0.345161290323,0.3572916666667
-10,1,+0.593709677419,-0.555208333333
-11,1,-0.156451612903,-0.81875
-12,1,-0.211290322581,-0.579166666667
-13,1,+0.407258064516,-0.403125
diff --git a/applications/easysvm/tutpaper/svm_params.py b/applications/easysvm/tutpaper/svm_params.py
deleted file mode 100644
index a0aa7afd553..00000000000
--- a/applications/easysvm/tutpaper/svm_params.py
+++ /dev/null
@@ -1,377 +0,0 @@
-
-#from matplotlib import rc
-#rc('text', usetex=True)
-
-fontsize = 16
-contourFontsize = 12
-showColorbar = False
-xmin = -1
-xmax = 1
-ymin = -1.05
-ymax = 1
-
-import sys,os
-
-import numpy
-import shogun
-from shogun import GaussianKernel, LinearKernel, PolyKernel
-from shogun import RealFeatures, BinaryLabels
-from shogun import LibSVM
-
-from numpy import arange
-import matplotlib
-from matplotlib import pylab
-pylab.rcParams['contour.negative_linestyle'] = 'solid'
-
-def features_from_file(fileName) :
-
-    fileHandle = open(fileName)
-    fileHandle.readline()
-    features = []
-    labels = []
-    for line in fileHandle :
-        tokens = line.split(',')
-        labels.append(float(tokens[1]))
-        features.append([float(token) for token in tokens[2:]])
-
-    return RealFeatures(numpy.transpose(numpy.array(features))), features, BinaryLabels(numpy.array(labels,numpy.float))
-
-def create_kernel(kname, features, kparam=None) :
-
-    if kname == 'gauss' :
-        kernel = GaussianKernel(features, features, kparam)
-    elif kname == 'linear':
-        kernel = LinearKernel(features, features)
-    elif kname == 'poly' :
-        kernel = PolyKernel(features, features, kparam, True, False)
-
-    return kernel
-
-
-def svm_train(kernel, labels, C1, C2=None):
-    """Trains a SVM with the given kernel"""
-
-    num_threads = 1
-
-    kernel.io.disable_progress()
-    svm = LibSVM(C1, kernel, labels)
-    if C2:
-        svm.set_C(C1, C2)
-    svm.parallel.set_num_threads(num_threads)
-    svm.io.disable_progress()
-    svm.train()
-
-    return svm
-
-def svm_test(svm, kernel, features_train, features_test) :
-    """predicts on the test examples"""
-
-    kernel.init(features_train, features_test)
-    output = svm.apply().get_labels()
-
-    return output
-
-
-def decision_boundary_plot(svm, features, vectors, labels, kernel, fileName = None, **args) :
-
-    title = None
-    if 'title' in args :
-        title = args['title']
-    xlabel = None
-    if 'xlabel' in args :
-        xlabel = args['xlabel']
-    ylabel = None
-    if 'ylabel' in args :
-        ylabel = args['ylabel']
-    fontsize = 'medium'
-    if 'fontsize' in args :
-        fontsize = args['fontsize']
-    contourFontsize = 10
-    if 'contourFontsize' in args :
-        contourFontsize = args['contourFontsize']
-    showColorbar = True
-    if 'showColorbar' in args :
-        showColorbar = args['showColorbar']
-    show = True
-    if fileName is not None :
-        show = False
-    if 'show' in args :
-        show = args['show']
-
-
-    # setting up the grid
-    delta = 0.005
-    x = arange(xmin, xmax, delta)
-    y = arange(ymin, ymax, delta)
-
-    Z = numpy.zeros((len(x), len(y)), numpy.float_)
-    gridX = numpy.zeros((len(x) *len(y), 2), numpy.float_)
-    n = 0
-    for i in range(len(x)) :
-        for j in range(len(y)) :
-            gridX[n][0] = x[i]
-            gridX[n][1] = y[j]
-            n += 1
-
-    if kernel.get_name() == 'Linear' and 'customwandb' in args:
-        kernel.init_optimization_svm(svm)
-        b=svm.get_bias()
-        w=kernel.get_w()
-        kernel.set_w(args['customwandb'][0])
-        svm.set_bias(args['customwandb'][1])
-
-    if kernel.get_name() == 'Linear' and 'drawarrow' in args:
-        kernel.init_optimization_svm(svm)
-        b=svm.get_bias()
-        w=kernel.get_w()
-        s=1.0/numpy.dot(w,w)/1.17
-        pylab.arrow(0,-b/w[1], w[0]*s,s*w[1], width=0.01, fc='#dddddd', ec='k')
-    grid_features = RealFeatures(numpy.transpose(gridX))
-    results = svm_test(svm, kernel, features, grid_features)
-
-    n = 0
-    for i in range(len(x)) :
-        for j in range(len(y)) :
-            Z[i][j] = results[n]
-            n += 1
-
-    cdict = {'red'  :((0.0, 0.6, 0.6),(0.5, 0.8, 0.8),(1.0, 1.0, 1.0)),
-             'green':((0.0, 0.6, 0.6),(0.5, 0.8, 0.8),(1.0, 1.0, 1.0)),
-             'blue' :((0.0, 0.6, 0.6),(0.5, 0.8, 0.8),(1.0, 1.0, 1.0)),
-             }
-    my_cmap = matplotlib.colors.LinearSegmentedColormap('lightgray',cdict,256)
-    im = pylab.imshow(numpy.transpose(Z),
-                      interpolation='bilinear', origin='lower',
-                      cmap=my_cmap, extent=(xmin,xmax,ymin,ymax) )
-
-    if 'decisionboundaryonly' in args:
-        C1 = pylab.contour(numpy.transpose(Z),
-                [0],
-                origin='lower',
-                linewidths=(3),
-                colors = ['k'],
-                extent=(xmin,xmax,ymin,ymax))
-    else:
-        C1 = pylab.contour(numpy.transpose(Z),
-                [-1,0,1],
-                origin='lower',
-                linewidths=(1,3,1),
-                colors = ['k','k'],
-                extent=(xmin,xmax,ymin,ymax))
-
-        pylab.clabel(C1,
-                inline=1,
-                fmt='%1.1f',
-                fontsize=contourFontsize)
-
-    # plot the data
-    lab=labels.get_labels()
-    vec=numpy.array(vectors)
-    idx=numpy.where(lab==-1)[0]
-    pylab.scatter(vec[idx,0], vec[idx,1], s=300, c='#4444ff', marker='o', alpha=0.8, zorder=100)
-    idx=numpy.where(lab==+1)[0]
-    pylab.scatter(vec[idx,0], vec[idx,1], s=500, c='#ff4444', marker='s', alpha=0.8, zorder=100)
-
-    # plot SVs
-    if not 'decisionboundaryonly' in args:
-        training_outputs = svm_test(svm, kernel, features, features)
-        sv_idx=numpy.where(abs(training_outputs)<=1.01)[0]
-        pylab.scatter(vec[sv_idx,0], vec[sv_idx,1], s=100, c='k', marker='o', alpha=0.8, zorder=100)
-
-    if 'showmovedpoint' in args:
-        x=-0.779838709677
-        y=-0.1375
-        pylab.scatter([x], [y], s=300, c='#4e4e61', marker='o', alpha=1, zorder=100, edgecolor='#454548')
-        pylab.arrow(x,y-0.1, 0, -0.8/1.5, width=0.01, fc='#dddddd', ec='k')
-        #pylab.show()
-
-
-    if title is not None :
-        pylab.title(title, fontsize=fontsize)
-    if ylabel:
-        pylab.ylabel(ylabel,fontsize=fontsize)
-    if xlabel:
-        pylab.xlabel(xlabel,fontsize=fontsize)
-    if showColorbar :
-        pylab.colorbar(im)
-
-    # colormap:
-    pylab.hot()
-    if fileName is not None :
-        pylab.savefig(fileName)
-    if show :
-        pylab.show()
-
-def add_percent_ticks():
-    ticks=pylab.getp(pylab.gca(),'xticks')
-    ticklabels=len(ticks)*['']
-    ticklabels[0]='0%'
-    ticklabels[-1]='100%'
-    pylab.setp(pylab.gca(), xticklabels=ticklabels)
-
-    pylab.setp(pylab.gca(), yticklabels=['0%','100%'])
-    ticks=pylab.getp(pylab.gca(),'yticks')
-    ticklabels=len(ticks)*['']
-    #ticklabels[0]='0%'
-    ticklabels[-1]='100%'
-    pylab.setp(pylab.gca(), yticklabels=ticklabels)
-
-    xticklabels = pylab.getp(pylab.gca(), 'xticklabels')
-    yticklabels = pylab.getp(pylab.gca(), 'yticklabels')
-    pylab.setp(xticklabels, fontsize=fontsize)
-    pylab.setp(yticklabels, fontsize=fontsize)
-
-
-
-def create_figures(extension = 'pdf', directory = '../../tex/figures') :
-
-    if extension[0] != '.' :
-        extension = '.' + extension
-
-    dpi=90
-
-    # data and linear decision boundary
-    features,vectors,labels = features_from_file('data/small_gc_toy.data')
-
-    kernel = create_kernel('linear', features)
-    svm = svm_train(kernel, labels, 0.7)
-
-    pylab.figure(figsize=(8,6), dpi=dpi)
-    decision_boundary_plot(svm, features, vectors, labels, kernel,
-            fontsize=fontsize, contourFontsize=contourFontsize,
-            title="Linear Separation", customwandb=(numpy.array([-0.05, -1.0]), -0.3),
-            ylabel="GC Content Before 'AG'",xlabel="GC Content After 'AG'",
-            show=False, showColorbar=showColorbar, decisionboundaryonly=True)
-    add_percent_ticks()
-
-    pylab.savefig(os.path.join(directory, 'data_and_linear_classifier' + extension))
-    pylab.close()
-#####################################################################################
-    # data and svm decision boundary
-    features,vectors,labels = features_from_file('data/small_gc_toy.data')
-
-    kernel = create_kernel('linear', features)
-    svm = svm_train(kernel, labels, 100)
-
-    pylab.figure(figsize=(8,6), dpi=dpi)
-    decision_boundary_plot(svm, features, vectors, labels, kernel,
-            fontsize=fontsize, contourFontsize=contourFontsize,
-            title="Maximum Margin Separation", drawarrow=True,
-            ylabel="GC Content Before 'AG'",xlabel="GC Content After 'AG'",
-            show=False, showColorbar=showColorbar)
-    add_percent_ticks()
-
-    pylab.savefig(os.path.join(directory, 'data_and_svm_classifier' + extension))
-    pylab.close()
-#####################################################################################
-
-
-    # the effect of C on the decision surface:
-    features,vectors,labels = features_from_file('data/small_gc_toy_outlier.data')
-
-    pylab.figure(figsize=(16,6), dpi=dpi)
-    pylab.subplot(121)
-    kernel = create_kernel('linear', features)
-    svm = svm_train(kernel, labels, 200)
-    decision_boundary_plot(svm, features, vectors, labels, kernel,
-            title = 'Soft-Margin with C=200', ylabel="GC Content Before 'AG'",
-            xlabel="GC Content After 'AG'", fontsize=fontsize,
-            contourFontsize=contourFontsize, show=False, showmovedpoint=True,
-            showColorbar=showColorbar)
-    add_percent_ticks()
-
-    pylab.subplot(122)
-    kernel = create_kernel('linear', features)
-    svm = svm_train(kernel, labels, 2)
-    decision_boundary_plot(svm, features, vectors, labels, kernel,
-            title = 'Soft-Margin with C=2',
-            ylabel="GC Content Before 'AG'",xlabel="GC Content After 'AG'",
-            fontsize=fontsize, contourFontsize=contourFontsize, show=False, showColorbar=showColorbar)
-    add_percent_ticks()
-    #pylab.subplots_adjust(bottom=0.05, top=0.95)
-
-    pylab.savefig(os.path.join(directory, 'effect_of_c' + extension))
-    pylab.close()
-####################################################################################
-
-    # playing with nonlinear data:
-    # the effect of kernel parameters
-
-    features,vectors,labels = features_from_file('data/small_gc_toy_outlier.data')
-    pylab.figure(figsize=(24,6), dpi=dpi)
-    pylab.subplot(131)
-    kernel = create_kernel('linear', features)
-    svm = svm_train(kernel, labels, 100)
-    decision_boundary_plot(svm, features, vectors, labels, kernel,
-            title = 'Linear Kernel',
-            ylabel="GC Content Before 'AG'",
-            fontsize=fontsize, contourFontsize=contourFontsize, show=False,showColorbar=showColorbar)
-    add_percent_ticks()
-
-    pylab.subplot(132)
-    kernel = create_kernel('poly', features, 2)
-    svm = svm_train(kernel, labels, 100)
-    decision_boundary_plot(svm, features, vectors, labels, kernel,
-            title='Polynomial Kernel d=2',
-            xlabel="GC Content After 'AG'",
-            fontsize=fontsize, contourFontsize=contourFontsize, show=False,showColorbar=showColorbar)
-    add_percent_ticks()
-
-    pylab.subplot(133)
-    kernel = create_kernel('poly', features, 5)
-    svm = svm_train(kernel, labels, 10)
-    decision_boundary_plot(svm, features, vectors, labels, kernel,
-            title='Polynomial Kernel d=5',
-            fontsize=fontsize, contourFontsize=contourFontsize, show=False,showColorbar=showColorbar)
-    add_percent_ticks()
-    #pylab.subplots_adjust(bottom=0.05, top=0.95)
-
-    pylab.savefig(os.path.join(directory, 'params_polynomial' + extension))
-    pylab.close()
-####################################################################################
-
-    #effects of sigma
-    pylab.figure(figsize=(24,6), dpi=dpi)
-    pylab.subplot(131)
-    gamma = 0.1
-    sigma = 20.0
-    kernel = create_kernel('gauss', features, sigma)
-    svm = svm_train(kernel, labels, 100)
-    decision_boundary_plot(svm, features, vectors, labels, kernel,
-            title='Gaussian Kernel Sigma=20',
-            ylabel="GC Content Before 'AG'",
-            fontsize=fontsize, contourFontsize=contourFontsize, show=False,showColorbar=showColorbar)
-    add_percent_ticks()
-
-    pylab.subplot(132)
-    sigma = 1.0
-    kernel = create_kernel('gauss', features, sigma)
-    svm = svm_train(kernel, labels, 100)
-    decision_boundary_plot(svm, features, vectors, labels, kernel,
-            title='Gaussian Kernel Sigma=1',
-            xlabel="GC Content After 'AG'",
-            fontsize=fontsize, contourFontsize=contourFontsize, show=False,showColorbar=showColorbar)
-    add_percent_ticks()
-
-    pylab.subplot(133)
-    sigma = 0.05
-    kernel = create_kernel('gauss', features, sigma)
-    svm = svm_train(kernel, labels, 100)
-    decision_boundary_plot(svm, features, vectors, labels, kernel,
-            title='Gaussian Kernel Sigma=0.05',
-            fontsize=fontsize, contourFontsize=contourFontsize, show=False,showColorbar=showColorbar)
-    add_percent_ticks()
-
-    #pylab.subplots_adjust(bottom=0.05, top=0.95)
-
-    pylab.savefig(os.path.join(directory, 'params_gaussian' + extension))
-    pylab.close()
-####################################################################################
-
-if __name__ == '__main__' :
-
-    extension = 'pdf'
-    if len(sys.argv) > 1 :
-        extension = sys.argv[1]
-    pylab.ioff()
-    create_figures(extension)
diff --git a/applications/msplicer/LICENSE b/applications/msplicer/LICENSE
deleted file mode 100644
index 5b6e7c66c27..00000000000
--- a/applications/msplicer/LICENSE
+++ /dev/null
@@ -1,340 +0,0 @@
-		    GNU GENERAL PUBLIC LICENSE
-		       Version 2, June 1991
-
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.
-                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-			    Preamble
-
-  The licenses for most software are designed to take away your
-freedom to share and change it.  By contrast, the GNU General Public
-License is intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users.  This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it.  (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.)  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
-  To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have.  You must make sure that they, too, receive or can get the
-source code.  And you must show them these terms so they know their
-rights.
-
-  We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
-  Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software.  If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
-  Finally, any free program is threatened constantly by software
-patents.  We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary.  To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-		    GNU GENERAL PUBLIC LICENSE
-   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-  0. This License applies to any program or other work which contains
-a notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License.  The "Program", below,
-refers to any such program or work, and a "work based on the Program"
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language.  (Hereinafter, translation is included without limitation in
-the term "modification".)  Each licensee is addressed as "you".
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope.  The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the
-Program (independent of having been made by running the Program).
-Whether that is true depends on what the Program does.
-
-  1. You may copy and distribute verbatim copies of the Program's
-source code as you receive it, in any medium, provided that you
-conspicuously and appropriately publish on each copy an appropriate
-copyright notice and disclaimer of warranty; keep intact all the
-notices that refer to this License and to the absence of any warranty;
-and give any other recipients of the Program a copy of this License
-along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a fee.
-
-  2. You may modify your copy or copies of the Program or any portion
-of it, thus forming a work based on the Program, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
-    a) You must cause the modified files to carry prominent notices
-    stating that you changed the files and the date of any change.
-
-    b) You must cause any work that you distribute or publish, that in
-    whole or in part contains or is derived from the Program or any
-    part thereof, to be licensed as a whole at no charge to all third
-    parties under the terms of this License.
-
-    c) If the modified program normally reads commands interactively
-    when run, you must cause it, when started running for such
-    interactive use in the most ordinary way, to print or display an
-    announcement including an appropriate copyright notice and a
-    notice that there is no warranty (or else, saying that you provide
-    a warranty) and that users may redistribute the program under
-    these conditions, and telling the user how to view a copy of this
-    License.  (Exception: if the Program itself is interactive but
-    does not normally print such an announcement, your work based on
-    the Program is not required to print an announcement.)
-
-These requirements apply to the modified work as a whole.  If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works.  But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
-  3. You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
-    a) Accompany it with the complete corresponding machine-readable
-    source code, which must be distributed under the terms of Sections
-    1 and 2 above on a medium customarily used for software interchange; or,
-
-    b) Accompany it with a written offer, valid for at least three
-    years, to give any third party, for a charge no more than your
-    cost of physically performing source distribution, a complete
-    machine-readable copy of the corresponding source code, to be
-    distributed under the terms of Sections 1 and 2 above on a medium
-    customarily used for software interchange; or,
-
-    c) Accompany it with the information you received as to the offer
-    to distribute corresponding source code.  (This alternative is
-    allowed only for noncommercial distribution and only if you
-    received the program in object code or executable form with such
-    an offer, in accord with Subsection b above.)
-
-The source code for a work means the preferred form of the work for
-making modifications to it.  For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable.  However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-  4. You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License.  Any attempt
-otherwise to copy, modify, sublicense or distribute the Program is
-void, and will automatically terminate your rights under this License.
-However, parties who have received copies, or rights, from you under
-this License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
-  5. You are not required to accept this License, since you have not
-signed it.  However, nothing else grants you permission to modify or
-distribute the Program or its derivative works.  These actions are
-prohibited by law if you do not accept this License.  Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
-  6. Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions.  You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
-  7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all.  For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices.  Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-  8. If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded.  In such case, this License incorporates
-the limitation as if written in the body of this License.
-
-  9. The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number.  If the Program
-specifies a version number of this License which applies to it and "any
-later version", you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation.  If the Program does not specify a version number of
-this License, you may choose any version ever published by the Free Software
-Foundation.
-
-  10. If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission.  For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this.  Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
-			    NO WARRANTY
-
-  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
-  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
-
-		     END OF TERMS AND CONDITIONS
-
-	    How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
-    Gnomovision version 69, Copyright (C) year name of author
-    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, the commands you use may
-be called something other than `show w' and `show c'; they could even be
-mouse-clicks or menu items--whatever suits your program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the program, if
-necessary.  Here is a sample; alter the names:
-
-  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
-  `Gnomovision' (which makes passes at compilers) written by James Hacker.
-
-  <signature of Ty Coon>, 1 April 1989
-  Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into
-proprietary programs.  If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library.  If this is what you want to do, use the GNU Library General
-Public License instead of this License.
diff --git a/applications/msplicer/Makefile b/applications/msplicer/Makefile
deleted file mode 100644
index 91104e2868c..00000000000
--- a/applications/msplicer/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-release-dir := msplicer-0.3
-
-release:
-	( cd .. ; mkdir -p $(release-dir)/data ; \
-	cp python/msplicer python/LICENSE python/README python/NEWS python/*.py $(release-dir) ; \
-	tar cjvf $(release-dir).tar.bz2 $(release-dir) )
-clean:
-	rm -f *.pyc
diff --git a/applications/msplicer/NEWS b/applications/msplicer/NEWS
deleted file mode 100644
index be08d8d7af1..00000000000
--- a/applications/msplicer/NEWS
+++ /dev/null
@@ -1,6 +0,0 @@
-2008-05-18  Soeren Sonnenburg <Soeren.Sonnenburg@first.fraunhofer.de>
-
-    * mSplicer version 0.3
-		- Adjust code to work with newer shogun versions.
-		- Require shogun version at least 0.6.2 as splice site prediction in
-		the new version is now orders of magnitude faster.
diff --git a/applications/msplicer/README b/applications/msplicer/README
deleted file mode 100644
index c2757148a4c..00000000000
--- a/applications/msplicer/README
+++ /dev/null
@@ -1,110 +0,0 @@
-This is the mSplicer program accompanying the PLoS "Improving the C. elegans
-genome annotation using machine learning" submission.  by Gunnar Rätsch, Sören
-Sonnenburg, Jagan Srinivasan, Hanh Witte, Klaus-Robert Müller, Ralf Sommer and
-Bernhard Schölkopf. Published in PLoS Computational Biology, February, 2007.
-
-ABSTRACT:
-
-For modern biology, precise genome annotations are of prime importance as they
-allow the accurate definition of genic regions. We employ state of the art
-machine learning methods to assay and improve the accuracy of the genome
-annotation of the nematode Caenorhabditis elegans. The proposed machine
-learning system is trained to recognize exons and introns on the unspliced mRNA
-utilizing recent advances in support vector machines and label sequence
-learning. In 87% (coding and untranslated regions) and 95% (coding regions
-only) of all genes tested in several out-of-sample evaluations, our method
-correctly identified all exons and introns. Notably, only 37% and 50%,
-respectively, of the presently unconfirmed genes in the C. elegans genome
-annotation agree with our predictions, thus we hypothesize that a sizable
-fraction of those genes are not correctly annotated. A retrospective evaluation
-of the Wormbase WS120 annotation [1] of C. elegans reveals that splice form
-predictions on unconfirmed genes in WS120 are inaccurate in about 18% of the
-considered cases, while our predictions deviate from the truth only in 10 −
-13%. We experimentally analyzed 20 controversial genes on which our system and
-the annotation disagree, confirming the superiority of our predictions. While
-our method correctly predicted 75% of those cases, the standard annotation was
-never completely correct. The accuracy of our system is further corroborated by
-a comparison with two other recently proposed systems that can be used for
-splice form prediction: SNAP and ExonHunter. We conclude that the genome
-annotation of C. elegans and other organisms can be greatly enhanced using
-modern machine learning technology.  Availabibility:
-
-Training the mSplicer involves solving a relatively large linear optimization
-problem, which we have implemented in MATLAB using the CPLEX optimization
-package. Additionally we have developed a standalone tool for predicting the
-splice form for C. elegans sequences implemented in PYTHON and C++ available
-under the General Public License. It is based on python scripts that call
-methods implemented in C++ for predicting splice sites using Support Vector
-Machines [2] and Dynamic Programming for splice form prediction. These routines
-are part of the freely available Shogun toolbox for large scale kernel learning
-[3] which is available under http://www.shogun-toolbox.org.
-
-If you have questions regarding the results in [4], please consult
-http://www.msplicer.org or contact Gunnar Rätsch. In case you have difficulties
-using the provided software, please contact Sören Sonnenburg or Gunnar Rätsch.
-
-Following a statistical setup common in machine learning, we trained our
-system on 60% of the available cDNA sequences currently known for C. elegans
-(based on Wormbase [5], version WS120). The remaining 40% of the cDNA sequences
-were used to generate an independent set for out-of-sample testing.
-Additionally, we used available EST sequences (dbEST [6], as of 19/02/2004) to
-maximally extend the cDNA sequences at the 5’ and 3’ ends. For training, we did
-not use any EST sequences overlapping with the 40% of the cDNA sequences for
-out-of-sample prediction.
-
-MSPLICER PROGRAM REQUIREMENTS:
-
-The stand alone linux binary does not need further compilation/libraries and
-should run out of the box (tested on Debian sarge and Debian etch).
-
-For the python version you need a working python 2.4 installation with numpy
-(version 1.0 or later) and the shogun toolbox (version 0.6.2 or later)
-- which is available from http://www.shogun-toolbox.org for Linux, MacOSX,
-cygwin/win32. If you are running Debian GNU Linux, shogun 0.6.2 is available in
-debian unstable http://packages.debian.org/unstable/science/shogun-python-modular.
-
-MSPLICER PROGRAM RUNNING TIME AND MEMORY REQUIREMENTS:
-
-mSplicer requires about 100M of memory for short sequences. Memory requirements
-don't grow much (a additional linear term w.r.t. the length of the input
-sequence). On first run with a new model (see --model option below),
-msplicer will load and decompress the .bz2 compressed model file and store it
-as a python native pickle dump, which increases startup times a lot.
-Due to the optimizations in [3] splice form prediction (layer 1) times
-won't change much for many/long sequences. Otherwise mSplicer running times are
-dominated by computing the viterby path (layer 2). For example computing
-the output of the 708 sequences (2.3Mb) of elegans_WS160_mSplicer_val.fa takes
-on a 2GHz machine about 15 minutes and 170M of memory.
-
-MSPLICER PROGRAM USAGE:
-
-./msplicer fasta_file.fa
-
-This will read all entries in the .fa file and print a .gff file with the
-predictions for each of the entries to stdout. One may optionally specify the
-start and stop of the transcript via --start <basenum> / --stop <basenum> and
-the model via --model one of WS120, WS120gc, WS150, WS160, WS160gc. Note that
-<basenum> is zero based.
-
-
-REFERENCES:
-
-[1]	Harris T, Chen N, Cunningham F, et al. (2004) Wormbase, a multi-species
-	resource for nematode biology and genomics. Nucl Acids Res 32. D411-7.
-
-[2]	Cortes, C, Vapnik, VN. Support-vector networks. Machine Learning,
-	20(3):273--297, 1995.
-
-[3]	Sonnenburg, S, Rätsch, G, Schäfer, C, Schölkopf, B. Large Scale Multiple
-	Kernel Learning. Journal of Machine Learning Research,7:1531-1565,
-	July 2006, K.Bennett and E.P.-Hernandez Editors.
-
-[4]	Rätsch, G, Sonnenburg, S, Srinivasan, J, Witte, H, Müller, KR, Sommer, R,
-	and Schölkopf, B (2007). Improving the C. elegans genome annotation using
-	machine learning. PLoS Computational Biology 3(2):e20.
-
-[5]	Schwarz E, Antoshechkin I, Bastiani C, et al (2006) Wormbase, better
-	software, richer content. Nucleic Acids Res 34:D475–8.
-
-[6]	Boguski M, Tolstoshev TLC (1993). dbEST–database for expressed sequence
-	tags. Nat Genet 4,332–3.
diff --git a/applications/msplicer/content_sensors.py b/applications/msplicer/content_sensors.py
deleted file mode 100644
index ee0bc39ad7e..00000000000
--- a/applications/msplicer/content_sensors.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2006-2007 Soeren Sonnenburg
-# Written (W) 2007 Gunnar Raetsch
-# Copyright (C) 2007-2008 Fraunhofer Institute FIRST and Max-Planck-Society
-#
-
-import numpy
-
-class content_sensors:
-	def __init__(self, model):
-		self.dict_weights_intron=numpy.array(model.dict_weights_intron, dtype=numpy.float64)
-		self.dict_weights_coding=numpy.array(model.dict_weights_coding, dtype=numpy.float64)
-
-		self.dicts=numpy.concatenate((self.dict_weights_coding,self.dict_weights_intron, self.dict_weights_coding, self.dict_weights_intron, self.dict_weights_coding,self.dict_weights_intron, self.dict_weights_coding, self.dict_weights_intron), axis=0)
-
-		self.dicts[0, 64:] = 0      # only order 3 info
-		self.dicts[1, 64:] = 0      # only order 3 info
-		self.dicts[2, 0:64] = 0     # only order 4 info
-		self.dicts[2, 320:] = 0
-		self.dicts[3, 0:64] = 0     # only order 4 info
-		self.dicts[3, 320:] = 0
-		self.dicts[4, 0:320] = 0    # only order 5 info
-		self.dicts[4, 1344:] = 0
-		self.dicts[5, 0:320] = 0    # only order 5 info
-		self.dicts[5, 1344:] = 0
-		self.dicts[6, 0:1344] = 0   # only order 6 info
-		self.dicts[7, 0:1344] = 0   # only order 6 info
-
-		self.model = model
-
-	def get_dict_weights(self):
-		return self.dicts.T
-
-	def initialize_content(self, dyn):
-		dyn.init_svm_arrays(len(self.model.word_degree), len(self.model.mod_words))
-
-		word_degree = numpy.array(self.model.word_degree, numpy.int32)
-		dyn.init_word_degree_array(word_degree)
-
-		mod_words = numpy.array(4**word_degree, numpy.int32)
-		dyn.init_num_words_array(mod_words)
-
-		cum_mod_words=numpy.zeros(len(mod_words)+1, numpy.int32)
-		cum_mod_words[1:] = numpy.cumsum(mod_words)
-		dyn.init_cum_num_words_array(cum_mod_words)
-
-		dyn.init_mod_words_array(numpy.array(self.model.mod_words, numpy.int32))
-		dyn.init_sign_words_array(numpy.array(self.model.sign_words, numpy.bool))
-		dyn.init_string_words_array(numpy.zeros(len(self.model.sign_words), numpy.int32))
-
-		assert(dyn.check_svm_arrays())
diff --git a/applications/msplicer/convert_mat.m b/applications/msplicer/convert_mat.m
deleted file mode 100644
index 6c30878e5d4..00000000000
--- a/applications/msplicer/convert_mat.m
+++ /dev/null
@@ -1,213 +0,0 @@
-function convert_mat()
-
-addpath ../matlab
-fnames={'../matlab/msplicer_elegansWS120_gc=0_orf=0.mat', '../matlab/msplicer_elegansWS120_gc=0_orf=1.mat', '../matlab/msplicer_elegansWS120_gc=1_orf=0.mat', '../matlab/msplicer_elegansWS150_gc=0_orf=0.mat', '../matlab/msplicer_elegansWS160_gc=0_orf=0.mat', '../matlab/msplicer_elegansWS160_gc=1_orf=0.mat', '../matlab/msplicer_elegansWS160_gc=1_orf=1.mat'};
-
-for i=1:length(fnames),
-	clear L;
-	L=load(fnames{i});
-	targetname=[ './data', fnames{i}(10:end-3), 'dat' ];
-
-	fid=fopen(targetname,'wb');
-
-	fprintf(fid, '%%msplicer definition file version: 1.0\n\n');
-	fprintf(fid, 'bins=%d\n', L.bins);
-	fprintf(fid, 'dict_weights_intron=');
-	write_mat(fid, L.dict_weights_train.intron);
-	fprintf(fid, 'dict_weights_coding=');
-	write_mat(fid, L.dict_weights_train.coding);
-	fprintf(fid,'\n');
-
-    % has to fit to the python code (order of array in plif.py)
-    penids.acceptor = 0 ;
-    penids.donor = 1 ;
-    penids.first_coding_len = 2 ;
-    penids.last_coding_len = 3 ;
-    penids.coding_len = 4 ;
-    penids.single_coding_len = 5 ;
-    penids.intron_len = 6 ;
-
-	if ~isempty(findstr(targetname,'_orf=1'))
-		make_a_trans_orf(fid, L, penids);
-	else
-		make_a_trans_noorf(fid, L, penids);
-	end
-
-	%penalties
-	fprintf(fid,'%%penalties\n');
-	write_penalty(fid, 'penalty_acceptor', L.penalty.acceptor);
-	write_penalty(fid, 'penalty_donor', L.penalty.donor);
-	write_penalty(fid, 'penalty_coding_len', L.penalty.coding_len);
-	write_penalty(fid, 'penalty_first_coding_len', L.penalty.first_coding_len);
-	write_penalty(fid, 'penalty_last_coding_len', L.penalty.last_coding_len);
-	write_penalty(fid, 'penalty_single_coding_len', L.penalty.single_coding_len);
-	write_penalty(fid, 'penalty_intron_len', L.penalty.intron_len);
-	write_penalty(fid, 'penalty_coding', L.penalty.coding);
-	write_penalty(fid, 'penalty_coding2', L.penalty.coding2);
-	write_penalty(fid, 'penalty_coding3', L.penalty.coding3);
-	write_penalty(fid, 'penalty_coding4', L.penalty.coding4);
-	write_penalty(fid, 'penalty_intron', L.penalty.intron);
-	write_penalty(fid, 'penalty_intron2', L.penalty.intron2);
-	write_penalty(fid, 'penalty_intron3', L.penalty.intron3);
-	write_penalty(fid, 'penalty_intron4', L.penalty.intron4);
-	write_penalty(fid, 'penalty_transitions', L.penalty.transitions);
-	fprintf(fid,'\n');
-
-	acc=load(L.accfname);
-	fprintf(fid,'%%acceptor splice\n');
-	fprintf(fid, 'acc_splice_b=%e\n', acc.b);
-	fprintf(fid, 'acc_splice_order=%d\n', acc.PAR.order);
-	fprintf(fid, 'acc_splice_window_left=%d\n', 60);
-	fprintf(fid, 'acc_splice_window_right=%d\n', 79);
-	fprintf(fid, 'acc_splice_alphas=');
-	write_mat(fid, acc.alphas);
-	fprintf(fid, 'acc_splice_svs=');
-	write_string(fid, acc.XT);
-	fprintf(fid,'\n');
-
-	don=load(L.donfname);
-	fprintf(fid,'%%donor splice\n');
-	fprintf(fid, 'don_splice_b=%e\n', don.b);
-	fprintf(fid, 'don_splice_use_gc=%d\n', don.PAR.use_gc);
-	fprintf(fid, 'don_splice_order=%d\n', don.PAR.order);
-	fprintf(fid, 'don_splice_window_left=%d\n', 80);
-	fprintf(fid, 'don_splice_window_right=%d\n', 59);
-	fprintf(fid, 'don_splice_alphas=');
-	write_mat(fid, don.alphas);
-	fprintf(fid, 'don_splice_svs=');
-	write_string(fid, don.XT);
-
-	fclose(fid);
-
-	system(sprintf('bzip2 -9 "%s"\n', targetname));
-end
-
-function make_a_trans_orf(fid, L, penids)
-	[A,p,q,info,penalties,orf_info]=gen_splice_model_orf(penids);
-	write_model(fid, L, A, p, q, info, penalties, orf_info)
-
-function make_a_trans_noorf(fid, L, penids)
-	[A, p, q, info, penalties, orf_info]=gen_splice_model_noorf(penids);
-	write_model(fid, L, A,p,q, info, penalties, orf_info)
-
-function write_model(fid, L, A,p,q, info, penalties, orf_info)
-	A(~isinf(A))=L.penalty.transitions.penalty;
-	%idx=[];
-	%fieldns=fieldnames(info);
-	%for i=1:length(fieldns)
-	%	if isequal(fieldns{i}, 'cnt')
-	%		continue
-	%	end
-	%	idx=[idx getfield(info, fieldns{i})];
-	%end
-	%A=A(idx,idx);
-
-	a_trans = zeros(3,sum(~isinf(A(:)))) ;
-	k=0 ;
-	for i=1:size(A,1)
-	  idx = find(~isinf(A(i,:))) ;
-	  val = A(i,idx) ;
-	  a_trans(1,k+1:k+length(idx))=i-1 ;
-	  a_trans(2,k+1:k+length(idx))=idx-1 ;
-	  a_trans(3,k+1:k+length(idx))=val ;
-	  k=k+length(idx) ;
-	end ;
-	a_trans=a_trans' ;
-	[tmp,idx]=sort(a_trans(:,2)) ;
-	a_trans = a_trans(idx,:)' ;
-
-	fprintf(fid, 'msplicer_a_trans=');
-	write_mat(fid, a_trans);
-	fprintf(fid, 'msplicer_p=');
-	p(isinf(p))=32768;
-	write_mat(fid, p(:));
-	fprintf(fid, 'msplicer_q=');
-	q(isinf(q))=32768;
-	write_mat(fid, q(:));
-	fprintf(fid,'\n');
-
-    % start-state: 0
-    % exon-start-state: 1
-    % donor-state: 2
-    % acceptor-state: 3
-    % exon-end-state: 4
-    % stop-state: 5
-    statedescr = zeros(1,info.cnt) ;
-    statedescr(info.start) = 0 ;
-    statedescr(info.atg) = 1 ;
-    statedescr(info.don) = 2 ;
-    statedescr(info.acc) = 3 ;
-    statedescr(info.stop) = 4 ;
-    statedescr(info.final) = 5 ;
-
-	fprintf(fid, 'statedescr=');
-	write_mat(fid, statedescr);
-	fprintf(fid,'\n');
-
-    plifidmat = penalties ;
-    plifidmat(plifidmat==0)=-1 ;
-
-	fprintf(fid, 'plifidmat=');
-	write_mat(fid, plifidmat);
-	fprintf(fid,'\n');
-
-	fprintf(fid, 'orf_info=');
-	write_mat(fid, orf_info);
-	fprintf(fid,'\n');
-
-    word_degree = [3,4,5,6] ;
-    mod_words   = [1,1,1,1,1,1,1,1;
-                   0,0,0,0,0,0,0,0] ;
-    sign_words   = [1,1,1,1,1,1,1,1] ;
-
-	fprintf(fid, 'word_degree=');
-	write_mat(fid, word_degree);
-	fprintf(fid,'\n');
-
-	fprintf(fid, 'mod_words=');
-	write_mat(fid, mod_words);
-	fprintf(fid,'\n');
-
-	fprintf(fid, 'sign_words=');
-	write_mat(fid, sign_words);
-	fprintf(fid,'\n');
-
-    info
-
-function write_penalty(fid, name, x)
-
-	if isfield(x, 'boundaries')
-		fprintf(fid, '%s_boundaries=', name);
-		write_mat(fid, x.boundaries(:,1:(end-1)));
-	else
-		warning('boundaries field does not exist!')
-	end
-	fprintf(fid, '%s_penalty=', name);
-	write_mat(fid, x.penalty');
-
-function write_string(fid, x)
-	fprintf(fid, '[\n');
-	for i=1:size(x,2),
-		fprintf(fid, '%c', x(1:(size(x,1)-1),i));
-		fprintf(fid, '%c\n', x(size(x,1),i));
-	end
-	fprintf(fid, ']\n');
-
-function write_mat(fid, x)
-	if size(x,1)==1,
-		fprintf(fid, '[');
-		fprintf(fid, '%e, ', x(1:(length(x)-1)));
-		fprintf(fid, '%e', x(end));
-	else
-		fprintf(fid, '[');
-		for i=1:size(x,2),
-			fprintf(fid, '%e, ', x(1:(size(x,1)-1),i));
-
-			if i<size(x,2)
-				fprintf(fid, '%e;\n ', x(size(x,1),i));
-			else
-				fprintf(fid, '%e', x(size(x,1),i));
-			end
-		end
-	end
-	fprintf(fid, ']\n');
diff --git a/applications/msplicer/data b/applications/msplicer/data
deleted file mode 120000
index 468cd4027d4..00000000000
--- a/applications/msplicer/data
+++ /dev/null
@@ -1 +0,0 @@
-../../data/msplicer
\ No newline at end of file
diff --git a/applications/msplicer/dna.fa b/applications/msplicer/dna.fa
deleted file mode 100644
index 566d31d9918..00000000000
--- a/applications/msplicer/dna.fa
+++ /dev/null
@@ -1,74 +0,0 @@
->C01F1.1_DNA_-400:+400
-agctcctcactacaagaaaaacgataagattatgcaattataggatactctgtaaaaaaa
-accattcagaccgtttttggacgagcaaaatgaaaaattcgaaaatttagcggaatttgg
-cttttctgagacaattttttaaagaaaaatatttacaaaagtcttaaattcaggaaatcc
-acaaaaaaaagcacgaaaaataatcgcaaatgaaaaaaaattcaaataaaaacttcaaaa
-accgtgatttctcaattttagccaaattccgagggaatttgtggttttcttgaattttag
-acttttttgaaatttacctcgaaagaattcagttttttcaggattttttcttattttaat
-gcgtaaaacatcccatttttaacccaaattatttccagaaatgtctggcctgaaacctgt
-caaaccagaaggagttcaaagcgagttcagtgtccgcgttgcaaaacgaagcgatgatat
-ccgttactctgtaatgatgttcaacggaatggacaaagtggacacatcaaaatggacaat
-agacagtggtgttacaatggagagagaggataatcaacgtgtaattctatcaacacagac
-agttcaagaatacggagaaggatccgagtatggaaaagctgcgagggaagaagctcgccg
-aaagaaatatggaagacaatcaaaaaaatatcgacttgataatcagccatggaagatggc
-attcactgagccagaaggacggcagaggcaaatgagaggaattcgagaaggtggtgcaaa
-tgagcatgctgattattgggtttttctgaaaccaaatcaatcttctgagtttaaagctta
-taaagtcgatgaatggcataaattcctgccagcgattactcataaaactcttgatattga
-tcaagccgaggagcaattctctcagagatataaagttatgaatcaattcgctttgaaagc
-agcgatacagaaccaattgagtgcgacggatgaatcggaaatgacagagcagcagaaacg
-tctactgaaaattaaggatgaggcgagctctgatgattcggatggtgatgatgagggaga
-gggtggtgatgatggaaaaaaggcgaagaataagaaaaagaagaagaagaatgcgaaacc
-ggcgaaagagaagaggcagagggttgaggataaggatgatgttgctagtgagtaatagct
-ctggcggattttttgaaatatcgaaggaataattcggaaaaattccgattaaaaacatcg
-atttttgtttgaaatgccgaaaaataggaattttttcgaccttttcaaagaaaatcgaaa
-aattaaacacaaattgaaaaaaaaattcgccattaatttttccgatacttcaaaaaaatc
-gaatttttgctgttttttttttgaaaaaaccgattttccgaaaattcaacaaaaaaaaag
-aaattttgaaaaatctaattttttgtttgtttaaaatgtgaaaaatcgaaaacttaacga
-aaattgaaacaaaaattcgacatttcgtaaaaaaatggatttttccattaatatcgaaaa
-taaataaaaaatttcggatttttttgtttgaagtatcggaaaatatgaaaattttctagt
-ttttcagaaaaaaatcgtttttctatcttttttttttgaggaaaaatcaaaattttcaca
-catttccggcattttaaacgaaaaaatatagatttttgatttcagaaaaatagaaaatgt
-tcaagattatggtttctcattttacaaaaaaaaaaaattttaagaatacgagtcatcgga
-cggagaggacgagggccgcgagtacgactacatttctgacagtggaactgattctgagtt
-ggtttctttttttgaaaaaatttttaattttattttggaaaagaaaaaataacaaatctt
-ttaaaaattcgtttttgattttctgaaaaaaattaaaaaatttcgaatttctctttttaa
-aaaattcgttttgcgattcttattctaaaaaataaaaaaatcgggaaaaaatttattttt
-cgatttattatgaaaaaaaatatttaaacaaaatataattgcaaaaataaaaataaaaaa
-atttactaattagtcggaaaaattgggaaaaaatttaaaataccgaaaaaaatcaaacat
-tttcgatgtttgatttttttttttctgaaaaaattataaaatagaatattttcgattttc
-gctgggaaaaatttaaaaaatcgaagtttttcggttttttttctagaaaatcgataataa
-atttccattttctaattaaaaattgttattttccaattttcttagtaaaattgaaaacta
-aaaaaaagtccgaattttctgaaaaaaaatttgaaaaattgaaaatcttttccatttttc
-gtttttctctgaaaaaaatgtaataaatcgaaaaaaaaattaatttttcgattttccctt
-caaaaaaaattgaaaaatcaatgaattacccattttcagccgtgaacaagttccatcaga
-cgaaaaaatcgagaaacaactagtcggagttgctgaagaagaaggagcacgtgaatctga
-tagcagtgaatctgaagatgatttaacgaagaaattaatgaaaccatatggtgataagaa
-aaaaggaaatgatattgaagaacgtgattcatctggaacagattctgatgtttccgacac
-tgaaaaactcgattctgtagtttttatgaaggctaacaaggatggagaaggcggctcggg
-aggaaccggaaaaaaacgtccaccaactgaagattctgatcttaaaatggataatcttgg
-tccgagtgatgcgaaaaaagcgaaaccggctgttaaatttgaggaaggactcaatgagga
-aactgttcgcagatatttgcgtcgtaaaccgcatacaacgaaggtgaattggaaaattcg
-aaaaataggaaaaatttcagaaaaaaaaatcgattttctcaggataaaaaaaacaacaaa
-aaaacgaaaaaaaaaatgtttaaaaagttaacaaaaaaaggataataaactttttattta
-tttttaaaattaaaattatgaaacttgtgataattgtaagaaggaaattcaatttttgag
-aaaattgcacgaaaactttaaaaatatttaaaatataggtataattttttcgaacgaaaa
-aaacagaaaaatatataagttaatgcagaaaataccaagaaagttattttccgcgaaaat
-ttcgatttttcggagaaaaattaaaaattaaaaaaaaatcaattttttggtttatattga
-gaagaaaacacatgagaaaaaaattcaattttatgctaaaaataagaaaaacagccaaat
-tgaaatttttcgatttttcaaaacctaaaaatcaaaaaactggtaacaaaatagattttt
-tgttttcaaaaaatttaatactatttttttttataataaatttaaatctaatcaatttaa
-cctatgagcatttttaaaaatttttaattaaaaaaaatagaaaatttttaatttttcaca
-aaattcggaataaaaaactttagaaaaatgaattatgctagaaattgcatcatttgttca
-taaaaaatgttaaaaatttgacaaaaaagcaagaaaactctaatataaattttcaattaa
-aaaaaattataatttaaataaaattgaaagctgaaaaacctattcaaaaatcgctcctgt
-accaaaaatttcagcgaaatctgaaaaaatccaaattttctcataataaaattccatttt
-ttgcaggaactcctccacaaaatgaacggaaaatgcggaaatatgagcaaatctgaaatg
-gtaacccaactggcttcaattctgaaagcaatcgaaccgaatcaatctcgacaattgaag
-ggaaagaaggaagtactcttcttctcacttgtcaacactatcgcctataattttaactat
-tttttcttctcttaaaaattatacaatccttcgcatttttattttaaattcaaattttcc
-cgcccaaaattctgtaaatgaaccaaaagtttcgcgattaaaaaaattttatcgaaatat
-ttaagtgcaaaatattctaaaagctaggaattatagatttttcaaaaaaattcaaataat
-tatgcaagaatcacttgatcaaagccatccactcagcccacaggccttcaacattctctc
-ccttattctgaatttcagtccgagcatcctgaacaatcacttctccctgatcatttacaa
-ttctcatggctggaattgtcttgacttcgtatttttgcatcagggacctggaaaaaaaat
-attattcaatcgtagaaaattgtgattt
diff --git a/applications/msplicer/genomic.py b/applications/msplicer/genomic.py
deleted file mode 100644
index cb49e82b7a0..00000000000
--- a/applications/msplicer/genomic.py
+++ /dev/null
@@ -1,152 +0,0 @@
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2006-2007 Soeren Sonnenburg
-# Written (W) 2006-2007 Mikio Braun
-# Copyright (C) 2007 Fraunhofer Institute FIRST and Max-Planck-Society
-#
-
-import time
-from string import maketrans
-
-""" this function is 100% compatible to the matlab function, thus it is one based (!)
-	use one_based=False if needed, then however the interval is [start,stop) (excluding stop)
-"""
-def load_genomic(chromosome, strand, start, stop, genome, one_based=True):
-	fname = '/fml/ag-raetsch/share/databases/genomes/' + genome + '/' + chromosome[3:] + '.flat'
-	f=file(fname)
-	if one_based:
-		f.seek(start-1)
-		str=f.read(stop-start+1)
-	else:
-		f.seek(start)
-		str=f.read(stop-start)
-
-	if strand=='-':
-		return reverse_complement(str)
-	elif strand=='+':
-		return str
-	else:
-		print 'strand must be + or -'
-		raise KeyError
-
-""" read a table browser ascii output file (http://genome.ucsc.edu/cgi-bin/hgTables) """
-def read_table_browser(f):
-	table=dict();
-	for l in f.readlines():
-		if not l.startswith('#'):
-			(name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,proteinID,alignID)=l.split('\t')
-			exonStarts=[ int(i) for i in exonStarts.split(',')[:-1] ]
-			exonEnds=[ int(i) for i in exonEnds.split(',')[:-1] ]
-
-			table[name]={ 'chrom': chrom, 'strand': strand, 'txStart': int(txStart), 'txEnd': int(txEnd),
-			'cdsStart': int(cdsStart), 'cdsEnd': int(cdsEnd), 'exonCount': int(exonCount), 'exonStarts': exonStarts,
-			'exonEnds': exonEnds, 'proteinID': proteinID, 'alignID': alignID[:-1] }
-
-	return table
-
-""" get promoter region """
-def get_promoter_region(chromosome, strand, gene_start, gene_end, genome, length):
-
-	if strand == '+':
-		return load_genomic(chromosome, strand, gene_start, gene_start+length, genome, one_based=False)
-	elif strand == '-':
-		return load_genomic(chromosome, strand, gene_end, gene_end+length, genome, one_based=False)
-	else:
-		print 'unknown strand'
-		return None
-
-""" reverse + complement a DNA sequence (only letters ACGT are translated!)
-	FIXME won't work with all the rest like y... """
-def reverse_complement(str):
-	t=maketrans('acgtACGT','tgcaTGCA')
-	return str[len(str)::-1].translate(t)
-
-""" works only with .fa files that contain a single entry """
-def read_single_fasta(fname):
-	str=file(fname).read()
-	str=str[str.index('\n')+1:].replace('\n','')
-	return str
-
-""" writes only single enty .fa files """
-def write_single_fasta(fname, name, str, linelen=60):
-	header= '>' + name + '\n'
-	f=file(fname,'a')
-	f.write(header)
-	for i in xrange(0,len(str),linelen):
-		f.write(str[i:i+linelen]+'\n')
-	f.close()
-
-""" read fasta as dictionary """
-def read_fasta(f):
-	fasta=dict()
-
-	for s in f.readlines():
-		if s.startswith('>'):
-			key=s[1:-1]
-			fasta[key]=""
-		else:
-			fasta[key]+=s[:-1]
-
-	return fasta
-
-""" write dictionary fasta """
-def write_fasta(f, d, linelen=60):
-    for k in sorted(d):
-        f.write('>%s\n' % k);
-        s = d[k]
-        for i in xrange(0, len(s), linelen):
-            f.write(s[i:i+linelen] + '\n')
-
-def write_gff(f, (source, version), (seqtype, seqname), descrlist, skipheader=False):
-	""" writes a gff version 2 file
-		descrlist is a list of dictionaries, each of which contain these fields:
-		<seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
-	"""
-
-	if not skipheader:
-		f.write('##gff-version 2\n')
-		f.write('##source-version %s %s\n' % (source, version) )
-
-		t=time.localtime()
-		f.write("##date %d-%d-%d %d:%d:%d\n" % t[0:6])
-
-	f.write('##Type %s %s\n' % (seqtype, seqname) )
-
-	for d in descrlist:
-		f.write('%s\t%s\t%s\t%d\t%d\t%f\t%s\t%d' % (d['seqname'], d['source'],
-											d['feature'], d['start'], d['end'],
-											d['score'], d['strand'], d['frame']))
-		if d.has_key('attributes'):
-			f.write('\t' + d['attributes'])
-			if d.has_key('comments'):
-				f.write('\t' + d['comments'])
-		f.write('\n')
-
-
-if __name__ == '__main__':
-	import sys,os
-
-	table=read_table_browser(file('/fml/ag-raetsch/home/sonne/addnet/tfbs/share/data/wt1_bibliosphere_table_browser_hg17.txt'))
-	print table.keys()
-	print table[table.keys()[0]]
-	d = { 'ahoernchen' : 'ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT',
-		  'bhoernchen' : 'GATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACA' }
-
-	write_fasta(sys.stdout, d)
-	write_fasta(file('/tmp/test.fa','w'), d)
-
-	d2 = read_fasta(file('/tmp/test.fa'))
-	os.unlink('/tmp/test.fa')
-
-	print d
-	print d2
-	print d == d2
-
-	p=load_genomic('chr5', '+', 100000, 100100,'hg17')
-	n=load_genomic('chr1', '-', 3000000, 3001000,'mm7')
-	write_single_fasta('bla.fa','bla', 'ACGT')
-	n2=read_single_fasta('bla.fa')
diff --git a/applications/msplicer/model.py b/applications/msplicer/model.py
deleted file mode 100644
index 6c71b86cad0..00000000000
--- a/applications/msplicer/model.py
+++ /dev/null
@@ -1,307 +0,0 @@
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2006-2008 Soeren Sonnenburg
-# Written (W) 2007 Gunnar Raetsch
-# Copyright (C) 2006-2008 Fraunhofer Institute FIRST and Max-Planck-Society
-#
-
-import sys
-from numpy import mat,array,inf,any,reshape,int32
-
-class model(object):
-	#model matrices
-	bins=None
-	dict_weights_intron=None
-	dict_weights_coding=None
-	a_trans=None
-	p=None
-	q=None
-
-	statedescr = None
-	plifidmat = None
-	orf_info = None
-	use_orf = None
-
-	word_degree = None
-	mod_words = None
-	sign_words = None
-
-	#penalties
-	penalty_acceptor_boundaries=None
-	penalty_acceptor_penalty=None
-	penalty_donor_boundaries=None
-	penalty_donor_penalty=None
-	penalty_coding_len_boundaries=None
-	penalty_coding_len_penalty=None
-	penalty_first_coding_len_boundaries=None
-	penalty_first_coding_len_penalty=None
-	penalty_last_coding_len_boundaries=None
-	penalty_last_coding_len_penalty=None
-	penalty_single_coding_len_boundaries=None
-	penalty_single_coding_len_penalty=None
-	penalty_intron_len_boundaries=None
-	penalty_intron_len_penalty=None
-	penalty_coding_boundaries=None
-	penalty_coding_penalty=None
-	penalty_coding2_boundaries=None
-	penalty_coding2_penalty=None
-	penalty_coding3_boundaries=None
-	penalty_coding3_penalty=None
-	penalty_coding4_boundaries=None
-	penalty_coding4_penalty=None
-	penalty_intron_boundaries=None
-	penalty_intron_penalty=None
-	penalty_intron2_boundaries=None
-	penalty_intron2_penalty=None
-	penalty_intron3_boundaries=None
-	penalty_intron3_penalty=None
-	penalty_intron4_boundaries=None
-	penalty_intron4_penalty=None
-	penalty_transitions_penalty=None
-
-	#acceptor
-	acc_splice_b=None
-	acc_splice_order=None
-	acc_splice_window_left=None
-	acc_splice_window_right=None
-	acc_splice_alphas=None
-	acc_splice_svs=None
-
-	#donor
-	don_splice_b=None
-	don_splice_order=None
-	don_splice_use_gc=None
-	don_splice_window_left=None
-	don_splice_window_right=None
-	don_splice_alphas=None
-	don_splice_svs=None
-
-
-
-def parse_file(file):
-	m=model()
-
-	l=file.readline();
-
-	if l != '%msplicer definition file version: 1.0\n':
-		sys.stderr.write("\nfile not a msplicer definition file\n")
-		return None
-
-	while l:
-		if not ( l.startswith('%') or l.startswith('\n') ): # comment
-			if m.bins is None: m.bins=parse_value(l, 'bins')
-			if m.dict_weights_intron is None: m.dict_weights_intron=parse_matrix(l, file, 'dict_weights_intron')
-			if m.dict_weights_coding is None: m.dict_weights_coding=parse_matrix(l, file, 'dict_weights_coding')
-			if m.a_trans is None: m.a_trans=parse_matrix(l, file, 'msplicer_a_trans')
-			if m.p is None:
-				m.p=parse_vector(l, file, 'msplicer_p')
-				if m.p is not None:
-					m.p[m.p==32768]=-inf
-			if m.q is None:
-				m.q=parse_vector(l, file, 'msplicer_q')
-				if m.q is not None:
-					m.q[m.q==32768]=-inf
-
-			if m.statedescr is None:
-				m.statedescr=parse_vector(l, file, 'statedescr')
-				if m.statedescr is not None:
-					m.statedescr=array(m.statedescr, int32)
-
-			if m.plifidmat is None:
-				m.plifidmat=parse_matrix(l, file, 'plifidmat')
-				if m.plifidmat is not None:
-					m.plifidmat = array(m.plifidmat, int32)
-
-			if m.orf_info is None:
-				m.orf_info=parse_matrix(l, file, 'orf_info')
-				if m.orf_info is not None:
-					m.orf_info=array(m.orf_info, int32).T
-					if any(m.orf_info != -1):
-						m.use_orf = True
-					else:
-						m.use_orf = False
-
-			if m.word_degree is None: m.word_degree=parse_vector(l, file, 'word_degree')
-			if m.mod_words is None: m.mod_words=parse_matrix(l, file, 'mod_words')
-			if m.sign_words is None: m.sign_words=parse_vector(l, file, 'sign_words')
-
-			#penalties
-			if m.penalty_acceptor_boundaries is None: m.penalty_acceptor_boundaries=parse_vector(l, file, 'penalty_acceptor_boundaries')
-			if m.penalty_acceptor_penalty is None: m.penalty_acceptor_penalty=parse_vector(l, file, 'penalty_acceptor_penalty')
-			if m.penalty_donor_boundaries is None: m.penalty_donor_boundaries=parse_vector(l, file, 'penalty_donor_boundaries')
-			if m.penalty_donor_penalty is None: m.penalty_donor_penalty=parse_vector(l, file, 'penalty_donor_penalty')
-			if m.penalty_coding_len_boundaries is None: m.penalty_coding_len_boundaries=parse_vector(l, file, 'penalty_coding_len_boundaries')
-			if m.penalty_coding_len_penalty is None: m.penalty_coding_len_penalty=parse_vector(l, file, 'penalty_coding_len_penalty')
-			if m.penalty_first_coding_len_boundaries is None: m.penalty_first_coding_len_boundaries=parse_vector(l, file, 'penalty_first_coding_len_boundaries')
-			if m.penalty_first_coding_len_penalty is None: m.penalty_first_coding_len_penalty=parse_vector(l, file, 'penalty_first_coding_len_penalty')
-			if m.penalty_last_coding_len_boundaries is None: m.penalty_last_coding_len_boundaries=parse_vector(l, file, 'penalty_last_coding_len_boundaries')
-			if m.penalty_last_coding_len_penalty is None: m.penalty_last_coding_len_penalty=parse_vector(l, file, 'penalty_last_coding_len_penalty')
-			if m.penalty_single_coding_len_boundaries is None: m.penalty_single_coding_len_boundaries=parse_vector(l, file, 'penalty_single_coding_len_boundaries')
-			if m.penalty_single_coding_len_penalty is None: m.penalty_single_coding_len_penalty=parse_vector(l, file, 'penalty_single_coding_len_penalty')
-			if m.penalty_intron_len_boundaries is None: m.penalty_intron_len_boundaries=parse_vector(l, file, 'penalty_intron_len_boundaries')
-			if m.penalty_intron_len_penalty is None: m.penalty_intron_len_penalty=parse_vector(l, file, 'penalty_intron_len_penalty')
-			if m.penalty_coding_boundaries is None: m.penalty_coding_boundaries=parse_vector(l, file, 'penalty_coding_boundaries')
-			if m.penalty_coding_penalty is None: m.penalty_coding_penalty=parse_vector(l, file, 'penalty_coding_penalty')
-			if m.penalty_coding2_boundaries is None: m.penalty_coding2_boundaries=parse_vector(l, file, 'penalty_coding2_boundaries')
-			if m.penalty_coding2_penalty is None: m.penalty_coding2_penalty=parse_vector(l, file, 'penalty_coding2_penalty')
-			if m.penalty_coding3_boundaries is None: m.penalty_coding3_boundaries=parse_vector(l, file, 'penalty_coding3_boundaries')
-			if m.penalty_coding3_penalty is None: m.penalty_coding3_penalty=parse_vector(l, file, 'penalty_coding3_penalty')
-			if m.penalty_coding4_boundaries is None: m.penalty_coding4_boundaries=parse_vector(l, file, 'penalty_coding4_boundaries')
-			if m.penalty_coding4_penalty is None: m.penalty_coding4_penalty=parse_vector(l, file, 'penalty_coding4_penalty')
-			if m.penalty_intron_boundaries is None: m.penalty_intron_boundaries=parse_vector(l, file, 'penalty_intron_boundaries')
-			if m.penalty_intron_penalty is None: m.penalty_intron_penalty=parse_vector(l, file, 'penalty_intron_penalty')
-			if m.penalty_intron2_boundaries is None: m.penalty_intron2_boundaries=parse_vector(l, file, 'penalty_intron2_boundaries')
-			if m.penalty_intron2_penalty is None: m.penalty_intron2_penalty=parse_vector(l, file, 'penalty_intron2_penalty')
-			if m.penalty_intron3_boundaries is None: m.penalty_intron3_boundaries=parse_vector(l, file, 'penalty_intron3_boundaries')
-			if m.penalty_intron3_penalty is None: m.penalty_intron3_penalty=parse_vector(l, file, 'penalty_intron3_penalty')
-			if m.penalty_intron4_boundaries is None: m.penalty_intron4_boundaries=parse_vector(l, file, 'penalty_intron4_boundaries')
-			if m.penalty_intron4_penalty is None: m.penalty_intron4_penalty=parse_vector(l, file, 'penalty_intron4_penalty')
-			if m.penalty_transitions_penalty is None: m.penalty_transitions_penalty=parse_vector(l, file, 'penalty_transitions_penalty')
-
-			#acceptor
-			if m.acc_splice_b is None: m.acc_splice_b=parse_value(l, 'acc_splice_b')
-			if m.acc_splice_order is None: m.acc_splice_order=parse_value(l, 'acc_splice_order')
-			if m.acc_splice_window_left is None: m.acc_splice_window_left=parse_value(l, 'acc_splice_window_left')
-			if m.acc_splice_window_right is None: m.acc_splice_window_right=parse_value(l, 'acc_splice_window_right')
-			if m.acc_splice_alphas is None: m.acc_splice_alphas=parse_vector(l, file, 'acc_splice_alphas')
-			if m.acc_splice_svs is None: m.acc_splice_svs=parse_string(l, file, 'acc_splice_svs')
-
-			#donor
-			if m.don_splice_b is None: m.don_splice_b=parse_value(l, 'don_splice_b')
-			if m.don_splice_order is None: m.don_splice_order=parse_value(l, 'don_splice_order')
-			if m.don_splice_use_gc is None: m.don_splice_use_gc=parse_value(l, 'don_splice_use_gc')
-			if m.don_splice_window_left is None: m.don_splice_window_left=parse_value(l, 'don_splice_window_left')
-			if m.don_splice_window_right is None: m.don_splice_window_right=parse_value(l, 'don_splice_window_right')
-			if m.don_splice_alphas is None: m.don_splice_alphas=parse_vector(l, file, 'don_splice_alphas')
-			if m.don_splice_svs is None: m.don_splice_svs=parse_string(l, file, 'don_splice_svs')
-
-		l=file.readline()
-
-	sys.stderr.write('done\n')
-	return m
-
-def parse_value(line, name):
-	if (line.startswith(name)):
-		sys.stdout.write('.'); sys.stdout.flush()
-		return float(line[line.find('=')+1:-1])
-	else:
-		return None
-
-def parse_vector(line, file, name):
-    mat = parse_matrix(line, file, name)
-    if mat is None:
-     return mat
-    else:
-     mat = array(mat).flatten()
-     return mat
-
-def parse_matrix(line, file, name):
-	if (line.startswith(name)):
-		sys.stdout.write('.'); sys.stdout.flush()
-		if line.find(']') < 0:
-			l=''
-			while l is not None and l.find(']') < 0:
-				line+=l
-				l=file.readline()
-			if l is not None and l.find(']') >= 0:
-				line+=l
-
-		if line.find(']') < 0:
-			sys.stderr.write("matrix `" + name + "' ended without ']'\n")
-			return None
-		else:
-			mm = mat(line[line.find('['):line.find(']')+1])
-			if len(mm.shape)==1:
-				mm = reshape(mm.shape[0],1)
-			return mm
-	else:
-		return None
-
-def parse_string(line, file, name):
-	if (line.startswith(name)):
-		sys.stdout.write('.'); sys.stdout.flush()
-		l=''
-		lines=[]
-		while l is not None and l.find(']') < 0:
-			if l:
-				lines.append(l[:-1])
-			l=file.readline()
-
-		if l.find(']') < 0:
-			sys.stderr.write("string ended without ']'\n")
-			return None
-		else:
-			return lines
-	else:
-		return None
-
-if __name__ == '__main__':
-	import bz2
-	import sys
-	import hotshot, hotshot.stats
-
-	def load():
-		#f=bz2.BZ2File('data/msplicer_arabidopsis10_gc=1_orf=0.dat.bz2');
-		f=file('data/msplicer_arabidopsis10_gc=1_orf=0.dat');
-		m=parse_file(f);
-
-		print m.penalty_acceptor_boundaries is None
-		print m.penalty_acceptor_penalty is None
-		print m.penalty_donor_boundaries is None
-		print m.penalty_donor_penalty is None
-		print m.penalty_coding_len_boundaries is None
-		print m.penalty_coding_len_penalty is None
-		print m.penalty_first_coding_len_boundaries is None
-		print m.penalty_first_coding_len_penalty is None
-		print m.penalty_last_coding_len_boundaries is None
-		print m.penalty_last_coding_len_penalty is None
-		print m.penalty_single_coding_len_boundaries is None
-		print m.penalty_single_coding_len_penalty is None
-		print m.penalty_intron_len_boundaries is None
-		print m.penalty_intron_len_penalty is None
-		print m.penalty_coding_boundaries is None
-		print m.penalty_coding_penalty is None
-		print m.penalty_coding2_boundaries is None
-		print m.penalty_coding2_penalty is None
-		print m.penalty_coding3_boundaries is None
-		print m.penalty_coding3_penalty is None
-		print m.penalty_coding4_boundaries is None
-		print m.penalty_coding4_penalty is None
-		print m.penalty_intron_boundaries is None
-		print m.penalty_intron_penalty is None
-		print m.penalty_intron2_boundaries is None
-		print m.penalty_intron2_penalty is None
-		print m.penalty_intron3_boundaries is None
-		print m.penalty_intron3_penalty is None
-		print m.penalty_intron4_boundaries is None
-		print m.penalty_intron4_penalty is None
-		print m.penalty_transitions_penalty is None
-
-		print m.acc_splice_b is None
-		print m.acc_splice_order is None
-		print m.acc_splice_window_left is None
-		print m.acc_splice_window_right is None
-		print m.acc_splice_alphas is None
-		print m.acc_splice_svs is None
-
-		print m.don_splice_b is None
-		print m.don_splice_order is None
-		print m.don_splice_use_gc is None
-		print m.don_splice_window_left is None
-		print m.don_splice_window_right is None
-		print m.don_splice_alphas is None
-		print m.don_splice_svs is None
-
-	load()
-
-	#prof = hotshot.Profile("model.prof")
-	#benchtime = prof.runcall(load)
-	#prof.close()
-	#stats = hotshot.stats.load("model.prof")
-	#stats.strip_dirs()
-	#stats.sort_stats('time', 'calls')
-	#stats.print_stats(20)
diff --git a/applications/msplicer/msplicer b/applications/msplicer/msplicer
deleted file mode 100755
index 03101521cb3..00000000000
--- a/applications/msplicer/msplicer
+++ /dev/null
@@ -1,355 +0,0 @@
-#!/usr/bin/env python
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2007 Gunnar Raetsch
-# Written (W) 2006-2008 Soeren Sonnenburg
-# Copyright (C) 2006-2008 Fraunhofer Institute FIRST and Max-Planck-Society
-#
-
-try:
-	import os
-	import os.path
-	import sys
-	import pickle
-	import bz2
-	import numpy
-	import optparse
-
-	import genomic
-	import model
-	import seqdict
-	import shogun
-
-	d=shogun.DynProg()
-	if (d.version.get_version_revision() < 2997):
-		print
-		print "ERROR: SHOGUN VERSION 0.6.2 or later required"
-		print
-		sys.exit(1)
-	from content_sensors import content_sensors
-	from signal_detectors import signal_detectors
-	from plif import plif
-except ImportError, e:
-	print e
-	print
-	print "ERROR IMPORTING MODULES, MAKE SURE YOU HAVE SHOGUN INSTALLED"
-	print
-	sys.exit(1)
-
-
-msplicer_version='v0.3'
-
-class msplicer:
-	def __init__(self):
-		self.model = None
-		self.plif = None
-		self.signal = None
-		self.content = None
-		self.model_name = None
-
-	def load_model(self, filename):
-		self.model_name = filename
-		sys.stderr.write('loading model file\n')
-		f=None
-		picklefile=filename+'.pickle'
-		if os.path.isfile(picklefile):
-			self.model=pickle.load(file(picklefile))
-		else:
-			if filename.endswith('.bz2'):
-				f=bz2.BZ2File(filename);
-			else:
-				f=file(filename);
-
-			self.model=model.parse_file(f)
-			f.close()
-
-			f=file(picklefile,'w')
-			pickle.dump(self.model, f)
-			f.close()
-
-		self.plif=plif(self.model)
-		self.signal=signal_detectors(self.model)
-		self.content=content_sensors(self.model)
-
-	def compute_seqmatrix(self, seq):
-		# start-state: 0
-		# exon-start-state: 1
-		# donor-state: 2
-		# acceptor-state: 3
-		# exon-end-state: 4
-		# stop-state: 5
-
-		start_idx = numpy.where(self.model.statedescr == 0)[0]
-		exon_start_idx = numpy.where(self.model.statedescr == 1)[0]
-		don_idx = numpy.where(self.model.statedescr == 2)[0]
-		acc_idx = numpy.where(self.model.statedescr == 3)[0]
-		exon_stop_idx = numpy.where(self.model.statedescr == 4)[0]
-		stop_idx = numpy.where(self.model.statedescr == 5)[0]
-
-		# start positions
-		positions=[(0,0,start_idx)]
-		positions.append((seq.start,0,exon_start_idx))
-
-		# end positions
-		positions.append((seq.end, 0, exon_stop_idx[0]))
-		if len(exon_stop_idx)>1:
-			idx = numpy.where(numpy.array(seq.preds['acceptor'].positions,numpy.int32)==seq.end)[0]
-			if len(idx)==1:
-				positions.append((seq.end, seq.preds['acceptor'].scores[idx], exon_stop_idx[1]))
-		positions.append((len(seq.seq)-1,0,stop_idx))
-
-		# donor posititions
-		for i in don_idx:
-			positions.extend(zip(seq.preds['donor'].positions,
-								 seq.preds['donor'].scores,
-								 len(seq.preds['donor'].positions)*[i]))
-
-		# acceptor positions
-		for i in acc_idx:
-			positions.extend(zip(seq.preds['acceptor'].positions,
-								 list(seq.preds['acceptor'].scores),
-								 len(seq.preds['acceptor'].positions)*[i]))
-
-		positions.sort(cmp=lambda x,y : int(x[0]-y[0]))
-		unique_positions= numpy.unique(numpy.array([ x[0] for x in positions ], numpy.int32))
-
-		seqmatrix= -numpy.infty * numpy.ones((len(self.model.statedescr),len(unique_positions)))
-		for i in xrange(len(positions)):
-			p = numpy.where(positions[i][0]==unique_positions)[0] ;
-			assert(len(p)==1)
-			p = p[0] ;
-			seqmatrix[positions[i][2],p]=positions[i][1]
-
-		if len(don_idx)>1: # orf case
-			for i in xrange(len(unique_positions)):
-				if seqmatrix[don_idx[0], i] > -1e20:
-					s1 = seq.seq[unique_positions[i]-1:unique_positions[i]+1]
-					s2 = seq.seq[unique_positions[i]-2:unique_positions[i]+1]
-					if s1 in ['TG']: seqmatrix[don_idx[1], i]=-numpy.infty
-					if s1 not in ['TG']: seqmatrix[don_idx[2], i]=-numpy.infty
-					if s2 in ['TAG', 'TGG']: seqmatrix[don_idx[3], i]=-numpy.infty
-					if s2 not in ['TAG']: seqmatrix[don_idx[4], i]=-numpy.infty
-					if s2 not in ['TGG']: seqmatrix[don_idx[5], i]=-numpy.infty
-
-		if len(acc_idx)>1: # orf case
-			for i in xrange(len(unique_positions)):
-				if seqmatrix[acc_idx[0], i] > -1e20:
-					s1 = seq.seq[unique_positions[i]-1:unique_positions[i]+1]
-					s2 = seq.seq[unique_positions[i]-1:unique_positions[i]+2]
-					if s2 in ['GAA', 'GAG', 'GGA']: seqmatrix[acc_idx[2], i]=-numpy.infty
-					if s1 in ['GA', 'GG']: seqmatrix[acc_idx[4], i]=-numpy.infty
-					if s1 in ['GA']: seqmatrix[acc_idx[5], i]=-numpy.infty
-
-		plifstatemat = -numpy.ones((len(self.model.statedescr),1), numpy.int32);
-		plifstatemat[acc_idx,0] = 0 ; # acceptors use first plif
-		plifstatemat[don_idx,0] = 1 ; # donors use second plif
-
-		return (seqmatrix, unique_positions, plifstatemat)
-
-
-	def initialize_dynprog(self, seq):
-		dyn=shogun.DynProg()
-
-		self.content.initialize_content(dyn)
-
-		n=len(self.model.p)
-		dyn.set_num_states(n)
-		dyn.set_p_vector(self.model.p)
-		dyn.set_q_vector(self.model.q)
-		dyn.set_a_trans_matrix(self.model.a_trans)
-
-		#design scoring seqmatrix
-		(seqmatrix, positions, plifstatemat) = self.compute_seqmatrix(seq)
-
-		dyn.best_path_set_seq(seqmatrix)
-		dyn.best_path_set_pos(positions)
-		dyn.best_path_set_orf_info(self.model.orf_info)
-
-		dyn.best_path_set_plif_list(self.plif.get_plif_array())
-
-		dyn.best_path_set_plif_id_matrix(self.model.plifidmat.T)
-		dyn.best_path_set_plif_state_signal_matrix(plifstatemat)
-		s=[]; s+=seq.seq;
-		dyn.best_path_set_single_genestr(numpy.array(s))
-		dyn.best_path_set_dict_weights(self.content.get_dict_weights())
-
-	#	self.precompute_content_svm_values(self, dyn, seq, positions)
-
-		return (dyn,positions)
-
-	#def precompute_content_svm_values(self, dyn, seq, positions):
-	#	wordstr=dyn.create_word_string(seq, 1, len(seq));
-	#	dyn.init_content_svm_value_array(Npos)
-	#	weights = self.content.get_dict_weights()
-	#	#n = size(weights, 1)
-	#	#m = size(weights, 2)
-	#	dyn.precompute_content_values(wordstr, positions, len(positions), len(seq), self.content.get_dict_weights(), n*m);
-	#	dyn.set_genestr_len(len(seq));
-	#	return (dyn)
-
-	def write_gff(self, outfile, pred, name, score, skipheader):
-		descr=list()
-		for i in xrange(pred.shape[0]):
-			d=dict()
-			d['seqname']=name
-			d['source']='msplicer'
-			d['feature']='exon'
-			d['start']=pred[i,0]+1
-			d['end']=pred[i,1]
-			d['score']=score
-			d['strand']='+'
-			d['frame']=0
-			descr.append(d)
-
-		genomic.write_gff(outfile, ('msplicer',msplicer_version + ' ' + self.model_name),
-				('DNA', name), descr, skipheader)
-
-	def predict_file(self, fname, (start,end)):
-		skipheader=False
-		fasta_dict = genomic.read_fasta(file(fname))
-		sys.stderr.write('found fasta file with ' + `len(fasta_dict)` + ' sequence(s)\n')
-		seqs= seqdict.seqdict(fasta_dict, (start,end))
-
-		#get donor/acceptor signal predictions for all sequences
-		self.signal.predict_acceptor_sites_from_seqdict(seqs)
-		self.signal.predict_donor_sites_from_seqdict(seqs)
-
-		for seq in seqs:
-			#initialize dynamic programming, with content sensors
-			#signal detectors, Plifs and HMM like model
-			(dyn,positions)=self.initialize_dynprog(seq)
-
-			#compute max likely path
-			dyn.best_path_call(1, self.model.use_orf)
-			scores=dyn.best_path_get_scores()
-			states=dyn.best_path_get_states()
-			pos=dyn.best_path_get_positions()
-			pred_states=states[0][0:numpy.where(pos[0]==-1)[0]][1:-1]
-			pred=positions[pos[0][0:numpy.where(pos[0]==-1)[0]][1:-1]]
-			#print scores
-			#print pred_states
-			#print pred
-			#print len(pred_states)
-			if (len(pred_states)>0):
-				if (pred_states[-1]==15): # joint state for acceptor and stop codon
-					pred_ = numpy.zeros(len(pred)+1, numpy.int32) ;
-					pred_[0:len(pred)] = pred ;
-					pred_[-1] = pred[-1]
-					pred = pred_
-
-			pred=pred.reshape((len(pred)/2,2))
-			self.write_gff(outfile, pred, seq.name, scores, skipheader)
-			skipheader=True
-
-			if 0:
-				my_posi = numpy.array([  1, 400, 408, 451, 1188, 1785, 1858, 2732, 2924, 3869, 3948, 4348 ], numpy.int32)-1 ;
-				my_pos = numpy.zeros(len(my_posi), numpy.int32) ;
-				print positions, my_posi
-				for i in xrange(len(my_posi)):
-					my_pos[i] = numpy.where(positions == my_posi[i])[0]
-
-				my_states = numpy.array([0, 13, 6, 12, 2, 8, 4, 10, 4, 10, 14, 16], numpy.int32)
-				#my_pos = numpy.array([  0, 51, 169, 204, 216, 241, 300, 355, 360, 397], numpy.int32) ;
-				#my_states = numpy.array([0, 3, 1, 2, 1, 2, 1, 2, 4, 5], numpy.int32)
-
-				my_states = states[0][0:numpy.where(pos[0]==-1)[0]]
-				my_pos    = pos[0][0:numpy.where(pos[0]==-1)[0]]
-
-				print my_states
-				print my_pos
-				print positions[my_pos]
-
-				dyn.best_path_set_my_state_seq(my_states)
-				dyn.best_path_set_my_pos_seq(my_pos)
-
-				dyn.io.set_loglevel(shogun.M_DEBUG)
-				dyn.best_path_deriv_call()
-
-def print_version():
-	sys.stderr.write('mSplicer '+msplicer_version+'\n')
-
-def parse_options():
-	parser = optparse.OptionParser(usage="usage: %prog [options] seq.fa")
-
-	parser.add_option("-o", "--outfile", type="str", default='stdout',
-			                  help="File to write the results to")
-	parser.add_option("-v", "--version", default=False,
-			                  help="Show some more information")
-	parser.add_option("--start", type="int", default=499,
-			                  help="coding start (zero based, relative to sequence start)")
-	parser.add_option("--stop", type="int", default=-499,
-			                  help="""coding stop (zero based, if positive relative to
-							  sequence start, if negative relative to sequence end)""")
-	parser.add_option("--model", type="str", default='WS160',
-			                  help="mSplicer Model to use in predicting")
-
-	(options, args) = parser.parse_args()
-	if options.version:
-		print_version()
-		sys.exit(0)
-
-	if len(args) != 1:
-		parser.error("incorrect number of arguments")
-
-	fafname=args[0]
-	if not os.path.isfile(fafname):
-		parser.error("fasta file does not exist")
-
-	if options.model.endswith('gc'):
-		gc=1
-		model=options.model[:-2]
-	else:
-		gc=0
-		model=options.model
-
-	if model.startswith('orf'):
-		orf=1
-		model=model[3:]
-	else:
-		orf=0
-
-	modelfname = 'data/msplicer_elegans%s_gc=%d_orf=%d.dat.bz2' % (model, gc, orf)
-	print "loading model file " + modelfname,
-
-	if not os.path.isfile(modelfname):
-		print "...not found!\n"
-		parser.error("""model should be one of:
-
-WS120, WS120gc, orfWS120, WS150,
-WS160, WS160gc, orfWS160gc
-""")
-
-	if options.outfile == 'stdout':
-		outfile=sys.stdout
-	else:
-		try:
-			outfile=file(options.outfile,'w')
-		except IOError:
-			parser.error("could not open %s for writing" % options.outfile)
-
-	if options.start<80:
-		parser.error("--start value must be >=80")
-
-	if options.stop > 0 and options.start >= options.stop - 80:
-		parser.error("--stop value must be > start + 80")
-
-	if options.stop < 0 and options.stop > -80:
-		parser.error("--stop value must be <= - 80")
-
-	# shift the start and stop a bit
-	options.start -= 1 ;
-	options.stop -= 1 ;
-
-	return ((options.start,options.stop), fafname, modelfname, outfile)
-
-
-if __name__ == '__main__':
-	dyn=shogun.DynProg()
-	(startstop, fafname, modelfname, outfile ) = parse_options()
-	p=msplicer()
-	p.load_model(modelfname);
-	p.predict_file(fafname, startstop)
diff --git a/applications/msplicer/plif.py b/applications/msplicer/plif.py
deleted file mode 100644
index b1abe8c6417..00000000000
--- a/applications/msplicer/plif.py
+++ /dev/null
@@ -1,224 +0,0 @@
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2006-2007 Soeren Sonnenburg
-# Written (W) 2007 Gunnar Raetsch
-# Copyright (C) 2007-2008 Fraunhofer Institute FIRST and Max-Planck-Society
-#
-
-from numpy import array
-from shogun import Plif
-from shogun import PlifArray
-from shogun import DynamicPlifArray
-
-class plif:
-	def __init__(self, model):
-		min_exon_len=2
-		min_intron_len=30
-		max_len=22222;
-		#extract plifs from model
-		l=array(model.penalty_acceptor_boundaries).flatten()
-		p=array(model.penalty_acceptor_penalty).flatten()
-		self.acceptor=Plif(len(l))
-		self.acceptor.set_plif_limits(l)
-		self.acceptor.set_plif_penalty(p)
-		self.acceptor.set_min_value(-1e+20)
-		self.acceptor.set_max_value(1e+20)
-		self.acceptor.set_plif_name("acceptor")
-
-		l=array(model.penalty_donor_boundaries).flatten()
-		p=array(model.penalty_donor_penalty).flatten()
-		self.donor=Plif(len(l))
-		self.donor.set_plif_limits(l)
-		self.donor.set_plif_penalty(p)
-		self.donor.set_min_value(-1e+20)
-		self.donor.set_max_value(1e+20)
-		self.donor.set_plif_name("donor")
-
-		l=array(model.penalty_coding_len_boundaries).flatten()
-		p=array(model.penalty_coding_len_penalty).flatten()
-		self.coding_len=Plif(len(l))
-		self.coding_len.set_plif_limits(l)
-		self.coding_len.set_plif_penalty(p)
-		self.coding_len.set_min_value(min_exon_len)
-		self.coding_len.set_max_value(max_len)
-		self.coding_len.set_plif_name('coding_len')
-		self.coding_len.set_transform_type("log(+1)")
-
-		l=array(model.penalty_first_coding_len_boundaries).flatten()
-		p=array(model.penalty_first_coding_len_penalty).flatten()
-		self.first_coding_len=Plif(len(l))
-		self.first_coding_len.set_plif_limits(l)
-		self.first_coding_len.set_plif_penalty(p)
-		self.first_coding_len.set_min_value(min_exon_len)
-		self.first_coding_len.set_max_value(max_len)
-		self.first_coding_len.set_plif_name("first_coding_len")
-		self.first_coding_len.set_transform_type("log(+1)")
-
-		l=array(model.penalty_last_coding_len_boundaries).flatten()
-		p=array(model.penalty_last_coding_len_penalty).flatten()
-		self.last_coding_len=Plif(len(l))
-		self.last_coding_len.set_plif_limits(l)
-		self.last_coding_len.set_plif_penalty(p)
-		self.last_coding_len.set_min_value(min_exon_len)
-		self.last_coding_len.set_max_value(max_len)
-		self.last_coding_len.set_plif_name('last_coding_len')
-		self.last_coding_len.set_transform_type("log(+1)")
-
-		l=array(model.penalty_single_coding_len_boundaries).flatten()
-		p=array(model.penalty_single_coding_len_penalty).flatten()
-		self.single_coding_len=Plif(len(l))
-		self.single_coding_len.set_plif_limits(l)
-		self.single_coding_len.set_plif_penalty(p)
-		self.single_coding_len.set_min_value(min_exon_len)
-		self.single_coding_len.set_max_value(max_len)
-		self.single_coding_len.set_plif_name('single_coding_len')
-		self.single_coding_len.set_transform_type("log(+1)")
-
-		l=array(model.penalty_intron_len_boundaries).flatten()
-		p=array(model.penalty_intron_len_penalty).flatten()
-		self.intron_len=Plif(len(l))
-		self.intron_len.set_plif_limits(l)
-		self.intron_len.set_plif_penalty(p)
-		self.intron_len.set_min_value(min_intron_len)
-		self.intron_len.set_max_value(max_len)
-		self.intron_len.set_plif_name('intron_len')
-		self.intron_len.set_transform_type("log(+1)")
-
-		l=array(model.penalty_coding_boundaries).flatten()
-		p=array(model.penalty_coding_penalty).flatten()
-		self.coding=Plif(len(l))
-		self.coding.set_use_svm(1)
-		self.coding.set_plif_limits(l)
-		self.coding.set_plif_penalty(p)
-		self.coding.set_min_value(-1e+20)
-		self.coding.set_max_value(1e+20)
-		self.coding.set_plif_name('coding')
-
-		l=array(model.penalty_coding2_boundaries).flatten()
-		p=array(model.penalty_coding2_penalty).flatten()
-		self.coding2=Plif(len(l))
-		self.coding2.set_use_svm(3)
-		self.coding2.set_plif_limits(l)
-		self.coding2.set_plif_penalty(p)
-		self.coding2.set_min_value(-1e+20)
-		self.coding2.set_max_value(1e+20)
-		self.coding2.set_plif_name('coding2')
-
-		l=array(model.penalty_coding3_boundaries).flatten()
-		p=array(model.penalty_coding3_penalty).flatten()
-		self.coding3=Plif(len(l))
-		self.coding3.set_use_svm(5)
-		self.coding3.set_plif_limits(l)
-		self.coding3.set_plif_penalty(p)
-		self.coding3.set_min_value(-1e+20)
-		self.coding3.set_max_value(1e+20)
-		self.coding3.set_plif_name('coding3')
-
-		l=array(model.penalty_coding4_boundaries).flatten()
-		p=array(model.penalty_coding4_penalty).flatten()
-		self.coding4=Plif(len(l))
-		self.coding4.set_use_svm(7)
-		self.coding4.set_plif_limits(l)
-		self.coding4.set_plif_penalty(p)
-		self.coding4.set_min_value(-1e+20)
-		self.coding4.set_max_value(1e+20)
-		self.coding4.set_plif_name('coding4')
-
-		l=array(model.penalty_intron_boundaries).flatten()
-		p=array(model.penalty_intron_penalty).flatten()
-		self.intron=Plif(len(l))
-		self.intron.set_use_svm(2)
-		self.intron.set_plif_limits(l)
-		self.intron.set_plif_penalty(p)
-		self.intron.set_min_value(-1e+20)
-		self.intron.set_max_value(1e+20)
-		self.intron.set_plif_name('intron')
-
-		l=array(model.penalty_intron2_boundaries).flatten()
-		p=array(model.penalty_intron2_penalty).flatten()
-		self.intron2=Plif(len(l))
-		self.intron2.set_use_svm(4)
-		self.intron2.set_plif_limits(l)
-		self.intron2.set_plif_penalty(p)
-		self.intron2.set_min_value(-1e+20)
-		self.intron2.set_max_value(1e+20)
-		self.intron2.set_plif_name('intron2')
-
-		l=array(model.penalty_intron3_boundaries).flatten()
-		p=array(model.penalty_intron3_penalty).flatten()
-		self.intron3=Plif(len(l))
-		self.intron3.set_use_svm(6)
-		self.intron3.set_plif_limits(l)
-		self.intron3.set_plif_penalty(p)
-		self.intron3.set_min_value(-1e+20)
-		self.intron3.set_max_value(1e+20)
-		self.intron3.set_plif_name('intron3')
-
-		l=array(model.penalty_intron4_boundaries).flatten()
-		p=array(model.penalty_intron4_penalty).flatten()
-		self.intron4=Plif(len(l))
-		self.intron4.set_use_svm(8)
-		self.intron4.set_plif_limits(l)
-		self.intron4.set_plif_penalty(p)
-		self.intron4.set_min_value(-1e+20)
-		self.intron4.set_max_value(1e+20)
-		self.intron4.set_plif_name('intron4')
-
-		p=array(model.penalty_transitions_penalty).flatten()
-		self.transitions=Plif(len(p))
-		self.transitions.set_plif_penalty(p)
-		self.transitions.set_min_value(-1e+20)
-		self.transitions.set_max_value(1e+20)
-
-		#create magic plifarrays
-		self.first_coding_plif_array=PlifArray()
-		self.first_coding_plif_array.add_plif(self.first_coding_len)
-		self.first_coding_plif_array.add_plif(self.coding)
-		self.first_coding_plif_array.add_plif(self.coding2)
-		self.first_coding_plif_array.add_plif(self.coding3)
-		self.first_coding_plif_array.add_plif(self.coding4)
-
-		self.last_coding_plif_array=PlifArray()
-		self.last_coding_plif_array.add_plif(self.last_coding_len)
-		self.last_coding_plif_array.add_plif(self.coding)
-		self.last_coding_plif_array.add_plif(self.coding2)
-		self.last_coding_plif_array.add_plif(self.coding3)
-		self.last_coding_plif_array.add_plif(self.coding4)
-
-		self.coding_plif_array=PlifArray()
-		self.coding_plif_array.add_plif(self.coding_len)
-		self.coding_plif_array.add_plif(self.coding)
-		self.coding_plif_array.add_plif(self.coding2)
-		self.coding_plif_array.add_plif(self.coding3)
-		self.coding_plif_array.add_plif(self.coding4)
-
-		self.single_coding_plif_array=PlifArray()
-		self.single_coding_plif_array.add_plif(self.single_coding_len)
-		self.single_coding_plif_array.add_plif(self.coding)
-		self.single_coding_plif_array.add_plif(self.coding2)
-		self.single_coding_plif_array.add_plif(self.coding3)
-		self.single_coding_plif_array.add_plif(self.coding4)
-
-		self.intron_plif_array=PlifArray()
-		self.intron_plif_array.add_plif(self.intron_len)
-		self.intron_plif_array.add_plif(self.intron)
-		self.intron_plif_array.add_plif(self.intron2)
-		self.intron_plif_array.add_plif(self.intron3)
-		self.intron_plif_array.add_plif(self.intron4)
-
-		#finally create a single array with all the plifs
-		self.plif_array=DynamicPlifArray()
-		self.plif_array.append_element(self.acceptor)
-		self.plif_array.append_element(self.donor)
-		self.plif_array.append_element(self.first_coding_plif_array)
-		self.plif_array.append_element(self.last_coding_plif_array)
-		self.plif_array.append_element(self.coding_plif_array)
-		self.plif_array.append_element(self.single_coding_plif_array)
-		self.plif_array.append_element(self.intron_plif_array)
-
-	def get_plif_array(self):
-		return self.plif_array
diff --git a/applications/msplicer/seqdict.py b/applications/msplicer/seqdict.py
deleted file mode 100644
index 18fe75ca469..00000000000
--- a/applications/msplicer/seqdict.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import string
-
-class predictions(object):
-	def __init__(self, positions=None, scores=None):
-		self.positions=positions
-		self.scores=scores
-
-	def set_positions(self, positions):
-		self.positions=positions;
-	def get_positions(self):
-		return self.positions
-
-	def set_scores(self, scores):
-		self.scores=scores
-	def get_scores(self):
-		return self.scores
-
-	def __str__(self):
-		return 'positions: ' + `self.positions` + 'scores: ' + `self.scores`
-	def __repr__(self):
-		return self.__str__()
-
-class sequence(object):
-	def __init__(self, name, seq, (start,end)):
-		assert(start<end<len(seq))
-		self.start=start
-		self.end=end
-		self.name=name
-		self.seq=seq
-		self.preds=dict()
-		self.preds['acceptor']=predictions()
-		self.preds['donor']=predictions()
-
-	def __str__(self):
-		s="start:" + `self.start`
-		s+=" end:" + `self.end`
-		s+=" name:" + `self.name`
-		s+=" sequence:" + `self.seq[0:10]`
-		s+="... preds:" + `self.preds`
-		return s
-	def __repr__(self):
-		return self.__str__()
-
-def seqdict(dic, (start,end)):
-	""" takes a fasta dict as input and
-	generates a list of sequence objects from it """
-
-	sequences=list()
-
-	#translate string to ACGT / all non ACGT letters are mapped to A
-	tab=''
-	for i in xrange(256):
-		if chr(i).upper() in 'ACGT':
-			tab+=chr(i).upper()
-		else:
-			tab+='A'
-
-	for seqname in dic:
-		seq=string.translate(dic[seqname], tab)
-		seq=seq.upper()
-		if end<0:
-			stop=len(seq)+end
-		else:
-			stop=end
-
-		sequences.append(sequence(seqname, seq, (start,stop)))
-
-	return sequences
diff --git a/applications/msplicer/signal_detectors.py b/applications/msplicer/signal_detectors.py
deleted file mode 100644
index e12002c2d5f..00000000000
--- a/applications/msplicer/signal_detectors.py
+++ /dev/null
@@ -1,164 +0,0 @@
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2006-2008 Soeren Sonnenburg
-# Written (W) 2007 Gunnar Raetsch
-# Copyright (C) 2006-2008 Fraunhofer Institute FIRST and Max-Planck-Society
-#
-
-import sys
-import numpy
-import seqdict
-
-from shogun import KernelMachine,StringCharFeatures,DNA,WeightedDegreeStringKernel
-
-class svm_splice_model(object):
-	def __init__(self, order, traindat, alphas, b, (window_left,offset,window_right), consensus):
-
-		f=StringCharFeatures(DNA)
-		f.set_features(traindat)
-		wd_kernel = WeightedDegreeStringKernel(f,f, int(order))
-		wd_kernel.io.set_target_to_stderr()
-
-		self.svm=KernelMachine(wd_kernel, alphas, numpy.arange(len(alphas), dtype=numpy.int32), b)
-		self.svm.io.set_target_to_stderr()
-		self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus())
-		self.svm.set_linadd_enabled(False)
-		self.svm.set_batch_computation_enabled(False)
-
-		self.window_left=int(window_left)
-		self.window_right=int(window_right)
-
-		self.consensus=consensus
-		self.wd_kernel=wd_kernel
-		self.traindat=f
-		self.offset=offset
-
-	def get_positions(self, sequence):
-		positions=list()
-
-		for cons in self.consensus:
-			l=sequence.find(cons)
-			while l>-1:
-				if l<len(sequence)-self.window_right-2 and l>self.window_left:
-					positions.append(l+self.offset)
-				l=sequence.find(cons, l+1)
-
-		positions.sort()
-		return positions
-
-	def get_predictions_from_seqdict(self, seqdic, site):
-		""" we need to generate a huge test features object
-			containing all locations found in each seqdict-sequence
-			and each location (this is necessary to efficiently
-			(==fast,low memory) compute the splice outputs
-		"""
-
-		seqlen=self.window_right+self.window_left+2
-
-		num=0
-		for s in seqdic:
-			num+= len(s.preds[site].positions)
-
-		testdat = []
-
-		for s in seqdic:
-			sequence=s.seq
-			positions=s.preds[site].positions
-			for j in xrange(len(positions)):
-				i=positions[j] - self.offset
-				s=sequence[i-self.window_left:i+self.window_right+2]
-				testdat.append(s)
-
-		t=StringCharFeatures(testdat, DNA)
-
-		self.wd_kernel.init(self.traindat, t)
-		self.svm.set_kernel(self.wd_kernel)
-		l=self.svm.apply().get_labels()
-		sys.stderr.write("\n...done...\n")
-
-		k=0
-		for s in seqdic:
-			num=len(s.preds[site].positions)
-			scores= num * [0]
-			for j in xrange(num):
-				scores[j]=l[k]
-				k+=1
-			s.preds[site].set_scores(scores)
-
-	def get_positions_from_seqdict(self, seqdic, site):
-		for d in seqdic:
-			positions=list()
-			sequence=d.seq
-			for cons in self.consensus:
-				l=sequence.find(cons)
-				while l>-1:
-					if l<len(sequence)-self.window_right-2 and l>self.window_left:
-						positions.append(l+self.offset)
-					l=sequence.find(cons, l+1)
-			positions.sort()
-			d.preds[site].set_positions(positions)
-
-	def get_predictions(self, sequence, positions):
-
-		seqlen=self.window_right+self.window_left+2
-		num=len(positions)
-
-		testdat = []
-
-		for j in xrange(num):
-			i=positions[j] - self.offset ;
-			s=sequence[i-self.window_left:i+self.window_right+2]
-			testdat.append(s)
-
-		t=StringCharFeatures(DNA)
-		t.set_string_features(testdat)
-
-		self.wd_kernel.init(self.traindat, t)
-		l=self.svm.classify().get_labels()
-		sys.stderr.write("\n...done...\n")
-		return l
-
-class signal_detectors(object):
-	def __init__(self, model):
-		if model.don_splice_use_gc:
-			don_consensus=['GC','GT']
-		else:
-			don_consensus=['GT']
-
-		self.acceptor=svm_splice_model(model.acc_splice_order, model.acc_splice_svs,
-				numpy.array(model.acc_splice_alphas).flatten(), model.acc_splice_b,
-				(model.acc_splice_window_left, 2, model.acc_splice_window_right), ['AG'])
-		self.donor=svm_splice_model(model.don_splice_order, model.don_splice_svs,
-				numpy.array(model.don_splice_alphas).flatten(), model.don_splice_b,
-				(model.don_splice_window_left, 0, model.don_splice_window_right),
-				don_consensus)
-
-	def set_sequence(self, seq):
-		self.acceptor.set_sequence(seq)
-		self.donor.set_sequence(seq)
-
-	def predict_acceptor_sites(self, seq):
-		pos=self.acceptor.get_positions(seq)
-		sys.stderr.write("computing svm output for acceptor positions\n")
-		pred=self.acceptor.get_predictions(seq, pos)
-		return (pos,pred)
-
-	def predict_donor_sites(self,seq):
-		pos=self.donor.get_positions(seq)
-		sys.stderr.write("computing svm output for donor positions\n")
-		pred=self.donor.get_predictions(seq, pos)
-		return (pos,pred)
-
-	def predict_acceptor_sites_from_seqdict(self, seqs):
-		self.acceptor.get_positions_from_seqdict(seqs, 'acceptor')
-		sys.stderr.write("computing svm output for acceptor positions\n")
-		self.acceptor.get_predictions_from_seqdict(seqs, 'acceptor')
-
-	def predict_donor_sites_from_seqdict(self, seqs):
-		self.donor.get_positions_from_seqdict(seqs, 'donor')
-		sys.stderr.write("computing svm output for donor positions\n")
-		self.donor.get_predictions_from_seqdict(seqs, 'donor')
diff --git a/applications/ocr/Ai.py b/applications/ocr/Ai.py
deleted file mode 100644
index f8a3284ae52..00000000000
--- a/applications/ocr/Ai.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# File   : $HeadURL$
-# Version: $Id$
-
-from shogun import RealFeatures, MulticlassLabels
-from shogun import GaussianKernel
-from shogun import GMNPSVM
-
-import numpy as np
-import gzip as gz
-import pickle as pkl
-
-import common as com
-
-class Ai:
-    def __init__(self):
-        self.x = None
-        self.y = None
-
-        self.x_test = None
-        self.y_test = None
-
-        self.svm = None
-
-    def load_train_data(self, x_fname, y_fname):
-        Ai.__init__(self)
-
-        self.x = np.loadtxt(x_fname)
-        self.y = np.loadtxt(y_fname) - 1.0
-
-        self.x_test = self.x
-        self.y_test = self.y
-
-    def _svm_new(self, kernel_width, c, epsilon):
-        if self.x == None or self.y == None:
-            raise Exception("No training data loaded.")
-
-        x = RealFeatures(self.x)
-        y = MulticlassLabels(self.y)
-
-        self.svm = GMNPSVM(c, GaussianKernel(x, x, kernel_width), y)
-        self.svm.set_epsilon(epsilon)
-
-    def write_svm(self):
-        gz_stream = gz.open(com.TRAIN_SVM_FNAME_GZ, 'wb', 9)
-        pkl.dump(self.svm, gz_stream)
-        gz_stream.close()
-
-    def read_svm(self):
-        gz_stream = gz.open(com.TRAIN_SVM_FNAME_GZ, 'rb')
-        self.svm = pkl.load(gz_stream)
-        gz_stream.close()
-
-    def enable_validation(self, train_frac):
-        x = self.x
-        y = self.y
-
-        idx = np.arange(len(y))
-        np.random.shuffle(idx)
-        train_idx=idx[:np.floor(train_frac*len(y))]
-        test_idx=idx[np.ceil(train_frac*len(y)):]
-
-        self.x = x[:,train_idx]
-        self.y = y[train_idx]
-        self.x_test = x[:,test_idx]
-        self.y_test = y[test_idx]
-
-    def train(self, kernel_width, c, epsilon):
-        self._svm_new(kernel_width, c, epsilon)
-
-        x = RealFeatures(self.x)
-        self.svm.io.enable_progress()
-        self.svm.train(x)
-        self.svm.io.disable_progress()
-
-    def load_classifier(self): self.read_svm()
-
-    def classify(self, matrix):
-        cl = self.svm.apply(
-            RealFeatures(
-                np.reshape(matrix, newshape=(com.FEATURE_DIM, 1),
-                           order='F')
-                )
-            ).get_label(0)
-
-        return int(cl + 1.0) % 10
-
-    def get_test_error(self):
-        self.svm.io.enable_progress()
-        l = self.svm.apply(RealFeatures(self.x_test)).get_labels()
-        self.svm.io.disable_progress()
-
-        return 1.0 - np.mean(l == self.y_test)
diff --git a/applications/ocr/FigureWidget.py b/applications/ocr/FigureWidget.py
deleted file mode 100644
index c90f59bd74b..00000000000
--- a/applications/ocr/FigureWidget.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# File   : $HeadURL$
-# Version: $Id$
-
-import gtk
-import numpy as np
-
-import common as com
-from QuadrWidget import QuadrWidget
-
-class FigureWidget(QuadrWidget):
-    THICKNESS_FRAC = 0.03
-
-    def __init__(self, go_func, go_args):
-        QuadrWidget.__init__(self)
-        self.add_events(gtk.gdk.ALL_EVENTS_MASK)
-
-        self.drag = False
-        self.coords = []
-
-        self.go_func = go_func
-        self.go_args = go_args
-
-        self.connect("expose_event", FigureWidget.on_redraw)
-        self.connect("button-press-event", FigureWidget.on_press)
-        self.connect("button-release-event", FigureWidget.on_release)
-        self.connect("motion-notify-event", FigureWidget.on_motion)
-        self.connect("realize", FigureWidget.on_realize)
-
-    def on_realize(self):
-        self.window.set_cursor(gtk.gdk.Cursor(gtk.gdk.CROSSHAIR))
-
-    def on_press(self, event):
-        if event.button == com.BUTTON_RIGHT:
-            self.clear_coords()
-            self.drag = False
-
-        if event.button == com.BUTTON_MIDDLE:
-            self.go_func(*self.go_args)
-            self.drag = False
-
-        if event.button != com.BUTTON_LEFT:
-            return False
-
-        self.drag = True
-        self.coords.insert(0, [])
-
-        # Two times to add a zero-length line
-        self.on_motion(event)
-        self.on_motion(event)
-
-        return False
-
-    def on_release(self, event):
-        if event.button != com.BUTTON_LEFT:
-            return False
-
-        self.drag = False
-        return False
-
-    def on_motion(self, event):
-        if not self.drag:
-            return False
-
-        width = self.window.get_size()[0]
-        height = self.window.get_size()[1]
-        x = event.x/width if event.x < width else com.NEAR_ONE_NEG
-        y = event.y/height if event.y < height else com.NEAR_ONE_NEG
-        x = 0 if x < 0 else x
-        y = 0 if y < 0 else y
-
-        self.coords[0].append((x, y))
-
-        self.update()
-
-        return False
-
-    def on_redraw(self, event):
-        gc = self.style.fg_gc[self.state]
-        w = self.window
-        width = w.get_size()[0]
-        height = w.get_size()[1]
-
-        # Backup graphic context
-        self.default_fg = gc.foreground
-        self.line_width = gc.line_width
-        self.line_style = gc.line_style
-        self.cap_style = gc.cap_style
-        self.join_style = gc.join_style
-
-        # Background
-        gc.set_rgb_fg_color(com.COLOR_WHITE)
-        w.draw_rectangle(gc, True, 0, 0, width-1, height-1)
-        gc.set_rgb_fg_color(com.COLOR_BLACK)
-        w.draw_rectangle(gc, False, 0, 0, width-1, height-1)
-
-        # Data
-        gc.set_line_attributes(int(height*self.THICKNESS_FRAC),
-                               gtk.gdk.LINE_SOLID, gtk.gdk.CAP_ROUND,
-                               gtk.gdk.JOIN_ROUND)
-        for poly in self.coords:
-            w.draw_lines(gc, map(
-                    lambda coord:
-                        (int(coord[0]*width), int(coord[1]*height)),
-                    poly))
-
-        # Recovering graphic context
-        gc.line_width = self.line_width
-        gc.line_style = self.line_style
-        gc.cap_style = self.cap_style
-        gc.join_style = self.join_style
-        gc.foreground = self.default_fg
-
-        return False
-
-    def get_coords(self):
-        result = map(lambda line: np.array(line), self.coords)
-
-        result = map(lambda line: np.transpose(line), result)
-
-        minx = 2.0
-        miny = 2.0
-        for line in result:
-            minx = min(minx, min(line[0]))
-            miny = min(miny, min(line[1]))
-        for line in result:
-            line[0] -= minx
-            line[1] -= miny
-
-        maxxy = 0.0
-        for line in result: maxxy = max(maxxy, line.max())
-        for line in result: line /= maxxy + com.NEAR_ZERO_POS
-
-        maxx = 0.0
-        maxy = 0.0
-        for line in result:
-            maxx = max(maxx, max(line[0]))
-            maxy = max(maxy, max(line[1]))
-        for line in result:
-            line[0] += (1 - maxx)/2
-            line[1] += (1 - maxy)/2
-
-        result = map(lambda line: np.transpose(line), result)
-        return result
-
-    def clear_coords(self):
-        self.coords = []
-        self.update()
diff --git a/applications/ocr/MatrixWidget.py b/applications/ocr/MatrixWidget.py
deleted file mode 100644
index b4198f646d8..00000000000
--- a/applications/ocr/MatrixWidget.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# File   : $HeadURL$
-# Version: $Id$
-
-import gtk
-import numpy as np
-
-import common as com
-from QuadrWidget import QuadrWidget
-
-class MatrixWidget(QuadrWidget):
-    def __init__(self, matrix_size):
-        QuadrWidget.__init__(self)
-
-        self.matrix = np.zeros((matrix_size, matrix_size),
-                               dtype=np.bool)
-
-        self.connect("expose_event", MatrixWidget.on_redraw)
-
-    def on_redraw(self, event):
-        gc = self.style.fg_gc[self.state]
-        w = self.window
-        width = w.get_size()[0]
-        height = w.get_size()[1]
-
-        # Backup graphic context
-        self.default_fg = gc.foreground
-
-        # Background
-        gc.set_rgb_fg_color(com.COLOR_WHITE)
-        w.draw_rectangle(gc, True, 0, 0, width-1, height-1)
-
-        size_y = self.matrix.shape[0]
-        size_x = self.matrix.shape[1]
-        pixels_per_y = float(height)/size_y
-        pixels_per_x = float(width)/size_x
-        gc.set_rgb_fg_color(com.COLOR_GRAY)
-        for y in range(size_y):
-            w.draw_line(gc, 0, int(y*pixels_per_y),
-                        width-1, int(y*pixels_per_y))
-            for x in range(size_x):
-                if y == 0:
-                    w.draw_line(gc, int(x*pixels_per_x), 0,
-                                int(x*pixels_per_x), height-1)
-                if self.matrix[y, x]:
-                    gc.set_rgb_fg_color(com.COLOR_BLACK)
-                    w.draw_rectangle(gc, self.matrix[y, x]
-                                     > com.NEAR_ZERO_POS,
-                                     int(x*pixels_per_x),
-                                     int(y*pixels_per_y),
-                                     int(pixels_per_x+1),
-                                     int(pixels_per_y+1))
-                    gc.set_rgb_fg_color(com.COLOR_GRAY)
-
-        gc.set_rgb_fg_color(com.COLOR_BLACK)
-        w.draw_rectangle(gc, False, 0, 0, width-1, height-1)
-
-        gc.foreground = self.default_fg
-
-        return False
-
-    def set_image(self, image):
-        self.matrix = image
-        self.update()
-
-    def get_image(self):
-        return self.matrix
diff --git a/applications/ocr/QuadrWidget.py b/applications/ocr/QuadrWidget.py
deleted file mode 100644
index 41ccceeb6c2..00000000000
--- a/applications/ocr/QuadrWidget.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# File   : $HeadURL$
-# Version: $Id$
-
-import gtk
-
-class QuadrWidget(gtk.DrawingArea):
-
-    # No SELF.CONNECT because we like to prevent the call of
-    # gtk.DrawingArea.do_size_allocate()
-    __gsignals__ = {"size-allocate": 'override'}
-
-    def __init__(self):
-        gtk.DrawingArea.__init__(self)
-
-    def update(self):
-        width = self.window.get_size()[0]
-        height = self.window.get_size()[1]
-        self.window.invalidate_rect(gtk.gdk.Rectangle(
-                0, 0, width, height), False)
-        #self.window.process_updates(False)
-
-    def do_size_allocate(self, allocation):
-        if allocation.width < allocation.height:
-            allocation.y += (allocation.height-allocation.width)/2
-            allocation.height = allocation.width
-        elif allocation.width > allocation.height:
-            allocation.x += (allocation.width-allocation.height)/2
-            allocation.width = allocation.height
-
-        gtk.DrawingArea.do_size_allocate(self, allocation)
diff --git a/applications/ocr/README b/applications/ocr/README
deleted file mode 100644
index 818e073f7d4..00000000000
--- a/applications/ocr/README
+++ /dev/null
@@ -1,9 +0,0 @@
-This example illustrates how to do ocr of handwritten digits.
-
-./predict  - Starts a GUI where one can draw digits with the mouse.
-             Pressing the classify button will detect the drawn digit
-             (so does pressing the middle mouse button). Pressing the right
-             mouse button will clear the window.
-
-./train    - Does model selection using a SVM with a Gaussian kernel and saves the
-            'best' model.
diff --git a/applications/ocr/common.py b/applications/ocr/common.py
deleted file mode 100644
index c28e9bdb409..00000000000
--- a/applications/ocr/common.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# File   : $HeadURL$
-# Version: $Id$
-
-import gtk
-
-UINT16_MAX = 0xffff
-UINT_GRAY = UINT16_MAX - UINT16_MAX/4
-COLOR_BLACK = gtk.gdk.Color(0, 0, 0)
-COLOR_WHITE = gtk.gdk.Color(UINT16_MAX, UINT16_MAX, UINT16_MAX)
-COLOR_GRAY = gtk.gdk.Color(UINT_GRAY, UINT_GRAY, UINT_GRAY)
-COLOR_BLUE = gtk.gdk.Color(UINT_GRAY, UINT_GRAY, UINT16_MAX)
-
-BUTTON_LEFT = 1
-BUTTON_MIDDLE = 2
-BUTTON_RIGHT = 3
-
-NEAR_ZERO_POS = 1e-8
-NEAR_ONE_NEG = 1-NEAR_ZERO_POS
-
-TRAIN_X_FNAME = "data/train_data_x.asc.gz"
-TRAIN_Y_FNAME = "data/train_data_y.asc.gz"
-
-TRAIN_SVM_FNAME_GZ = "data/ocr.svm.gz"
-
-MATIX_IMAGE_SIZE = 16
-FEATURE_DIM = MATIX_IMAGE_SIZE * MATIX_IMAGE_SIZE
-
-HISTORY_WIDTH = 5
-HISTORY_HEIGHT = 2
-
-FEATURE_RANGE_MAX = 1.0
diff --git a/applications/ocr/data b/applications/ocr/data
deleted file mode 120000
index ee220ad1a1b..00000000000
--- a/applications/ocr/data
+++ /dev/null
@@ -1 +0,0 @@
-../../data/ocr
\ No newline at end of file
diff --git a/applications/ocr/predict b/applications/ocr/predict
deleted file mode 100755
index 43a5eae20b2..00000000000
--- a/applications/ocr/predict
+++ /dev/null
@@ -1,203 +0,0 @@
-#!/usr/bin/env python
-
-# File   : $HeadURL$
-# Version: $Id$
-
-import gtk, sys
-import numpy as np
-
-from FigureWidget import FigureWidget
-from MatrixWidget import MatrixWidget
-from Ai import Ai
-
-import common as com
-
-def _draw_line(image, start, end):
-    start = np.array(start, dtype=np.int)
-    end = np.array(end, dtype=np.int)
-
-    delta = abs(end - start)
-
-    e = delta[0]/2.0
-    x, y = start
-    image[y, x] = com.FEATURE_RANGE_MAX
-    while np.any((x, y) != end):
-        if e < 0.0 or x == end[0]:
-            y += -1 if start[1] > end[1] else 1
-            e += delta[0]
-        if e >= 0.0 and x != end[0]:
-            x += -1 if start[0] > end[0] else 1
-            e -= delta[1]
-        image[y, x] = com.FEATURE_RANGE_MAX
-
-def button_go_clicked(button, main_window):
-    coords = map(lambda line: com.MATIX_IMAGE_SIZE*line,
-                 main_window.figure.get_coords())
-    image = np.zeros((com.MATIX_IMAGE_SIZE, com.MATIX_IMAGE_SIZE),
-                     dtype=np.float)
-
-    for line in coords:
-        for i in range(line.shape[0]-1):
-            _draw_line(image, line[i], line[i+1])
-
-    main_window.push_image(image,
-                           str(main_window.ai.classify(image))
-                           )
-
-    main_window.figure.clear_coords()
-    main_window.set_focus(main_window.button_go)
-
-def button_clear_clicked(button, main_window):
-    main_window.figure.clear_coords()
-    main_window.set_focus(main_window.button_go)
-
-class MainWindow(gtk.Window):
-    TITLE = "OCR Demo - Press middle mouse button to classify, right" \
-        " mouse button to clear"
-
-    MIN_WIDTH = 800
-    MIN_HEIGHT = 260
-
-    MAIN_PADDING = 4
-    BOX_PADDING = 4
-
-    MARKUP_PRE = '<span size="x-large" color="red"><b>'
-    MARKUP_POST = '</b></span>'
-
-    def __init__(self):
-        # Main Window
-        gtk.Window.__init__(self, type=gtk.WINDOW_TOPLEVEL)
-        self.set_title(self.TITLE)
-        self.connect("delete-event", MainWindow.on_delete)
-        self.set_size_request(self.MIN_WIDTH, self.MIN_HEIGHT)
-
-        # AI
-        self.ai = Ai()
-        self.ai.load_classifier()
-
-        # Main Container
-        self.main_align = gtk.Alignment(xalign=0.0, yalign=0.0,
-                                        xscale=1.0, yscale=1.0)
-        self.main_align.set_padding(self.MAIN_PADDING,
-                                    self.MAIN_PADDING,
-                                    self.MAIN_PADDING,
-                                    self.MAIN_PADDING)
-        self.add(self.main_align)
-
-        # Figure HBox
-        self.figure_hbox = gtk.HBox(homogeneous=False,
-                                    spacing=self.BOX_PADDING)
-        self.main_align.add(self.figure_hbox)
-
-        # Main VBox
-        self.main_vbox = gtk.VBox(homogeneous=False,
-                                  spacing=self.BOX_PADDING)
-        self.figure_hbox.add(self.main_vbox)
-
-
-        # Figure
-        self.figure = FigureWidget(button_go_clicked, (None, self))
-        self.main_vbox.pack_start(self.figure, expand=True, fill=True,
-                                  padding=0)
-
-        # VSeperator
-        self.figure_vsep = gtk.VSeparator()
-        self.figure_hbox.pack_start(self.figure_vsep, expand=False,
-                                    fill=True, padding=0)
-
-        # History
-        self.mat_table = gtk.Table(rows=com.HISTORY_HEIGHT,
-                                   columns=com.HISTORY_WIDTH,
-                                   homogeneous=True)
-        self.figure_hbox.pack_start(self.mat_table, expand=True,
-                                    fill=True, padding=0)
-
-        self.mat_frame = []
-        self.mat_vbox = []
-        self.mat_images = []
-        self.mat_result = []
-        for y in range(com.HISTORY_HEIGHT):
-            for x in range(com.HISTORY_WIDTH):
-                i = y*com.HISTORY_WIDTH + x
-
-                self.mat_frame.append(gtk.Frame(
-                        "History " + str(y*com.HISTORY_WIDTH + x)))
-                if i == 0:
-                    self.mat_frame[i].set_label("Current")
-                    mf_style = self.mat_frame[i].get_style()
-                    mf_style.bg[gtk.STATE_NORMAL] = com.COLOR_BLUE
-                    self.mat_frame[i].set_style(mf_style)
-                self.mat_table.attach(self.mat_frame[i],
-                                      left_attach=x, right_attach=x+1,
-                                      top_attach=y, bottom_attach=y+1,
-                                      xpadding=self.BOX_PADDING)
-
-                self.mat_vbox.append(gtk.VBox(homogeneous=False,
-                                              spacing=self.BOX_PADDING)
-                                     )
-                self.mat_frame[i].add(self.mat_vbox[i])
-
-                self.mat_result.append(gtk.Label(
-                        self.MARKUP_PRE + "?" + self.MARKUP_POST))
-                self.mat_result[i].set_use_markup(True)
-                self.mat_vbox[i].pack_start(
-                    self.mat_result[i], expand=False, fill=True,
-                    padding=0)
-
-                self.mat_images.append(MatrixWidget(
-                        com.MATIX_IMAGE_SIZE))
-                self.mat_vbox[i].pack_start(
-                    self.mat_images[i], expand=True, fill=True,
-                    padding=self.BOX_PADDING)
-
-        # HSeperator
-        self.main_hsep = gtk.HSeparator()
-        self.main_vbox.pack_start(self.main_hsep, expand=False,
-                                  fill=True, padding=0)
-
-        # HBox
-        self.hbox = gtk.HBox(homogeneous=False,
-                             spacing=self.BOX_PADDING)
-        self.main_vbox.pack_end(self.hbox, expand=False, fill=True,
-                                padding=0)
-
-        # Button Go
-        self.button_go = gtk.Button(label="_Classify")
-        self.button_go.connect("clicked", button_go_clicked, self)
-        self.button_go.set_focus_on_click(False)
-        self.hbox.add(self.button_go)
-
-        # Button clear
-        self.button_clear = gtk.Button(label="Clea_r")
-        self.button_clear.set_focus_on_click(False)
-        self.button_clear.connect("clicked", button_clear_clicked,
-                                  self)
-        self.hbox.add(self.button_clear)
-
-    def on_delete(self, event):
-        gtk.Window.destroy(self)
-        gtk.main_quit()
-        return True
-
-    def push_image(self, image, str):
-        prev_image = image
-        prev_str = str
-
-        for i in range(com.HISTORY_WIDTH*com.HISTORY_HEIGHT):
-            tmp_image = self.mat_images[i].get_image()
-            tmp_str = self.mat_result[i].get_text()
-            self.mat_images[i].set_image(prev_image)
-            self.mat_result[i].set_markup(
-                self.MARKUP_PRE + prev_str + self.MARKUP_POST)
-            prev_image = tmp_image
-            prev_str = tmp_str
-
-def main(argv):
-    gtk.gdk.threads_init()
-
-    window = MainWindow()
-    window.show_all()
-    gtk.main()
-
-if __name__ == '__main__':
-    sys.exit(main(sys.argv))
diff --git a/applications/ocr/train b/applications/ocr/train
deleted file mode 100755
index c3b62df6915..00000000000
--- a/applications/ocr/train
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-
-# File   : $HeadURL$
-# Version: $Id$
-
-import sys
-
-from Ai import Ai
-import common as com
-
-EPSILON = 1e-3
-
-# optimal epsilon=1.00e-03, C=2.50, kernel_width=24.00
-
-C_LIST = [2.5, 3.0, 3.5]
-KERNEL_WIDTH_LIST = [22.0, 24.0, 26.0]
-
-VALIDATION_TRAIN_FRAC = 0.80
-
-def to_str(e):
-    result = "" if e['error'] > 1.0 else "error=%.4f, " % e['error']
-
-    return "%sepsilon=%.2e, C=%.2f, kernel_width=%.2f" % (
-        result, e['epsilon'], e['c'], e['kernel_width'])
-
-def main(argv):
-    ai = Ai()
-    ai.load_train_data(com.TRAIN_X_FNAME, com.TRAIN_Y_FNAME)
-    ai.enable_validation(VALIDATION_TRAIN_FRAC)
-
-    best_error = {'error': 2.0, 'epsilon': -1.0, 'c': -1.0,
-                  'kernel_width': -1.0}
-    for kernel_width in KERNEL_WIDTH_LIST:
-        for c in C_LIST:
-            cur = {'error': 2.0, 'epsilon': EPSILON, 'c': c,
-                   'kernel_width': kernel_width}
-
-            print "Trying: %s" % to_str(cur)
-
-            ai.train(kernel_width=kernel_width, c=c,
-                     epsilon=EPSILON)
-            print ""
-            cur['error'] = ai.get_test_error()
-            print ""
-
-            if cur['error'] < best_error['error']:
-                best_error = cur
-                print "New best: %s" % to_str(best_error)
-            else:
-                print "Result: %s" % to_str(cur)
-                print "Best: %s" % to_str(best_error)
-
-            print ""
-
-    print "Finally using parameters: %s" % to_str(best_error)
-    ai.load_train_data(com.TRAIN_X_FNAME, com.TRAIN_Y_FNAME)
-    ai.train(kernel_width=best_error['kernel_width'],
-             c=best_error['c'], epsilon=best_error['epsilon'])
-    ai.write_svm()
-    print ""
-    print "Finished :DD"
-
-if __name__ == '__main__':
-    sys.exit(main(sys.argv))
diff --git a/applications/tapkee/faces_embedding.py b/applications/tapkee/faces_embedding.py
deleted file mode 100644
index 24cf327031d..00000000000
--- a/applications/tapkee/faces_embedding.py
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env python
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2011 Sergey Lisitsyn
-# Copyright (C) 2011 Sergey Lisitsyn
-
-from shogun import *
-from numpy import *
-from matplotlib.offsetbox import TextArea, DrawingArea, OffsetImage, AnnotationBbox
-import re,os,time
-from pylab import *
-
-def build_features(path):
-	files = os.listdir(path)
-	files.remove('README')
-	N = len(files)
-	(nd,md) = imread(os.path.join(path,files[0])).shape
-	dim = nd*md
-	feature_matrix = zeros([dim,N])
-	for i,filename in enumerate(files):
-		feature_matrix[:,i] = imread(os.path.join(path,filename)).ravel()
-	return nd,md,RealFeatures(feature_matrix)
-
-path = '../../data/faces/'
-converter = DiffusionMaps
-nd,md,features = build_features(path)
-converter_instance = converter()
-converter_instance.set_t(5)
-converter_instance.set_target_dim(2)
-
-start = time.time()
-new_features = converter_instance.embed(features).get_feature_matrix()
-print new_features.shape
-end = time.time()
-
-clusterer = KMeans
-clusterer_instance = clusterer(2,EuclideanDistance())
-clusterer_instance.train(features)
-labels = clusterer_instance.apply().get_labels()
-print labels
-
-print 'applied %s, took %fs' % (converter_instance.get_name(), end-start)
-print 'plotting'
-
-fig = figure()
-ax = fig.add_subplot(111,axisbg='#ffffff')
-ax.scatter(new_features[0],new_features[1],color='black')
-import random
-for i in range(len(new_features[0])):
-	feature_vector = features.get_feature_vector(i)
-	Z = zeros([nd,md,4])
-	Z[:,:,0] = 255-feature_vector.reshape(nd,md)[::-1,:]
-	Z[:,:,1] = Z[:,:,0]
-	Z[:,:,2] = Z[:,:,0]
-	for k in range(nd):
-		for j in range(md):
-			Z[k,j,3] = pow(sin(k*pi/nd)*sin(j*pi/md),0.5)
-	imagebox = OffsetImage(Z,cmap=cm.gray,zoom=0.25)
-	ab = AnnotationBbox(imagebox, (new_features[0,i],new_features[1,i]),
-						pad=0.001,frameon=False)
-	ax.add_artist(ab)
-axis('off')
-savefig('faces.png')
-show()
diff --git a/applications/tapkee/octave_ltsa.m b/applications/tapkee/octave_ltsa.m
deleted file mode 100644
index f79e000146f..00000000000
--- a/applications/tapkee/octave_ltsa.m
+++ /dev/null
@@ -1,11 +0,0 @@
-n = 1000;
-noise = 0.0;
-t = (3 * pi / 2) * (1 + 2 * rand(n, 1));
-height = 30 * rand(n, 1);
-X = [t .* cos(t) height t .* sin(t)] + noise * randn(n, 3);
-
-sg('set_features','TRAIN',X');
-sg('set_converter','ltsa',10);
-embedding = sg('embed',2);
-plot(embedding(:,1),embedding(:,2),'@');
-
diff --git a/applications/tapkee/samples/data.py b/applications/tapkee/samples/data.py
deleted file mode 100644
index 2fc91d07c23..00000000000
--- a/applications/tapkee/samples/data.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import numpy
-
-def swissroll(N=1000):
-	tt = numpy.array((5*numpy.pi/4)*(1+2*numpy.random.rand(N)))
-	height = numpy.array((numpy.random.rand(N)-0.5))
-	noise = 0.0
-	X = numpy.array([(tt+noise*numpy.random.randn(N))*numpy.cos(tt), 10*height, (tt+noise*numpy.random.randn(N))*numpy.sin(tt)])
-	return X
-
diff --git a/applications/tapkee/samples/dm.py b/applications/tapkee/samples/dm.py
deleted file mode 100644
index 036028dfd44..00000000000
--- a/applications/tapkee/samples/dm.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import shogun as sg
-import data
-import numpy as np
-
-# load data
-feature_matrix = data.swissroll()
-# create features instance
-features = sg.RealFeatures(feature_matrix)
-
-# create Diffusion Maps converter instance
-converter = sg.DiffusionMaps()
-
-# set target dimensionality
-converter.set_target_dim(2)
-# set number of time-steps
-converter.set_t(2)
-# set width of gaussian kernel
-converter.set_width(10.0)
-
-# create euclidean distance instance
-distance = sg.EuclideanDistance()
-# enable converter instance to use created distance instance
-converter.set_distance(distance)
-
-# compute embedding with Diffusion Maps method
-embedding = converter.embed(features)
-
-# compute custom distance matrix
-distance_matrix = np.exp(-np.dot(feature_matrix.T,feature_matrix))
-# create Custom Kernel instance
-custom_distance = sg.CustomDistance(distance_matrix)
-# construct embedding based on created distance
-distance_embedding = converter.embed_distance(custom_distance)
diff --git a/applications/tapkee/samples/hlle.py b/applications/tapkee/samples/hlle.py
deleted file mode 100644
index 0dc860931c0..00000000000
--- a/applications/tapkee/samples/hlle.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import shogun as sg
-import data
-
-# load data
-feature_matrix = data.swissroll()
-# create features instance
-features = sg.RealFeatures(feature_matrix)
-
-# create Hessian Locally Linear Embedding converter instance
-converter = sg.HessianLocallyLinearEmbedding()
-
-# set target dimensionality
-converter.set_target_dim(2)
-# set number of neighbors
-converter.set_k(10)
-# set number of threads
-converter.parallel.set_num_threads(2)
-# set nullspace shift (optional)
-converter.set_nullspace_shift(-1e-6)
-
-# compute embedding with Hessian Locally Linear Embedding method
-embedding = converter.embed(features)
diff --git a/applications/tapkee/samples/isomap.py b/applications/tapkee/samples/isomap.py
deleted file mode 100644
index 1d0569e1662..00000000000
--- a/applications/tapkee/samples/isomap.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import shogun as sg
-import data
-import numpy as np
-
-# load data
-feature_matrix = data.swissroll()
-# create features instance
-features = sg.RealFeatures(feature_matrix)
-
-# create Isomap converter instance
-converter = sg.Isomap()
-
-# set number of neighbors to be used
-converter.set_k(15)
-
-# set target dimensionality
-converter.set_target_dim(2)
-
-# compute embedding with Isomap method
-embedding = converter.embed(features)
-
-# set number of threads
-converter.parallel.set_num_threads(2)
-# compute approximate embedding
-approx_embedding = converter.embed(features)
-# disable landmark approximation
-converter.set_landmark(False)
-
-# compute cosine distance matrix 'manually'
-N = features.get_num_vectors()
-distance_matrix = np.zeros((N,N))
-for i in range(N):
-	for j in range(N):
-		distance_matrix[i,j] = \
-		  np.linalg.norm(feature_matrix[:,i]-feature_matrix[:,j],2)
-# create custom distance instance
-distance = sg.CustomDistance(distance_matrix)
-# construct embedding based on created distance
-converter.embed_distance(distance)
diff --git a/applications/tapkee/samples/klle.py b/applications/tapkee/samples/klle.py
deleted file mode 100644
index 77b9908cd68..00000000000
--- a/applications/tapkee/samples/klle.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import shogun as sg
-import data
-import numpy as np
-
-# load data
-feature_matrix = data.swissroll()
-# create features instance
-features = sg.RealFeatures(feature_matrix)
-
-# create Kernel Locally Linear Embedding converter instance
-converter = sg.KernelLocallyLinearEmbedding()
-
-# set target dimensionality
-converter.set_target_dim(2)
-# set number of neighbors
-converter.set_k(10)
-# set number of threads
-converter.parallel.set_num_threads(2)
-# set nullspace shift (optional)
-converter.set_nullspace_shift(-1e-6)
-
-# create Gaussian kernel instance
-kernel = sg.GaussianKernel(100,10.0)
-# enable converter instance to use created kernel instance
-converter.set_kernel(kernel)
-
-# compute embedding with Kernel Locally Linear Embedding method
-embedding = converter.embed(features)
-
-# compute linear kernel matrix
-kernel_matrix = np.dot(feature_matrix.T,feature_matrix)
-# create Custom Kernel instance
-custom_kernel = sg.CustomKernel(kernel_matrix)
-# construct embedding based on created kernel
-kernel_embedding = converter.embed_kernel(custom_kernel)
diff --git a/applications/tapkee/samples/la.py b/applications/tapkee/samples/la.py
deleted file mode 100644
index b125fcc67c8..00000000000
--- a/applications/tapkee/samples/la.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import shogun as sg
-import data
-import numpy as np
-
-# load data
-feature_matrix = data.swissroll()
-# create features instance
-features = sg.RealFeatures(feature_matrix)
-
-# create Laplacian Eigenmaps converter instance
-converter = sg.LaplacianEigenmaps()
-
-# set target dimensionality
-converter.set_target_dim(2)
-# set number of neighbors
-converter.set_k(20)
-# set tau multiplier
-converter.set_tau(1.0)
-
-# compute embedding with Laplacian Eigenmaps method
-embedding = converter.embed(features)
-
-# compute cosine distance matrix 'manually'
-N = features.get_num_vectors()
-distance_matrix = np.zeros((N,N))
-for i in range(N):
-	for j in range(N):
-		distance_matrix[i,j] = \
-		  np.linalg.norm(feature_matrix[:,i]-feature_matrix[:,j],2)
-# create custom distance instance
-distance = sg.CustomDistance(distance_matrix)
-# construct embedding based on created distance
-converter.embed_distance(distance)
diff --git a/applications/tapkee/samples/lle.py b/applications/tapkee/samples/lle.py
deleted file mode 100644
index f0a05dd4599..00000000000
--- a/applications/tapkee/samples/lle.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import shogun as sg
-import data
-
-# load data
-feature_matrix = data.swissroll()
-# create features instance
-features = sg.RealFeatures(feature_matrix)
-
-# create Locally Linear Embedding converter instance
-converter = sg.LocallyLinearEmbedding()
-
-# set target dimensionality
-converter.set_target_dim(2)
-# set number of neighbors
-converter.set_k(10)
-# set reconstruction shift (optional)
-converter.set_reconstruction_shift(1e-3)
-# set nullspace shift (optional)
-converter.set_nullspace_shift(-1e-6)
-
-# compute embedding with Locally Linear Embedding method
-embedding_first = converter.embed(features)
-
-# set number of neighbors to be used
-converter.set_k(50)
-
-# compute embedding with Locally Linear Embedding method
-embedding_second = converter.embed(features)
diff --git a/applications/tapkee/samples/lltsa.py b/applications/tapkee/samples/lltsa.py
deleted file mode 100644
index f59a3fb2ea3..00000000000
--- a/applications/tapkee/samples/lltsa.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import shogun as sg
-import data
-
-# load data
-feature_matrix = data.swissroll()
-# create features instance
-features = sg.RealFeatures(feature_matrix)
-
-# create Linear Local Tangent Space Alignment converter instance
-converter = sg.LinearLocalTangentSpaceAlignment()
-
-# set target dimensionality
-converter.set_target_dim(2)
-# set number of neighbors
-converter.set_k(10)
-# set number of threads
-converter.parallel.set_num_threads(2)
-# set nullspace shift (optional)
-converter.set_nullspace_shift(-1e-6)
-
-# compute embedding with Linear Local Tangent Space Alignment method
-embedding = converter.embed(features)
diff --git a/applications/tapkee/samples/lpp.py b/applications/tapkee/samples/lpp.py
deleted file mode 100644
index 9c9c4e87f37..00000000000
--- a/applications/tapkee/samples/lpp.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import shogun as sg
-import data
-
-# load data
-feature_matrix = data.swissroll()
-# create features instance
-features = sg.RealFeatures(feature_matrix)
-
-# create Locality Preserving Projections converter instance
-converter = sg.LocalityPreservingProjections()
-
-# set target dimensionality
-converter.set_target_dim(2)
-# set number of neighbors
-converter.set_k(10)
-# set number of threads
-converter.parallel.set_num_threads(2)
-
-# compute embedding with Locality Preserving Projections method
-embedding = converter.embed(features)
diff --git a/applications/tapkee/samples/ltsa.py b/applications/tapkee/samples/ltsa.py
deleted file mode 100644
index 2c2d760cd4a..00000000000
--- a/applications/tapkee/samples/ltsa.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import shogun as sg
-import data
-
-# load data
-feature_matrix = data.swissroll()
-# create features instance
-features = sg.RealFeatures(feature_matrix)
-
-# create Local Tangent Space Alignment converter instance
-converter = sg.LocalTangentSpaceAlignment()
-
-# set target dimensionality
-converter.set_target_dim(2)
-# set number of neighbors
-converter.set_k(10)
-# set number of threads
-converter.parallel.set_num_threads(2)
-# set nullspace shift (optional)
-converter.set_nullspace_shift(-1e-6)
-
-# compute embedding with Local Tangent Space Alignment method
-embedding = converter.embed(features)
diff --git a/applications/tapkee/samples/mds.py b/applications/tapkee/samples/mds.py
deleted file mode 100644
index 4f0f8ffe633..00000000000
--- a/applications/tapkee/samples/mds.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import shogun as sg
-import data
-import numpy as np
-
-# load data
-feature_matrix = data.swissroll()
-# create features instance
-features = sg.RealFeatures(feature_matrix)
-
-# create Multidimensional Scaling converter instance
-converter = sg.MultidimensionalScaling()
-
-# set target dimensionality
-converter.set_target_dim(2)
-
-# compute embedding with Multidimensional Scaling method
-embedding = converter.embed(features)
-
-# enable landmark approximation
-converter.set_landmark(True)
-# set number of landmarks
-converter.set_landmark_number(100)
-# set number of threads
-converter.parallel.set_num_threads(2)
-# compute approximate embedding
-approx_embedding = converter.embed(features)
-# disable landmark approximation
-converter.set_landmark(False)
-
-# compute cosine distance matrix 'manually'
-N = features.get_num_vectors()
-distance_matrix = np.zeros((N,N))
-for i in range(N):
-	for j in range(N):
-		distance_matrix[i,j] = \
-		  np.linalg.norm(feature_matrix[:,i]-feature_matrix[:,j],2)
-# create custom distance instance
-distance = sg.CustomDistance(distance_matrix)
-# construct embedding based on created distance
-converter.embed_distance(distance)
diff --git a/applications/tapkee/samples/npe.py b/applications/tapkee/samples/npe.py
deleted file mode 100644
index cefa4880bdf..00000000000
--- a/applications/tapkee/samples/npe.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import shogun as sg
-import data
-
-# load data
-feature_matrix = data.swissroll()
-# create features instance
-features = sg.RealFeatures(feature_matrix)
-
-# create Neighborhood Preserving Embedding converter instance
-converter = sg.NeighborhoodPreservingEmbedding()
-
-# set target dimensionality
-converter.set_target_dim(2)
-# set number of neighbors
-converter.set_k(10)
-# set number of threads
-converter.parallel.set_num_threads(2)
-# set nullspace shift (optional)
-converter.set_nullspace_shift(-1e-6)
-
-# compute embedding with Neighborhood Preserving Projections method
-embedding = converter.embed(features)
diff --git a/applications/tapkee/swissroll_embedding.py b/applications/tapkee/swissroll_embedding.py
deleted file mode 100644
index e83d69d71a5..00000000000
--- a/applications/tapkee/swissroll_embedding.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import numpy
-numpy.random.seed(40)
-tt = numpy.genfromtxt('../../data/toy/swissroll_color.dat',unpack=True).T
-X = numpy.genfromtxt('../../data/toy/swissroll.dat',unpack=True).T
-N = X.shape[1]
-converters = []
-
-from shogun import LocallyLinearEmbedding
-lle = LocallyLinearEmbedding()
-lle.set_k(9)
-converters.append((lle, "LLE with k=%d" % lle.get_k()))
-
-from shogun import MultidimensionalScaling
-mds = MultidimensionalScaling()
-converters.append((mds, "Classic MDS"))
-
-lmds = MultidimensionalScaling()
-lmds.set_landmark(True)
-lmds.set_landmark_number(20)
-converters.append((lmds,"Landmark MDS with %d landmarks" % lmds.get_landmark_number()))
-
-from shogun import Isomap
-cisomap = Isomap()
-cisomap.set_k(9)
-converters.append((cisomap,"Isomap with k=%d" % cisomap.get_k()))
-
-from shogun import DiffusionMaps
-from shogun import GaussianKernel
-dm = DiffusionMaps()
-dm.set_t(2)
-dm.set_width(1000.0)
-converters.append((dm,"Diffusion Maps with t=%d, sigma=%.1f" % (dm.get_t(),dm.get_width())))
-
-from shogun import HessianLocallyLinearEmbedding
-hlle = HessianLocallyLinearEmbedding()
-hlle.set_k(6)
-converters.append((hlle,"Hessian LLE with k=%d" % (hlle.get_k())))
-
-from shogun import LocalTangentSpaceAlignment
-ltsa = LocalTangentSpaceAlignment()
-ltsa.set_k(6)
-converters.append((ltsa,"LTSA with k=%d" % (ltsa.get_k())))
-
-from shogun import LaplacianEigenmaps
-le = LaplacianEigenmaps()
-le.set_k(20)
-le.set_tau(100.0)
-converters.append((le,"Laplacian Eigenmaps with k=%d, tau=%d" % (le.get_k(),le.get_tau())))
-
-import matplotlib
-import matplotlib.pyplot as plt
-from mpl_toolkits.mplot3d import Axes3D
-
-fig = plt.figure()
-
-new_mpl = False
-
-try:
-	swiss_roll_fig = fig.add_subplot(3,3,1, projection='3d')
-	new_mpl = True
-except:
-	figure = plt.figure()
-	swiss_roll_fig = Axes3D(figure)
-
-swiss_roll_fig.scatter(X[0], X[1], X[2], s=10, c=tt, cmap=plt.cm.Spectral)
-swiss_roll_fig._axis3don = False
-plt.suptitle('Swissroll embedding',fontsize=9)
-plt.subplots_adjust(hspace=0.4)
-
-from shogun import RealFeatures
-
-for (i, (converter, label)) in enumerate(converters):
-	X = numpy.genfromtxt('../../data/toy/swissroll.dat',unpack=True).T
-	features = RealFeatures(X)
-	converter.set_target_dim(2)
-	converter.parallel.set_num_threads(1)
-	new_feats = converter.embed(features).get_feature_matrix()
-	if not new_mpl:
-		embedding_subplot = fig.add_subplot(4,2,i+1)
-	else:
-		embedding_subplot = fig.add_subplot(3,3,i+2)
-	embedding_subplot.scatter(new_feats[0],new_feats[1], c=tt, cmap=plt.cm.Spectral)
-	plt.axis('tight')
-	plt.xticks([]), plt.yticks([])
-	plt.title(label,fontsize=9)
-	print converter.get_name(), 'done'
-
-plt.show()
diff --git a/applications/tapkee/words_embedding.py b/applications/tapkee/words_embedding.py
deleted file mode 100644
index d74deb215e6..00000000000
--- a/applications/tapkee/words_embedding.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/env python
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
-# (at your option) any later version.
-#
-# Written (W) 2011 Sergey Lisitsyn
-# Copyright (C) 2011 Sergey Lisitsyn
-
-from numpy import *
-from pylab import *
-from shogun import *
-import random
-import difflib
-
-def word_kernel(words):
-	N = len(words)
-	dist_matrix = zeros([N,N])
-	for i in range(N):
-		for j in range(i,N):
-			s = difflib.SequenceMatcher(None,words[i],words[j])
-			dist_matrix[i,j] = s.ratio()
-	dist_matrix = 0.5*(dist_matrix+dist_matrix.T)
-	return CustomKernel(dist_matrix)
-
-print 'loading'
-words = []
-f = open("../../data/toy/words.dat")
-for line in f:
-	words.append(line[:-1])
-f.close()
-print 'loaded'
-
-converter = KernelLocallyLinearEmbedding()
-converter.set_k(10)
-converter.set_target_dim(2)
-converter.parallel.set_num_threads(1)
-embedding = converter.embed_kernel(word_kernel(words[:200]))
-embedding_matrix = embedding.get_feature_matrix()
-fig = figure()
-ax = fig.add_subplot(1,1,1)
-ax.scatter(embedding_matrix[0,:],embedding_matrix[1,:],alpha=0.4,cmap=cm.Spectral,c=embedding_matrix[0,:]*embedding_matrix[1,:])
-
-# hardcode ;)
-words_to_show = ['finishing','publishing','standing',\
-                 'shifted','insisted','tilted','blasted',\
-                 'jumble','battle','gobble']
-
-for i in xrange(0,200):
-	if words[i] in words_to_show:
-		ax.text(embedding_matrix[0,i]*1.1,1.25*embedding_matrix[1,i],words[i],fontsize=16,alpha=1.0)
-
-axis('off')
-show()
-
diff --git a/benchmarks/hasheddoc_benchmarks.cpp b/benchmarks/hasheddoc_benchmarks.cpp
deleted file mode 100644
index adc268c3b35..00000000000
--- a/benchmarks/hasheddoc_benchmarks.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * This software is distributed under BSD 3-clause license (see LICENSE file).
- *
- * Authors: Evangelos Anagnostopoulos
- */
-
-#include <shogun/base/init.h>
-#include <shogun/classifier/svm/SVMOcas.h>
-#include <shogun/features/HashedDocDotFeatures.h>
-#include <shogun/features/DenseFeatures.h>
-#include <shogun/lib/NGramTokenizer.h>
-#include <shogun/mathematics/Math.h>
-
-using namespace shogun;
-
-int main(int argv, char** argc)
-{
-	init_shogun_with_defaults();
-
-	int32_t bits[] = {8, 10, 12, 16, 20};
-	int32_t bits_length = 5;
-
-	int32_t num_strings = 5000;
-	int32_t max_str_length = 10000;
-	SGStringList<char> string_list(num_strings, max_str_length);
-
-	SG_SPRINT("Creating features...\n");
-	for (index_t i=0; i<num_strings; i++)
-	{
-		string_list.strings[i] = SGString<char>(max_str_length);
-		for (index_t j=0; j<max_str_length; j++)
-			string_list.strings[i].string[j] = (char) CMath::random('A', 'Z');
-	}
-	SG_SPRINT("Features were created.\n");
-
-	CStringFeatures<char>* string_feats = new CStringFeatures<char>(string_list, RAWBYTE);
-	CNGramTokenizer* tzer = new CNGramTokenizer(3);
-
-	for (index_t i=0; i<bits_length; i++)
-	{
-		int32_t b = bits[i];
-		SG_SPRINT("Starting training for num_bits = %d\n", b);
-
-		SG_REF(string_feats);
-		SG_REF(tzer);
-		CHashedDocDotFeatures* feats = new CHashedDocDotFeatures(b, string_feats, tzer);
-		feats->benchmark_dense_dot_range();
-		feats->benchmark_add_to_dense_vector();
-	}
-	exit_shogun();
-}
diff --git a/benchmarks/kernel_matrix_sum_benchmark.cpp b/benchmarks/kernel_matrix_sum_benchmark.cpp
deleted file mode 100644
index 62d7228480d..00000000000
--- a/benchmarks/kernel_matrix_sum_benchmark.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (w) 2014 Soumyajit De
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#include <shogun/lib/common.h>
-#include <shogun/base/init.h>
-#include <shogun/io/SGIO.h>
-#include <shogun/lib/SGMatrix.h>
-#include <shogun/lib/Time.h>
-#include <shogun/features/DenseFeatures.h>
-#include <shogun/kernel/GaussianKernel.h>
-#include <shogun/kernel/CustomKernel.h>
-#include <shogun/mathematics/eigen3.h>
-#include <map>
-
-using namespace shogun;
-using namespace Eigen;
-
-std::pair<float64_t,float64_t> test()
-{
-	CTime *time=new CTime();
-
-	const index_t n=1000;
-	const index_t d=3;
-	SGMatrix<float64_t> data_p(d, n);
-	Map<MatrixXd> data_pm(data_p.matrix, data_p.num_rows, data_p.num_cols);
-	data_pm=MatrixXd::Random(d, n);
-	SGMatrix<float64_t> data_q(d, n);
-	Map<MatrixXd> data_qm(data_q.matrix, data_q.num_rows, data_q.num_cols);
-	data_qm=MatrixXd::Random(d, n);
-
-	CDenseFeatures<float64_t>* feats_p=new CDenseFeatures<float64_t>(data_p);
-	CDenseFeatures<float64_t>* feats_q=new CDenseFeatures<float64_t>(data_q);
-	CGaussianKernel* kernel=new CGaussianKernel(feats_p, feats_q, 2);
-	CCustomKernel* precomputed_kernel=new CCustomKernel(kernel);
-
-	// BENCHMARK_1
-	time->start();
-	float64_t sum1=precomputed_kernel->sum_block(0, 0, n, n);
-	float64_t time1=time->cur_time_diff();
-
-	float64_t sum2=0.0;
-	SGMatrix<float64_t> km=precomputed_kernel->get_kernel_matrix();
-	Map<MatrixXd> k_m(km.matrix, km.num_rows, km.num_cols);
-
-	// BENCHMARK_2
-	time->start();
-	sum2=k_m.sum();
-	float64_t time2=time->cur_time_diff();
-
-	ASSERT(CMath::abs(sum1-sum2) <= 1E-5);
-
-	SG_UNREF(kernel);
-	SG_UNREF(precomputed_kernel);
-	SG_UNREF(time);
-
-	return std::make_pair(time1, time2);
-}
-
-int main(int argc, char **argv)
-{
-	init_shogun_with_defaults();
-	//sg_io->set_loglevel(MSG_DEBUG);
-	//sg_io->set_location_info(MSG_FUNCTION);
-	float64_t time1=0.0, time2=0.0;
-	float64_t var1=0.0, var2=0.0;
-	index_t num_runs=100;
-	for (index_t i=1; i<=num_runs; ++i)
-	{
-		std::pair<float64_t,float64_t> time=test();
-		float64_t delta=time.first - time1;
-		time1+=delta/i;
-		var1+=delta*(time.first - time1);
-		delta=time.second - time2;
-		time2+=delta/i;
-		var2+=delta*(time.second - time2);
-	}
-	var1/=num_runs;
-	var2/=num_runs;
-	SG_SPRINT("mean %f\t var %f\n", time1, var1);
-	SG_SPRINT("mean %f\t var %f\n", time2, var2);
-	exit_shogun();
-	return 0;
-}
-
diff --git a/benchmarks/rf_feats_benchmark.cpp b/benchmarks/rf_feats_benchmark.cpp
deleted file mode 100644
index e93d72a8459..00000000000
--- a/benchmarks/rf_feats_benchmark.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-#include <shogun/base/init.h>
-#include <shogun/features/RandomFourierDotFeatures.h>
-#include <shogun/kernel/GaussianKernel.h>
-#include <shogun/kernel/normalizer/IdentityKernelNormalizer.h>
-#include <shogun/lib/Time.h>
-
-using namespace shogun;
-
-/** Benchmark code for the RandomFourierDotFeatures class
- * Current results are after the code
- */
-
-int main(int argv, char** argc)
-{
-	init_shogun_with_defaults();
-
-	int32_t dims[] = {100, 300, 600};
-	CTime* timer = new CTime();
-	for (index_t d=0; d<3; d++)
-	{
-		int32_t num_dim = dims[d];
-		int32_t num_vecs = 100000;
-		SG_SPRINT("-------------------------------------------------------------------------\n");
-		SG_SPRINT("Starting experiment for number of dimensions = %d, number of vectors = %d,", num_dim, num_vecs);
-		SGMatrix<float64_t> mat(num_dim, num_vecs);
-		for (index_t i=0; i<num_vecs; i++)
-		{
-			for (index_t j=0; j<num_dim; j++)
-			{
-				mat(j,i) = CMath::random(0,1) + 0.5;
-			}
-		}
-
-		SGVector<float64_t> params(1);
-		params[0] = num_dim - 20;
-		SG_SPRINT(" using kernel_width = %f\n", params[0]);
-
-		CDenseFeatures<float64_t>* dense_feats = new CDenseFeatures<float64_t>(mat);
-		SG_REF(dense_feats);
-
-		int D[] = {50, 100, 200, 300, 400, 500};
-		for (index_t i=0; i<6; i++)
-		{
-			SG_SPRINT("Results for D = %d\n", D[i]);
-			CRandomFourierDotFeatures* rand_feats =
-					new CRandomFourierDotFeatures(dense_feats, D[i], KernelName::GAUSSIAN, params);
-			rand_feats->benchmark_dense_dot_range();
-			rand_feats->benchmark_add_to_dense_vector();
-			SG_UNREF(rand_feats);
-		}
-
-		SG_SPRINT("-------------------------------------------------------------------------\n");
-		SG_UNREF(dense_feats);
-	}
-	SG_SPRINT("Total time : %fs\n", timer->cur_runtime_diff_sec());
-	timer->stop();
-	SG_UNREF(timer);
-
-	exit_shogun();
-}
-
-/** Current results, using Release settings, for future comparisons :
- * -------------------------------------------------------------------------
- *  Starting experiment for number of dimensions = 100, number of vectors = 100000, using kernel_width = 80.000000
- *  Results for D = 50
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 1.846000s walltime 0.310587s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 1.244000s walltime 1.244486s
- *  Results for D = 100
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 3.438000s walltime 0.521576s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 2.644000s walltime 2.645543s
- *  Results for D = 200
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 5.860000s walltime 0.867629s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 5.092000s walltime 5.090811s
- *  Results for D = 300
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 8.564000s walltime 1.233921s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 7.770000s walltime 7.770405s
- *  Results for D = 400
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 10.974000s walltime 1.531718s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 10.126000s walltime 10.125524s
- *  Results for D = 500
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 13.558000s walltime 1.965116s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 12.894000s walltime 12.894182s
- *  -------------------------------------------------------------------------
- *  -------------------------------------------------------------------------
- *  Starting experiment for number of dimensions = 300, number of vectors = 100000, using kernel_width = 280.000000
- *  Results for D = 50
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 3.346000s walltime 0.580631s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 2.234000s walltime 2.234459s
- *  Results for D = 100
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 5.670000s walltime 0.878700s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 4.402000s walltime 4.401725s
- *  Results for D = 200
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 10.044000s walltime 1.441796s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 9.332000s walltime 9.332423s
- *  Results for D = 300
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 15.382000s walltime 2.138093s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 14.858000s walltime 14.858871s
- *  Results for D = 400
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 20.674000s walltime 2.905396s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 20.028000s walltime 20.030157s
- *  Results for D = 500
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 25.662000s walltime 3.550897s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 24.374000s walltime 24.374596s
- *  -------------------------------------------------------------------------
- *  -------------------------------------------------------------------------
- *  Starting experiment for number of dimensions = 600, number of vectors = 100000, using kernel_width = 580.000000
- *  Results for D = 50
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 4.414000s walltime 0.657778s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 3.490000s walltime 3.489634s
- *  Results for D = 100
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 8.456000s walltime 1.267112s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 7.458000s walltime 7.457174s
- *  Results for D = 200
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 16.922000s walltime 2.268248s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 16.142000s walltime 16.141996s
- *  Results for D = 300
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 25.584000s walltime 3.424675s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 25.752000s walltime 25.753305s
- *  Results for D = 400
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 34.392000s walltime 4.644195s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 34.340000s walltime 34.340004s
- *  Results for D = 500
- *  Time to process 5 x num=100000 dense_dot_range ops: cputime 44.028000s walltime 5.816031s
- *  Time to process 5 x num=100000 add_to_dense_vector ops: cputime 43.978000s walltime 43.979196s
- *  -------------------------------------------------------------------------
- *  Total time : 2531.890000s
- */
diff --git a/benchmarks/rf_feats_kernel_comp.cpp b/benchmarks/rf_feats_kernel_comp.cpp
deleted file mode 100644
index a1acc106d6f..00000000000
--- a/benchmarks/rf_feats_kernel_comp.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-#include <shogun/base/init.h>
-#include <shogun/features/RandomFourierDotFeatures.h>
-#include <shogun/kernel/GaussianKernel.h>
-#include <shogun/kernel/normalizer/SqrtDiagKernelNormalizer.h>
-#include <shogun/classifier/svm/LibLinear.h>
-#include <shogun/classifier/svm/SVMOcas.h>
-#include <shogun/classifier/svm/LibSVM.h>
-#include <shogun/labels/BinaryLabels.h>
-#include <shogun/evaluation/PRCEvaluation.h>
-#include <shogun/lib/Time.h>
-
-#include <stdio.h>
-#include <ctime>
-
-using namespace shogun;
-
-/** Code that compares the times needed to train
- * a linear svm using the RandomFourierDotFeatures class
- * vs a non-linear svm using the Gaussian Kernel.
- */
-int main(int argv, char** argc)
-{
-	init_shogun_with_defaults();
-
-	int32_t dims[] = {10, 100, 1000};
-	int32_t vecs[] = {10000, 100000, 1000000};
-	CTime* timer = new CTime(false);
-	float64_t epsilon = 0.001;
-	float64_t lin_C = 0.1;
-	float64_t non_lin_C = 0.1;
-	CPRCEvaluation* evaluator = new CPRCEvaluation();
-	CSqrtDiagKernelNormalizer* normalizer = new CSqrtDiagKernelNormalizer(true);
-	SG_REF(normalizer);
-	for (index_t d=0; d<4; d++)
-	{
-		int32_t num_dim = dims[d];
-		SG_SPRINT("Starting experiment for number of dimensions = %d\n", num_dim);
-		for (index_t v=0; v<3; v++)
-		{
-			int32_t num_vecs = vecs[v];
-			SG_SPRINT("   Using %d examples\n", num_vecs);
-			SGMatrix<float64_t> mat(num_dim, num_vecs);
-			SGVector<float64_t> labs(num_vecs);
-			for (index_t i=0; i<num_vecs; i++)
-			{
-				for (index_t j=0; j<num_dim; j++)
-				{
-					if ((i+j)%2==0)
-					{
-						labs[i] = -1;
-						mat(j,i) = CMath::random(0,1) + 0.5;
-					}
-					else
-					{
-						labs[i] = 1;
-						mat(j,i) = CMath::random(0,1) - 0.5;
-					}
-				}
-			}
-
-			SGVector<float64_t> params(1);
-			params[0] = 8;
-			SG_SPRINT("    Using kernel_width = %f\n", params[0]);
-
-			CDenseFeatures<float64_t>* dense_feats = new CDenseFeatures<float64_t>(mat);
-			SG_REF(dense_feats);
-
-			CBinaryLabels* labels = new CBinaryLabels(labs);
-			SG_REF(labels);
-
-			/** LibLinear SVM using RandomFourierDotFeatures */
-			int32_t D[] = {50, 100, 300, 1000};
-			for (index_t d=0; d<4; d++)
-			{
-				CRandomFourierDotFeatures* r_feats = new CRandomFourierDotFeatures(
-						dense_feats, D[d], KernelName::GAUSSIAN, params);
-
-				//CLibLinear* lin_svm = new CLibLinear(C, r_feats, labels);
-				CSVMOcas* lin_svm = new CSVMOcas(lin_C, r_feats, labels);
-				lin_svm->set_epsilon(epsilon);
-				clock_t t = clock();
-				timer->start();
-				lin_svm->train();
-				t = clock() - t;
-				timer->stop();
-				SG_SPRINT("\tSVMOcas using RFDotFeatures(D=%d) finished training. Took %fs (or %fs), ",
-						D[d], timer->time_diff_sec(), (float64_t) t /CLOCKS_PER_SEC);
-
-				t = clock();
-				timer->start();
-				CBinaryLabels* predicted = CLabelsFactory::to_binary(lin_svm->apply());
-				timer->stop();
-				t = clock() - t;
-				float64_t auPRC = evaluator->evaluate(predicted, labels);
-				SG_SPRINT("SVMOcas auPRC=%f (Applying took %fs (%fs)\n", auPRC,
-						timer->time_diff_sec(), (float64_t) t / CLOCKS_PER_SEC);
-				SG_UNREF(lin_svm);
-				SG_UNREF(predicted);
-			}
-			/** End of LibLinear code */
-
-
-			/** LibSVM using Gaussian Kernel */
-
-			CGaussianKernel* kernel = new CGaussianKernel(dense_feats, dense_feats, params[0]);
-			//kernel->set_normalizer(normalizer);
-			CLibSVM* svm = new CLibSVM(non_lin_C, kernel, labels);
-			svm->set_epsilon(epsilon);
-			clock_t t = clock();
-			timer->start();
-			svm->train();
-			t = clock() - t;
-			timer->stop();
-			SG_SPRINT("\tLibSVM using GaussianKernel finished training. Took %fs (or %fs), ",
-					timer->time_diff_sec(), (float64_t) t /CLOCKS_PER_SEC);
-
-			t = clock();
-			timer->start();
-			CBinaryLabels* predicted = CLabelsFactory::to_binary(svm->apply());
-			timer->stop();
-			t = clock() - t;
-			float64_t auPRC = evaluator->evaluate(predicted, labels);
-			SG_SPRINT("LibSVM auPRC=%f (Applying took %fs (%fs)\n", auPRC,
-					timer->time_diff_sec(), (float64_t) t / CLOCKS_PER_SEC);
-			SG_UNREF(svm);
-			SG_UNREF(predicted);
-			/** End of LibSVM code */
-			SG_UNREF(labels);
-			SG_UNREF(dense_feats);
-		}
-	}
-	SG_UNREF(timer);
-	SG_UNREF(evaluator);
-	SG_UNREF(normalizer);
-	exit_shogun();
-}
diff --git a/benchmarks/sparse_test.cpp b/benchmarks/sparse_test.cpp
deleted file mode 100644
index 8849ea68e69..00000000000
--- a/benchmarks/sparse_test.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * This software is distributed under BSD 3-clause license (see LICENSE file).
- *
- * Authors: Soeren Sonnenburg, Pan Deng, Soumyajit De, Björn Esser
- */
-
-#include <shogun/lib/common.h>
-
-#include <shogun/lib/Time.h>
-#include <shogun/lib/SGVector.h>
-#include <shogun/lib/SGSparseMatrix.h>
-#include <shogun/lib/SGSparseVector.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/mathematics/eigen3.h>
-#include <pthread.h>
-
-using namespace shogun;
-using namespace Eigen;
-
-struct APPLY_THREAD_PARAM
-{
-	int32_t start;
-	int32_t stop;
-	float64_t* result;
-	float64_t* vec;
-	int32_t len;
-	SGSparseVector<float64_t>* sm;
-};
-
-
-int32_t get_nnz(SGSparseMatrix<float64_t> m)
-{
-
-	int32_t nnz=0;
-	int32_t n=m.num_vectors;
-
-	for (int i=0; i<n; i++)
-	{
-		nnz+=m[i].num_feat_entries;
-	}
-	return nnz;
-}
-
-static void* dot_helper(void* p)
-{
-	APPLY_THREAD_PARAM* par=(APPLY_THREAD_PARAM*) p;
-	float64_t* r = par->result;
-	SGSparseVector<float64_t>* m=par->sm;
-	float64_t* vec = par->vec;
-	int32_t len = par->len;
-	int32_t start = par->start;
-	int32_t stop = par->stop;
-
-	for (index_t i=start; i<stop; ++i)
-		r[i]=m[i].dense_dot(1.0, vec, len, 0.0);
-}
-
-
-SGVector<float64_t> sg_m_apply(SGSparseMatrix<float64_t> m, SGVector<float64_t> v)
-{
-	SGVector<float64_t> r(v.vlen);
-	ASSERT(v.vlen==m.num_vectors);
-
-	int num_threads=8;
-	pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
-	APPLY_THREAD_PARAM* params = SG_MALLOC(APPLY_THREAD_PARAM, num_threads);
-	int32_t step= m.num_vectors/num_threads;
-
-	int32_t start=0;
-	int32_t stop=m.num_vectors;
-	int32_t t;
-
-	for (t=0; t<num_threads-1; t++)
-	{
-		params[t].start = start+t*step;
-		params[t].stop = start+(t+1)*step;
-		params[t].result = r.vector;
-		params[t].sm=m.sparse_matrix;
-		params[t].vec=v.vector;
-		params[t].len=v.vlen;
-		pthread_create(&threads[t], NULL,
-				dot_helper, (void*)&params[t]);
-	}
-
-	params[t].start = start+t*step;
-	params[t].stop = stop;
-	params[t].result = r.vector;
-	params[t].sm=m.sparse_matrix;
-	params[t].vec=v.vector;
-	params[t].len=v.vlen;
-	dot_helper((void*) &params[t]);
-
-	for (t=0; t<num_threads-1; t++)
-		pthread_join(threads[t], NULL);
-
-	SG_FREE(params);
-	SG_FREE(threads);
-
-	return r;
-}
-
-int main(int argc, char** argv)
-{
-	Eigen::initParallel();
-	init_shogun_with_defaults();
-	//sg_io->set_loglevel(MSG_GCDEBUG);
-
-	const index_t n=100;
-	const index_t times=5;
-	const index_t size=1000000;
-	SGVector<float64_t> v(size);
-	v.set_const(1.0);
-	Map<VectorXd> map_v(v.vector, v.vlen);
-	CTime time;
-	CMath::init_random(17);
-
-	SG_SPRINT("time\tshogun (s)\teigen3 (s)\n\n");
-	for (index_t t=0; t<times; ++t)
-	{
-//#ifdef RUN_SHOGUN
-		SGSparseMatrix<float64_t> sg_m(size, size);
-		typedef SGSparseVectorEntry<float64_t> Entry;
-		SGSparseVector<float64_t> *vec=SG_MALLOC(SGSparseVector<float64_t>, size);
-
-		// for first row
-		Entry *first=SG_MALLOC(Entry, size);
-		// the digonal index for row #1
-		first[0].feat_index=0;
-		first[0].entry=1.836593;
-		for (index_t i=1; i<size; ++i)
-		{
-			// fill the index for row #1
-			first[i].feat_index=i;
-			first[i].entry=0.02;
-		}
-		vec[0].features=first;
-		vec[0].num_feat_entries=size;
-		sg_m[0]=vec[0].get();
-
-		// fill the rest of the rows
-		Entry** rest=SG_MALLOC(Entry*, size-1);
-		for (index_t i=0; i<size-1; ++i)
-		{
-			int num=40;
-			// the first col
-			rest[i]=SG_MALLOC(Entry, num);
-
-			for (int j=0; j<i && j<num; j++)
-			{
-				rest[i][j].feat_index=j;
-				rest[i][j].entry=0.01+j;
-			}
-
-			if (i>num)
-			{
-				//// the diagonal element
-				rest[i][num-1].feat_index=i+1;
-				rest[i][num-1].entry=1.836593;
-			}
-
-			vec[i+1].features=rest[i];
-			vec[i+1].num_feat_entries=num;
-
-			sg_m[i+1]=vec[i+1].get();
-		}
-		SGVector<float64_t> r(size);
-
-		SG_SPRINT("nnz=%d\n", get_nnz(sg_m));
-
-		// sg starts
-		time.start();
-		for (index_t i=0; i<n; ++i)
-			r=sg_m_apply(sg_m, v);
-		float64_t sg_time = time.cur_time_diff();
-
-		Map<VectorXd> map_r(r.vector, r.vlen);
-		float64_t sg_norm=map_r.norm();
-
-//#endif // RUN_SHOGUN
-
-//#ifdef RUN_EIGEN
-		const SparseMatrix<float64_t> &eig_m=EigenSparseUtil<float64_t>::toEigenSparse(sg_m);
-		VectorXd eig_r(size);
-
-		// eigen3 starts
-		time.start();
-		for (index_t i=0; i<n; ++i)
-			eig_r=eig_m*map_v;
-
-		float64_t eig_time = time.cur_time_diff();
-		float64_t eig_norm=eig_r.norm();
-//#endif // RUN_EIGEN
-
-		SG_SPRINT("%d\t%lf\t%lf\n", t, sg_time, eig_time);
-		//ASSERT(sg_time>eig_time);
-		ASSERT(CMath::abs(sg_norm-eig_norm)<=CMath::MACHINE_EPSILON)
-
-		SG_FREE(vec);
-		SG_FREE(rest);
-	}
-
-
-	exit_shogun();
-
-	return 0;
-}