diff --git a/.clang-format b/.clang-format
new file mode 100644
index 00000000000..96184e36d91
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,44 @@
+#
+# Shogun clang-format configuration file
+#
+Language:        Cpp
+AccessModifierOffset: -4
+AlignAfterOpenBracket: AlwaysBreak
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: true
+BreakBeforeBraces: Custom
+BraceWrapping:
+  AfterClass:      true
+  AfterControlStatement: true
+  AfterEnum:       true
+  AfterFunction:   true
+  AfterNamespace:  true
+  AfterObjCDeclaration: false
+  AfterStruct:     true
+  AfterUnion:      true
+  BeforeCatch:     true
+  BeforeElse:      true
+  IndentBraces:    false
+ColumnLimit: 80
+ContinuationIndentWidth: 4
+IndentWidth: 4
+NamespaceIndentation: All
+PenaltyBreakComment: 10000
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 10000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Left
+ReflowComments: true
+SortIncludes:    true
+Standard:        Cpp11
+TabWidth:        4
+UseTab:          ForIndentation
diff --git a/.gitignore b/.gitignore
index aa34f8a780a..20415f123e3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,53 +52,6 @@ cpplint.py
 /src/configure-*-*.c*
 /src/build-local
 
-# modular interfaces
-/src/interfaces/*_modular/*.doxy
-/src/interfaces/*_modular/Evaluation.i
-/src/interfaces/*_modular/Regression.i
-/src/interfaces/*_modular/Library.i
-/src/interfaces/*_modular/Distribution.i
-/src/interfaces/*_modular/Structure.i
-/src/interfaces/*_modular/Classifier.i
-/src/interfaces/*_modular/Features.i
-/src/interfaces/*_modular/Kernel.i
-/src/interfaces/*_modular/Preprocessor.i
-/src/interfaces/*_modular/Distance.i
-/src/interfaces/*_modular/Clustering.i
-/src/interfaces/*_modular/SGBase.i
-/src/interfaces/*_modular/IO.i
-/src/interfaces/*_modular/Mathematics.i
-/src/interfaces/*_modular/ModelSelection.i
-/src/interfaces/*_modular/modshogun.i
-/src/interfaces/*_modular/modshogun_ignores.i
-/src/interfaces/*_modular/*_includes.i
-/src/interfaces/*_modular/Makefile
-/src/interfaces/*_modular/Converter.i
-/src/interfaces/*_modular/Multiclass.i
-/src/interfaces/*_modular/Machine.i
-/src/interfaces/*_modular/Transfer.i
-/src/interfaces/*_modular/Loss.i
-/src/interfaces/*_modular/Statistics.i
-/src/interfaces/*_modular/Latent.i
-/src/interfaces/*_modular/GaussianProcess.i
-
-# particular modular ones
-/src/interfaces/csharp_modular/*.cs
-/src/interfaces/csharp_modular/abstract_types_extension.i
-/src/interfaces/csharp_modular/modshogun.dll
-/src/interfaces/java_modular/*.java
-/src/interfaces/java_modular/*.jar
-/src/interfaces/java_modular/*.class
-/src/interfaces/java_modular/org/*
-/src/interfaces/java_modular/shogun/*
-/src/interfaces/python_modular/*.py
-/src/interfaces/python_modular/abstract_types_extension.i
-/src/interfaces/r_modular/*.R
-/src/interfaces/r_modular/*.RData
-/src/interfaces/perl_modular/*.pm
-/src/interfaces/octave_modular/abstract_types_extension.i
-/.duped_py_pl.pb
-
 # /examples/
 *.log
 *.exe
@@ -108,9 +61,9 @@ cpplint.py
 !/examples/undocumented/libshogun/*.cpp
 !/examples/undocumented/libshogun/CMakeLists.txt
 !/examples/undocumented/libshogun/tools/
-!examples/undocumented/python_modular/graphical/
-!examples/undocumented/python_modular/*.py
-!examples/undocumented/python_modular/CMakeLists.txt
+!examples/undocumented/python/graphical/
+!examples/undocumented/python/*.py
+!examples/undocumented/python/CMakeLists.txt
 
 # /tests
 /tests/unit/shogun-unit-test
@@ -120,12 +73,8 @@ cpplint.py
 /tests/unit/*.json
 /tests/unit/combined_kernel.weights
 Testing/
-examples/undocumented/python_modular/serialized_svm.bz2
-examples/undocumented/python_modular/tmp/blaah.asc
-examples/undocumented/python_modular/tmp/blaah.h5
-examples/undocumented/python_modular/tmp/blaah.json
-examples/undocumented/python_modular/tmp/blaah.xml
-examples/undocumented/python_modular/vw_cache.dat.cache
+examples/undocumented/python/serialized_svm.bz2
+examples/undocumented/python/vw_cache.dat.cache
 
 # cmake
 /CMakeFiles/
@@ -262,6 +211,7 @@ local.properties
 
 # CDT-specific (C/C++ Development Tooling)
 .cproject
+.idea
 
 # JDT-specific (Eclipse Java Development Tools)
 .classpath
diff --git a/.gitmodules b/.gitmodules
index c5a3755999b..d67d6ee7dce 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -5,3 +5,6 @@
 [submodule "debian"]
 	path = debian
 	url = git://github.com/shogun-toolbox/shogun-debian.git
+[submodule "gpl"]
+	path = src/gpl
+	url = git://github.com/shogun-toolbox/shogun-gpl.git
diff --git a/.travis.yml b/.travis.yml
index bfcfc87275a..5b0a6e34839 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,6 @@
 sudo: required
 language: cpp
+cache: ccache
 notifications:
   email: false
   irc:
@@ -16,63 +17,74 @@ matrix:
       services: docker
     - compiler: clang
       services: docker
+    - compiler: gcc
+      services: docker
+      env:
+        - CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=Debug -DENABLE_COVERAGE=ON -DENABLE_CCACHE=OFF"
+        - CODE_COVERAGE=1
     - compiler: clang
       services: docker
-      python: "2.7_with_system_site_packages"
-      language: python
       env:
-        - CMAKE_OPTIONS="-DPythonModular=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
+        - CMAKE_OPTIONS="-DINTERFACE_PYTHON=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
         - CC=clang
         - CXX=clang++
     - compiler: clang
       services: docker
-      language: ruby
       env:
-        - CMAKE_OPTIONS="-DRubyModular=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
+        - CMAKE_OPTIONS="-DINTERFACE_RUBY=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
         - CC=clang
         - CXX=clang++
     - compiler: clang
       services: docker
-      language: java
       env:
-        - CMAKE_OPTIONS="-DJavaModular=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
+        - CMAKE_OPTIONS="-DINTERFACE_JAVA=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
         - CC=clang
         - CXX=clang++
     - compiler: clang
       services: docker
       env:
-        - CMAKE_OPTIONS="-DCSharpModular=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
+        - CMAKE_OPTIONS="-DINTERFACE_CSHARP=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
         - CC=clang
         - CXX=clang++
     - compiler: clang
       services: docker
       env:
-        - CMAKE_OPTIONS="-DLuaModular=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
+        - CMAKE_OPTIONS="-DINTERFACE_LUA=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
         - CC=clang
         - CXX=clang++
     - compiler: gcc
       services: docker
       env:
-        - CMAKE_OPTIONS="-DOctaveModular=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
-    - compiler: clang
-      services: docker
-      env:
-        - CMAKE_OPTIONS="-DRModular=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
-        - CC=clang
-        - CXX=clang++
+        - CMAKE_OPTIONS="-DINTERFACE_OCTAVE=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
+        - INTERFACE_OCTAVE=true
     - compiler: clang
       services: docker
       env:
-        - CMAKE_OPTIONS="-DScalaModular=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
+        - CMAKE_OPTIONS="-DINTERFACE_R=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
         - CC=clang
         - CXX=clang++
+#    - compiler: clang
+#      services: docker
+#      env:
+#        - CMAKE_OPTIONS="-DScala=ON -DTRAVIS_DISABLE_UNIT_TESTS=ON -DTRAVIS_DISABLE_LIBSHOGUN_TESTS=ON"
+#        - CC=clang
+#        - CXX=clang++
 before_install:
   - docker pull shogun/shogun-dev
-  - echo $CC; echo $CXX
-  - docker run -t -d -P -e "JAVA_HOME=/usr/lib/jvm/java-8-oracle" -e "CC=$CC" -e "CXX=$CXX" --name devenv -v $PWD:/opt/shogun shogun/shogun-dev /bin/sh -c "mkdir /opt/shogun/build;bash"
+  - perl -pe 's/\$(\w+)/$ENV{$1}/g' configs/shogun-sdk/travis.env.in > travis.env
+  - docker run -t -d -P --env-file travis.env --name devenv -v $HOME/.ccache:/root/.ccache -v $PWD:/opt/shogun shogun/shogun-dev /bin/sh -c "mkdir /opt/shogun/build;bash"
 before_script:
   - docker exec -t devenv /bin/sh -c "cd /opt/shogun/build; cmake -DCMAKE_INSTALL_PREFIX=$HOME/shogun-build -DENABLE_TESTING=ON $CMAKE_OPTIONS .."
 script:
+  - |
+    if [ $CC == "gcc" ] && [ -z ${INTERFACE_OCTAVE} ]; then
+      docker exec -t devenv /bin/sh -c "cd /opt/shogun/;  if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then ./scripts/check_format.sh "$TRAVIS_PULL_REQUEST_BRANCH" "$TRAVIS_BRANCH"; fi"
+    fi
   - docker exec -t devenv /bin/sh -c "cd /opt/shogun/build; make -j2"
   - docker exec -t devenv /bin/sh -c "cd /opt/shogun/build; make install"
   - docker exec -t devenv /bin/sh -c "cd /opt/shogun/build; ctest --output-on-failure -j 2"
+after_success:
+  - |
+    if [ $CODE_COVERAGE ] ; then
+      docker exec -t devenv /bin/sh -c "cd /opt/shogun/build; lcov --directory src/shogun/ --capture --output-file all_coverage.info; lcov --remove all_coverage.info '/usr/*' > coverage.info; rm all_coverage.info; codecov -X gcov"
+    fi
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9f6901104c9..6c48bed88a5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,12 +35,21 @@ SET(CMAKE_INCLUDE_DIRECTORIES_PROJECT_BEFORE ON)
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
+if (MSVC)
+	if (MSVC_VERSION VERSION_LESS 1900)
+		message(FATAL_ERROR "C++11 is required to use Shogun, but the version of Visual Studio you are using is too old and doesn't support C++11. You need Visual Studio 2015 or newer. ")
+	else()
+		include(CheckCXXCompilerFlag)
+		CHECK_CXX_COMPILER_FLAG("/std:c++14" _cpp_latest_flag_supported)
+		if (_cpp_latest_flag_supported)
+			add_compile_options("/std:c++14")
+		endif()
+	endif()
+endif()
 
 ### FIXME: remove these flags when the codebase
 # is cleared up
 set(HAVE_CXX11 ON)
-set(HAVE_CXX11_ATOMIC ON)
-set(HAVE_STD_UNORDERED_MAP ON)
 
 ############# minimum library versions ###################
 SET(EIGEN_VERSION_MINIMUM 3.1.2)
@@ -85,18 +94,19 @@ SET(EXT_SRC_TEST_TMP "${EXT_SRC_TEST}.jinja2")
 
 SET(THIRD_PARTY_DIR ${CMAKE_SOURCE_DIR}/third_party)
 SET(LIBSHOGUN_SRC_DIR ${CMAKE_SOURCE_DIR}/src/shogun)
-SET(COMMON_MODULAR_SRC_DIR ${CMAKE_SOURCE_DIR}/src/interfaces/modular/)
-
-SET(AVAILABLE_INTERFACES PythonModular;OctaveModular;JavaModular;PerlModular;RubyModular;CSharpModular;RModular;LuaModular;ScalaModular)
-SET(PythonModularDescription "Python")
-SET(OctaveModularDescription "Octave")
-SET(JavaModularDescription "Java")
-SET(PerlModularDescription "Perl")
-SET(RubyModularDescription "Ruby")
-SET(CSharpModularDescription "C#")
-SET(RModularDescription "R")
-SET(LuaModularDescription "Lua")
-SET(ScalaModularDescription "Scala")
+SET(COMMON_INTERFACE_SRC_DIR ${CMAKE_SOURCE_DIR}/src/interfaces/swig/)
+
+SET(AVAILABLE_INTERFACES
+    INTERFACE_PYTHON;INTERFACE_OCTAVE;INTERFACE_JAVA;INTERFACE_PERL;INTERFACE_RUBY;INTERFACE_CSHARP;INTERFACE_R;INTERFACE_LUA;INTERFACE_SCALA)
+SET(INTERFACE_PYTHON_DESCRIPTION "Python")
+SET(INTERFACE_OCTAVE_DESCRIPTION "Octave")
+SET(INTERFACE_JAVA_DESCRIPTION "Java")
+SET(INTERFACE_PERL_DESCRIPTION "Perl")
+SET(INTERFACE_RUBY_DESCRIPTION "Ruby")
+SET(INTERFACE_CSHARP_DESCRIPTION "C#")
+SET(INTERFACE_R_DESCRIPTION "R")
+SET(INTERFACE_LUA_DESCRIPTION "Lua")
+SET(INTERFACE_SCALA_DESCRIPTION "Scala")
 SET(LIBSHOGUN ON CACHE BOOL "Compile shogun library")
 
 IsAnyTrue("${AVAILABLE_INTERFACES}" ANY_INTERFACE_ENABLED)
@@ -104,7 +114,7 @@ IF (${ANY_INTERFACE_ENABLED})
 	# SWIG3 is the minimum requirement because of C++11 support
 	SET(SWIG_VERSION_MINIMUM 3.0.0)
 
-	IF(CSharpModular)
+	IF(INTERFACE_CSHARP)
 		# We require SWIG 3.0.7 to support functions with a few SGVector or
 		# SGMatrix arguments. The required SWIG feature is called
 		# "Support for special variable expansion in typemap attributes."
@@ -115,7 +125,7 @@ IF (${ANY_INTERFACE_ENABLED})
 		# typemapping created for earlier versions of SWIG.
 		# see: http://www.swig.org/Doc3.0/CSharp.html#CSharp_introduction_swig2_compatibility
 		LIST(APPEND CMAKE_SWIG_FLAGS "-DSWIG2_CSHARP")
-	ELSEIF(PythonModular)
+	ELSEIF(INTERFACE_PYTHON)
 		# SWIG was broken for combining -builtin and -modernargs
 		# from v3.0.0 and until 3.0.4.  This bug was fixed in
 		# v3.0.5.  Make CMake emit an error and fail to configure.
@@ -128,7 +138,7 @@ IF (${ANY_INTERFACE_ENABLED})
 	IF(ENABLE_CCACHE AND CCACHE_SWIG)
 		SET(CCACHE_SWIG_EXECUTABLE ${CCACHE_SWIG})
 	ENDIF()
-	SET(COMPILE_MODULAR_INTERFACE 1)
+	SET(COMPILE_INTERFACE 1)
 ENDIF()
 
 # Detect OS
@@ -137,18 +147,14 @@ DetectSystemName()
 # Get processor type, sets MACHINE macro
 SET(MACHINE ${CMAKE_SYSTEM_PROCESSOR})
 
-SET(EXT_LIB_SWIG_RUBY_MODULAR ".so")
+SET(EXT_LIB_SWIG_RUBY ".so")
 if(DARWIN)
-	SET(EXT_LIB_SWIG_RUBY_MODULAR ".bundle")
+	SET(EXT_LIB_SWIG_RUBY ".bundle")
 ENDIF()
 
 ################ COMPILER #######################
 # g++ version needs to be => 4.3
 IF(CMAKE_COMPILER_IS_GNUCXX)
-	# in order to support cmake 2.8.7 and older
-	IF(NOT CMAKE_CXX_COMPILER_VERSION)
-		include(CheckCompiler)
-	ENDIF()
 	IF("${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS "4.3.0")
 		MESSAGE(FATAL_ERROR "g++ version is too old")
 	ENDIF()
@@ -184,6 +190,7 @@ IF(MSVC)
 	SET(CMAKE_CXX_FLAGS_DISTRIBUTION "/Ot")
 	SET(CMAKE_C_FLAGS_DEBUG "/DEBUG /Od /Zi")
 	SET(CMAKE_CXX_FLAGS_DEBUG "/DEBUG /Od /Zi")
+	add_compile_options("/bigobj")
 ELSE()
 	SET(CMAKE_C_FLAGS_RELEASE "-O3 ${RELEASE_COMPILER_FLAGS}")
 	SET(CMAKE_CXX_FLAGS_RELEASE "-O3 ${RELEASE_COMPILER_FLAGS}")
@@ -308,7 +315,7 @@ OPTION(BUILD_META_EXAMPLES "Generate API examples from meta-examples" ON)
 # note the examples dir is added below after tests have been defined
 
 ################# DATATYPES #################
-IF(COMPILE_MODULAR_INTERFACE)
+IF(COMPILE_INTERFACE)
 	OPTION(USE_CHAR "Support for char datatype" ON)
 	OPTION(USE_BOOL "Support for bool datatype" ON)
 	OPTION(USE_UINT8 "Support for uint8_t datatype" ON)
@@ -323,10 +330,10 @@ IF(COMPILE_MODULAR_INTERFACE)
 	OPTION(USE_FLOAT64 "Support for float64_t datatype" ON)
 	OPTION(USE_COMPLEX128 "Support for complex128_t datatype" ON)
 	OPTION(USE_FLOATMAX "Support for floatmax_t datatype" OFF)
-ENDIF(COMPILE_MODULAR_INTERFACE)
+ENDIF(COMPILE_INTERFACE)
 
 # detect word size
-IF(CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT DARWIN)
+IF(CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT (DARWIN OR WIN32))
 	SET(SWIGWORDSIZE64 TRUE)
 ENDIF()
 
@@ -338,19 +345,6 @@ EndForEach(Interface)
 # Debugging Python-interface with CTest
 OPTION(ENABLE_PYTHON_DEBUG "Enable Python-interface-debugging with CTest" OFF)
 
-# Allow to hide non-bsd compatible codes
-OPTION(LICENSE_GPL_SHOGUN "Include GPL codes of Shogun (non-BSD compatible) in build" ON)
-IF (LICENSE_GPL_SHOGUN)
-	SET(USE_GPL_SHOGUN 1)
-ELSE()
-	SET(USE_GPL_SHOGUN 0)
-ENDIF()
-
-# SVMLight
-OPTION(USE_SVMLIGHT "SVMLight" ON)
-IF(USE_SVMLIGHT AND NOT USE_GPL_SHOGUN)
-	MESSAGE(FATAL_ERROR "Can only use SVMLight when GPL codes are included")
-ENDIF()
 SET(SVMLightWarning "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
 SET(SVMLightWarning "${SVMLightWarning}\nWARNING: SHOGUN is built using SVMlight which was written")
 SET(SVMLightWarning "${SVMLightWarning}\nby Thorsten Joachims and uses a different non GPL compatible license.")
@@ -379,9 +373,6 @@ OPTION(USE_PATHDEBUG "Viterbi path debugging" OFF)
 # big states
 OPTION(USE_BIGSTATES "Big (16bit) state" ON)
 
-# Large file
-OPTION(HAVE_LARGEFILE "Large file support" ON)
-
 #kernelcache to use 4-byte-floating-point values instead of 8-byte-doubles
 OPTION(USE_SHORTREAL_KERNELCACHE "Kernelcache to use 4-byte-floating-point values instead of 8-byte-doubles" ON)
 
@@ -390,9 +381,6 @@ SET(MEXP "19937" CACHE STRING "PRNG Mersenne exponent")
 SET(SFMT_MEXP ${MEXP})
 SET(DSFMT_MEXP ${MEXP})
 
-# Reference counting
-OPTION(USE_REFERENCE_COUNTING "Reference Counting" ON)
-
 OPTION(USE_LOGCACHE "Use (1+exp(x)) log cache (is much faster but less accurate)" OFF)
 
 OPTION(USE_LOGSUMARRAY "Use sum array, supposed to be a bit more accurate" OFF)
@@ -417,6 +405,9 @@ IF (LIBSHOGUN)
 	MergeCFLAGS()
 	add_subdirectory(${CMAKE_SOURCE_DIR}/src/shogun)
 	add_library(shogun::shogun ALIAS shogun)
+	if(LIBSHOGUN_BUILD_STATIC)
+		add_library(shogun::shogun-static ALIAS shogun-static)
+	endif()
 	set(shogun_INCLUDE_DIR ${CMAKE_BINARY_DIR}/src)
 ELSE()
 	find_package(Shogun ${VERSION} CONFIG REQUIRED)
@@ -427,9 +418,9 @@ ForEach(SwigFlag "-w473" "-w454" "-w312" "-w325" "-fvirtual")
     LIST(APPEND CMAKE_SWIG_FLAGS ${SwigFlag})
 EndForEach()
 
-OPTION(SWIG_SINGLE_THREADED "Build modular interfaces single-threaded to reduce memory usage" OFF)
+OPTION(SWIG_SINGLE_THREADED "Build interfaces single-threaded to reduce memory usage" OFF)
 
-OPTION(USE_SWIG_DIRECTORS "Enable SWIG director classes" OFF)
+OPTION(USE_SWIG_DIRECTORS "Enable SWIG director classes" ON)
 
 # Respect system's or distribution's C[XX]FLAGS.
 OPTION(SWIG_WITH_SYSTEM_CFLAGS "Enable system's C[XX]FLAGS for compilation of swig-binaries" ON)
@@ -448,71 +439,71 @@ IF(REDUCE_SWIG_DEBUG)
 	SET(SWIG_CXX_COMPILER_FLAGS "${SWIG_CXX_COMPILER_FLAGS} -g1")
 ENDIF(REDUCE_SWIG_DEBUG)
 
-# python modular
-IF (PythonModular)
-	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/python_modular)
-		add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/python_modular)
+# python
+IF (INTERFACE_PYTHON)
+	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/python)
+		add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/python)
 	ENDIF()
 ENDIF()
 
-# lua modular
-IF (LuaModular)
-	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/lua_modular)
-		add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/lua_modular)
+# lua
+IF (INTERFACE_LUA)
+	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/lua)
+		add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/lua)
 	ENDIF()
 ENDIF()
 
-# scala modular
-IF (ScalaModular)
+# scala
+IF (INTERFACE_SCALA)
 	# Java needed because Scala extends Java Classes and uses the executable generated from Java example for Integration testing
-	set(JavaModular "ON")
+	set(INTERFACE_JAVA "ON")
 	FIND_PACKAGE(Scala REQUIRED)
-	IF(NOT JavaModular)
-		IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/java_modular)
-			add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/java_modular)
+	IF(NOT INTERFACE_JAVA)
+		IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/java)
+			add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/java)
 		ENDIF()
 	ENDIF()
 ENDIF()
 
-# java modular
-IF (JavaModular)
-	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/java_modular)
-		add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/java_modular)
+# java
+IF (INTERFACE_JAVA)
+	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/java)
+		add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/java)
 	ENDIF()
 ENDIF()
 
-# ruby modular
-IF (RubyModular)
-	add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/ruby_modular)
+# ruby
+IF (INTERFACE_RUBY)
+	add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/ruby)
 ENDIF()
 
-# octave modular
-IF (OctaveModular)
-	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/octave_modular)
-		add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/octave_modular)
+# octave
+IF (INTERFACE_OCTAVE)
+	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/octave)
+		add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/octave)
 	ENDIF()
 ENDIF()
 
-# csharp modular
-IF (CSharpModular)
-	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/csharp_modular)
-		add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/csharp_modular)
+# csharp
+IF (INTERFACE_CSHARP)
+	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/csharp)
+		add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/csharp)
 	ENDIF()
 ENDIF()
 
-# r modular
-IF (RModular)
-	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/r_modular)
-		add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/r_modular)
+# R
+IF (INTERFACE_R)
+	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/r)
+		add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/r)
 	ENDIF()
 ENDIF()
 
-# perl modular
-IF (PerlModular)
+# perl
+IF (INTERFACE_PERL)
 	FIND_PACKAGE(FindPerlLibs REQUIRED)
 	UNSET(TARGET_SWIGFLAGS)
-	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/perl_modular)
-		#add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/perl_modular)
+	IF(EXISTS ${CMAKE_SOURCE_DIR}/src/interfaces/perl)
+		#add_subdirectory(${CMAKE_SOURCE_DIR}/src/interfaces/perl)
 	ENDIF()
 ENDIF()
 
@@ -565,6 +556,11 @@ IF(EXISTS ${CMAKE_SOURCE_DIR}/examples)
 	    add_subdirectory(${CMAKE_SOURCE_DIR}/examples)
 	ENDIF()
 
+    # always build minimal example(s)
+    IF(EXISTS ${CMAKE_SOURCE_DIR}/examples/minimal)
+        add_subdirectory(${CMAKE_SOURCE_DIR}/examples/minimal)
+    ENDIF()
+
 	IF(BUILD_META_EXAMPLES)
         # allow meta examples without adding examples dir itself
         add_subdirectory(${CMAKE_SOURCE_DIR}/examples/meta)
@@ -583,7 +579,7 @@ include(FeatureSummary)
 feature_summary(WHAT ALL)
 
 PrintLine()
-PrintStatus("Integration")
+PrintStatus("Integrations")
 
 PrintInterfaceStatus("OpenCV Integration" OpenCV)
 
@@ -591,7 +587,7 @@ PrintLine()
 PrintStatus("Interfaces")
 
 ForEach(Interface ${AVAILABLE_INTERFACES})
-    PrintInterfaceStatus("${${Interface}Description} interface" ${Interface})
+    PrintInterfaceStatus("${${Interface}_DESCRIPTION}" ${Interface})
 EndForEach(Interface)
 
 PrintLine()
diff --git a/NEWS b/NEWS
index 3b1951d45c5..261ec7098bd 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,34 @@
+2017-11-28 Viktor Gal <viktor.gal@shogun-toolbox.org>
+
+	* SHOGUN Release version 6.1.0 (libshogun 18.0, data 0.11, parameter 1)
+
+	* This release is dedicated for Heiko's successful PhD defense
+
+	* Add conda-forge packages, to get prebuilt binaries via the cross-platform conda package manager [Dougal Sutherland]
+	* Change interface cmake variables to INTERFACE_*
+	* Move GPL code to gpl submodule [Heiko Strathmann]
+
+	* Features:
+		- Enable using BLAS/LAPACK from Eigen by default [Viktor Gal]
+		- Add iterators to SGVector and SGMatrix [Viktor Gal]
+		- Significantly lower the runtime of KernelPCA (GSoC '17) [Michele Mazzoni]
+		- Refactor FisherLDA and LDA solvers (GSoC '17) [Michele Mazzoni]
+		- Add automated test for trained model serialization (GSoC '17) [Michele Mazzoni]
+		- Enable SWIG director classes by default [Viktor Gal]
+		- Vectorize DotFeatures covariance/mean calculation [Michele Mazzoni]
+		- Support for premature stopping of model training (GSoC '17) [Giovanni De Toni]
+		- Add support for observable variables (GSoC '17) [Giovanni De Toni]
+		- Use TFLogger to serialize observed variables for TensorBoard (GSoC '17) [Giovanni De Toni]
+		- Drop CMath::dot and SGVector::dot and use linalg::dot [Viktor Gal]
+		- Added class probabilities for BaggingMachine (GSoC '17) [Olivier Nguyen]
+	* Bugfixes:
+		- Fix transpose bug in Ruby typemap for matrices [Elias Saalmann]
+		- Fix MKL detection and linking; use mkl_rt when available [Viktor Gal]
+		- Fix Windows static linking [Viktor Gal]
+		- Fix SWIG interface compilation on Windows [qcrist]
+		- Fix CircularBuffer bug that broke parsing of big CSV and LibSVM files #1991 [Viktor Gal]
+		- Fix R interface when using clang to compile the interface [Viktor Gal]
+
 2016-11-05 Viktor Gal <viktor.gal@shogun-toolbox.org>
 
 	* SHOGUN Release version 6.0.0 (libshogun 18.0, data 0.11, parameter 1)
diff --git a/README.md b/README.md
index c067c786439..8f70c468390 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,12 @@ Develop branch build status:
 
 [![Build Status](https://travis-ci.org/shogun-toolbox/shogun.svg?branch=develop)](https://travis-ci.org/shogun-toolbox/shogun)
 [![Build status](https://ci.appveyor.com/api/projects/status/jx095rnr9qhg8dcv/branch/develop?svg=true)](https://ci.appveyor.com/project/vigsterkr/shogun/branch/develop)
-[![Coverage Status](https://coveralls.io/repos/shogun-toolbox/shogun/badge.png?branch=develop)](https://coveralls.io/r/shogun-toolbox/shogun?branch=develop)
+[![codecov](https://codecov.io/gh/shogun-toolbox/shogun/branch/develop/graph/badge.svg)](https://codecov.io/gh/shogun-toolbox/shogun)
+
+Donate to Shogun via NumFocus:
+
+[![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](http://numfocus.org)
+
 
 Buildbot: http://buildbot.shogun-toolbox.org/waterfall.
 
diff --git a/applications/arts/signal_sensor.py b/applications/arts/signal_sensor.py
index a0a139b4800..0498570457a 100644
--- a/applications/arts/signal_sensor.py
+++ b/applications/arts/signal_sensor.py
@@ -12,11 +12,11 @@
 
 from util import *
 
-from shogun.Features import StringCharFeatures, StringWordFeatures, CombinedFeatures, DNA
-from shogun.Kernel import CombinedKernel, WeightedDegreePositionStringKernel
-from shogun.Kernel import K_COMMWORDSTRING, CommWordStringKernel, IdentityKernelNormalizer
-from shogun.Preprocessor import SortWordString
-from shogun.Classifier import KernelMachine
+from shogun import StringCharFeatures, StringWordFeatures, CombinedFeatures, DNA
+from shogun import CombinedKernel, WeightedDegreePositionStringKernel
+from shogun import K_COMMWORDSTRING, CommWordStringKernel, IdentityKernelNormalizer
+from shogun import SortWordString
+from shogun import KernelMachine
 
 
 class Sensor(object):
diff --git a/applications/asp/asp b/applications/asp/asp
index 41f57f28201..e5f9d21eec4 100755
--- a/applications/asp/asp
+++ b/applications/asp/asp
@@ -24,9 +24,9 @@ try:
 	import genomic
 	import model
 	import seqdict
-	import shogun.Kernel
+	import shogun
 
-	d=shogun.Kernel.WeightedDegreeStringKernel(1)
+	d=shogun.WeightedDegreeStringKernel(1)
 	if (d.version.get_version_revision() < 2997):
 		print
 		print "ERROR: SHOGUN VERSION 0.6.2 or later required"
diff --git a/applications/asp/signal_detectors.py b/applications/asp/signal_detectors.py
index cd43df09f5b..fe9e084791e 100644
--- a/applications/asp/signal_detectors.py
+++ b/applications/asp/signal_detectors.py
@@ -13,10 +13,10 @@
 import numpy
 import seqdict
 
-from shogun.Classifier import LibSVM
-from shogun.Features import StringCharFeatures,DNA
-from shogun.Kernel import WeightedDegreeStringKernel
-from shogun.Library import DynamicIntArray
+from shogun import LibSVM
+from shogun import StringCharFeatures,DNA
+from shogun import WeightedDegreeStringKernel
+from shogun import DynamicIntArray
 
 class svm_splice_model(object):
 	def __init__(self, order, traindat, alphas, b, (window_left,offset,window_right), consensus):
diff --git a/applications/classification/evaluate_multiclass_labels.py b/applications/classification/evaluate_multiclass_labels.py
index 030f5361db1..f720bb687d0 100644
--- a/applications/classification/evaluate_multiclass_labels.py
+++ b/applications/classification/evaluate_multiclass_labels.py
@@ -32,7 +32,7 @@
 import argparse
 import logging
 import numpy as np
-from modshogun import (LibSVMFile, MulticlassLabels, MulticlassAccuracy)
+from shogun import (LibSVMFile, MulticlassLabels, MulticlassAccuracy)
 from utils import get_features_and_labels
 
 LOGGER = logging.getLogger(__file__)
diff --git a/applications/classification/predict_multiclass_svm.py b/applications/classification/predict_multiclass_svm.py
index 88ce0d6b504..79585ec6784 100644
--- a/applications/classification/predict_multiclass_svm.py
+++ b/applications/classification/predict_multiclass_svm.py
@@ -32,7 +32,7 @@
 import argparse
 import logging
 from contextlib import closing
-from modshogun import (LibSVMFile, SparseRealFeatures, MulticlassLabels,
+from shogun import (LibSVMFile, SparseRealFeatures, MulticlassLabels,
 											MulticlassLibSVM, SerializableHdf5File,
 											MulticlassAccuracy)
 from utils import get_features_and_labels
diff --git a/applications/classification/random_fourier_classification.py b/applications/classification/random_fourier_classification.py
index bf5468a893f..07350cde384 100644
--- a/applications/classification/random_fourier_classification.py
+++ b/applications/classification/random_fourier_classification.py
@@ -32,7 +32,7 @@ def parse_arguments():
 	return parser.parse_args()
 
 def evaluate(predicted_labels, labels, prefix="Results"):
-	from modshogun import PRCEvaluation, ROCEvaluation, AccuracyMeasure
+	from shogun import PRCEvaluation, ROCEvaluation, AccuracyMeasure
 
 	prc_evaluator = PRCEvaluation()
 	roc_evaluator = ROCEvaluation()
@@ -58,9 +58,9 @@ def load_sparse_data(filename, dimension=None):
 	return {'data':sparse_feats, 'labels':labels}
 
 if __name__=='__main__':
-	from modshogun import SparseRealFeatures, RandomFourierDotFeatures, GAUSSIAN
-	from modshogun import LibSVMFile, BinaryLabels, SVMOcas
-	from modshogun import Time
+	from shogun import SparseRealFeatures, RandomFourierDotFeatures, GAUSSIAN
+	from shogun import LibSVMFile, BinaryLabels, SVMOcas
+	from shogun import Time
 	from numpy import array
 
 	args = parse_arguments()
diff --git a/applications/classification/train_multiclass_svm.py b/applications/classification/train_multiclass_svm.py
index 5dfb1fa4c41..39e17d2619c 100644
--- a/applications/classification/train_multiclass_svm.py
+++ b/applications/classification/train_multiclass_svm.py
@@ -32,7 +32,7 @@
 import argparse
 import logging
 from contextlib import contextmanager, closing
-from modshogun import (LibSVMFile, GaussianKernel, MulticlassLibSVM,
+from shogun import (LibSVMFile, GaussianKernel, MulticlassLibSVM,
 																SerializableHdf5File, LinearKernel)
 from utils import get_features_and_labels, track_execution
 
diff --git a/applications/classification/utils.py b/applications/classification/utils.py
index 40da45d9ae0..e459c39e97e 100644
--- a/applications/classification/utils.py
+++ b/applications/classification/utils.py
@@ -30,7 +30,7 @@
 
 import logging
 from contextlib import contextmanager
-from modshogun import MulticlassLabels, SparseRealFeatures, Time
+from shogun import MulticlassLabels, SparseRealFeatures, Time
 
 
 logging.basicConfig(level=logging.INFO, format='[%(asctime)-15s %(module)s] %(message)s')
diff --git a/applications/easysvm/esvm/experiment.py b/applications/easysvm/esvm/experiment.py
index 04595861ba6..ed4c3a79e46 100644
--- a/applications/easysvm/esvm/experiment.py
+++ b/applications/easysvm/esvm/experiment.py
@@ -28,28 +28,28 @@
 from poim import compute_poims
 
 import shogun
-from shogun.Kernel import GaussianKernel, WeightedDegreePositionStringKernel
-from shogun.Kernel import WeightedDegreeStringKernel
-from shogun.Kernel import LinearKernel, PolyKernel, LocalAlignmentStringKernel
-from shogun.Kernel import LocalityImprovedStringKernel
-from shogun.Kernel import CommWordStringKernel, WeightedCommWordStringKernel, CommUlongStringKernel
-from shogun.Kernel import CombinedKernel
-from shogun.Kernel import SLOWBUTMEMEFFICIENT
-from shogun.Kernel import AvgDiagKernelNormalizer
-from shogun.Features import RealFeatures, Labels, StringCharFeatures, DNA, StringWordFeatures, StringUlongFeatures, PROTEIN
-from shogun.Features import CombinedFeatures
-from shogun.Classifier import LibSVM,GPBTSVM
+from shogun import GaussianKernel, WeightedDegreePositionStringKernel
+from shogun import WeightedDegreeStringKernel
+from shogun import LinearKernel, PolyKernel, LocalAlignmentStringKernel
+from shogun import LocalityImprovedStringKernel
+from shogun import CommWordStringKernel, WeightedCommWordStringKernel, CommUlongStringKernel
+from shogun import CombinedKernel
+from shogun import SLOWBUTMEMEFFICIENT
+from shogun import AvgDiagKernelNormalizer
+from shogun import RealFeatures, Labels, StringCharFeatures, DNA, StringWordFeatures, StringUlongFeatures, PROTEIN
+from shogun import CombinedFeatures
+from shogun import LibSVM,GPBTSVM
 
 DefaultSVM = LibSVM
 try:
-    from shogun.Classifier import SVMLight
+    from shogun import SVMLight
     LinAddSVM = SVMLight
     LinearSVM = SVMLight
 except:
     LinAddSVM = GPBTSVM
     LinearSVM = LibSVM
 
-from shogun.Preprocessor import SortWordString, SortUlongString
+from shogun import SortWordString, SortUlongString
 
 from utils import calcprc, calcroc, accuracy
 from utils import getPartitionedSet, getCurrentSplit
diff --git a/applications/easysvm/esvm/plots.py b/applications/easysvm/esvm/plots.py
index 1b0cd2f2948..98e7322e234 100644
--- a/applications/easysvm/esvm/plots.py
+++ b/applications/easysvm/esvm/plots.py
@@ -27,8 +27,8 @@
 import warnings
 import shutil
 
-from shogun.Features import Labels
-from shogun.Evaluation import *
+from shogun import Labels
+from shogun import *
 
 def plotroc(output, LTE, draw_random=False, figure_fname="", roc_label='ROC'):
     """Plot the receiver operating characteristic curve"""
diff --git a/applications/easysvm/esvm/utils.py b/applications/easysvm/esvm/utils.py
index 3e6a0ad60fb..dfdefa24456 100644
--- a/applications/easysvm/esvm/utils.py
+++ b/applications/easysvm/esvm/utils.py
@@ -23,8 +23,8 @@
 import warnings
 import shutil
 
-from shogun.Features import Labels
-from shogun.Evaluation import *
+from shogun import Labels
+from shogun import *
 
 ################################################################################
 # evaluation functions
diff --git a/applications/easysvm/galaxy/README b/applications/easysvm/galaxy/README
index a45fe881a7b..1e7b2566082 100644
--- a/applications/easysvm/galaxy/README
+++ b/applications/easysvm/galaxy/README
@@ -1,3 +1,3 @@
 The files in this directory are a copy of
 svn/projects/galaxy/tools/agr.  If you edit them, make sure the
-changes are also integrated into the main version.
\ No newline at end of file
+changes are also integrated into the main version.
diff --git a/applications/easysvm/tutpaper/svm_params.py b/applications/easysvm/tutpaper/svm_params.py
index 83a6f0b2206..a0aa7afd553 100644
--- a/applications/easysvm/tutpaper/svm_params.py
+++ b/applications/easysvm/tutpaper/svm_params.py
@@ -14,9 +14,9 @@
 
 import numpy
 import shogun
-from shogun.Kernel import GaussianKernel, LinearKernel, PolyKernel
-from shogun.Features import RealFeatures, BinaryLabels
-from shogun.Classifier import LibSVM
+from shogun import GaussianKernel, LinearKernel, PolyKernel
+from shogun import RealFeatures, BinaryLabels
+from shogun import LibSVM
 
 from numpy import arange
 import matplotlib
diff --git a/applications/msplicer/msplicer b/applications/msplicer/msplicer
index 4e9417f3c7a..03101521cb3 100755
--- a/applications/msplicer/msplicer
+++ b/applications/msplicer/msplicer
@@ -22,9 +22,9 @@ try:
 	import genomic
 	import model
 	import seqdict
-	import shogun.Structure
+	import shogun
 
-	d=shogun.Structure.DynProg()
+	d=shogun.DynProg()
 	if (d.version.get_version_revision() < 2997):
 		print
 		print "ERROR: SHOGUN VERSION 0.6.2 or later required"
@@ -152,7 +152,7 @@ class msplicer:
 
 
 	def initialize_dynprog(self, seq):
-		dyn=shogun.Structure.DynProg()
+		dyn=shogun.DynProg()
 
 		self.content.initialize_content(dyn)
 
@@ -266,7 +266,7 @@ class msplicer:
 				dyn.best_path_set_my_state_seq(my_states)
 				dyn.best_path_set_my_pos_seq(my_pos)
 
-				dyn.io.set_loglevel(shogun.Structure.M_DEBUG)
+				dyn.io.set_loglevel(shogun.M_DEBUG)
 				dyn.best_path_deriv_call()
 
 def print_version():
@@ -348,7 +348,7 @@ WS160, WS160gc, orfWS160gc
 
 
 if __name__ == '__main__':
-	dyn=shogun.Structure.DynProg()
+	dyn=shogun.DynProg()
 	(startstop, fafname, modelfname, outfile ) = parse_options()
 	p=msplicer()
 	p.load_model(modelfname);
diff --git a/applications/msplicer/plif.py b/applications/msplicer/plif.py
index 059acd8a34e..b1abe8c6417 100644
--- a/applications/msplicer/plif.py
+++ b/applications/msplicer/plif.py
@@ -10,9 +10,9 @@
 #
 
 from numpy import array
-from shogun.Structure import Plif
-from shogun.Structure import PlifArray
-from shogun.Library import DynamicPlifArray
+from shogun import Plif
+from shogun import PlifArray
+from shogun import DynamicPlifArray
 
 class plif:
 	def __init__(self, model):
diff --git a/applications/msplicer/signal_detectors.py b/applications/msplicer/signal_detectors.py
index f10096d35cb..e12002c2d5f 100644
--- a/applications/msplicer/signal_detectors.py
+++ b/applications/msplicer/signal_detectors.py
@@ -13,7 +13,7 @@
 import numpy
 import seqdict
 
-from modshogun import KernelMachine,StringCharFeatures,DNA,WeightedDegreeStringKernel
+from shogun import KernelMachine,StringCharFeatures,DNA,WeightedDegreeStringKernel
 
 class svm_splice_model(object):
 	def __init__(self, order, traindat, alphas, b, (window_left,offset,window_right), consensus):
diff --git a/applications/ocr/Ai.py b/applications/ocr/Ai.py
index 73edfb261b0..f8a3284ae52 100644
--- a/applications/ocr/Ai.py
+++ b/applications/ocr/Ai.py
@@ -1,9 +1,9 @@
 # File   : $HeadURL$
 # Version: $Id$
 
-from modshogun import RealFeatures, MulticlassLabels
-from modshogun import GaussianKernel
-from modshogun import GMNPSVM
+from shogun import RealFeatures, MulticlassLabels
+from shogun import GaussianKernel
+from shogun import GMNPSVM
 
 import numpy as np
 import gzip as gz
diff --git a/applications/tapkee/faces_embedding.py b/applications/tapkee/faces_embedding.py
index 1931d9fad93..24cf327031d 100644
--- a/applications/tapkee/faces_embedding.py
+++ b/applications/tapkee/faces_embedding.py
@@ -8,7 +8,7 @@
 # Written (W) 2011 Sergey Lisitsyn
 # Copyright (C) 2011 Sergey Lisitsyn
 
-from modshogun import *
+from shogun import *
 from numpy import *
 from matplotlib.offsetbox import TextArea, DrawingArea, OffsetImage, AnnotationBbox
 import re,os,time
diff --git a/applications/tapkee/samples/dm.py b/applications/tapkee/samples/dm.py
index 8c1d3b87efd..036028dfd44 100644
--- a/applications/tapkee/samples/dm.py
+++ b/applications/tapkee/samples/dm.py
@@ -1,4 +1,4 @@
-import modshogun as sg
+import shogun as sg
 import data
 import numpy as np
 
diff --git a/applications/tapkee/samples/hlle.py b/applications/tapkee/samples/hlle.py
index 7360941b614..0dc860931c0 100644
--- a/applications/tapkee/samples/hlle.py
+++ b/applications/tapkee/samples/hlle.py
@@ -1,4 +1,4 @@
-import modshogun as sg
+import shogun as sg
 import data
 
 # load data
diff --git a/applications/tapkee/samples/isomap.py b/applications/tapkee/samples/isomap.py
index 086bf2cbc2e..1d0569e1662 100644
--- a/applications/tapkee/samples/isomap.py
+++ b/applications/tapkee/samples/isomap.py
@@ -1,4 +1,4 @@
-import modshogun as sg
+import shogun as sg
 import data
 import numpy as np
 
diff --git a/applications/tapkee/samples/klle.py b/applications/tapkee/samples/klle.py
index 1075a357c1e..77b9908cd68 100644
--- a/applications/tapkee/samples/klle.py
+++ b/applications/tapkee/samples/klle.py
@@ -1,4 +1,4 @@
-import modshogun as sg
+import shogun as sg
 import data
 import numpy as np
 
diff --git a/applications/tapkee/samples/la.py b/applications/tapkee/samples/la.py
index 46e61451277..b125fcc67c8 100644
--- a/applications/tapkee/samples/la.py
+++ b/applications/tapkee/samples/la.py
@@ -1,4 +1,4 @@
-import modshogun as sg
+import shogun as sg
 import data
 import numpy as np
 
diff --git a/applications/tapkee/samples/lle.py b/applications/tapkee/samples/lle.py
index 60740d3eb71..f0a05dd4599 100644
--- a/applications/tapkee/samples/lle.py
+++ b/applications/tapkee/samples/lle.py
@@ -1,4 +1,4 @@
-import modshogun as sg
+import shogun as sg
 import data
 
 # load data
diff --git a/applications/tapkee/samples/lltsa.py b/applications/tapkee/samples/lltsa.py
index 12c19da02a4..f59a3fb2ea3 100644
--- a/applications/tapkee/samples/lltsa.py
+++ b/applications/tapkee/samples/lltsa.py
@@ -1,4 +1,4 @@
-import modshogun as sg
+import shogun as sg
 import data
 
 # load data
diff --git a/applications/tapkee/samples/lpp.py b/applications/tapkee/samples/lpp.py
index f5d44662cbb..9c9c4e87f37 100644
--- a/applications/tapkee/samples/lpp.py
+++ b/applications/tapkee/samples/lpp.py
@@ -1,4 +1,4 @@
-import modshogun as sg
+import shogun as sg
 import data
 
 # load data
diff --git a/applications/tapkee/samples/ltsa.py b/applications/tapkee/samples/ltsa.py
index 6d4f32734aa..2c2d760cd4a 100644
--- a/applications/tapkee/samples/ltsa.py
+++ b/applications/tapkee/samples/ltsa.py
@@ -1,4 +1,4 @@
-import modshogun as sg
+import shogun as sg
 import data
 
 # load data
diff --git a/applications/tapkee/samples/mds.py b/applications/tapkee/samples/mds.py
index d758dce1ed4..4f0f8ffe633 100644
--- a/applications/tapkee/samples/mds.py
+++ b/applications/tapkee/samples/mds.py
@@ -1,4 +1,4 @@
-import modshogun as sg
+import shogun as sg
 import data
 import numpy as np
 
diff --git a/applications/tapkee/samples/npe.py b/applications/tapkee/samples/npe.py
index a413a08856c..cefa4880bdf 100644
--- a/applications/tapkee/samples/npe.py
+++ b/applications/tapkee/samples/npe.py
@@ -1,4 +1,4 @@
-import modshogun as sg
+import shogun as sg
 import data
 
 # load data
diff --git a/applications/tapkee/swissroll_embedding.py b/applications/tapkee/swissroll_embedding.py
index 057c902901c..e83d69d71a5 100644
--- a/applications/tapkee/swissroll_embedding.py
+++ b/applications/tapkee/swissroll_embedding.py
@@ -5,12 +5,12 @@
 N = X.shape[1]
 converters = []
 
-from shogun.Converter import LocallyLinearEmbedding
+from shogun import LocallyLinearEmbedding
 lle = LocallyLinearEmbedding()
 lle.set_k(9)
 converters.append((lle, "LLE with k=%d" % lle.get_k()))
 
-from shogun.Converter import MultidimensionalScaling
+from shogun import MultidimensionalScaling
 mds = MultidimensionalScaling()
 converters.append((mds, "Classic MDS"))
 
@@ -19,29 +19,29 @@
 lmds.set_landmark_number(20)
 converters.append((lmds,"Landmark MDS with %d landmarks" % lmds.get_landmark_number()))
 
-from shogun.Converter import Isomap
+from shogun import Isomap
 cisomap = Isomap()
 cisomap.set_k(9)
 converters.append((cisomap,"Isomap with k=%d" % cisomap.get_k()))
 
-from shogun.Converter import DiffusionMaps
-from shogun.Kernel import GaussianKernel
+from shogun import DiffusionMaps
+from shogun import GaussianKernel
 dm = DiffusionMaps()
 dm.set_t(2)
 dm.set_width(1000.0)
 converters.append((dm,"Diffusion Maps with t=%d, sigma=%.1f" % (dm.get_t(),dm.get_width())))
 
-from shogun.Converter import HessianLocallyLinearEmbedding
+from shogun import HessianLocallyLinearEmbedding
 hlle = HessianLocallyLinearEmbedding()
 hlle.set_k(6)
 converters.append((hlle,"Hessian LLE with k=%d" % (hlle.get_k())))
 
-from shogun.Converter import LocalTangentSpaceAlignment
+from shogun import LocalTangentSpaceAlignment
 ltsa = LocalTangentSpaceAlignment()
 ltsa.set_k(6)
 converters.append((ltsa,"LTSA with k=%d" % (ltsa.get_k())))
 
-from shogun.Converter import LaplacianEigenmaps
+from shogun import LaplacianEigenmaps
 le = LaplacianEigenmaps()
 le.set_k(20)
 le.set_tau(100.0)
@@ -67,7 +67,7 @@
 plt.suptitle('Swissroll embedding',fontsize=9)
 plt.subplots_adjust(hspace=0.4)
 
-from shogun.Features import RealFeatures
+from shogun import RealFeatures
 
 for (i, (converter, label)) in enumerate(converters):
 	X = numpy.genfromtxt('../../data/toy/swissroll.dat',unpack=True).T
diff --git a/applications/tapkee/words_embedding.py b/applications/tapkee/words_embedding.py
index faa1e8af613..d74deb215e6 100644
--- a/applications/tapkee/words_embedding.py
+++ b/applications/tapkee/words_embedding.py
@@ -10,7 +10,7 @@
 
 from numpy import *
 from pylab import *
-from modshogun import *
+from shogun import *
 import random
 import difflib
 
diff --git a/appveyor.yml b/appveyor.yml
index 89f5f7f7e33..9adc097a25b 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,11 +1,13 @@
-version: 4.2.{build}
+version: "{build}"
 
 environment:
   matrix:
     - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
       VSVER: Visual Studio 14 2015 Win64
-    - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
-      VSVER: Visual Studio 15 2017 Win64
+      PYTHON: "C:\\Python27"
+# disable multiple builds until each build takes more than 30 minutes
+#    - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
+#      VSVER: Visual Studio 15 2017 Win64
 
 platform:
   - x64
@@ -20,14 +22,19 @@ before_build:
   - md %APPVEYOR_BUILD_FOLDER%\build
   - cd %APPVEYOR_BUILD_FOLDER%\build
   - cmake -G"%VSVER%" -DCMAKE_BUILD_TYPE=%CONFIGURATION% -DBUILD_META_EXAMPLES=OFF -DENABLE_TESTING=ON ..
-  - cd ..
 
 build:
-  parallel: true
-  project: build\shogun.sln
   verbosity: minimal
 
+# TODO: fix cmake script in order that building examples
+# triggers the build of shogun-static.vcxproj
+build_script:
+  - msbuild src\shogun\libshogun.vcxproj
+  - msbuild src\shogun\shogun.vcxproj
+  - msbuild src\shogun\shogun-static.vcxproj
+  - msbuild shogun.sln
+
 test_script:
   - cd %APPVEYOR_BUILD_FOLDER%\build
   - ctest --output-on-failure -C %CONFIGURATION%
-  - cd ..
+
diff --git a/benchmarks/elementwise_benchmark.cpp b/benchmarks/elementwise_benchmark.cpp
deleted file mode 100644
index 3a1f2feaee8..00000000000
--- a/benchmarks/elementwise_benchmark.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (w) 2015 Soumyajit De
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#include <shogun/lib/SGMatrix.h>
-#include <shogun/lib/GPUMatrix.h>
-#include <shogun/mathematics/linalg/linalg.h>
-#include <vector>
-#include <algorithm>
-#include <hayai/hayai.hpp>
-
-using namespace shogun;
-
-/**
- * Instructions :
- * 1. Install benchmarking toolkit "hayai" (https://github.com/nickbruun/hayai)
- * 2. Compile against libhayai_main, e.g.
- * g++ -O3 -std=c++11 elementwise_benchmark.cpp -I/usr/include/eigen3 -lshogun -lhayai_main -lOpenCL -o benchmark
- * 3. ./benchmark
- */
-
-/** Generate data only once */
-struct Data
-{
-	Data()
-	{
-		init();
-	}
-
-	void init()
-	{
-		m_cpu=SGMatrix<float32_t>(num_rows, num_cols);
-		std::iota(m_cpu.data(), m_cpu.data()+m_cpu.size(), 1);
-		m_gpu=CGPUMatrix<float32_t>(m_cpu);
-	}
-
-	SGMatrix<float32_t> m_cpu;
-	CGPUMatrix<float32_t> m_gpu;
-
-	static constexpr index_t num_rows=1000;
-	static constexpr index_t num_cols=1000;
-};
-
-Data data;
-
-BENCHMARK(SGMatrix, elementwise, 10, 1000)
-{
-	float32_t weights=0.6;
-	float32_t std_dev=0.2;
-	float32_t mean=0.01;
-
-	SGMatrix<float32_t> result=linalg::elementwise_compute(data.m_cpu,
-	[&weights, &std_dev, &mean](float32_t& sqr_dist)
-	{
-		float32_t outer_factor=-2*CMath::PI*CMath::sqrt(sqr_dist)*CMath::sq(weights);
-		float32_t exp_factor=CMath::exp(-2*CMath::sq(CMath::PI)*sqr_dist*CMath::sq(std_dev));
-		float32_t sin_factor=CMath::sin(2*CMath::PI*CMath::sqrt(sqr_dist)*mean);
-		return outer_factor*exp_factor*sin_factor;
-	});
-
-}
-
-BENCHMARK(SGMatrix, loop, 10, 1000)
-{
-	float32_t weights=0.6;
-	float32_t std_dev=0.2;
-	float32_t mean=0.01;
-
-	SGMatrix<float32_t> result(data.m_cpu.num_rows, data.m_cpu.num_cols);
-
-	for (index_t j=0; j<data.m_cpu.num_cols; ++j)
-	{
-		for (index_t i=0; i<data.m_cpu.num_rows; ++i)
-		{
-			float32_t sqr_dist=data.m_cpu(i, j);
-			float32_t outer_factor=-2*CMath::PI*CMath::sqrt(sqr_dist)*CMath::sq(weights);
-			float32_t exp_factor=CMath::exp(-2*CMath::sq(CMath::PI)*sqr_dist*CMath::sq(std_dev));
-			float32_t sin_factor=CMath::sin(2*CMath::PI*CMath::sqrt(sqr_dist)*mean);
-			result(i, j)=outer_factor*exp_factor*sin_factor;
-		}
-	}
-}
-
-BENCHMARK(CGPUMatrix, elementwise, 10, 1000)
-{
-	float32_t weights=0.6;
-	float32_t std_dev=0.2;
-	float32_t mean=0.01;
-
-	std::string data_type=linalg::implementation::ocl::get_type_string<float32_t>();
-
-	std::string operation;
-	operation.append(linalg::implementation::ocl::format(
-	R"(
-		{type} {var1} = -2*{pi}*sqrt(element)*pow({weights}, 2);
-		{type} {var2} = exp(-2*pow({pi}, 2)*element*pow({stddev}, 2));
-		{type} {var3} = sin(2*{pi}*sqrt(element)*{mean});
-		return {var1}*{var2}*{var3};
-	)",
-	{
-		linalg::ocl::Parameter("type")=data_type,
-		linalg::ocl::Parameter("var1")="outer_factor",
-		linalg::ocl::Parameter("var2")="exp_factor",
-		linalg::ocl::Parameter("var3")="sin_factor",
-		linalg::ocl::Parameter("pi")=CMath::PI,
-		linalg::ocl::Parameter("weights")=weights,
-		linalg::ocl::Parameter("stddev")=std_dev,
-		linalg::ocl::Parameter("mean")=mean
-	}));
-
-	linalg::elementwise_compute_inplace(data.m_gpu, operation);
-}
diff --git a/cmake/CheckCompiler.cmake b/cmake/CheckCompiler.cmake
deleted file mode 100644
index 5b2dcb43ce2..00000000000
--- a/cmake/CheckCompiler.cmake
+++ /dev/null
@@ -1,68 +0,0 @@
-# -*- mode: cmake; -*-
-#
-#  Figure out the version of the used compiler
-#  Variables set by this module
-#  CMAKE_CXX_COMPILER_MAJOR  major version of compiler
-#  CMAKE_CXX_COMPILER_MINR   minor version of compiler
-#  CMAKE_CXX_COMPILER_PATCH  patch level (e.g. gcc 4.1.0)
-#
-
-#execute_process(COMMAND <cmd1> [args1...]]
-#                  [COMMAND <cmd2> [args2...] [...]]
-#                  [WORKING_DIRECTORY <directory>]
-#                  [TIMEOUT <seconds>]
-#                  [RESULT_VARIABLE <variable>]
-#                  [OUTPUT_VARIABLE <variable>]
-#                  [ERROR_VARIABLE <variable>]
-#                  [INPUT_FILE <file>]
-#                  [OUTPUT_FILE <file>]
-#                  [ERROR_FILE <file>]
-#                  [OUTPUT_QUIET]
-#                  [ERROR_QUIET]
-#                  [OUTPUT_STRIP_TRAILING_WHITESPACE]
-#                  [ERROR_STRIP_TRAILING_WHITESPACE])
-
-# check the version of the compiler
-set(CMAKE_CXX_COMPILER_MAJOR "CMAKE_CXX_COMPILER_MAJOR-NOTFOUND")
-set(CMAKE_CXX_COMPILER_MINOR "CMAKE_CXX_COMPILER_MINOR-NOTFOUND")
-set(CMAKE_CXX_COMPILER_PATCH "CMAKE_CXX_COMPILER_PATCH-NOTFOUND")
-
-# extract the version of the compiler
-if( ${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel")
-  execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion
-      OUTPUT_VARIABLE CMAKE_CXX_COMPILER_VERSION)
-
-  string(REGEX REPLACE "^([0-9]+)\\.([0-9]+).*$" "\\1"
-         CMAKE_CXX_COMPILER_MAJOR ${CMAKE_CXX_COMPILER_VERSION})
-  string(REGEX REPLACE "^([0-9]+)\\.([0-9]+).*" "\\2"
-         CMAKE_CXX_COMPILER_MINOR ${CMAKE_CXX_COMPILER_VERSION})
-  set(CMAKE_CXX_COMPILER_PATCH "")
-endif( ${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel")
-
-if( ${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
-  execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion
-      OUTPUT_VARIABLE CMAKE_CXX_COMPILER_VERSION)
-
-  string(STRIP ${CMAKE_CXX_COMPILER_VERSION} CMAKE_CXX_COMPILER_VERSION)
-  string(REGEX REPLACE "^([0-9]+).*$" "\\1"
-         CMAKE_CXX_COMPILER_MAJOR ${CMAKE_CXX_COMPILER_VERSION})
-  string(REGEX REPLACE "^([0-9]+)\\.([0-9]+).*$" "\\2"
-         CMAKE_CXX_COMPILER_MINOR ${CMAKE_CXX_COMPILER_VERSION})
-  string(REGEX REPLACE "^([0-9]+)\\.([0-9]+)\\.([0-9]+)$" "\\3"
-         CMAKE_CXX_COMPILER_PATCH ${CMAKE_CXX_COMPILER_VERSION})
-endif( ${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
-
-
-# just print the results if requested
-function(info_compiler)
-  message(STATUS "CMAKE_FORCE_CXX_COMPILER  = '${CMAKE_FORCE_CXX_COMPILER}'")
-  message(STATUS "CMAKE_CXX_COMPILER        = '${CMAKE_CXX_COMPILER}'")
-  message(STATUS "CMAKE_CXX_COMPILER_ID     = '${CMAKE_CXX_COMPILER_ID}'")
-  message(STATUS "CMAKE_CXX_COMPILER_INIT   = '${CMAKE_CXX_COMPILER_INIT}'")
-  message(STATUS "CMAKE_GENERATOR_CXX       = '${CMAKE_GENERATOR_CXX}'")
-  message(STATUS "CMAKE_GNULD_IMAGE_VERSION = '${CMAKE_GNULD_IMAGE_VERSION}'")
-  message(STATUS "CMAKE_CXX_COMPILER_VERSION= '${CMAKE_CXX_COMPILER_VERSION}'")
-  message(STATUS "CMAKE_CXX_COMPILER_MAJOR  = '${CMAKE_CXX_COMPILER_MAJOR}'")
-  message(STATUS "CMAKE_CXX_COMPILER_MINOR  = '${CMAKE_CXX_COMPILER_MINOR}'")
-  message(STATUS "CMAKE_CXX_COMPILER_PATCH  = '${CMAKE_CXX_COMPILER_PATCH}'")
-endfunction(info_compiler)
diff --git a/cmake/CheckSVNRevision.cmake b/cmake/CheckSVNRevision.cmake
deleted file mode 100644
index c4d6489b4d6..00000000000
--- a/cmake/CheckSVNRevision.cmake
+++ /dev/null
@@ -1,18 +0,0 @@
-EXECUTE_PROCESS(
-	     COMMAND ${CMAKE_COMMAND} -E chdir ${SRC_DIR} ${SVN_EXEC} info
-	     OUTPUT_VARIABLE SVN_INFO_OUTPUT
-	     RESULT_VARIABLE SVN_INFO_RETURN
-	     ERROR_VARIABLE SVN_INFO_ERROR
-	     OUTPUT_STRIP_TRAILING_WHITESPACE
-	)
-
-IF (NOT ${SVN_INFO_RETURN})
-	STRING(REGEX MATCH "Revision: [0-9]+" CHECKED_OUT_REVISION ${SVN_INFO_OUTPUT})
-	STRING(REGEX REPLACE "Revision: ([0-9]+)" "\\1" CHECKED_OUT_REVISION ${CHECKED_OUT_REVISION})
-
-	IF (${CHECKED_OUT_REVISION} VERSION_EQUAL ${REVISION})
-		EXECUTE_PROCESS(
-		     COMMAND ${CMAKE_COMMAND} -E touch ${STAMP_DIR}/${PROJECT_NAME}-download
-		)
-	ENDIF()
-ENDIF()
\ No newline at end of file
diff --git a/cmake/CommonModularInterface.cmake b/cmake/CommonModularInterface.cmake
deleted file mode 100644
index 1e02e00cbaf..00000000000
--- a/cmake/CommonModularInterface.cmake
+++ /dev/null
@@ -1,89 +0,0 @@
-MACRO(GENERATE_MODULAR_TARGET MODULAR_NAME MODULAR_DIR MODULAR_LIBARIES)
-
-get_target_property(ShogunIncludes shogun::shogun INTERFACE_INCLUDE_DIRECTORIES)
-INCLUDE_DIRECTORIES(${ShogunIncludes})
-
-# set compiler SWIG generated cxx compiler flags
-SET(CMAKE_CXX_FLAGS ${SWIG_CXX_COMPILER_FLAGS})
-# unset any release or distribution flags
-# we don't want them when compiling SWIG generated source
-SET(CMAKE_CXX_FLAGS_RELEASE "")
-SET(CMAKE_CXX_FLAGS_DISTRIBUTION "")
-SET(CMAKE_CXX_FLAGS_DEBUG "")
-
-if(${MODULAR_NAME} STREQUAL "python")
-	SET(PREPEND_TARGET "_")
-endif()
-
-set(modular_files)
-FILE(GLOB_RECURSE MODULAR_FILES ${COMMON_MODULAR_SRC_DIR}/*.i)
-FILE(GLOB_RECURSE CUSTOM_MODULAR_FILES ${MODULAR_DIR}/*.i)
-LIST(APPEND MODULAR_FILES ${CUSTOM_MODULAR_FILES})
-FOREACH(file ${MODULAR_FILES})
-	get_filename_component(fname ${file} NAME)
-	list(APPEND modular_files ${fname})
-	ADD_CUSTOM_COMMAND(OUTPUT ${fname}
-		DEPENDS ${file}
-		COMMAND "${CMAKE_COMMAND}" -E copy_if_different ${file} ${fname}
-		COMMENT ""
-	)
-ENDFOREACH()
-
-ADD_CUSTOM_TARGET(${MODULAR_NAME}_modular_src
-	DEPENDS shogun::shogun ${modular_files}
-	COMMENT "copying SWIG files")
-
-INCLUDE(${SWIG_USE_FILE})
-SET_SOURCE_FILES_PROPERTIES(modshogun.i PROPERTIES CPLUSPLUS ON)
-IF(DEFINED TARGET_SWIGFLAGS)
-	SET_SOURCE_FILES_PROPERTIES(modshogun.i PROPERTIES SWIG_FLAGS ${TARGET_SWIGFLAGS})
-ENDIF()
-SET(SWIG_MODULE_${MODULAR_NAME}_modular_EXTRA_DEPS ${modular_files})
-SWIG_ADD_MODULE(${MODULAR_NAME}_modular ${MODULAR_NAME} modshogun.i sg_print_functions.cpp)
-SWIG_LINK_LIBRARIES(${MODULAR_NAME}_modular shogun::shogun ${MODULAR_LIBARIES})
-SET_TARGET_PROPERTIES(${SWIG_MODULE_${MODULAR_NAME}_modular_REAL_NAME} PROPERTIES OUTPUT_NAME ${PREPEND_TARGET}modshogun)
-ADD_DEPENDENCIES(${SWIG_MODULE_${MODULAR_NAME}_modular_REAL_NAME} ${MODULAR_NAME}_modular_src)
-
-#ADD_CUSTOM_COMMAND(TARGETS ${PREPEND_TARGET}${MODULAR_NAME}_modular
-#				   POST_BUILD
-#				   COMMAND ${PYTHON_EXECUTABLE}
-#				   ARGS ${CMAKE_SOURCE_DIR}/src/.scrub_docstrings.py )
-
-IF(DOXYGEN_FOUND)
-	configure_file(${COMMON_MODULAR_SRC_DIR}/modshogun.doxy.in modshogun.doxy)
-
-	ADD_CUSTOM_COMMAND(
-	OUTPUT    modshogun
-	COMMAND   ${DOXYGEN_EXECUTABLE}
-	ARGS	  modshogun.doxy
-	DEPENDS   shogun::shogun
-	COMMENT   "Generating doxygen doc"
-	)
-
-	ADD_CUSTOM_COMMAND(
-	OUTPUT    modshogun_doxygen.i
-	COMMAND   ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/src/.doxy2swig.py
-	ARGS	  --quiet --no-function-definition modshogun/doxygen_xml/index.xml modshogun_doxygen.i
-	DEPENDS   modshogun
-	)
-	ADD_CUSTOM_TARGET(${MODULAR_NAME}_doxy2swig DEPENDS modshogun_doxygen.i)
-	ADD_DEPENDENCIES(${SWIG_MODULE_${MODULAR_NAME}_modular_REAL_NAME} ${MODULAR_NAME}_doxy2swig)
-ELSE()
-	#TODO add scrubing
-ENDIF()
-
-# Make sure all modular interfaces are build single-threaded to reduce
-# excessive memory consumption during build.
-IF(SWIG_SINGLE_THREADED)
-	FOREACH(SG_MODULAR_INTERFACE_TARGET ${SG_MODULAR_INTERFACE_TARGETS})
-		ADD_DEPENDENCIES(${SWIG_MODULE_${MODULAR_NAME}_modular_REAL_NAME}
-			${SG_MODULAR_INTERFACE_TARGET})
-	ENDFOREACH(SG_MODULAR_INTERFACE_TARGET ${SG_MODULAR_INTERFACE_TARGETS})
-	SET(SG_MODULAR_INTERFACE_TARGETS
-		"${SWIG_MODULE_${MODULAR_NAME}_modular_REAL_NAME};${SG_MODULAR_INTERFACE_TARGETS}"
-		CACHE STRING "List of modular-interfaces beeing build." FORCE)
-ENDIF(SWIG_SINGLE_THREADED)
-
-CONFIGURE_FILE(${COMMON_MODULAR_SRC_DIR}/swig_config.h.in swig_config.h)
-
-ENDMACRO()
diff --git a/cmake/FindAtlas.cmake b/cmake/FindAtlas.cmake
index 60ed66320b0..d5cc159244d 100644
--- a/cmake/FindAtlas.cmake
+++ b/cmake/FindAtlas.cmake
@@ -7,10 +7,8 @@
 #  Atlas_FOUND
 #  Atlas_INCLUDE_DIRS
 #  Atlas_LIBRARIES
-#  Atlas_LIBRARYRARY_DIRS
 
 set(Atlas_INCLUDE_SEARCH_PATHS
-  /usr/include
   /usr/include/atlas
   /usr/include/atlas-base
   $ENV{Atlas_ROOT_DIR}
@@ -20,36 +18,42 @@ set(Atlas_INCLUDE_SEARCH_PATHS
 set(Atlas_LIB_SEARCH_PATHS
   /usr/lib/atlas
   /usr/lib/atlas-base
-  /usr/lib64
   /usr/lib64/atlas
+  /usr/lib64/atlas-base
   $ENV{Atlas_ROOT_DIR}
   $ENV{Atlas_ROOT_DIR}/lib
 )
-
 find_path(Atlas_CBLAS_INCLUDE_DIR   NAMES cblas.h   PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
 find_path(Atlas_CLAPACK_INCLUDE_DIR NAMES clapack.h PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
 
-find_library(Atlas_CBLAS_LIBRARY  NAMES ptcblas_r ptcblas cblas_r cblas blas   PATHS ${Atlas_LIB_SEARCH_PATHS})
-find_library(Atlas_BLAS_LIBRARY   NAMES atlas_r atlas tatlas satlas            PATHS ${Atlas_LIB_SEARCH_PATHS})
-find_library(Atlas_LAPACK_LIBRARY NAMES alapack_r alapack lapack_atlas lapacke PATHS ${Atlas_LIB_SEARCH_PATHS})
-
-set(LOOKED_FOR
-  Atlas_CBLAS_INCLUDE_DIR
-  Atlas_CLAPACK_INCLUDE_DIR
-
-  Atlas_CBLAS_LIBRARY
-  Atlas_BLAS_LIBRARY
-  Atlas_LAPACK_LIBRARY
-)
+find_library(Atlas_BLAS_LIBRARY NAMES atlas_r atlas tatlas satlas PATHS ${Atlas_LIB_SEARCH_PATHS})
+set(ATLAS_LIBS_VAR Atlas_BLAS_LIBRARY)
+if (Atlas_BLAS_LIBRARY)
+  include(CheckLibraryExists)
+  # atlas 3.10+ contains all the function in one shared lib so dont try to find other parts of atlas
+  check_library_exists("${Atlas_BLAS_LIBRARY}" cblas_dgemv "" FOUND_ATLAS_CBLAS_DGEMV)
+  if (NOT FOUND_ATLAS_CBLAS_DGEMV)
+    find_library(Atlas_CBLAS_LIBRARY NAMES ptcblas_r ptcblas cblas_r cblas PATHS ${Atlas_LIB_SEARCH_PATHS})
+    list(APPEND ATLAS_LIBS_VAR Atlas_CBLAS_LIBRARY)
+  endif()
+  check_library_exists("${Atlas_BLAS_LIBRARY}" clapack_dpotrf "" FOUND_ATLAS_CLAPACK_DPOTRF)
+  if (NOT FOUND_ATLAS_CLAPACK_DPOTRF)
+    find_library(Atlas_LAPACK_LIBRARY NAMES alapack_r alapack lapack_atlas atllapack PATHS ${Atlas_LIB_SEARCH_PATHS})
+    list(APPEND ATLAS_LIBS_VAR Atlas_LAPACK_LIBRARY)
+  endif()
+endif()
 
 include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(Atlas DEFAULT_MSG ${LOOKED_FOR})
+find_package_handle_standard_args(Atlas DEFAULT_MSG Atlas_CBLAS_INCLUDE_DIR Atlas_CLAPACK_INCLUDE_DIR ${ATLAS_LIBS_VAR})
 
 if(ATLAS_FOUND)
   set(Atlas_INCLUDE_DIRS ${Atlas_CBLAS_INCLUDE_DIR} ${Atlas_CLAPACK_INCLUDE_DIR})
-  set(Atlas_LIBRARIES ${Atlas_LAPACK_LIBRARY} ${Atlas_CBLAS_LIBRARY} ${Atlas_BLAS_LIBRARY})
-  mark_as_advanced(${LOOKED_FOR})
+  set(ATLAS_LIBS)
+  foreach(atlas_lib ${ATLAS_LIBS_VAR})
+    list(APPEND ATLAS_LIBS ${${atlas_lib}})
+  endforeach()
+  set(Atlas_LIBRARIES ${ATLAS_LIBS})
+  mark_as_advanced(${Atlas_CBLAS_INCLUDE_DIR} ${Atlas_CLAPACK_INCLUDE_DIR} ${ATLAS_LIBS})
 
   message(STATUS "Found Atlas (include: ${Atlas_CBLAS_INCLUDE_DIR}, library: ${Atlas_BLAS_LIBRARY})")
 endif(ATLAS_FOUND)
-
diff --git a/cmake/FindCBLAS.cmake b/cmake/FindCBLAS.cmake
deleted file mode 100644
index 554d086d153..00000000000
--- a/cmake/FindCBLAS.cmake
+++ /dev/null
@@ -1,42 +0,0 @@
-# - Find CBLAS
-# Find the native CBLAS headers and libraries.
-#
-#  CBLAS_LIBRARIES    - List of libraries when using cblas.
-#  CBLAS_FOUND        - True if cblas found.
-#
-# Copyright 2009-2011 The VOTCA Development Team (http://www.votca.org)
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-find_file(CBLAS_LIBRARY NAMES libcblas.so.3 libcblas.so.3gf
-	PATHS /usr/lib /usr/local/lib /opt/local/lib)
-find_library(CBLAS_LIBRARY NAMES cblas gslcblas HINTS $ENV{CBLASDIR}/lib $ENV{CBLASDIR}/lib64 )
-
-set(CBLAS_LIBRARIES ${CBLAS_LIBRARY} )
-
-include(FindPackageHandleStandardArgs)
-# handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE
-# if all listed variables are TRUE
-
-find_package_handle_standard_args(CBLAS DEFAULT_MSG CBLAS_LIBRARY )
-
-if (CBLAS_FOUND)
-  include(CheckLibraryExists)
-  check_library_exists("${CBLAS_LIBRARY}" cblas_dsyrk "" FOUND_DSYRK)
-  if(NOT FOUND_DSYRK)
-    message(FATAL_ERROR "Could not find cblas_dsyrk in ${CBLAS_LIBRARY}, take a look at the error message in ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log to find out what was going wrong. If you don't have pkg-config installed you will most likely have to set CBLAS_LIBRARY by hand (i.e. -DCBLAS_LIBRARY='/path/to/libcblas.so') !")
-  endif(NOT FOUND_DSYRK)
-endif (CBLAS_FOUND)
-
-mark_as_advanced( CBLAS_LIBRARY )
diff --git a/cmake/FindMetaExamples.cmake b/cmake/FindMetaExamples.cmake
index 054f746a0b7..760d07ee53b 100644
--- a/cmake/FindMetaExamples.cmake
+++ b/cmake/FindMetaExamples.cmake
@@ -30,7 +30,10 @@ function(get_excluded_meta_examples)
 	IF(NOT USE_GPL_SHOGUN)
 		LIST(APPEND EXCLUDED_META_EXAMPLES
 			gaussian_processes/gaussian_process_regression.sg
+			gaussian_processes/gaussian_process_classifier.sg
 			multiclass_classifier/multiclass_logisticregression.sg
+            statistical_testing/linear_time_mmd.sg
+            statistical_testing/quadratic_time_mmd.sg
 			)
 	ENDIF()
 
diff --git a/cmake/FindPythonLibs.cmake b/cmake/FindPythonLibs.cmake
deleted file mode 100644
index 45712a35b33..00000000000
--- a/cmake/FindPythonLibs.cmake
+++ /dev/null
@@ -1,279 +0,0 @@
-# - Find python libraries
-# This module finds if Python is installed and determines where the
-# include files and libraries are. It also determines what the name of
-# the library is. This code sets the following variables:
-#
-#  PYTHONLIBS_FOUND           - have the Python libs been found
-#  PYTHON_LIBRARIES           - path to the python library
-#  PYTHON_INCLUDE_PATH        - path to where Python.h is found (deprecated)
-#  PYTHON_INCLUDE_DIRS        - path to where Python.h is found
-#  PYTHON_DEBUG_LIBRARIES     - path to the debug library (deprecated)
-#  PYTHONLIBS_VERSION_STRING  - version of the Python libs found (since CMake 2.8.8)
-#
-# The Python_ADDITIONAL_VERSIONS variable can be used to specify a list of
-# version numbers that should be taken into account when searching for Python.
-# You need to set this variable before calling find_package(PythonLibs).
-#
-# If you'd like to specify the installation of Python to use, you should modify
-# the following cache variables:
-#  PYTHON_LIBRARY             - path to the python library
-#  PYTHON_INCLUDE_DIR         - path to where Python.h is found
-
-#=============================================================================
-# Copyright 2001-2009 Kitware, Inc.
-#
-# Distributed under the OSI-approved BSD License (the "License");
-# see accompanying file Copyright.txt for details.
-#
-# This software is distributed WITHOUT ANY WARRANTY; without even the
-# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the License for more information.
-#=============================================================================
-# (To distribute this file outside of CMake, substitute the full
-#  License text for the above reference.)
-
-include(CMakeFindFrameworks)
-# Search for the python framework on Apple.
-CMAKE_FIND_FRAMEWORKS(Python)
-
-set(_PYTHON1_VERSIONS 1.6 1.5)
-set(_PYTHON2_VERSIONS 2.7 2.6 2.5 2.4 2.3 2.2 2.1 2.0)
-set(_PYTHON3_VERSIONS 3.3 3.2 3.1 3.0)
-
-if(PythonLibs_FIND_VERSION)
-    if(PythonLibs_FIND_VERSION MATCHES "^[0-9]+\\.[0-9]+(\\.[0-9]+.*)?$")
-        string(REGEX REPLACE "^([0-9]+\\.[0-9]+).*" "\\1" _PYTHON_FIND_MAJ_MIN "${PythonLibs_FIND_VERSION}")
-        string(REGEX REPLACE "^([0-9]+).*" "\\1" _PYTHON_FIND_MAJ "${_PYTHON_FIND_MAJ_MIN}")
-        unset(_PYTHON_FIND_OTHER_VERSIONS)
-        if(PythonLibs_FIND_VERSION_EXACT)
-            if(_PYTHON_FIND_MAJ_MIN STREQUAL PythonLibs_FIND_VERSION)
-                set(_PYTHON_FIND_OTHER_VERSIONS "${PythonLibs_FIND_VERSION}")
-            else()
-                set(_PYTHON_FIND_OTHER_VERSIONS "${PythonLibs_FIND_VERSION}" "${_PYTHON_FIND_MAJ_MIN}")
-            endif()
-        else()
-            foreach(_PYTHON_V ${_PYTHON${_PYTHON_FIND_MAJ}_VERSIONS})
-                if(NOT _PYTHON_V VERSION_LESS _PYTHON_FIND_MAJ_MIN)
-                    list(APPEND _PYTHON_FIND_OTHER_VERSIONS ${_PYTHON_V})
-                endif()
-             endforeach()
-        endif()
-        unset(_PYTHON_FIND_MAJ_MIN)
-        unset(_PYTHON_FIND_MAJ)
-    else()
-        set(_PYTHON_FIND_OTHER_VERSIONS ${_PYTHON${PythonLibs_FIND_VERSION}_VERSIONS})
-    endif()
-else()
-    set(_PYTHON_FIND_OTHER_VERSIONS ${_PYTHON3_VERSIONS} ${_PYTHON2_VERSIONS} ${_PYTHON1_VERSIONS})
-endif()
-
-# Set up the versions we know about, in the order we will search. Always add
-# the user supplied additional versions to the front.
-# If FindPythonInterp has already found the major and minor version,
-# insert that version between the user supplied versions and the stock
-# version list.
-set(_Python_VERSIONS ${Python_ADDITIONAL_VERSIONS})
-if(DEFINED PYTHON_VERSION_MAJOR AND DEFINED PYTHON_VERSION_MINOR)
-  list(APPEND _Python_VERSIONS ${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR})
-endif()
-list(APPEND _Python_VERSIONS ${_PYTHON_FIND_OTHER_VERSIONS})
-
-unset(_PYTHON_FIND_OTHER_VERSIONS)
-unset(_PYTHON1_VERSIONS)
-unset(_PYTHON2_VERSIONS)
-unset(_PYTHON3_VERSIONS)
-
-foreach(_CURRENT_VERSION ${_Python_VERSIONS})
-  string(REPLACE "." "" _CURRENT_VERSION_NO_DOTS ${_CURRENT_VERSION})
-  if(WIN32)
-    find_library(PYTHON_DEBUG_LIBRARY
-      NAMES python${_CURRENT_VERSION_NO_DOTS}_d python
-      PATHS
-      [HKEY_LOCAL_MACHINE\\SOFTWARE\\Python\\PythonCore\\${_CURRENT_VERSION}\\InstallPath]/libs/Debug
-      [HKEY_CURRENT_USER\\SOFTWARE\\Python\\PythonCore\\${_CURRENT_VERSION}\\InstallPath]/libs/Debug
-      [HKEY_LOCAL_MACHINE\\SOFTWARE\\Python\\PythonCore\\${_CURRENT_VERSION}\\InstallPath]/libs
-      [HKEY_CURRENT_USER\\SOFTWARE\\Python\\PythonCore\\${_CURRENT_VERSION}\\InstallPath]/libs
-      )
-  endif()
-
-  find_library(PYTHON_LIBRARY
-    NAMES
-    python${_CURRENT_VERSION_NO_DOTS}
-    python${_CURRENT_VERSION}mu
-    python${_CURRENT_VERSION}m
-    python${_CURRENT_VERSION}u
-    python${_CURRENT_VERSION}
-    PATHS
-      [HKEY_LOCAL_MACHINE\\SOFTWARE\\Python\\PythonCore\\${_CURRENT_VERSION}\\InstallPath]/libs
-      [HKEY_CURRENT_USER\\SOFTWARE\\Python\\PythonCore\\${_CURRENT_VERSION}\\InstallPath]/libs
-    # Avoid finding the .dll in the PATH.  We want the .lib.
-    NO_SYSTEM_ENVIRONMENT_PATH
-  )
-  # Look for the static library in the Python config directory
-  find_library(PYTHON_LIBRARY
-    NAMES python${_CURRENT_VERSION_NO_DOTS} python${_CURRENT_VERSION}
-    # Avoid finding the .dll in the PATH.  We want the .lib.
-    NO_SYSTEM_ENVIRONMENT_PATH
-    # This is where the static library is usually located
-    PATH_SUFFIXES python${_CURRENT_VERSION}/config
-  )
-
-  # For backward compatibility, honour value of PYTHON_INCLUDE_PATH, if
-  # PYTHON_INCLUDE_DIR is not set.
-  if(DEFINED PYTHON_INCLUDE_PATH AND NOT DEFINED PYTHON_INCLUDE_DIR)
-    set(PYTHON_INCLUDE_DIR "${PYTHON_INCLUDE_PATH}" CACHE PATH
-      "Path to where Python.h is found" FORCE)
-  endif()
-
-  set(PYTHON_FRAMEWORK_INCLUDES)
-  if(Python_FRAMEWORKS AND NOT PYTHON_INCLUDE_DIR)
-    foreach(dir ${Python_FRAMEWORKS})
-      set(PYTHON_FRAMEWORK_INCLUDES ${PYTHON_FRAMEWORK_INCLUDES}
-        ${dir}/Versions/${_CURRENT_VERSION}/include/python${_CURRENT_VERSION})
-    endforeach()
-  endif()
-
-  find_path(PYTHON_INCLUDE_DIR
-    NAMES Python.h
-    PATHS
-      ${PYTHON_FRAMEWORK_INCLUDES}
-      [HKEY_LOCAL_MACHINE\\SOFTWARE\\Python\\PythonCore\\${_CURRENT_VERSION}\\InstallPath]/include
-      [HKEY_CURRENT_USER\\SOFTWARE\\Python\\PythonCore\\${_CURRENT_VERSION}\\InstallPath]/include
-    PATH_SUFFIXES
-      python${_CURRENT_VERSION}mu
-      python${_CURRENT_VERSION}m
-      python${_CURRENT_VERSION}u
-      python${_CURRENT_VERSION}
-  )
-
-  # For backward compatibility, set PYTHON_INCLUDE_PATH.
-  set(PYTHON_INCLUDE_PATH "${PYTHON_INCLUDE_DIR}")
-
-  if(PYTHON_INCLUDE_DIR AND EXISTS "${PYTHON_INCLUDE_DIR}/patchlevel.h")
-    file(STRINGS "${PYTHON_INCLUDE_DIR}/patchlevel.h" python_version_str
-         REGEX "^#define[ \t]+PY_VERSION[ \t]+\"[^\"]+\"")
-    string(REGEX REPLACE "^#define[ \t]+PY_VERSION[ \t]+\"([^\"]+)\".*" "\\1"
-                         PYTHONLIBS_VERSION_STRING "${python_version_str}")
-    unset(python_version_str)
-  endif()
-
-  if(PYTHON_LIBRARY AND PYTHON_INCLUDE_DIR)
-    break()
-  endif()
-endforeach()
-
-mark_as_advanced(
-  PYTHON_DEBUG_LIBRARY
-  PYTHON_LIBRARY
-  PYTHON_INCLUDE_DIR
-)
-
-# We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the
-# cache entries because they are meant to specify the location of a single
-# library. We now set the variables listed by the documentation for this
-# module.
-set(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
-set(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}")
-
-# These variables have been historically named in this module different from
-# what SELECT_LIBRARY_CONFIGURATIONS() expects.
-set(PYTHON_LIBRARY_DEBUG "${PYTHON_DEBUG_LIBRARY}")
-set(PYTHON_LIBRARY_RELEASE "${PYTHON_LIBRARY}")
-include(SelectLibraryConfigurations)
-SELECT_LIBRARY_CONFIGURATIONS(PYTHON)
-# SELECT_LIBRARY_CONFIGURATIONS() sets ${PREFIX}_FOUND if it has a library.
-# Unset this, this prefix doesn't match the module prefix, they are different
-# for historical reasons.
-unset(PYTHON_FOUND)
-
-include(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(PythonLibs
-                                  REQUIRED_VARS PYTHON_LIBRARIES PYTHON_INCLUDE_DIRS
-                                  VERSION_VAR PYTHONLIBS_VERSION_STRING)
-
-# PYTHON_ADD_MODULE(<name> src1 src2 ... srcN) is used to build modules for python.
-# PYTHON_WRITE_MODULES_HEADER(<filename>) writes a header file you can include
-# in your sources to initialize the static python modules
-function(PYTHON_ADD_MODULE _NAME )
-  get_property(_TARGET_SUPPORTS_SHARED_LIBS
-    GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS)
-  option(PYTHON_ENABLE_MODULE_${_NAME} "Add module ${_NAME}" TRUE)
-  option(PYTHON_MODULE_${_NAME}_BUILD_SHARED
-    "Add module ${_NAME} shared" ${_TARGET_SUPPORTS_SHARED_LIBS})
-
-  # Mark these options as advanced
-  mark_as_advanced(PYTHON_ENABLE_MODULE_${_NAME}
-    PYTHON_MODULE_${_NAME}_BUILD_SHARED)
-
-  if(PYTHON_ENABLE_MODULE_${_NAME})
-    if(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
-      set(PY_MODULE_TYPE MODULE)
-    else()
-      set(PY_MODULE_TYPE STATIC)
-      set_property(GLOBAL  APPEND  PROPERTY  PY_STATIC_MODULES_LIST ${_NAME})
-    endif()
-
-    set_property(GLOBAL  APPEND  PROPERTY  PY_MODULES_LIST ${_NAME})
-    add_library(${_NAME} ${PY_MODULE_TYPE} ${ARGN})
-#    target_link_libraries(${_NAME} ${PYTHON_LIBRARIES})
-
-    if(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
-      set_target_properties(${_NAME} PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}")
-      if(WIN32 AND NOT CYGWIN)
-        set_target_properties(${_NAME} PROPERTIES SUFFIX ".pyd")
-      endif()
-    endif()
-
-  endif()
-endfunction()
-
-function(PYTHON_WRITE_MODULES_HEADER _filename)
-
-  get_property(PY_STATIC_MODULES_LIST  GLOBAL  PROPERTY PY_STATIC_MODULES_LIST)
-
-  get_filename_component(_name "${_filename}" NAME)
-  string(REPLACE "." "_" _name "${_name}")
-  string(TOUPPER ${_name} _nameUpper)
-  set(_filename ${CMAKE_CURRENT_BINARY_DIR}/${_filename})
-
-  set(_filenameTmp "${_filename}.in")
-  file(WRITE ${_filenameTmp} "/*Created by cmake, do not edit, changes will be lost*/\n")
-  file(APPEND ${_filenameTmp}
-"#ifndef ${_nameUpper}
-#define ${_nameUpper}
-
-#include <Python.h>
-
-#ifdef __cplusplus
-extern \"C\" {
-#endif /* __cplusplus */
-
-")
-
-  foreach(_currentModule ${PY_STATIC_MODULES_LIST})
-    file(APPEND ${_filenameTmp} "extern void init${PYTHON_MODULE_PREFIX}${_currentModule}(void);\n\n")
-  endforeach()
-
-  file(APPEND ${_filenameTmp}
-"#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-")
-
-
-  foreach(_currentModule ${PY_STATIC_MODULES_LIST})
-    file(APPEND ${_filenameTmp} "int ${_name}_${_currentModule}(void) \n{\n  static char name[]=\"${PYTHON_MODULE_PREFIX}${_currentModule}\"; return PyImport_AppendInittab(name, init${PYTHON_MODULE_PREFIX}${_currentModule});\n}\n\n")
-  endforeach()
-
-  file(APPEND ${_filenameTmp} "void ${_name}_LoadAllPythonModules(void)\n{\n")
-  foreach(_currentModule ${PY_STATIC_MODULES_LIST})
-    file(APPEND ${_filenameTmp} "  ${_name}_${_currentModule}();\n")
-  endforeach()
-  file(APPEND ${_filenameTmp} "}\n\n")
-  file(APPEND ${_filenameTmp} "#ifndef EXCLUDE_LOAD_ALL_FUNCTION\nvoid CMakeLoadAllPythonModules(void)\n{\n  ${_name}_LoadAllPythonModules();\n}\n#endif\n\n#endif\n")
-
-# with configure_file() cmake complains that you may not use a file created using file(WRITE) as input file for configure_file()
-  execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${_filenameTmp}" "${_filename}" OUTPUT_QUIET ERROR_QUIET)
-
-endfunction()
diff --git a/cmake/FindScala.cmake b/cmake/FindScala.cmake
index 46c683efe1b..b56b493b3ea 100644
--- a/cmake/FindScala.cmake
+++ b/cmake/FindScala.cmake
@@ -6,13 +6,15 @@ set(_SCALA_PATHS
   /sw
   /usr
   /usr/share/java
+  /usr/share/scala
+  /usr/share/scala/lib
   )
 
 find_package(Java COMPONENTS Runtime)
 if(JAVA_FOUND)
     include(UseJava)
 else()
-    message(WARNING "JAVA count not be found!" "\nIt is required for Scala Modular Interface!!!")
+    message(WARNING "Java is not found." "\nIt is required for the Scala Interface.")
 endif()
 
 find_program(Scala_SCALA_EXECUTABLE
@@ -25,29 +27,31 @@ find_program(Scala_SCALAC_EXECUTABLE
   PATHS ${_SCALA_PATHS}
   )
 
-find_jar(Scala_JAR_EXECUTABLE "scala-library")
+find_jar(Scala_JAR_EXECUTABLE
+  NAMES "scala-library"
+  PATHS ${_SCALA_PATHS}
+  )
 
-if(Scala_SCALA_EXECUTABLE)
-    execute_process(COMMAND ${Scala_SCALA_EXECUTABLE} -version
-      RESULT_VARIABLE SCALA_SEARCH_SUCCESS
-      OUTPUT_VARIABLE SCALA_VERSION
-      ERROR_VARIABLE SCALA_VERSION
-      OUTPUT_STRIP_TRAILING_WHITESPACE
-      ERROR_STRIP_TRAILING_WHITESPACE)
-    if( SCALA_SEARCH_SUCCESS )
-      message( FATAL_ERROR "Error executing scala -version" )
-    else()
-      string(TOLOWER ${SCALA_VERSION} SCALA_VERSION)
-      string( REGEX REPLACE ".*([0-9]+\\.[0-9]+\\.[0-9_.]+.*)" "\\1" SCALA_VERSION "${SCALA_VERSION}" )
-      string( REGEX REPLACE "([0-9]+\\.[0-9]+\\.[0-9_.]).*" "\\1" SCALA_VERSION ${SCALA_VERSION} )
-    endif()
+if( "${Scala_SCALA_EXECUTABLE}" MATCHES "Scala_SCALA_EXECUTABLE-NOTFOUND" )
+  message( FATAL_ERROR "Error executing scala -version" )
+else()
+  execute_process(COMMAND ${Scala_SCALA_EXECUTABLE} -version
+    RESULT_VARIABLE SCALA_SEARCH_SUCCESS
+    OUTPUT_VARIABLE SCALA_VERSION
+    ERROR_VARIABLE SCALA_VERSION
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    ERROR_STRIP_TRAILING_WHITESPACE
+    )
+    string(TOLOWER ${SCALA_VERSION} SCALA_VERSION)
+    string( REGEX REPLACE ".*([0-9]+\\.[0-9]+\\.[0-9_.]+.*)" "\\1" SCALA_VERSION "${SCALA_VERSION}" )
+    string( REGEX REPLACE "([0-9]+\\.[0-9]+\\.[0-9_.]).*" "\\1" SCALA_VERSION ${SCALA_VERSION} )
 endif()
 
 include(FindPackageHandleStandardArgs)
 if (CMAKE_VERSION LESS 2.8.3)
   find_package_handle_standard_args(SCALA DEFAULT_MSG Scala_SCALA_EXECUTABLE)
 else ()
-  find_package_handle_standard_args(SCALA 
+  find_package_handle_standard_args(SCALA
       REQUIRED_VARS Scala_SCALA_EXECUTABLE Scala_SCALAC_EXECUTABLE Scala_JAR_EXECUTABLE
       VERSION_VAR SCALA_VERSION)
 endif ()
diff --git a/cmake/FindSpinlock.cmake b/cmake/FindSpinlock.cmake
deleted file mode 100644
index 8c590c0945c..00000000000
--- a/cmake/FindSpinlock.cmake
+++ /dev/null
@@ -1,23 +0,0 @@
-IF (CMAKE_USE_PTHREADS_INIT)
-	SET(_bindir "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}")
-	IF (DARWIN)
-		TRY_COMPILE(HAVE_SPINLOCK "${_bindir}" "${CMAKE_MODULE_PATH}/spinlock-test-darwin.cpp")
-	ELSE ()
-		TRY_COMPILE(HAVE_SPINLOCK "${_bindir}"
-			"${CMAKE_MODULE_PATH}/spinlock-test.cpp"
-			CMAKE_FLAGS "-DLINK_LIBRARIES:STRING=${CMAKE_THREAD_LIBS_INIT}"
-		)
-	ENDIF ()
-
-	IF (HAVE_SPINLOCK)
-		MESSAGE(STATUS "Spinlock support found")
-		SET(SPINLOCK_FOUND TRUE)
-	ELSE (HAVE_SPINLOCK)
-		MESSAGE(STATUS "Spinlock support not found")
-		SET(SPINLOCK_FOUND FALSE)
-	ENDIF (HAVE_SPINLOCK)
-ELSE ()
-		MESSAGE(STATUS "Spinlock support not found due to no pthreads available")
-		SET(SPINLOCK_FOUND FALSE)
-ENDIF ()
-
diff --git a/cmake/Findrxcpp.cmake b/cmake/Findrxcpp.cmake
new file mode 100644
index 00000000000..2577e3b6a35
--- /dev/null
+++ b/cmake/Findrxcpp.cmake
@@ -0,0 +1,30 @@
+# Copyright Gonzalo Brito Gadeschi 2015
+# Copyright Kirk Shoop 2016
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE.md or copy at http://boost.org/LICENSE_1_0.txt)
+#
+# Find the rxcpp include directory
+# The following variables are set if rxcpp is found.
+#  rxcpp_FOUND        - True when the rxcpp include directory is found.
+#  rxcpp_INCLUDE_DIR  - The path to where the meta include files are.
+# If rxcpp is not found, rxcpp_FOUND is set to false.
+
+# https://github.com/gnzlbg/ndtree/blob/master/cmake/Findrange-v3.cmake
+
+find_package(PkgConfig)
+
+if(NOT EXISTS "${rxcpp_INCLUDE_DIR}")
+    find_path(rxcpp_INCLUDE_DIR
+            NAMES rxcpp/rx.hpp
+            DOC "rxcpp library header files"
+            )
+endif()
+
+include(FindPackageHandleStandardArgs)
+mark_as_advanced(rxcpp_INCLUDE_DIR)
+
+if(EXISTS "${rxcpp_INCLUDE_DIR}")
+    set(rxcpp_FOUND 1)
+else()
+    set(rxcpp_FOUND 0)
+endif()
\ No newline at end of file
diff --git a/cmake/ShogunFindLAPACK.cmake b/cmake/ShogunFindLAPACK.cmake
new file mode 100644
index 00000000000..fe3d6db9c24
--- /dev/null
+++ b/cmake/ShogunFindLAPACK.cmake
@@ -0,0 +1,118 @@
+# Enable Eigen to use Lapack backend
+OPTION(ENABLE_EIGEN_LAPACK "Enable Eigen to use detected BLAS and LAPACK backend" ON)
+
+FIND_PACKAGE(LAPACK QUIET)
+IF (LAPACK_FOUND)
+  SET(HAVE_LAPACK 1)
+
+  # find out the type of Lapack/BLAS implementation we are dealing with
+  IF("${LAPACK_LIBRARIES}" MATCHES ".*/Accelerate.framework$")
+    # Accelerate.framework we found for LaPack/BLAS
+    SET(HAVE_MVEC 1)
+    SET(HAVE_CATLAS 1)
+    MESSAGE(STATUS "Found Accelerate.framework using as BLAS/LAPACK backend.")
+
+    if (ENABLE_EIGEN_LAPACK)
+      SET(EIGEN_USE_BLAS 1)
+      MESSAGE(STATUS "Enabling Accelerate.framework as BLAS backend for Eigen.")
+      find_library(LAPACKE_LIBRARY
+        NAMES lapacke
+        PATHS /usr/lib /usr/local/lib $ENV{LAPACKE_PATH})
+      if (LAPACKE_LIBRARY)
+        MESSAGE(STATUS "Enabling Accelerate.framework as LAPACK backend for Eigen.")
+        SET(EIGEN_USE_LAPACKE_STRICT 1)
+        LIST(APPEND LAPACK_LIBRARIES ${LAPACKE_LIBRARY})
+      endif()
+    endif()
+  ELSEIF("${LAPACK_LIBRARIES}" MATCHES ".*/.*mkl_.*")
+    # in case MKL is available enable Eigen to use it.
+    # for more fine grained control and details see:
+    # https://eigen.tuxfamily.org/dox/TopicUsingIntelMKL.html
+    # this is supported since Eigen version 3.1 and later
+    MESSAGE(STATUS "Found MKL using as BLAS/LAPACK backend.")
+    SET(HAVE_MKL 1)
+
+    # trying to use the new Single Dynamic Library of MKL
+    # https://software.intel.com/en-us/articles/a-new-linking-model-single-dynamic-library-mkl_rt-since-intel-mkl-103
+    IF (NOT "${LAPACK_LIBRARIES}" MATCHES ".*/.*mkl_rt.*")
+      # just use the mkl_rt and let the user specify/decide all the MKL specific
+      # optimisation runtime
+      SET(MKL_LIBRARIES ${LAPACK_LIBRARIES})
+      LIST(FILTER MKL_LIBRARIES INCLUDE REGEX ".*/.*mkl_core.*")
+      get_filename_component(MKL_PATH ${MKL_LIBRARIES} DIRECTORY)
+      find_library(MKL_RT mkl_rt PATHS ${MKL_PATH})
+      IF (MKL_RT)
+        IF (MSVC)
+          SET(LAPACK_LIBRARIES ${MKL_RT})
+        ELSEIF(CMAKE_USE_PTHREADS_INIT)
+          SET(LAPACK_LIBRARIES ${MKL_RT})
+          LIST(APPEND LAPACK_LIBRARIES ${CMAKE_THREAD_LIBS_INIT} -lm)
+        ENDIF()
+      ENDIF()
+    ENDIF()
+
+    IF (ENABLE_EIGEN_LAPACK)
+      FIND_PATH(MKL_INCLUDE_DIR mkl.h)
+      IF(NOT MKL_INCLUDE_DIR)
+        MESSAGE(STATUS "Found MKL, but not mkl.h. Make sure that mkl headers are available in order to use MKL as BLAS/Lapack backend for Eigen.")
+        SET(ENABLE_EIGEN_LAPACK OFF CACHE BOOL "Use ${ENABLE_EIGEN_LAPACK}" FORCE)
+      ELSE()
+        MESSAGE(STATUS "Enabling MKL as BLAS/Lapack backend for Eigen.")
+        SET(EIGEN_USE_MKL_ALL 1)
+        target_include_directories(shogun PUBLIC ${MKL_INCLUDE_DIR})
+        IF (LIBSHOGUN_BUILD_STATIC)
+          target_include_directories(shogun-static PUBLIC ${MKL_INCLUDE_DIR})
+        ENDIF()
+      ENDIF()
+    ENDIF()
+  ELSE()
+    include(CheckLibraryExists)
+    # test whether we have cblas.h in the header paths and the detected
+    # LAPACK_LIBRARIES contains all the libraries to compile even with cblas_* functions
+    check_library_exists("${LAPACK_LIBRARIES}" cblas_dgemv "" FOUND_CBLAS_DGEMV)
+
+    # detect if the detected Lapack is atlas
+    # clapack_* functions are implemented in atlas
+    check_library_exists("${LAPACK_LIBRARIES}" clapack_dpotrf "" FOUND_CLAPACK_DPOTRF)
+    IF (FOUND_CLAPACK_DPOTRF OR NOT FOUND_CBLAS_DGEMV)
+      FIND_PACKAGE(Atlas QUIET)
+      IF(Atlas_FOUND)
+        MESSAGE(STATUS "Found Atlas using as BLAS/LAPACK backend.")
+        SET(HAVE_ATLAS 1)
+        SHOGUN_INCLUDE_DIRS(SCOPE PUBLIC ${Atlas_INCLUDE_DIRS})
+        IF (NOT FOUND_CBLAS_DGEMV)
+          # this usually happens on RHEL/CentOS; usually having Atlas is good
+          SET(LAPACK_LIBRARIES ${Atlas_LIBRARIES})
+        ENDIF()
+      ELSEIF(NOT FOUND_CBLAS_DGEMV)
+          UNSET(LAPACK_FOUND CACHE)
+          UNSET(LAPACK_LIBRARIES)
+          UNSET(HAVE_LAPACK)
+      ENDIF()
+    ENDIF()
+
+    # if LaPack is detected and Eigen is 3.3 or later
+    # use the lapack/blas backend in Eigen
+    IF(${EIGEN_VERSION} VERSION_GREATER 3.3.0 AND ENABLE_EIGEN_LAPACK AND HAVE_LAPACK)
+      SET(EIGEN_USE_BLAS 1)
+      MESSAGE(STATUS "Enabling detected BLAS library as backend for Eigen")
+
+      find_library(LAPACKE_LIBRARY NAMES lapacke PATHS /usr/lib /usr/local/lib $ENV{LAPACKE_PATH})
+      IF (LAPACKE_LIBRARY)
+        MESSAGE(STATUS "Enabling detected LAPACK backend for Eigen")
+        SET(EIGEN_USE_LAPACKE_STRICT 1)
+        LIST(APPEND LAPACK_LIBRARIES ${LAPACKE_LIBRARY})
+      ENDIF()
+    ENDIF()
+  ENDIF()
+
+  IF (ENABLE_EIGEN_LAPACK)
+    SET (LAPACK_SCOPE PUBLIC)
+  ELSE()
+    SET (LAPACK_SCOPE PRIVATE)
+  ENDIF()
+  target_link_libraries(shogun ${LAPACK_SCOPE} ${LAPACK_LIBRARIES})
+  if (LIBSHOGUN_BUILD_STATIC)
+    target_link_libraries(shogun-static ${LAPACK_SCOPE} ${LAPACK_LIBRARIES})
+  endif()
+ENDIF()
diff --git a/cmake/ShogunInterfaces.cmake b/cmake/ShogunInterfaces.cmake
new file mode 100644
index 00000000000..3730a71088d
--- /dev/null
+++ b/cmake/ShogunInterfaces.cmake
@@ -0,0 +1,105 @@
+MACRO(GENERATE_INTERFACE_TARGET INTERFACE_NAME INTERFACE_DIR INTERFACE_LIBRARIES)
+
+get_target_property(ShogunIncludes shogun::shogun INTERFACE_INCLUDE_DIRECTORIES)
+INCLUDE_DIRECTORIES(${ShogunIncludes})
+
+# set compiler SWIG generated cxx compiler flags
+SET(CMAKE_CXX_FLAGS ${SWIG_CXX_COMPILER_FLAGS})
+# unset any release or distribution flags
+# we don't want them when compiling SWIG generated source
+SET(CMAKE_CXX_FLAGS_RELEASE "")
+SET(CMAKE_CXX_FLAGS_DISTRIBUTION "")
+SET(CMAKE_CXX_FLAGS_DEBUG "")
+
+if(${INTERFACE_NAME} STREQUAL "python")
+	SET(PREPEND_TARGET "_")
+endif()
+
+SET(INTERFACE_FILES)
+FILE(GLOB_RECURSE INTERFACE_FILES ${COMMON_INTERFACE_SRC_DIR}/*.i)
+FILE(GLOB_RECURSE CUSTOM_INTERFACE_FILES ${INTERFACE_DIR}/*.i)
+LIST(APPEND INTERFACE_FILES ${CUSTOM_INTERFACE_FILES})
+FOREACH(file ${INTERFACE_FILES})
+	get_filename_component(fname ${file} NAME)
+	list(APPEND INTERFACE_FILES ${fname})
+	ADD_CUSTOM_COMMAND(OUTPUT ${fname}
+		DEPENDS ${file}
+		COMMAND "${CMAKE_COMMAND}" -E copy_if_different ${file} ${fname}
+		COMMENT ""
+	)
+ENDFOREACH()
+
+SET(INTERFACE_TARGET interface_${INTERFACE_NAME})
+SET(INTERFACE_TARGET_SRC ${INTERFACE_TARGET}_src)
+
+ADD_CUSTOM_TARGET(${INTERFACE_TARGET_SRC}
+	DEPENDS shogun::shogun ${INTERFACE_FILES}
+	COMMENT "copying SWIG files")
+
+INCLUDE(${SWIG_USE_FILE})
+SET_SOURCE_FILES_PROPERTIES(shogun.i PROPERTIES CPLUSPLUS ON)
+IF(DEFINED TARGET_SWIGFLAGS)
+	SET_SOURCE_FILES_PROPERTIES(shogun.i PROPERTIES SWIG_FLAGS ${TARGET_SWIGFLAGS})
+ENDIF()
+SET(SWIG_MODULE_${INTERFACE_NAME}_EXTRA_DEPS ${INTERFACE_FILES})
+SWIG_ADD_MODULE(${INTERFACE_TARGET} ${INTERFACE_NAME} shogun.i sg_print_functions.cpp)
+SWIG_LINK_LIBRARIES(${INTERFACE_TARGET} shogun::shogun ${INTERFACE_LIBRARIES})
+
+
+#get_cmake_property(_variableNames VARIABLES)
+#foreach (_variableName ${_variableNames})
+	#	    message(STATUS "${_variableName}=${${_variableName}}")
+		#	endforeach()
+
+SET(INTERFACE_REAL_NAME ${SWIG_MODULE_interface_${INTERFACE_NAME}_REAL_NAME})
+SET_TARGET_PROPERTIES(${INTERFACE_REAL_NAME} PROPERTIES OUTPUT_NAME ${PREPEND_TARGET}shogun)
+ADD_DEPENDENCIES(${INTERFACE_REAL_NAME} ${INTERFACE_TARGET_SRC})
+
+#ADD_CUSTOM_COMMAND(TARGETS ${PREPEND_TARGET}interface_${INTERFACE_NAME}
+#				   POST_BUILD
+#				   COMMAND ${PYTHON_EXECUTABLE}
+#				   ARGS ${CMAKE_SOURCE_DIR}/src/.scrub_docstrings.py )
+
+IF(DOXYGEN_FOUND)
+	configure_file(${COMMON_INTERFACE_SRC_DIR}/shogun.doxy.in shogun.doxy)
+
+	ADD_CUSTOM_COMMAND(
+	OUTPUT    shogun_doxygen
+	COMMAND   ${DOXYGEN_EXECUTABLE}
+	ARGS	  shogun.doxy
+	DEPENDS   shogun::shogun
+	COMMENT   "Generating doxygen doc"
+	)
+
+	ADD_CUSTOM_COMMAND(
+	OUTPUT    shogun_doxygen.i
+	COMMAND   ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/src/.doxy2swig.py
+	ARGS	  --quiet --no-function-definition shogun_doxygen/xml/index.xml shogun_doxygen.i
+	DEPENDS   shogun_doxygen
+	)
+	ADD_CUSTOM_TARGET(${INTERFACE_NAME}_doxy2swig DEPENDS shogun_doxygen.i)
+    ADD_DEPENDENCIES(${INTERFACE_REAL_NAME} ${INTERFACE_NAME}_doxy2swig)
+ELSE()
+	#TODO add scrubing
+ENDIF()
+
+# Make sure all interfaces are build single-threaded to reduce
+# excessive memory consumption during build.
+IF(SWIG_SINGLE_THREADED)
+	FOREACH(SG_INTERFACE_TARGET ${SG_INTERFACE_TARGETS})
+		ADD_DEPENDENCIES(${INTERFACE_REAL_NAME}
+			${SG_INTERFACE_TARGET})
+	ENDFOREACH(SG_INTERFACE_TARGET ${SG_INTERFACE_TARGETS})
+	SET(SG_INTERFACE_TARGETS
+		"${INTERFACE_REAL_NAME};${SG_INTERFACE_TARGETS}"
+		CACHE STRING "List of modular-interfaces beeing build." FORCE)
+ENDIF(SWIG_SINGLE_THREADED)
+
+CONFIGURE_FILE(${COMMON_INTERFACE_SRC_DIR}/swig_config.h.in swig_config.h)
+
+UNSET(INTERFACE_TARGET)
+UNSET(INTERFACE_TARGET_SRC)
+UNSET(INTERFACE_REAL_NAME)
+UNSET(INTERFACE_FILES)
+
+ENDMACRO()
diff --git a/cmake/ShogunUtils.cmake b/cmake/ShogunUtils.cmake
index abc30353869..a133bd420ce 100644
--- a/cmake/ShogunUtils.cmake
+++ b/cmake/ShogunUtils.cmake
@@ -57,7 +57,7 @@ macro(DetectSystemName)
 		list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir)
 		if(${isSystemDir} STREQUAL "-1")
 			set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
-		endif(${isSystemDir} STREQUAL "-1")		
+		endif(${isSystemDir} STREQUAL "-1")
 	ELSEIF(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
 		SET(LINUX 1)
 	ELSEIF(${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
@@ -70,11 +70,15 @@ MACRO(PrintInterfaceStatus INTERFACE_NAME INTERFACE_FLAG)
 		message(STATUS "  ${INTERFACE_NAME} is ON")
 	ELSE()
 		STRING(LENGTH ${INTERFACE_NAME} IFACE_NAME_LENGTH)
-		IF (IFACE_NAME_LENGTH LESS 10)
-			message(STATUS "  ${INTERFACE_NAME} is OFF \t\t - enable with -D${INTERFACE_FLAG}=ON")
+		IF (IFACE_NAME_LENGTH LESS 3)
+			SET(INDENT "\t\t\t")
+		ELSEIF (IFACE_NAME_LENGTH LESS 10)
+			SET(INDENT "\t\t")
 		ELSE ()
-			message(STATUS "  ${INTERFACE_NAME} is OFF \t - enable with -D${INTERFACE_FLAG}=ON")
+			SET(INDENT "\t")
 		ENDIF ()
+		message(STATUS "  ${INTERFACE_NAME} is OFF ${INDENT} enable with -D${INTERFACE_FLAG}=ON")
+		UNSET(INDENT)
 	ENDIF()
 ENDMACRO()
 
@@ -107,6 +111,30 @@ MACRO(AddMetaIntegrationTest META_TARGET CONDITION)
     ENDIF()
 ENDMACRO()
 
+MACRO(AddLibShogunExample EXAMPLE_CPP)
+	STRING(REGEX REPLACE ".cpp\$" "" EXAMPLE "${EXAMPLE_CPP}")
+
+	add_executable(${EXAMPLE} ${CMAKE_CURRENT_SOURCE_DIR}/${EXAMPLE_CPP})
+	if(WIN32)
+		target_link_libraries(${EXAMPLE} shogun::shogun-static ${SANITIZER_LIBRARY})
+	else()
+		target_link_libraries(${EXAMPLE} shogun::shogun ${SANITIZER_LIBRARY})
+	endif()
+	IF(SANITIZER_FLAGS)
+		set_target_properties(${EXAMPLE} PROPERTIES COMPILE_FLAGS ${SANITIZER_FLAGS})
+	ENDIF()
+    add_test(libshogun-${EXAMPLE} ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE})
+
+	# Add examples to the dependencies of modular interfaces to make sure
+	# nothing will infer with them being build single-threaded.
+	IF(SWIG_SINGLE_THREADED)
+		FOREACH(SG_INTERFACE_TARGET ${SG_INTERFACE_TARGETS})
+			ADD_DEPENDENCIES(${SG_INTERFACE_TARGET} ${EXAMPLE})
+		ENDFOREACH(SG_INTERFACE_TARGET ${SG_INTERFACE_TARGETS})
+	ENDIF(SWIG_SINGLE_THREADED)
+ENDMACRO()
+
+
 function(PrintLine)
 	message(STATUS "===================================================================================================================")
 endfunction()
@@ -151,7 +179,7 @@ macro(ADD_LIBRARY_DEPENDENCY)
 				if (${TARGET_TYPE} STREQUAL INTERFACE_LIBRARY)
 					target_include_directories(${element} INTERFACE ${LIBRARY_HEADER})
 					target_link_libraries(${element} INTERFACE ${${LIBRARY_PREFIX}_LIBRARIES})
-				else() 
+				else()
 					if (NOT ${TARGET_TYPE} STREQUAL OBJECT_LIBRARY)
 						target_link_libraries(${element} ${ADD_LIBRARY_DEPENDENCY_SCOPE} ${${LIBRARY_PREFIX}_LIBRARIES})
 					endif()
@@ -245,9 +273,8 @@ function(GET_META_EXAMPLE_VARS META_EXAMPLE EX_NAME REL_DIR NAME_WITH_DIR)
 	set(${NAME_WITH_DIR} ${EXAMPLE_NAME_WITH_DIR} PARENT_SCOPE)
 endfunction()
 
-
 function(GET_INTERFACE_VARS INTERFACE DIRECTORY EXTENSION)
-	string(REGEX MATCH "([a-zA-Z]+)Modular" _dir ${INTERFACE})
+    string(REGEX MATCH "INTERFACE_([a-zA-Z]+)" _dir ${INTERFACE})
 	STRING(TOLOWER "${CMAKE_MATCH_1}" _dir)
 	SET(${DIRECTORY} ${_dir} PARENT_SCOPE)
 
diff --git a/cmake/external/Eigen3.cmake b/cmake/external/Eigen3.cmake
index fb9b698ab33..f947b0e7b1d 100644
--- a/cmake/external/Eigen3.cmake
+++ b/cmake/external/Eigen3.cmake
@@ -1,13 +1,17 @@
 GetCompilers()
 
 set(EIGEN_INCLUDE_DIR ${THIRD_PARTY_INCLUDE_DIR}/eigen)
+set(EIGEN_VERSION 3.2.8)
 include(ExternalProject)
 ExternalProject_Add(
 	Eigen3
 	PREFIX ${CMAKE_BINARY_DIR}/Eigen3
 	DOWNLOAD_DIR ${THIRD_PARTY_DIR}/Eigen3
-	URL https://bitbucket.org/eigen/eigen/get/3.2.8.tar.bz2
+	URL https://bitbucket.org/eigen/eigen/get/${EIGEN_VERSION}.tar.bz2
 	URL_MD5 9e3bfaaab3db18253cfd87ea697b3ab1
+#	FIXME: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1445
+#	URL https://bitbucket.org/eigen/eigen/get/3.3.4.tar.bz2
+#	URL_MD5 a7aab9f758249b86c93221ad417fbe18
 	CMAKE_ARGS -DEIGEN_INCLUDE_INSTALL_DIR:PATH=${EIGEN_INCLUDE_DIR}
 		-DCMAKE_C_FLAGS:STRING=${CMAKE_C_FLAGS}${CMAKE_DEFINITIONS}
 		-DCMAKE_CXX_FLAGS:STRING=${CMAKE_CXX_FLAGS}${CMAKE_DEFINITIONS}
diff --git a/cmake/external/GoogleTestNMock.cmake b/cmake/external/GoogleTestNMock.cmake
index 4f296b09028..05e093f2323 100644
--- a/cmake/external/GoogleTestNMock.cmake
+++ b/cmake/external/GoogleTestNMock.cmake
@@ -23,16 +23,27 @@ ELSE ()
 	)
 ENDIF()
 
-ExternalProject_Add(
-	GoogleMock
-	URL https://github.com/google/googletest/archive/release-1.8.0.tar.gz
-	URL_MD5 16877098823401d1bf2ed7891d7dce36
-    TIMEOUT 10
-	PREFIX ${CMAKE_BINARY_DIR}/GoogleMock
-	DOWNLOAD_DIR ${THIRD_PARTY_DIR}/GoogleMock
-	INSTALL_COMMAND ""
-	CMAKE_ARGS ${CUSTOM_CMAKE_ARGS}
-)
+IF(EXISTS /usr/src/googletest)
+	ExternalProject_Add(
+		GoogleMock
+		DOWNLOAD_COMMAND ""
+		SOURCE_DIR /usr/src/googletest
+		PREFIX ${CMAKE_BINARY_DIR}/GoogleMock
+		INSTALL_COMMAND ""
+		CMAKE_ARGS ${CUSTOM_CMAKE_ARGS}
+	)
+ELSE()
+	ExternalProject_Add(
+		GoogleMock
+		URL https://github.com/google/googletest/archive/release-1.8.0.tar.gz
+		URL_MD5 16877098823401d1bf2ed7891d7dce36
+		TIMEOUT 10
+		PREFIX ${CMAKE_BINARY_DIR}/GoogleMock
+		DOWNLOAD_DIR ${THIRD_PARTY_DIR}/GoogleMock
+		INSTALL_COMMAND ""
+		CMAKE_ARGS ${CUSTOM_CMAKE_ARGS}
+	)
+ENDIF()
 
 UNSET(C_COMPILER)
 UNSET(CXX_COMPILER)
\ No newline at end of file
diff --git a/cmake/external/TFLogger.cmake b/cmake/external/TFLogger.cmake
new file mode 100644
index 00000000000..50e4daa3ebe
--- /dev/null
+++ b/cmake/external/TFLogger.cmake
@@ -0,0 +1,21 @@
+GetCompilers()
+
+include(ExternalProject)
+ExternalProject_Add(
+        rxcpp
+        PREFIX ${CMAKE_BINARY_DIR}/tflogger
+        DOWNLOAD_DIR ${THIRD_PARTY_DIR}/tflogger
+        URL https://github.com/shogun-toolbox/tflogger/archive/master.zip
+        CMAKE_ARGS
+        -DCMAKE_INSTALL_PREFIX:STRING=${CMAKE_BINARY_DIR}/src/shogun/lib/external
+        -DCMAKE_C_COMPILER:STRING=${C_COMPILER}
+        -DCMAKE_CXX_COMPILER:STRING=${CXX_COMPILER}
+        BUILD_COMMAND ""
+        )
+
+add_dependencies(libshogun tflogger)
+
+set(TFLogger_INCLUDE_DIR ${THIRD_PARTY_INCLUDE_DIR})
+
+UNSET(C_COMPILER)
+UNSET(CXX_COMPILER)
diff --git a/cmake/external/rxcpp.cmake b/cmake/external/rxcpp.cmake
new file mode 100644
index 00000000000..8a25b2a5ca7
--- /dev/null
+++ b/cmake/external/rxcpp.cmake
@@ -0,0 +1,23 @@
+GetCompilers()
+
+include(ExternalProject)
+ExternalProject_Add(
+        rxcpp
+        PREFIX ${CMAKE_BINARY_DIR}/rxcpp
+        DOWNLOAD_DIR ${THIRD_PARTY_DIR}/rxcpp
+        URL https://github.com/Reactive-Extensions/RxCpp/archive/v4.0.0.tar.gz
+        URL_MD5 feb89934f465bb5ac513c9adce8d3b1b
+        CMAKE_ARGS
+        -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo
+        -DCMAKE_INSTALL_PREFIX:STRING=${CMAKE_BINARY_DIR}/src/shogun/lib/external
+        -DCMAKE_C_COMPILER:STRING=${C_COMPILER}
+        -DCMAKE_CXX_COMPILER:STRING=${CXX_COMPILER}
+        BUILD_COMMAND ""
+        )
+
+add_dependencies(libshogun rxcpp)
+
+set(rxcpp_INCLUDE_DIR ${THIRD_PARTY_INCLUDE_DIR})
+
+UNSET(C_COMPILER)
+UNSET(CXX_COMPILER)
\ No newline at end of file
diff --git a/cmake/spinlock-test-darwin.cpp b/cmake/spinlock-test-darwin.cpp
deleted file mode 100644
index 7a042d14f5e..00000000000
--- a/cmake/spinlock-test-darwin.cpp
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <libkern/OSAtomic.h>
-
-int main()
-{
-	volatile OSSpinLock lock;
-	OSSpinLockTry(&lock);
-	return 0;
-}
diff --git a/cmake/spinlock-test.cpp b/cmake/spinlock-test.cpp
deleted file mode 100644
index e871211f13e..00000000000
--- a/cmake/spinlock-test.cpp
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <pthread.h>
-
-int main()
-{
-	volatile pthread_spinlock_t spinlock;
-	pthread_spin_init(&spinlock, 0);
-	return 0;
-}
diff --git a/configs/shogun-sdk/Dockerfile b/configs/shogun-sdk/Dockerfile
index 8300927b1e2..17b2a580628 100644
--- a/configs/shogun-sdk/Dockerfile
+++ b/configs/shogun-sdk/Dockerfile
@@ -1,41 +1,45 @@
-FROM ubuntu:14.04
+FROM debian:stretch
 MAINTAINER shogun@shogun-toolbox.org
 
-RUN apt-get update && apt-get install -qq software-properties-common lsb-release
-RUN add-apt-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc) multiverse"
-RUN add-apt-repository -y ppa:rosmo/swig3.0.7
-RUN add-apt-repository -y ppa:webupd8team/java
-RUN add-apt-repository -y ppa:george-edison55/cmake-3.x
-RUN apt-get update -qq
-RUN apt-get upgrade -y
-
-RUN echo debconf shared/accepted-oracle-license-v1-1 select true | debconf-set-selections
-RUN echo debconf shared/accepted-oracle-license-v1-1 seen true | debconf-set-selections
-
-# install dependencies
-RUN apt-get install -qq --force-yes --no-install-recommends make gcc g++ \
+RUN apt-get update -qq && apt-get upgrade -y && \
+    apt-get install -qq --force-yes --no-install-recommends make gcc g++ \
     libc6-dev libbz2-dev libjson-c-dev ccache libarpack2-dev libatlas-base-dev \
-    libblas-dev libglpk-dev libhdf5-serial-dev zlib1g-dev liblapack-dev cmake \
+    libblas-dev libglpk-dev libhdf5-serial-dev zlib1g-dev liblapacke-dev cmake \
     libnlopt-dev liblpsolve55-dev libxml2-dev libsnappy-dev liblzo2-dev \
     liblzma-dev libeigen3-dev swig3.0 python-dev python-numpy python-matplotlib python-scipy \
-    python-jinja2 git-core wget jblas mono-devel mono-gmcs cli-common-dev \
+    python-jinja2 python-setuptools git-core wget jblas mono-devel mono-dmcs cli-common-dev \
     lua5.1 liblua5.1-0-dev octave liboctave-dev r-base-core clang \
-    oracle-java8-installer ruby ruby-dev python-ply sphinx-doc python-pip \
-    exuberant-ctags
-
-RUN pip install sphinx ply sphinxcontrib-bibtex sphinx_bootstrap_theme
+    openjdk-8-jdk ruby ruby-dev python-ply sphinx-doc python-pip \
+    exuberant-ctags clang-format-3.8 libcereal-dev libcolpack-dev lcov \
+    protobuf-compiler libprotobuf-dev scala googletest
 
+RUN pip install sphinx ply sphinxcontrib-bibtex sphinx_bootstrap_theme codecov
 RUN gem install narray
 RUN cd /usr/bin && ln -s swig3.0 swig && ln -s ccache-swig3.0 ccache-swig
-ADD http://crd.lbl.gov/~dhbailey/mpdist/arprec-2.2.18.tar.gz /tmp/
-RUN cd /tmp;\
-    tar zxpf arprec-2.2.18.tar.gz;\
-    cd arprec-2.2.18;\
-    ./configure --enable-shared;\
-    make install; ldconfig
 
-ADD http://www.scala-lang.org/files/archive/scala-2.11.7.deb /tmp/scala.deb
-RUN sudo dpkg -i /tmp/scala.deb
+ADD http://crd.lbl.gov/~dhbailey/mpdist/arprec-2.2.19.tar.gz /tmp/
+RUN cd /tmp && \
+    tar zxpf arprec-2.2.19.tar.gz && \
+    cd arprec && ./configure --enable-shared && \
+    make install && ldconfig
 
 ADD http://dl.bintray.com/sbt/debian/sbt-0.13.6.deb /tmp/sbt.deb
-RUN sudo dpkg -i /tmp/sbt.deb
+RUN dpkg -i /tmp/sbt.deb
+
+ADD https://github.com/Reactive-Extensions/RxCpp/archive/v4.0.0.tar.gz /tmp/
+RUN cd /tmp;\
+    tar -xvf v4.0.0.tar.gz;\
+    cd RxCpp-4.0.0/projects/;\
+    mkdir build;\
+    cd build;\
+    cmake ../../;\
+    make install;
+
+ADD https://github.com/shogun-toolbox/tflogger/archive/v0.1.0.tar.gz /tmp/
+RUN cd /tmp;\
+    tar -xvf v0.1.0.tar.gz;\
+    cd tflogger-0.1.0;\
+    mkdir build;\
+    cd build;\
+    cmake ../;\
+    make install;
diff --git a/configs/shogun-sdk/travis.env.in b/configs/shogun-sdk/travis.env.in
new file mode 100644
index 00000000000..39f20153848
--- /dev/null
+++ b/configs/shogun-sdk/travis.env.in
@@ -0,0 +1,15 @@
+TRAVIS=$TRAVIS
+CI=$CI
+TRAVIS_BRANCH=$TRAVIS_BRANCH
+TRAVIS_COMMIT=$TRAVIS_COMMIT
+TRAVIS_JOB_NUMBER=$TRAVIS_JOB_NUMBER
+TRAVIS_PULL_REQUEST=$TRAVIS_PULL_REQUEST
+TRAVIS_JOB_ID=$TRAVIS_JOB_ID
+TRAVIS_REPO_SLUG=$TRAVIS_REPO_SLUG
+TRAVIS_TAG=$TRAVIS_TAG
+TRAVIS_OS_NAME=$TRAVIS_OS_NAME
+TRAVIS_BUILD_DIR=/opt/shogun
+TRAVIS_PULL_REQUEST_BRANCH=$TRAVIS_PULL_REQUEST_BRANCH
+JAVA_HOME=/usr/lib/jvm/java-8-oracle
+CC=$CC
+CXX=$CXX
diff --git a/configs/shogun/Dockerfile b/configs/shogun/Dockerfile
index 66ab9fcd07d..ee0281c04cb 100644
--- a/configs/shogun/Dockerfile
+++ b/configs/shogun/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:14.04
+FROM ubuntu:16.04
 MAINTAINER shogun@shogun-toolbox.org
 
 RUN apt-get update && apt-get install -qq software-properties-common lsb-release
@@ -8,4 +8,4 @@ RUN apt-get update -qq
 RUN apt-get upgrade -y
 
 # install shogun
-RUN apt-get install -qq --force-yes --no-install-recommends libshogun17
+RUN apt-get install -qq --force-yes --no-install-recommends libshogun18
diff --git a/data b/data
index 7854e3b08ab..4b85bd7faa0 160000
--- a/data
+++ b/data
@@ -1 +1 @@
-Subproject commit 7854e3b08ab99594dcba4c6e94d7cd48eb2d0556
+Subproject commit 4b85bd7faa0b2bbcbbc4458655e6f35f438f1e91
diff --git a/debian b/debian
index add2d5b2eeb..e47abf2ac2c 160000
--- a/debian
+++ b/debian
@@ -1 +1 @@
-Subproject commit add2d5b2eebb7631023cb26a3ff5614c396befbe
+Subproject commit e47abf2ac2c8741abf1352804d4726d471789717
diff --git a/doc/cookbook/source/examples/converter/ica_fast.rst b/doc/cookbook/source/examples/converter/ica_fast.rst
new file mode 100644
index 00000000000..3683c5e4043
--- /dev/null
+++ b/doc/cookbook/source/examples/converter/ica_fast.rst
@@ -0,0 +1,38 @@
+==============================
+Independent Component Analysis
+==============================
+
+Independent component analysis (ICA) separates a multivariate signal into additive subcomponents that are maximally independent.
+It is typically used for separating superimposed signals.
+
+The ICA algorithm presented here is fastICA, see :cite:`hyvarinen2000independent` for details.
+There are many other ICA implementations, all based on :sgclass:`CICAConverter`
+
+-------
+Example
+-------
+
+Given a dataset which we assume consists of linearly mixed signals, we create CDenseFeatures
+(RealFeatures, here 64 bit float values).
+
+.. sgexample:: ica_fast.sg:create_features
+
+We create the :sgclass:`CFastICA` instance, and set its parameter for the iterative optimization.
+
+.. sgexample:: ica_fast.sg:set_parameters
+
+Then we apply ICA, which gives the unmixed signals.
+
+.. sgexample:: ica_fast.sg:apply_convert
+
+We can also extract the estimated mixing matrix.
+
+.. sgexample:: ica_fast.sg:extract
+
+----------
+References
+----------
+:wiki:`Independent_component_analysis`
+
+.. bibliography:: ../../references.bib
+    :filter: docname in docnames
diff --git a/doc/cookbook/source/examples/evaluation/cross_validation_mkl_weights_storage.rst b/doc/cookbook/source/examples/evaluation/cross_validation_mkl_weights_storage.rst
new file mode 100644
index 00000000000..7ca656b437a
--- /dev/null
+++ b/doc/cookbook/source/examples/evaluation/cross_validation_mkl_weights_storage.rst
@@ -0,0 +1,53 @@
+============================================
+Cross Validation on Multiple Kernel Learning
+============================================
+
+Cross Validation is a model validation technique whose purpose is to give an insight
+on how the model we are testing will generalize to an independent dataset. Essentially,
+it is based on training and test the model many times on different complementary partitions of the original
+training dataset and then to combine the validation results (e.g. average) to estimate a
+the performance of the final predictive model.
+
+-------
+Example
+-------
+We'll use as example a classification problem solvable by using :sgclass:`CMKLClassification`.
+For the sake of brevity, we'll skip the initialization of features, kernels and so on
+(see :doc:`../regression/multiple_kernel_learning` for a more complete example of MKL usage).
+
+.. sgexample:: cross_validation_mkl_weight_storage.sg:create_classifier
+
+Firstly, we initialize a splitting strategy :sgclass:`CStratifiedCrossValidationSplitting`, which is needed
+to divide the dataset into folds, and an evaluation criterium :sgclass:`CAccuracyMeasure`, to evaluate the
+performance of the trained models. Secondly, we create the :sgclass:`CCrossValidation` instance.
+We set also the number of cross validation's runs.
+
+.. sgexample:: cross_validation_mkl_weight_storage.sg:create_cross_validation
+
+To observe also the partial folds' results, we create a cross validation's observer :sgclass:`CParameterObserverCV`
+and then we register it into the :sgclass:`CCrossValidation` instance.
+
+.. sgexample:: cross_validation_mkl_weight_storage.sg:create_observer
+
+Finally, we evaluate the model and get the results (aka a :sgclass:`CCrossValidationResult` instance).
+
+.. sgexample:: cross_validation_mkl_weight_storage.sg:evaluate_and_get_result
+
+We get the :math:`mean` of all the evaluation results and its standard deviation :math:`stddev`.
+
+.. sgexample:: cross_validation_mkl_weight_storage.sg:get_results
+
+We can get more information about the single cross validation's runs and folds by using the observer we set before, like the kernels' weights.
+We get the :sgclass:`CMKLClassification` machine used during the first run and trained on the first fold.
+
+.. sgexample:: cross_validation_mkl_weight_storage.sg:get_fold_machine
+
+Then, from the trained machine, we get the weights :math:`\mathbf{w}` of its kernels.
+
+.. sgexample:: cross_validation_mkl_weight_storage.sg:get_weights
+
+----------
+References
+----------
+
+:wiki:`Cross-validation_(statistics)`
\ No newline at end of file
diff --git a/doc/cookbook/source/index.rst b/doc/cookbook/source/index.rst
index 1b979cb0399..4225acf2ec4 100644
--- a/doc/cookbook/source/index.rst
+++ b/doc/cookbook/source/index.rst
@@ -56,15 +56,15 @@ Statistical Testing
 
    examples/statistical_testing/**
 
-Kernels
--------
+Converter
+---------
 
 .. toctree::
    :maxdepth: 1
    :glob:
-   :caption: Kernels
+   :caption: Converter
 
-   examples/kernel/**
+   examples/converter/**
 
 Gaussian Processes
 ------------------
@@ -105,3 +105,13 @@ Neural Networks
    :caption: Neural Netwroks
 
    examples/neural_nets/**
+
+Evaluation
+----------
+
+.. toctree::
+    :maxdepth: 1
+    :glob:
+    :caption: Evaluation
+
+    examples/evaluation/**
\ No newline at end of file
diff --git a/doc/cookbook/source/references.bib b/doc/cookbook/source/references.bib
index b32bec37852..073d5a70290 100644
--- a/doc/cookbook/source/references.bib
+++ b/doc/cookbook/source/references.bib
@@ -153,3 +153,13 @@ @article{Quinonero-Candela2005
  year = {2005},
  pages = {1939--1959},
 }
+
+@article{hyvarinen2000independent,
+  title={Independent component analysis: algorithms and applications},
+  author={Hyv{\"a}rinen, A. and Oja, E.},
+  journal={Neural networks},
+  volume={13},
+  number={4},
+  pages={411--430},
+  year={2000}
+}
diff --git a/doc/cookbook/source/static/shogun-style.css b/doc/cookbook/source/static/shogun-style.css
index 194363f9ff3..2329d699def 100644
--- a/doc/cookbook/source/static/shogun-style.css
+++ b/doc/cookbook/source/static/shogun-style.css
@@ -1,5 +1,5 @@
 .bs-sidenav {
-    background-color: #f5f5f5 !important;
+    background-color: #fff !important;
     padding-top: 0;
     margin-top: 20pt;
 }
@@ -13,9 +13,11 @@
 }
 
 #sidebar > ul > p.caption {
-    background-color: #cecece;
     padding: 10px;
-    border-radius: 5px;
+    text-shadow: none !important;
+    font-weight: 300;
+    font-size: 1.5em;
+    padding-left: 0px;
 }
 
 #sidebar li.toctree-l1:last-child {
@@ -24,4 +26,34 @@
 
 div.toctree-wrapper > p.caption {
     display: none;
-}
\ No newline at end of file
+}
+
+#sidebar li.toctree-l1 > a {
+    padding-left: 0px;
+    font-size: 100%;
+}
+
+#sidebar li.toctree-l1 {
+    text-shadow: none !important;
+    border: none;
+}
+
+#sidebar li.toctree-l1 .current {
+    color: #000;
+}
+
+#sidebar li.toctree-l1 :hover {
+    background-color: transparent;
+    border: none;
+    color: #000;
+}
+
+.section > h1 {
+    font-weight: 300;
+    margin-bottom: 0.5em;
+}
+
+.section > h2 {
+    font-weight: 300;
+    margin-bottom: 0.5em;
+}
diff --git a/doc/cookbook/source/templates/layout.html b/doc/cookbook/source/templates/layout.html
index 94df908c4ab..1c16d0a1683 100644
--- a/doc/cookbook/source/templates/layout.html
+++ b/doc/cookbook/source/templates/layout.html
@@ -2,6 +2,11 @@
 {% set script_files = script_files + ["_static/mathconf.js"] %}
 
 {% set bootswatch_css_custom = ['_static/shogun-style.css'] %}
+{#
+    bootswatch_css_custom is removed in sphinx-bootstrap-theme v0.5.0
+    css_files should be used instead
+#}
+{% set css_files = (css_files or []) + ['_static/shogun-style.css'] %}
 
 {% block rootrellink %}
     <li><a href="http://www.shogun-toolbox.org/">Shogun</a> &raquo;</li>
diff --git a/doc/ipython-notebooks/classification/Classification.ipynb b/doc/ipython-notebooks/classification/Classification.ipynb
index 8cf310c7280..e9364f1b4a5 100644
--- a/doc/ipython-notebooks/classification/Classification.ipynb
+++ b/doc/ipython-notebooks/classification/Classification.ipynb
@@ -48,7 +48,7 @@
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
     "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')\n",
-    "from modshogun import *"
+    "from shogun import *"
    ]
   },
   {
diff --git a/doc/ipython-notebooks/classification/HashedDocDotFeatures.ipynb b/doc/ipython-notebooks/classification/HashedDocDotFeatures.ipynb
index a413c70e7d1..34395685bbb 100644
--- a/doc/ipython-notebooks/classification/HashedDocDotFeatures.ipynb
+++ b/doc/ipython-notebooks/classification/HashedDocDotFeatures.ipynb
@@ -77,7 +77,7 @@
     "The response to that is to read our collection as it is and compute the hash of every token only when it's required, on-the-fly.<br>\n",
     "<h3>On-the-fly Hashing with Shogun</h3>\n",
     "We will now have a look at how the above idea is represented in the Shogun Toolbox. That is we will see how we can load our document collection in memory and consider a hashed document-term matrix with the hashing of every document (or token more specifically) happening on-the-fly, only when it's required to be computed. Altough it may sound a bit tricky, it's actually pretty straightforward and here is how.<br><br>\n",
-    "First of all we import the required components from the modshogun library."
+    "First of all we import the required components from the shogun library."
    ]
   },
   {
@@ -90,7 +90,7 @@
    "source": [
     "%matplotlib inline\n",
     "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')\n",
-    "from modshogun import StringCharFeatures, RAWBYTE, HashedDocDotFeatures, NGramTokenizer"
+    "from shogun import StringCharFeatures, RAWBYTE, HashedDocDotFeatures, NGramTokenizer"
    ]
   },
   {
@@ -133,7 +133,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import BinaryLabels\n",
+    "from shogun import BinaryLabels\n",
     "from numpy import array\n",
     "\n",
     "labels = BinaryLabels(array([-1, 1, 1]))"
@@ -199,7 +199,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import SVMOcas\n",
+    "from shogun import SVMOcas\n",
     "\n",
     "C = 0.1\n",
     "epsilon = 0.01\n",
diff --git a/doc/ipython-notebooks/classification/MKL.ipynb b/doc/ipython-notebooks/classification/MKL.ipynb
index f429c88fec4..d7d8bf5bffa 100644
--- a/doc/ipython-notebooks/classification/MKL.ipynb
+++ b/doc/ipython-notebooks/classification/MKL.ipynb
@@ -48,7 +48,7 @@
     "%matplotlib inline\n",
     "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')\n",
     "# import all shogun classes\n",
-    "from modshogun import *"
+    "from shogun import *"
    ]
   },
   {
diff --git a/doc/ipython-notebooks/classification/SupportVectorMachines.ipynb b/doc/ipython-notebooks/classification/SupportVectorMachines.ipynb
index 702a013bc53..a5b5bb51a2c 100644
--- a/doc/ipython-notebooks/classification/SupportVectorMachines.ipynb
+++ b/doc/ipython-notebooks/classification/SupportVectorMachines.ipynb
@@ -119,7 +119,7 @@
     "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')\n",
     "import matplotlib.patches as patches\n",
     "#To import all shogun classes\n",
-    "import modshogun as sg\n",
+    "import shogun as sg\n",
     "import numpy as np\n",
     "\n",
     "#Generate some random data\n",
diff --git a/doc/ipython-notebooks/clustering/GMM.ipynb b/doc/ipython-notebooks/clustering/GMM.ipynb
index 884399a3b77..28b9f7cd373 100644
--- a/doc/ipython-notebooks/clustering/GMM.ipynb
+++ b/doc/ipython-notebooks/clustering/GMM.ipynb
@@ -114,7 +114,7 @@
     "%matplotlib inline\n",
     "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')\n",
     "# import all Shogun classes\n",
-    "from modshogun import *"
+    "from shogun import *"
    ]
   },
   {
diff --git a/doc/ipython-notebooks/clustering/KMeans.ipynb b/doc/ipython-notebooks/clustering/KMeans.ipynb
index eacb76706ff..83c070e0247 100644
--- a/doc/ipython-notebooks/clustering/KMeans.ipynb
+++ b/doc/ipython-notebooks/clustering/KMeans.ipynb
@@ -111,7 +111,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import *\n",
+      "from shogun import *\n",
       "\n",
       "train_features = RealFeatures(rectangle)"
      ],
diff --git a/doc/ipython-notebooks/computer_vision/Scene_classification.ipynb b/doc/ipython-notebooks/computer_vision/Scene_classification.ipynb
index cb773bddc6d..cff09beb824 100644
--- a/doc/ipython-notebooks/computer_vision/Scene_classification.ipynb
+++ b/doc/ipython-notebooks/computer_vision/Scene_classification.ipynb
@@ -107,7 +107,7 @@
       "import numpy as np\n",
       "import matplotlib.pyplot as plt\n",
       "%matplotlib inline\n",
-      "from modshogun import *\n",
+      "from shogun import *\n",
       "\n",
       "# get the list of all jpg images from the path provided\n",
       "import os\n",
diff --git a/doc/ipython-notebooks/converter/Tapkee.ipynb b/doc/ipython-notebooks/converter/Tapkee.ipynb
index ace962f0905..307c024eb54 100644
--- a/doc/ipython-notebooks/converter/Tapkee.ipynb
+++ b/doc/ipython-notebooks/converter/Tapkee.ipynb
@@ -130,7 +130,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import Isomap, RealFeatures, MultidimensionalScaling\n",
+      "from shogun import Isomap, RealFeatures, MultidimensionalScaling\n",
       "\n",
       "# wrap data into Shogun features\n",
       "data, colors = generate_data('swissroll')\n",
@@ -175,7 +175,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import StochasticProximityEmbedding\n",
+      "from shogun import StochasticProximityEmbedding\n",
       "\n",
       "# wrap data into Shogun features\n",
       "data, colors = generate_data('helix')\n",
diff --git a/doc/ipython-notebooks/distributions/KernelDensity.ipynb b/doc/ipython-notebooks/distributions/KernelDensity.ipynb
index e28226916e3..f93c15563ab 100644
--- a/doc/ipython-notebooks/distributions/KernelDensity.ipynb
+++ b/doc/ipython-notebooks/distributions/KernelDensity.ipynb
@@ -123,7 +123,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import KernelDensity, RealFeatures, K_GAUSSIAN, D_EUCLIDEAN, EM_KDTREE_SINGLE\n",
+      "from shogun import KernelDensity, RealFeatures, K_GAUSSIAN, D_EUCLIDEAN, EM_KDTREE_SINGLE\n",
       "\n",
       "def get_kde_result(bandwidth,samples):\n",
       "    # set model parameters\n",
@@ -352,7 +352,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import KernelDensity, RealFeatures, K_GAUSSIAN, D_EUCLIDEAN, EM_BALLTREE_DUAL\n",
+      "from shogun import KernelDensity, RealFeatures, K_GAUSSIAN, D_EUCLIDEAN, EM_BALLTREE_DUAL\n",
       "import scipy.interpolate as interpolate\n",
       "\n",
       "def get_kde(samples):\n",
diff --git a/doc/ipython-notebooks/evaluation/xval_modelselection.ipynb b/doc/ipython-notebooks/evaluation/xval_modelselection.ipynb
index cb88db11067..9a786d8bdf5 100644
--- a/doc/ipython-notebooks/evaluation/xval_modelselection.ipynb
+++ b/doc/ipython-notebooks/evaluation/xval_modelselection.ipynb
@@ -69,7 +69,7 @@
     "%matplotlib inline\n",
     "# include all Shogun classes\n",
     "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')\n",
-    "from modshogun import *\n",
+    "from shogun import *\n",
     "# generate some ultra easy training data\n",
     "gray()\n",
     "n=20\n",
diff --git a/doc/ipython-notebooks/gaussian_process/gaussian_processes.ipynb b/doc/ipython-notebooks/gaussian_process/gaussian_processes.ipynb
index a74e9d118fa..374b9a10093 100644
--- a/doc/ipython-notebooks/gaussian_process/gaussian_processes.ipynb
+++ b/doc/ipython-notebooks/gaussian_process/gaussian_processes.ipynb
@@ -31,7 +31,7 @@
    "source": [
     "%matplotlib inline\n",
     "# import all shogun classes\n",
-    "from modshogun import *\n",
+    "from shogun import *\n",
     "import random\n",
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
diff --git a/doc/ipython-notebooks/gaussian_process/variational_classifier.ipynb b/doc/ipython-notebooks/gaussian_process/variational_classifier.ipynb
index f8e5db47a6a..6031bf5ad52 100644
--- a/doc/ipython-notebooks/gaussian_process/variational_classifier.ipynb
+++ b/doc/ipython-notebooks/gaussian_process/variational_classifier.ipynb
@@ -90,7 +90,7 @@
     "%matplotlib inline\n",
     "# import all shogun classes\n",
     "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')\n",
-    "from modshogun import *\n",
+    "from shogun import *\n",
     "\n",
     "# import all required libraries\n",
     "import scipy\n",
diff --git a/doc/ipython-notebooks/ica/bss_image.ipynb b/doc/ipython-notebooks/ica/bss_image.ipynb
index 7d92ceb1feb..a843d7816be 100644
--- a/doc/ipython-notebooks/ica/bss_image.ipynb
+++ b/doc/ipython-notebooks/ica/bss_image.ipynb
@@ -161,8 +161,8 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun  import RealFeatures\n",
-      "from modshogun import Jade\n",
+      "from shogun  import RealFeatures\n",
+      "from shogun import Jade\n",
       "\n",
       "mixed_signals = RealFeatures(X)\n",
       "\n",
diff --git a/doc/ipython-notebooks/intro/Introduction.ipynb b/doc/ipython-notebooks/intro/Introduction.ipynb
index f0a8e41fecf..f939e1b52ef 100644
--- a/doc/ipython-notebooks/intro/Introduction.ipynb
+++ b/doc/ipython-notebooks/intro/Introduction.ipynb
@@ -61,7 +61,7 @@
     "%matplotlib inline\n",
     "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')\n",
     "#To import all Shogun classes\n",
-    "from modshogun import *"
+    "from shogun import *"
    ]
   },
   {
diff --git a/doc/ipython-notebooks/logdet/logdet.ipynb b/doc/ipython-notebooks/logdet/logdet.ipynb
index 48d502246f7..c9e1e4d9391 100644
--- a/doc/ipython-notebooks/logdet/logdet.ipynb
+++ b/doc/ipython-notebooks/logdet/logdet.ipynb
@@ -93,7 +93,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import RealSparseMatrixOperator, LanczosEigenSolver\n",
+    "from shogun import RealSparseMatrixOperator, LanczosEigenSolver\n",
     "\n",
     "op = RealSparseMatrixOperator(A.tocsc())\n",
     "\n",
@@ -125,7 +125,7 @@
    "source": [
     "# We can specify the power of the sparse-matrix that is to be used for coloring, default values will apply a\n",
     "# 2-distance greedy graph coloring algorithm on the sparse-matrix itself. Matrix-power, if specified, is computed in O(lg p)\n",
-    "from modshogun import ProbingSampler\n",
+    "from shogun import ProbingSampler\n",
     "\n",
     "trace_sampler = ProbingSampler(op)\n",
     "# apply the graph coloring algorithm and generate the number of colors, i.e. number of trace samples\n",
@@ -150,7 +150,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import SerialComputationEngine, CGMShiftedFamilySolver, LogRationalApproximationCGM\n",
+    "from shogun import SerialComputationEngine, CGMShiftedFamilySolver, LogRationalApproximationCGM\n",
     "\n",
     "engine = SerialComputationEngine()\n",
     "cgm = CGMShiftedFamilySolver()\n",
@@ -182,7 +182,7 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
-    "from modshogun import LogDetEstimator\n",
+    "from shogun import LogDetEstimator\n",
     "\n",
     "# number of log-det samples (use a higher number to get better estimates)\n",
     "# (this is 5 times number of colors estimate in practice, so usually 1 probing estimate is enough)\n",
@@ -213,7 +213,7 @@
     "# the following method requires massive amount of memory, for demonstration purpose\n",
     "# the following code is commented out and direct value obtained from running it once is used\n",
     "\n",
-    "# from modshogun import Statistics\n",
+    "# from shogun import Statistics\n",
     "# actual_logdet = Statistics.log_det(A)\n",
     "\n",
     "actual_logdet = 7120357.73878\n",
@@ -282,7 +282,7 @@
     "probing_estimates = log_det_estimator.sample(num_probing_estimates)\n",
     "\n",
     "# computing log-det estimates using Gaussian sampler\n",
-    "from modshogun import NormalSampler, Statistics\n",
+    "from shogun import NormalSampler, Statistics\n",
     "\n",
     "num_colors = probing_sampler.get_num_samples()\n",
     "normal_sampler = NormalSampler(op.get_dimension())\n",
@@ -447,7 +447,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import RealSparseMatrixOperator, ComplexDenseMatrixOperator\n",
+    "from shogun import RealSparseMatrixOperator, ComplexDenseMatrixOperator\n",
     "\n",
     "dim = 5\n",
     "np.random.seed(10)\n",
@@ -487,7 +487,7 @@
    "source": [
     "from scipy.sparse import csc_matrix\n",
     "from scipy.sparse import identity\n",
-    "from modshogun import ConjugateGradientSolver\n",
+    "from shogun import ConjugateGradientSolver\n",
     "\n",
     "# creating a random spd matrix\n",
     "dim = 5\n",
@@ -532,8 +532,8 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import ComplexSparseMatrixOperator\n",
-    "from modshogun import ConjugateOrthogonalCGSolver\n",
+    "from shogun import ComplexSparseMatrixOperator\n",
+    "from shogun import ConjugateOrthogonalCGSolver\n",
     "\n",
     "# creating a random spd matrix\n",
     "dim = 5\n",
@@ -580,7 +580,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import CGMShiftedFamilySolver\n",
+    "from shogun import CGMShiftedFamilySolver\n",
     "\n",
     "cgm = CGMShiftedFamilySolver()\n",
     "\n",
@@ -652,7 +652,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import DirectSparseLinearSolver\n",
+    "from shogun import DirectSparseLinearSolver\n",
     "\n",
     "# creating a random spd matrix\n",
     "dim = 5\n",
@@ -691,7 +691,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import DirectLinearSolverComplex\n",
+    "from shogun import DirectLinearSolverComplex\n",
     "\n",
     "# creating a random spd matrix\n",
     "dim = 5\n",
diff --git a/doc/ipython-notebooks/metric/LMNN.ipynb b/doc/ipython-notebooks/metric/LMNN.ipynb
index 3ee07c04ba6..112ea1abc93 100644
--- a/doc/ipython-notebooks/metric/LMNN.ipynb
+++ b/doc/ipython-notebooks/metric/LMNN.ipynb
@@ -177,7 +177,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import RealFeatures, MulticlassLabels\n",
+      "from shogun import RealFeatures, MulticlassLabels\n",
       "\n",
       "features = RealFeatures(x.T)\n",
       "labels   = MulticlassLabels(y.astype(numpy.float64))"
@@ -197,7 +197,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import LMNN\n",
+      "from shogun import LMNN\n",
       "\n",
       "# number of target neighbours per example\n",
       "k = 1\n",
@@ -384,7 +384,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import KNN, EuclideanDistance, LMNN, RealFeatures, MulticlassLabels\n",
+      "from shogun import KNN, EuclideanDistance, LMNN, RealFeatures, MulticlassLabels\n",
       "\n",
       "def plot_neighborhood_graph(x, nn, axis=pyplot, cols=['r', 'b', 'g', 'm', 'k', 'y']):\n",
       "\tfor i in xrange(x.shape[0]):\n",
@@ -480,7 +480,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import CSVFile, RealFeatures, MulticlassLabels\n",
+      "from shogun import CSVFile, RealFeatures, MulticlassLabels\n",
       "\n",
       "ape_features = RealFeatures(CSVFile(os.path.join(SHOGUN_DATA_DIR, 'multiclass/fm_ape_gut.dat')))\n",
       "ape_labels = MulticlassLabels(CSVFile(os.path.join(SHOGUN_DATA_DIR, 'multiclass/label_ape_gut.dat')))"
@@ -518,7 +518,7 @@
      "collapsed": false,
      "input": [
       "def visualize_tdsne(features, labels):\n",
-      "    from modshogun import TDistributedStochasticNeighborEmbedding\n",
+      "    from shogun import TDistributedStochasticNeighborEmbedding\n",
       "    \n",
       "    converter = TDistributedStochasticNeighborEmbedding()\n",
       "    converter.set_target_dim(2)\n",
@@ -561,9 +561,9 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import KNN, EuclideanDistance\n",
-      "from modshogun import StratifiedCrossValidationSplitting, CrossValidation\n",
-      "from modshogun import CrossValidationResult, MulticlassAccuracy\n",
+      "from shogun import KNN, EuclideanDistance\n",
+      "from shogun import StratifiedCrossValidationSplitting, CrossValidation\n",
+      "from shogun import CrossValidationResult, MulticlassAccuracy\n",
       "\n",
       "# set up the classifier\n",
       "knn = KNN()\n",
@@ -622,7 +622,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import LMNN\n",
+      "from shogun import LMNN\n",
       "import numpy\n",
       "\n",
       "# to make training faster, use a portion of the features\n",
@@ -712,7 +712,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import CSVFile, RealFeatures, MulticlassLabels\n",
+      "from shogun import CSVFile, RealFeatures, MulticlassLabels\n",
       "\n",
       "wine_features = RealFeatures(CSVFile(os.path.join(SHOGUN_DATA_DIR, 'uci/wine/fm_wine.dat')))\n",
       "wine_labels = MulticlassLabels(CSVFile(os.path.join(SHOGUN_DATA_DIR, 'uci/wine/label_wine.dat')))\n",
@@ -736,9 +736,9 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import KNN, EuclideanDistance\n",
-      "from modshogun import StratifiedCrossValidationSplitting, CrossValidation\n",
-      "from modshogun import CrossValidationResult, MulticlassAccuracy\n",
+      "from shogun import KNN, EuclideanDistance\n",
+      "from shogun import StratifiedCrossValidationSplitting, CrossValidation\n",
+      "from shogun import CrossValidationResult, MulticlassAccuracy\n",
       "import numpy\n",
       "\n",
       "# kNN classifier\n",
@@ -775,7 +775,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import LMNN\n",
+      "from shogun import LMNN\n",
       "\n",
       "# train LMNN\n",
       "lmnn = LMNN(wine_features, wine_labels, k)\n",
@@ -838,7 +838,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import RescaleFeatures\n",
+      "from shogun import RescaleFeatures\n",
       "\n",
       "# preprocess features so that all of them vary within [0,1]\n",
       "preprocessor = RescaleFeatures()\n",
diff --git a/doc/ipython-notebooks/multiclass/KNN.ipynb b/doc/ipython-notebooks/multiclass/KNN.ipynb
index c5de58b52ab..68b2b174973 100644
--- a/doc/ipython-notebooks/multiclass/KNN.ipynb
+++ b/doc/ipython-notebooks/multiclass/KNN.ipynb
@@ -136,8 +136,8 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import MulticlassLabels, RealFeatures\n",
-    "from modshogun import KNN, EuclideanDistance\n",
+    "from shogun import MulticlassLabels, RealFeatures\n",
+    "from shogun import KNN, EuclideanDistance\n",
     "\n",
     "labels = MulticlassLabels(Ytrain)\n",
     "feats  = RealFeatures(Xtrain)\n",
@@ -151,7 +151,7 @@
     "print \"Predictions\", pred[:5]\n",
     "print \"Ground Truth\", Ytest[:5]\n",
     "\n",
-    "from modshogun import MulticlassAccuracy\n",
+    "from shogun import MulticlassAccuracy\n",
     "evaluator = MulticlassAccuracy()\n",
     "accuracy = evaluator.evaluate(pred, labels_test)\n",
     "\n",
@@ -249,7 +249,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import Time, KNN_COVER_TREE, KNN_BRUTE\n",
+    "from shogun import Time, KNN_COVER_TREE, KNN_BRUTE\n",
     "start = Time.get_curtime()\n",
     "knn.set_k(3)\n",
     "knn.set_knn_solver_type(KNN_BRUTE)\n",
@@ -280,7 +280,7 @@
    "outputs": [],
    "source": [
     "def evaluate(labels, feats, use_cover_tree=False):\n",
-    "    from modshogun import MulticlassAccuracy, CrossValidationSplitting\n",
+    "    from shogun import MulticlassAccuracy, CrossValidationSplitting\n",
     "    import time\n",
     "    split = CrossValidationSplitting(labels, Nsplit)\n",
     "    split.build_subsets()\n",
@@ -422,7 +422,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import GaussianKernel, GMNPSVM\n",
+    "from shogun import GaussianKernel, GMNPSVM\n",
     "\n",
     "width=80\n",
     "C=1\n",
diff --git a/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb b/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb
index 81ebc07ef7a..02f35db823f 100644
--- a/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb
+++ b/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb
@@ -199,7 +199,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import ID3ClassifierTree, RealFeatures, MulticlassLabels\n",
+    "from shogun import ID3ClassifierTree, RealFeatures, MulticlassLabels\n",
     "from numpy import array, concatenate\n",
     "\n",
     "# encoding dictionary\n",
@@ -561,7 +561,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import MulticlassAccuracy\n",
+    "from shogun import MulticlassAccuracy\n",
     "\n",
     "# Shogun object for calculating multiclass accuracy\n",
     "accuracy = MulticlassAccuracy()\n",
@@ -698,7 +698,7 @@
    "source": [
     "import matplotlib.pyplot as plt\n",
     "from numpy import ones, zeros, random, concatenate\n",
-    "from modshogun import RealFeatures, MulticlassLabels\n",
+    "from shogun import RealFeatures, MulticlassLabels\n",
     "% matplotlib inline\n",
     "\n",
     "def create_toy_classification_dataset(ncat,do_plot):\n",
@@ -780,7 +780,7 @@
    "outputs": [],
    "source": [
     "from numpy import array\n",
-    "from modshogun import C45ClassifierTree\n",
+    "from shogun import C45ClassifierTree\n",
     "\n",
     "# steps in C4.5 Tree training bundled together in a python method\n",
     "def train_tree(feats,types,labels):\n",
@@ -1028,7 +1028,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import RealFeatures, MulticlassLabels\n",
+    "from shogun import RealFeatures, MulticlassLabels\n",
     "\n",
     "# training data\n",
     "feats_train = RealFeatures(feats_train)\n",
@@ -1138,7 +1138,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import MulticlassAccuracy\n",
+    "from shogun import MulticlassAccuracy\n",
     "\n",
     "# Shogun object for calculating multiclass accuracy\n",
     "accuracy = MulticlassAccuracy()\n",
@@ -1258,7 +1258,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import PT_MULTICLASS, CARTree\n",
+    "from shogun import PT_MULTICLASS, CARTree\n",
     "from numpy import array\n",
     "\n",
     "def train_carttree(feat_types,problem_type,num_folds,use_cv_pruning,labels,features):\n",
@@ -1342,7 +1342,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import RegressionLabels, RealFeatures\n",
+    "from shogun import RegressionLabels, RealFeatures\n",
     "from numpy import random, sin, linspace\n",
     "import matplotlib.pyplot as plt\n",
     "% matplotlib inline\n",
@@ -1400,7 +1400,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import PT_REGRESSION\n",
+    "from shogun import PT_REGRESSION\n",
     "from numpy import array\n",
     "\n",
     "# feature type - continuous\n",
@@ -1539,7 +1539,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import CARTree, PT_MULTICLASS\n",
+    "from shogun import CARTree, PT_MULTICLASS\n",
     "\n",
     "# set attribute types - all continuous\n",
     "feature_types = array([False, False, False, False])\n",
@@ -1568,8 +1568,8 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import RealFeatures, MulticlassLabels\n",
-    "from modshogun import CrossValidation, MulticlassAccuracy, CrossValidationSplitting, CrossValidationResult\n",
+    "from shogun import RealFeatures, MulticlassLabels\n",
+    "from shogun import CrossValidation, MulticlassAccuracy, CrossValidationSplitting, CrossValidationResult\n",
     "\n",
     "# training features\n",
     "feats_train = RealFeatures(feat)\n",
@@ -1673,8 +1673,8 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import CARTree, RegressionLabels, PT_REGRESSION, MeanSquaredError\n",
-    "from modshogun import CrossValidation, CrossValidationSplitting, CrossValidationResult\n",
+    "from shogun import CARTree, RegressionLabels, PT_REGRESSION, MeanSquaredError\n",
+    "from shogun import CrossValidation, CrossValidationSplitting, CrossValidationResult\n",
     "\n",
     "# form training features\n",
     "feats_train = RealFeatures(feat)\n",
@@ -1818,7 +1818,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import PT_MULTICLASS, CHAIDTree\n",
+    "from shogun import PT_MULTICLASS, CHAIDTree\n",
     "from numpy import array, dtype, int32\n",
     "\n",
     "def train_chaidtree(dependent_var_type,feature_types,num_bins,features,labels):\n",
@@ -2002,7 +2002,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import CSVFile, RealFeatures, MulticlassLabels\n",
+    "from shogun import CSVFile, RealFeatures, MulticlassLabels\n",
     "\n",
     "train_feats=RealFeatures(CSVFile( os.path.join(SHOGUN_DATA_DIR, 'uci/wine/fm_wine.dat')))\n",
     "train_labels=MulticlassLabels(CSVFile( os.path.join(SHOGUN_DATA_DIR, 'uci/wine/label_wine.dat')))"
@@ -2028,7 +2028,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import CHAIDTree, MulticlassLabels\n",
+    "from shogun import CHAIDTree, MulticlassLabels\n",
     "\n",
     "# set attribute types - all attributes are continuous(2)\n",
     "feature_types = array([2 for i in range(13)],dtype=int32)    \n",
@@ -2059,7 +2059,7 @@
    "source": [
     "# set up cross validation class\n",
     "\n",
-    "from modshogun import CrossValidation, CrossValidationSplitting, CrossValidationResult, MulticlassAccuracy\n",
+    "from shogun import CrossValidation, CrossValidationSplitting, CrossValidationResult, MulticlassAccuracy\n",
     "\n",
     "# set evaluation criteria - multiclass accuracy\n",
     "accuracy = MulticlassAccuracy()\n",
@@ -2106,7 +2106,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import CSVFile, RealFeatures, RegressionLabels\n",
+    "from shogun import CSVFile, RealFeatures, RegressionLabels\n",
     "from numpy import ptp\n",
     "\n",
     "train_feats=RealFeatures(CSVFile( os.path.join(SHOGUN_DATA_DIR, 'uci/housing/fm_housing.dat')))\n",
@@ -2136,8 +2136,8 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import CHAIDTree, MeanSquaredError\n",
-    "from modshogun import CrossValidation, CrossValidationSplitting, CrossValidationResult\n",
+    "from shogun import CHAIDTree, MeanSquaredError\n",
+    "from shogun import CrossValidation, CrossValidationSplitting, CrossValidationResult\n",
     "from numpy import array, dtype, int32\n",
     "\n",
     "def get_cv_error(max_depth):\n",
diff --git a/doc/ipython-notebooks/multiclass/Tree/TreeEnsemble.ipynb b/doc/ipython-notebooks/multiclass/Tree/TreeEnsemble.ipynb
index 5ee0e83915b..249e14b0ff1 100644
--- a/doc/ipython-notebooks/multiclass/Tree/TreeEnsemble.ipynb
+++ b/doc/ipython-notebooks/multiclass/Tree/TreeEnsemble.ipynb
@@ -66,7 +66,7 @@
      "collapsed": false,
      "input": [
       "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../../data')\n",
-      "from modshogun import CSVFile,RealFeatures,MulticlassLabels\n",
+      "from shogun import CSVFile,RealFeatures,MulticlassLabels\n",
       "\n",
       "def load_file(feat_file,label_file):\n",
       "    feats=RealFeatures(CSVFile(feat_file))\n",
@@ -92,7 +92,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import RandomForest, MajorityVote\n",
+      "from shogun import RandomForest, MajorityVote\n",
       "from numpy import array\n",
       "\n",
       "def setup_random_forest(num_trees,rand_subset_size,combination_rule,feature_types):\n",
@@ -149,7 +149,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import CARTree, PT_MULTICLASS\n",
+      "from shogun import CARTree, PT_MULTICLASS\n",
       "\n",
       "def train_cart(train_feats,train_labels,feature_types,problem_type):\n",
       "    c=CARTree(feature_types,problem_type,2,False)\n",
@@ -180,7 +180,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import MulticlassAccuracy\n",
+      "from shogun import MulticlassAccuracy\n",
       "\n",
       "accuracy=MulticlassAccuracy()\n",
       "\n",
diff --git a/doc/ipython-notebooks/multiclass/multiclass_reduction.ipynb b/doc/ipython-notebooks/multiclass/multiclass_reduction.ipynb
index 3f36e306e7f..22e3c21d651 100644
--- a/doc/ipython-notebooks/multiclass/multiclass_reduction.ipynb
+++ b/doc/ipython-notebooks/multiclass/multiclass_reduction.ipynb
@@ -185,10 +185,10 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import RealFeatures, MulticlassLabels\n",
-      "from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine\n",
-      "from modshogun import MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy\n",
-      "from modshogun import MulticlassAccuracy\n",
+      "from shogun import RealFeatures, MulticlassLabels\n",
+      "from shogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine\n",
+      "from shogun import MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy\n",
+      "from shogun import MulticlassAccuracy\n",
       "\n",
       "import time\n",
       "\n",
@@ -273,7 +273,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import MulticlassLibLinear\n",
+      "from shogun import MulticlassLibLinear\n",
       "mcsvm = MulticlassLibLinear(5.0, feats_train, lab_train)\n",
       "mcsvm.set_use_bias(True)\n",
       "\n",
@@ -449,7 +449,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import ECOCStrategy, ECOCRandomDenseEncoder, ECOCLLBDecoder\n",
+      "from shogun import ECOCStrategy, ECOCRandomDenseEncoder, ECOCLLBDecoder\n",
       "\n",
       "print \"\\nRandom Dense Encoder + Margin Loss based Decoder\"\n",
       "print \"=\"*60\n",
@@ -482,7 +482,7 @@
      "collapsed": false,
      "input": [
       "def evaluate_multiclass_kernel(strategy):\n",
-      "    from modshogun import KernelMulticlassMachine, LibSVM, GaussianKernel\n",
+      "    from shogun import KernelMulticlassMachine, LibSVM, GaussianKernel\n",
       "    width=2.1\n",
       "    epsilon=1e-5\n",
       "    \n",
@@ -532,7 +532,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import *\n",
+      "from shogun import *\n",
       "from numpy import *\n",
       "\n",
       "num=1000;\n",
@@ -624,7 +624,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import KernelMulticlassMachine, LibSVM, GaussianKernel\n",
+      "from shogun import KernelMulticlassMachine, LibSVM, GaussianKernel\n",
       "\n",
       "width=2.1\n",
       "epsilon=1e-5\n",
diff --git a/doc/ipython-notebooks/multiclass/naive_bayes.ipynb b/doc/ipython-notebooks/multiclass/naive_bayes.ipynb
index 9a833ef583f..661a4963ed1 100644
--- a/doc/ipython-notebooks/multiclass/naive_bayes.ipynb
+++ b/doc/ipython-notebooks/multiclass/naive_bayes.ipynb
@@ -140,9 +140,9 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import GaussianNaiveBayes\n",
-      "from modshogun import RealFeatures\n",
-      "from modshogun import MulticlassLabels\n",
+      "from shogun import GaussianNaiveBayes\n",
+      "from shogun import RealFeatures\n",
+      "from shogun import MulticlassLabels\n",
       "\n",
       "X_train, Y_train = gen_samples(n_train)\n",
       "\n",
diff --git a/doc/ipython-notebooks/neuralnets/autoencoders.ipynb b/doc/ipython-notebooks/neuralnets/autoencoders.ipynb
index 64949fdab5d..8444fc9e03e 100644
--- a/doc/ipython-notebooks/neuralnets/autoencoders.ipynb
+++ b/doc/ipython-notebooks/neuralnets/autoencoders.ipynb
@@ -55,7 +55,7 @@
     "%matplotlib inline\n",
     "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')\n",
     "from scipy.io import loadmat\n",
-    "from modshogun import RealFeatures, MulticlassLabels, Math\n",
+    "from shogun import RealFeatures, MulticlassLabels, Math\n",
     "\n",
     "# load the dataset\n",
     "dataset = loadmat(os.path.join(SHOGUN_DATA_DIR, 'multiclass/usps.mat'))\n",
@@ -101,7 +101,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import NeuralLayers, DeepAutoencoder\n",
+    "from shogun import NeuralLayers, DeepAutoencoder\n",
     "\n",
     "layers = NeuralLayers()\n",
     "layers = layers.input(256).rectified_linear(512).rectified_linear(128).rectified_linear(512).linear(256).done()\n",
@@ -137,7 +137,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import AENT_DROPOUT, NNOM_GRADIENT_DESCENT\n",
+    "from shogun import AENT_DROPOUT, NNOM_GRADIENT_DESCENT\n",
     "\n",
     "ae.pt_noise_type.set_const(AENT_DROPOUT) # use dropout noise\n",
     "ae.pt_noise_parameter.set_const(0.5) # each input has a 50% chance of being set to zero\n",
@@ -150,7 +150,7 @@
     "ae.pt_epsilon.set_const(0.0) # disable automatic convergence testing\n",
     "\n",
     "# uncomment this line to allow the training progress to be printed on the console\n",
-    "#from modshogun import MSG_INFO; ae.io.set_loglevel(MSG_INFO)\n",
+    "#from shogun import MSG_INFO; ae.io.set_loglevel(MSG_INFO)\n",
     "\n",
     "# start pre-training. this might take some time\n",
     "ae.pre_train(Xtrain)"
@@ -287,7 +287,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import NeuralSoftmaxLayer\n",
+    "from shogun import NeuralSoftmaxLayer\n",
     "\n",
     "nn = ae.convert_to_neural_network(NeuralSoftmaxLayer(10))\n",
     "\n",
@@ -312,7 +312,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import MulticlassAccuracy\n",
+    "from shogun import MulticlassAccuracy\n",
     "\n",
     "predictions = nn.apply_multiclass(Xtest)\n",
     "accuracy = MulticlassAccuracy().evaluate(predictions, Ytest) * 100\n",
@@ -344,7 +344,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import DynamicObjectArray, NeuralInputLayer, NeuralConvolutionalLayer, CMAF_RECTIFIED_LINEAR\n",
+    "from shogun import DynamicObjectArray, NeuralInputLayer, NeuralConvolutionalLayer, CMAF_RECTIFIED_LINEAR\n",
     "\n",
     "conv_layers = DynamicObjectArray()\n",
     "# 16x16 single channel images\n",
diff --git a/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb b/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb
index 98c0bc14061..02e056b18cd 100644
--- a/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb
+++ b/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb
@@ -51,7 +51,7 @@
     "%matplotlib inline\n",
     "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')\n",
     "from scipy.io import loadmat\n",
-    "from modshogun import RealFeatures, MulticlassLabels, Math\n",
+    "from shogun import RealFeatures, MulticlassLabels, Math\n",
     "\n",
     "# load the dataset\n",
     "dataset = loadmat(os.path.join(SHOGUN_DATA_DIR, 'multiclass/usps.mat'))\n",
@@ -109,8 +109,8 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import NeuralNetwork, NeuralInputLayer, NeuralLogisticLayer, NeuralSoftmaxLayer\n",
-    "from modshogun import DynamicObjectArray\n",
+    "from shogun import NeuralNetwork, NeuralInputLayer, NeuralLogisticLayer, NeuralSoftmaxLayer\n",
+    "from shogun import DynamicObjectArray\n",
     "\n",
     "# setup the layers\n",
     "layers = DynamicObjectArray()\n",
@@ -215,7 +215,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import MulticlassAccuracy\n",
+    "from shogun import MulticlassAccuracy\n",
     "\n",
     "def compute_accuracy(net, X, Y):\n",
     "    predictions = net.apply_multiclass(X)\n",
@@ -246,7 +246,7 @@
     "net_no_reg.set_max_num_epochs(600)\n",
     "\n",
     "# uncomment this line to allow the training progress to be printed on the console\n",
-    "#from modshogun import MSG_INFO; net_no_reg.io.set_loglevel(MSG_INFO)\n",
+    "#from shogun import MSG_INFO; net_no_reg.io.set_loglevel(MSG_INFO)\n",
     "\n",
     "net_no_reg.set_labels(Ytrain)\n",
     "net_no_reg.train(Xtrain) # this might take a while, depending on your machine\n",
@@ -336,7 +336,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import NNOM_GRADIENT_DESCENT\n",
+    "from shogun import NNOM_GRADIENT_DESCENT\n",
     "\n",
     "# set the dropout probabilty for neurons in the hidden layers\n",
     "net_dropout.set_dropout_hidden(0.5)\n",
@@ -391,7 +391,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import NeuralConvolutionalLayer, CMAF_RECTIFIED_LINEAR\n",
+    "from shogun import NeuralConvolutionalLayer, CMAF_RECTIFIED_LINEAR\n",
     "\n",
     "# prepere the layers\n",
     "layers_conv = DynamicObjectArray()\n",
diff --git a/doc/ipython-notebooks/neuralnets/rbms_dbns.ipynb b/doc/ipython-notebooks/neuralnets/rbms_dbns.ipynb
index 3d2e8c16589..0e7a05fe47b 100644
--- a/doc/ipython-notebooks/neuralnets/rbms_dbns.ipynb
+++ b/doc/ipython-notebooks/neuralnets/rbms_dbns.ipynb
@@ -174,7 +174,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import RBM, RBMVUT_BINARY, Math\n",
+      "from shogun import RBM, RBMVUT_BINARY, Math\n",
       "\n",
       "# initialize the random number generator with a fixed seed, for repeatability\n",
       "Math.init_random(10)\n",
@@ -223,10 +223,10 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import RealFeatures, RBMMM_PSEUDO_LIKELIHOOD\n",
+      "from shogun import RealFeatures, RBMMM_PSEUDO_LIKELIHOOD\n",
       "\n",
       "# uncomment this line to allow the training progress to be printed on the console\n",
-      "#from modshogun import MSG_INFO; rbms[0].io.set_loglevel(MSG_INFO)\n",
+      "#from shogun import MSG_INFO; rbms[0].io.set_loglevel(MSG_INFO)\n",
       "\n",
       "for i in range(10):\n",
       "    # obtain the data for digit i\n",
@@ -305,7 +305,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import DeepBeliefNetwork\n",
+      "from shogun import DeepBeliefNetwork\n",
       "\n",
       "dbn = DeepBeliefNetwork(256) # 256 visible units\n",
       "dbn.add_hidden_layer(200) # 200 units in the first hidden layer\n",
@@ -386,7 +386,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import NeuralSoftmaxLayer, MulticlassLabels\n",
+      "from shogun import NeuralSoftmaxLayer, MulticlassLabels\n",
       "\n",
       "# get the neural network\n",
       "nn = dbn.convert_to_neural_network(NeuralSoftmaxLayer(10))\n",
@@ -413,7 +413,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import MulticlassAccuracy\n",
+      "from shogun import MulticlassAccuracy\n",
       "\n",
       "predictions = nn.apply_multiclass(RealFeatures(Xtest))\n",
       "accuracy = MulticlassAccuracy().evaluate(predictions, MulticlassLabels(Ytest)) * 100\n",
diff --git a/doc/ipython-notebooks/pca/pca_notebook.ipynb b/doc/ipython-notebooks/pca/pca_notebook.ipynb
index 1e378b1b120..8d29e3f66a5 100644
--- a/doc/ipython-notebooks/pca/pca_notebook.ipynb
+++ b/doc/ipython-notebooks/pca/pca_notebook.ipynb
@@ -33,7 +33,7 @@
     "%matplotlib inline\n",
     "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')\n",
     "# import all shogun classes\n",
-    "from modshogun import *"
+    "from shogun import *"
    ]
   },
   {
@@ -941,7 +941,7 @@
     "    imgplot.axes.get_xaxis().set_visible(False)\n",
     "    imgplot.axes.get_yaxis().set_visible(False)\n",
     "    \n",
-    "import Image\n",
+    "from PIL import Image\n",
     "from scipy import misc\n",
     "\n",
     "# to get a hang of the data, lets see some part of the dataset images.\n",
diff --git a/doc/ipython-notebooks/regression/Regression.ipynb b/doc/ipython-notebooks/regression/Regression.ipynb
index 8d69552897a..e7ebc689932 100644
--- a/doc/ipython-notebooks/regression/Regression.ipynb
+++ b/doc/ipython-notebooks/regression/Regression.ipynb
@@ -106,7 +106,7 @@
     "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')\n",
     "from cycler import cycler\n",
     "# import all shogun classes\n",
-    "from modshogun import *\n",
+    "from shogun import *\n",
     "slope = 3\n",
     "\n",
     "X_train = rand(30)*10\n",
diff --git a/doc/ipython-notebooks/statistical_testing/mmd_two_sample_testing.ipynb b/doc/ipython-notebooks/statistical_testing/mmd_two_sample_testing.ipynb
index ebc165f1a19..2bedd3a02a9 100644
--- a/doc/ipython-notebooks/statistical_testing/mmd_two_sample_testing.ipynb
+++ b/doc/ipython-notebooks/statistical_testing/mmd_two_sample_testing.ipynb
@@ -41,7 +41,7 @@
     "%pylab inline\n",
     "%matplotlib inline\n",
     "import os\nSHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')\n",
-    "import modshogun as sg\n",
+    "import shogun as sg\n",
     "import numpy as np"
    ]
   },
diff --git a/doc/ipython-notebooks/structure/Binary_Denoising.ipynb b/doc/ipython-notebooks/structure/Binary_Denoising.ipynb
index 292b9245ba3..9b3d2039356 100644
--- a/doc/ipython-notebooks/structure/Binary_Denoising.ipynb
+++ b/doc/ipython-notebooks/structure/Binary_Denoising.ipynb
@@ -262,10 +262,10 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import Factor, TableFactorType, FactorGraph\n",
-      "from modshogun import FactorGraphObservation, FactorGraphLabels, FactorGraphFeatures\n",
-      "from modshogun import FactorGraphModel, GRAPH_CUT, LP_RELAXATION\n",
-      "from modshogun import MAPInference"
+      "from shogun import Factor, TableFactorType, FactorGraph\n",
+      "from shogun import FactorGraphObservation, FactorGraphLabels, FactorGraphFeatures\n",
+      "from shogun import FactorGraphModel, GRAPH_CUT, LP_RELAXATION\n",
+      "from shogun import MAPInference"
      ],
      "language": "python",
      "metadata": {},
@@ -518,7 +518,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import StochasticSOSVM\n",
+      "from shogun import StochasticSOSVM\n",
       "import time\n",
       "\n",
       "# Training with Stocastic Gradient Descent\n",
diff --git a/doc/ipython-notebooks/structure/FGM.ipynb b/doc/ipython-notebooks/structure/FGM.ipynb
index a89cae5d57f..8d724e7ba6e 100644
--- a/doc/ipython-notebooks/structure/FGM.ipynb
+++ b/doc/ipython-notebooks/structure/FGM.ipynb
@@ -248,7 +248,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import TableFactorType\n",
+    "from shogun import TableFactorType\n",
     "\n",
     "# unary, type_id = 0\n",
     "cards_u = np.array([n_stats], np.int32)\n",
@@ -294,8 +294,8 @@
    "source": [
     "def prepare_data(x, y, ftype, num_samples):\n",
     "    \"\"\"prepare FactorGraphFeatures and FactorGraphLabels \"\"\"\n",
-    "    from modshogun import Factor, TableFactorType, FactorGraph\n",
-    "    from modshogun import FactorGraphObservation, FactorGraphLabels, FactorGraphFeatures\n",
+    "    from shogun import Factor, TableFactorType, FactorGraph\n",
+    "    from shogun import FactorGraphObservation, FactorGraphLabels, FactorGraphFeatures\n",
     "\n",
     "    samples = FactorGraphFeatures(num_samples)\n",
     "    labels = FactorGraphLabels(num_samples)\n",
@@ -443,7 +443,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import FactorGraphModel, TREE_MAX_PROD\n",
+    "from shogun import FactorGraphModel, TREE_MAX_PROD\n",
     "\n",
     "# create model and register factor types\n",
     "model = FactorGraphModel(samples, labels, TREE_MAX_PROD)\n",
@@ -468,8 +468,8 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import DualLibQPBMSOSVM\n",
-    "from modshogun import BmrmStatistics\n",
+    "from shogun import DualLibQPBMSOSVM\n",
+    "from shogun import BmrmStatistics\n",
     "import pickle\n",
     "import time\n",
     "\n",
@@ -577,7 +577,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import StochasticSOSVM\n",
+    "from shogun import StochasticSOSVM\n",
     "\n",
     "# the 3rd parameter is do_weighted_averaging, by turning this on, \n",
     "# a possibly faster convergence rate may be achieved.\n",
@@ -760,7 +760,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import FactorGraphFeatures, FactorGraphObservation, TREE_MAX_PROD, MAPInference\n",
+    "from shogun import FactorGraphFeatures, FactorGraphObservation, TREE_MAX_PROD, MAPInference\n",
     "\n",
     "# get a factor graph instance from test data\n",
     "fg0 = samples_ts.get_sample(100)\n",
@@ -800,7 +800,7 @@
    },
    "outputs": [],
    "source": [
-    "from modshogun import LabelsFactory, SOSVMHelper\n",
+    "from shogun import LabelsFactory, SOSVMHelper\n",
     "\n",
     "# training error of BMRM method\n",
     "bmrm.set_w(w_bmrm)\n",
diff --git a/doc/ipython-notebooks/structure/multilabel_structured_prediction.ipynb b/doc/ipython-notebooks/structure/multilabel_structured_prediction.ipynb
index 6e73a76082f..7eb42989a03 100644
--- a/doc/ipython-notebooks/structure/multilabel_structured_prediction.ipynb
+++ b/doc/ipython-notebooks/structure/multilabel_structured_prediction.ipynb
@@ -165,14 +165,14 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import RealFeatures, MultilabelSOLabels, MultilabelModel\n",
+      "from shogun import RealFeatures, MultilabelSOLabels, MultilabelModel\n",
       "\n",
       "def create_features(X, constant):\n",
       "    features = RealFeatures(\n",
       "                np.c_[X, constant * np.ones(X.shape[0])].T)\n",
       "    \n",
       "    return features\n",
-      "from modshogun import MultilabelSOLabels\n",
+      "from shogun import MultilabelSOLabels\n",
       "\n",
       "def create_labels(Y, n_classes):\n",
       "    try:\n",
@@ -236,7 +236,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import StochasticSOSVM, DualLibQPBMSOSVM, StructuredAccuracy, LabelsFactory\n",
+      "from shogun import StochasticSOSVM, DualLibQPBMSOSVM, StructuredAccuracy, LabelsFactory\n",
       "from time import time\n",
       "\n",
       "sgd = StochasticSOSVM(model, labels)\n",
@@ -386,7 +386,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "from modshogun import SparseMultilabel_obtain_from_generic\n",
+      "from shogun import SparseMultilabel_obtain_from_generic\n",
       "\n",
       "def plot_decision_plane(machine,\n",
       "                        title,\n",
diff --git a/doc/ipython-notebooks/template.ipynb b/doc/ipython-notebooks/template.ipynb
index 678beeca981..8a3c7846fbe 100644
--- a/doc/ipython-notebooks/template.ipynb
+++ b/doc/ipython-notebooks/template.ipynb
@@ -108,4 +108,4 @@
    "metadata": {}
   }
  ]
-}
\ No newline at end of file
+}
diff --git a/doc/license/LICENSE.md b/doc/license/LICENSE.md
index 94a9ed024d3..8a19f3af801 100644
--- a/doc/license/LICENSE.md
+++ b/doc/license/LICENSE.md
@@ -1,674 +1,28 @@
-                    GNU GENERAL PUBLIC LICENSE
-                       Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
-  The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works.  By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.  We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors.  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-  To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights.  Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received.  You must make sure that they, too, receive
-or can get the source code.  And you must show them these terms so they
-know their rights.
-
-  Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
-  For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software.  For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
-  Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so.  This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software.  The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable.  Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products.  If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
-  Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary.  To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                       TERMS AND CONDITIONS
-
-  0. Definitions.
-
-  "This License" refers to version 3 of the GNU General Public License.
-
-  "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
-  "The Program" refers to any copyrightable work licensed under this
-License.  Each licensee is addressed as "you".  "Licensees" and
-"recipients" may be individuals or organizations.
-
-  To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy.  The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
-  A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-  To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy.  Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-  To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies.  Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
-  An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License.  If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-  1. Source Code.
-
-  The "source code" for a work means the preferred form of the work
-for making modifications to it.  "Object code" means any non-source
-form of a work.
-
-  A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-  The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form.  A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-  The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities.  However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work.  For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-  The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
-  The Corresponding Source for a work in source code form is that
-same work.
-
-  2. Basic Permissions.
-
-  All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met.  This License explicitly affirms your unlimited
-permission to run the unmodified Program.  The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work.  This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-  You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force.  You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright.  Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
-  Conveying under any other circumstances is permitted solely under
-the conditions stated below.  Sublicensing is not allowed; section 10
-makes it unnecessary.
-
-  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-  No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-  When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
-  4. Conveying Verbatim Copies.
-
-  You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-  You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-  5. Conveying Modified Source Versions.
-
-  You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
-    a) The work must carry prominent notices stating that you modified
-    it, and giving a relevant date.
-
-    b) The work must carry prominent notices stating that it is
-    released under this License and any conditions added under section
-    7.  This requirement modifies the requirement in section 4 to
-    "keep intact all notices".
-
-    c) You must license the entire work, as a whole, under this
-    License to anyone who comes into possession of a copy.  This
-    License will therefore apply, along with any applicable section 7
-    additional terms, to the whole of the work, and all its parts,
-    regardless of how they are packaged.  This License gives no
-    permission to license the work in any other way, but it does not
-    invalidate such permission if you have separately received it.
-
-    d) If the work has interactive user interfaces, each must display
-    Appropriate Legal Notices; however, if the Program has interactive
-    interfaces that do not display Appropriate Legal Notices, your
-    work need not make them do so.
-
-  A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit.  Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-  6. Conveying Non-Source Forms.
-
-  You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
-    a) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by the
-    Corresponding Source fixed on a durable physical medium
-    customarily used for software interchange.
-
-    b) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by a
-    written offer, valid for at least three years and valid for as
-    long as you offer spare parts or customer support for that product
-    model, to give anyone who possesses the object code either (1) a
-    copy of the Corresponding Source for all the software in the
-    product that is covered by this License, on a durable physical
-    medium customarily used for software interchange, for a price no
-    more than your reasonable cost of physically performing this
-    conveying of source, or (2) access to copy the
-    Corresponding Source from a network server at no charge.
-
-    c) Convey individual copies of the object code with a copy of the
-    written offer to provide the Corresponding Source.  This
-    alternative is allowed only occasionally and noncommercially, and
-    only if you received the object code with such an offer, in accord
-    with subsection 6b.
-
-    d) Convey the object code by offering access from a designated
-    place (gratis or for a charge), and offer equivalent access to the
-    Corresponding Source in the same way through the same place at no
-    further charge.  You need not require recipients to copy the
-    Corresponding Source along with the object code.  If the place to
-    copy the object code is a network server, the Corresponding Source
-    may be on a different server (operated by you or a third party)
-    that supports equivalent copying facilities, provided you maintain
-    clear directions next to the object code saying where to find the
-    Corresponding Source.  Regardless of what server hosts the
-    Corresponding Source, you remain obligated to ensure that it is
-    available for as long as needed to satisfy these requirements.
-
-    e) Convey the object code using peer-to-peer transmission, provided
-    you inform other peers where the object code and Corresponding
-    Source of the work are being offered to the general public at no
-    charge under subsection 6d.
-
-  A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-  A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling.  In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage.  For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product.  A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
-  "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source.  The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
-  If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information.  But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-  The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed.  Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
-  Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-  7. Additional Terms.
-
-  "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law.  If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-  When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it.  (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.)  You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-  Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
-    a) Disclaiming warranty or limiting liability differently from the
-    terms of sections 15 and 16 of this License; or
-
-    b) Requiring preservation of specified reasonable legal notices or
-    author attributions in that material or in the Appropriate Legal
-    Notices displayed by works containing it; or
-
-    c) Prohibiting misrepresentation of the origin of that material, or
-    requiring that modified versions of such material be marked in
-    reasonable ways as different from the original version; or
-
-    d) Limiting the use for publicity purposes of names of licensors or
-    authors of the material; or
-
-    e) Declining to grant rights under trademark law for use of some
-    trade names, trademarks, or service marks; or
-
-    f) Requiring indemnification of licensors and authors of that
-    material by anyone who conveys the material (or modified versions of
-    it) with contractual assumptions of liability to the recipient, for
-    any liability that these contractual assumptions directly impose on
-    those licensors and authors.
-
-  All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10.  If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term.  If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-  If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-  Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
-  8. Termination.
-
-  You may not propagate or modify a covered work except as expressly
-provided under this License.  Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-  However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
-  Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-  Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License.  If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-  9. Acceptance Not Required for Having Copies.
-
-  You are not required to accept this License in order to receive or
-run a copy of the Program.  Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance.  However,
-nothing other than this License grants you permission to propagate or
-modify any covered work.  These actions infringe copyright if you do
-not accept this License.  Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-  10. Automatic Licensing of Downstream Recipients.
-
-  Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License.  You are not responsible
-for enforcing compliance by third parties with this License.
-
-  An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations.  If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-  You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License.  For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-  11. Patents.
-
-  A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based.  The
-work thus licensed is called the contributor's "contributor version".
-
-  A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version.  For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-  Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-  In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement).  To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-  If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients.  "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-  If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-  A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License.  You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
-  Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-  12. No Surrender of Others' Freedom.
-
-  If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all.  For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
-  13. Use with the GNU Affero General Public License.
-
-  Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work.  The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
-  14. Revised Versions of this License.
-
-  The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-  Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation.  If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
-  If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-  Later license versions may give you additional or different
-permissions.  However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-  15. Disclaimer of Warranty.
-
-  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. Limitation of Liability.
-
-  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
-  17. Interpretation of Sections 15 and 16.
-
-  If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
-                     END OF TERMS AND CONDITIONS
-
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
-  If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
-    <program>  Copyright (C) <year>  <name of author>
-    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
-  You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
-  The GNU General Public License does not permit incorporating your program
-into proprietary programs.  If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library.  If this is what you want to do, use the GNU Lesser General
-Public License instead of this License.  But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
+Copyright (c) 2017, Shogun Machine Learning Toolbox developers <shogun-team@shogun-toolbox.org>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+  1. Redistributions of source code must retain the above copyright notice,
+     this list of conditions and the following disclaimer.
+
+  2. Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+
+  3. Neither the name of the copyright holder nor the names of its
+     contributors may be used to endorse or promote products derived from
+     this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
diff --git a/doc/license/README_gpl.md b/doc/license/README_gpl.md
new file mode 100644
index 00000000000..c5b323d423b
--- /dev/null
+++ b/doc/license/README_gpl.md
@@ -0,0 +1,5 @@
+Shogun can be built (optionally) with GPL codes enabled.
+In order to allow for a less restrictive BSD compatible distribution of the
+main repository, these have been moved to a separate repository, shogun-gpl.
+The build automatically integrates these codes if it is checked out.
+Please refer to the submodule for exact licensing text and code details.
diff --git a/doc/readme/ABOUT.md b/doc/readme/ABOUT.md
index 327eee2f9d0..269f764d5cf 100644
--- a/doc/readme/ABOUT.md
+++ b/doc/readme/ABOUT.md
@@ -1,12 +1,10 @@
-The Shogun Machine learning toolbox offers a wide range of efficient and unified Machine Learning methods.
-
+Shogun is and open-source machine learning library that offers a wide range of efficient and unified machine learning methods.
 
 #### Shogun is accessible
 
 * Supports many languages (Python, Octave, R, Java/Scala, Lua, C#, Ruby, etc) and platforms (Linux/Unix, MacOS and Windows) and integrates with their scientific computing environments.
 * Try Shogun in the [cloud](https://cloud.shogun.ml) from your browser.
 
-
 #### Shogun is state-of-the-art
 
 * Efficient implementation (from standard to cutting edge algorithms), modern software architecture in C++.
diff --git a/doc/readme/DEVELOPING.md b/doc/readme/DEVELOPING.md
index d9b684f8de7..5e5368b290f 100644
--- a/doc/readme/DEVELOPING.md
+++ b/doc/readme/DEVELOPING.md
@@ -49,7 +49,6 @@ The steps are
     Read error messages and use the internet to find solutions.
     Compile errors are the easiest to fix!
     If all that does not help, ask us.
-
 7. Commit locally, using neat and informative commit messages, grouping commits, potentially iterate over more changes to the code,
 
         git commit FILENAME(S) -m "Fix issue #1234"
@@ -63,22 +62,29 @@ The steps are
 
         git reset --soft HEAD~3
         git commit -m 'Clear commit message'
+8. Check if the code was written in conformity with the  [Shogun Code Style Guidelines](https://github.com/shogun-toolbox/shogun/wiki/Code-style), or the Continuous Integration will fail.
+Shogun has a custom script called `check_format.sh` which can be used to verify the code formatting.
+
+        ./scripts/check_format.sh "feature/BRANCH_NAME" "develop"
 
-8. [Rebase](https://git-scm.com/book/en/v2/Git-Branching-Rebasing) against shogun's develop branch.
+    The script will provide you with the necessary information to fix potential style errors. All of this is done by
+    using [clang-format](https://clang.llvm.org/docs/ClangFormat.html). Make sure to have it installed
+    on your local machine, or the above script won't work. Update the commit once you have fixed the errors.
+9. [Rebase](https://git-scm.com/book/en/v2/Git-Branching-Rebasing) against shogun's develop branch.
     This might cause rebase errors, which you need to [solve](https://help.github.com/articles/resolving-merge-conflicts-after-a-git-rebase/)
 
         git pull --rebase upstream develop
 
-9. Push your commits to your fork
+10. Push your commits to your fork
 
         git push origin feature/BRANCH_NAME
 
     If you squashed or amended commits after you had pushed already, you might be required to force push via using the `git push -f` option **with care**.
 
-10. Send a [pull request](https://help.github.com/articles/about-pull-requests/) (PR) via GitHub.
+11. Send a [pull request](https://help.github.com/articles/about-pull-requests/) (PR) via GitHub.
     As described above, you can always **update** a pull request using the the `git push -f` option. Please **do not** close and send new ones instead, always update.
 
-11. Once the PR is merged, keep an eye on the [buildfarm](#buildfarm) to see whether your patch broke something.
+12. Once the PR is merged, keep an eye on the [buildfarm](#buildfarm) to see whether your patch broke something.
 
 ## Requirements for merging your PR
  * Read some [tips](http://blog.ploeh.dk/2015/01/15/10-tips-for-better-pull-requests/) on how to write good pull requests.
@@ -127,7 +133,7 @@ You can execute single tests via `ctest`, or via directly executing the unit tes
 
 Note that wildcards are allowed. Running single sub-tests is sometimes useful (i.e. for bug hunting)
 
-    ./bin/shogun-unit-test --gtest_filter=GaussianProcessRegression.apply_apply_regression
+    ./bin/shogun-unit-test --gtest_filter=GaussianProcessRegression.apply_regression
 
 ### Debugging and Memory leaks
 **All your C++ code and unit tests must be checked to not leak memory!**
@@ -136,8 +142,8 @@ If you do that, you might want to compile with debugging symbols and without com
 
 Then
 
-    valgrind ./shogun-unit-test --gtest_filter=GaussianProcessRegression.apply_apply_regression
-    gdb ./shogun-unit-test --gtest_filter=GaussianProcessRegression.apply_apply_regression
+    valgrind ./bin/shogun-unit-test --gtest_filter=GaussianProcessRegression.apply_regression
+    gdb --args ./bin/shogun-unit-test --gtest_filter=GaussianProcessRegression.apply_regression
 
 The option `--leak-check=full` for valgrind might be useful.
 In addition to manually running valgrind on your tests, you can use `ctest` to check multiple tests.
diff --git a/doc/readme/INSTALL.md b/doc/readme/INSTALL.md
index be284527e43..b66fa038505 100644
--- a/doc/readme/INSTALL.md
+++ b/doc/readme/INSTALL.md
@@ -7,6 +7,7 @@ For other cases, we describe how to build Shogun from source code.
 
 # Quicklinks
  * [Ready-to-install packages](#binaries)
+   - [Anaconda](#anaconda)
    - [Ubuntu](#ubuntu)
    - [Debian](#debian)
    - [Fedora](#fedora)
@@ -23,10 +24,22 @@ For other cases, we describe how to build Shogun from source code.
    - [Problems](#manual-problems)
    - [CMake tips](#manual-cmake)
    - [Customized Python](#manual-python)
-   - [Winows](#manual-windows)
+   - [Windows](#manual-windows)
 
 ## Ready-to-install packages <a name="binaries"></a>
 
+### Anaconda packages <a name="anaconda"></a>
+The base shogun library and its Python interface are available through the conda package manager, via <a href="https://conda-forge.org">conda-forge</a>.
+To install both:
+
+    conda install -c conda-forge shogun
+
+or to get just the library:
+
+    conda install -c conda-forge shogun-cpp
+
+These packages include most of the optional dependencies and are currently available for Linux and MacOS; we're [working on a Windows build](https://github.com/conda-forge/shogun-cpp-feedstock/issues/1).
+
 ### Ubuntu ppa <a name="ubuntu"></a>
 We are working on integrating Shogun with Debian/Ubuntu.
 In the meantime, we offer a [prepackaged ppa](https://launchpad.net/~shogun-toolbox/+archive/ubuntu/stable).
@@ -38,7 +51,7 @@ Add this to your system as
 
 Then, install as
 
-    sudo apt-get install libshogun17
+    sudo apt-get install libshogun18
 
 The Python (2) bindings can be installed as
 
@@ -47,19 +60,19 @@ The Python (2) bindings can be installed as
 In addition to the latest stable release, we offer [nightly builds](https://launchpad.net/~shogun-toolbox/+archive/ubuntu/nightly) of our development branch.
 
 ### Debian <a name="debian"></a>
-Latest packages for Debian jessie are available in our own repository at [http://apt.shogun.ml](http://apt.shogun.ml).
-We provide both the stable and nightly packages, currenlty only for amd64 architecture.
+Latest packages for Debian `jessie` and `stretch` are available in our own repository at [http://apt.shogun.ml](http://apt.shogun.ml).
+We provide both the stable and nightly packages, currently only for amd64 architecture.
 In order to add the stable packages to your system, simply run the following commands
 
     sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 3DD2174ACAB30365
-    echo "deb http://apt.shogun.ml/ jessie main" | sudo tee /etc/apt/sources.list.d/shogun-toolbox.list  > /dev/null
+    echo "deb http://apt.shogun.ml/ stretch main" | sudo tee /etc/apt/sources.list.d/shogun-toolbox.list  > /dev/null
     sudo apt-get update
 
 After this just simply install the shogun library
 
-    sudo apt-get install libshogun17
+    sudo apt-get install libshogun18
 
-The nightly packages are available in the `nightly` component, i.e. `deb http://apt.shogun.ml/ jessie nightly`
+The nightly packages are available in the `nightly` component, i.e. `deb http://apt.shogun.ml/ stretch nightly`
 
 ### Fedora <a name="fedora"></a>
 Shogun is part of [Fedora 25](https://admin.fedoraproject.org/pkgdb/package/rpms/shogun/).
@@ -72,10 +85,7 @@ Install as
 Shogun is part of [homebrew-science](https://github.com/Homebrew/homebrew-science).
 Install the latest stable version as
 
-    sudo brew install shogun
-
-Note: Shogun in homebrew is outdated.
-Contact us if this changed or if you want to help changing it.
+    sudo brew install homebrew/science/shogun
 
 ### Windows <a name="windows"></a>
 Shogun natively compiles under Windows using MSVC, see the [AppVeyor CI build](https://ci.appveyor.com/project/vigsterkr/shogun) and the [Windows section](#manual-windows)
@@ -89,11 +99,9 @@ You can run Shogun in [our own cloud](cloud.shogun.ml) or set up your own using
     sudo docker pull shogun/shogun:master
     sudo docker run -it shogun/shogun:master bash
 
-We offer images for both the latest release and nightly development builds.
-
-For the [developer version](https://hub.docker.com/r/shogun/shogun-dev/), replace `shogun/shogun:master` with `shogun/shogun-dev`.
+The docker image follows both the `master` and the `develop` branch of the repository, just specify the desired branch name as tag for the image. For example in order to use the develop version of shogun simply pull the `shogun/shogun:develop` docker image.
 
-Check the "details" tab before downloading to check if the latest build was successful (otherwise you might run into errors when running the docker image)."
+There's an [SDK docker image](https://hub.docker.com/r/shogun/shogun-dev/) for shogun development as well, which we use to run our [Travis CI](https://travis-ci.org/shogun-toolbox/shogun/) jobs.
 
 Sometimes mounting a local folder into the docker image is useful.
 You can do this via passing an additional option
@@ -178,10 +186,10 @@ In both cases, it is necessary to set a number of system libraries for using Sho
 
 ## Interfaces <a name="manual-interfaces"></a>
 The native C++ interface is always included.
-The cmake options for building interfaces are `-DPythonModular -DOctaveModular -DRModular -DJavaModular -DRubyModular -DLuaModular -DCSharpModular` etc.
+The cmake options for building interfaces are `-DINTERFACE_PYTHON=ON -DINTERFACE_R ..` etc.
 For example, replace the cmake step above by
 ```
-cmake -DPythonModular=ON [potentially more options] ..
+cmake -DINTERFACE_PYTHON=ON [potentially more options] ..
 ```
 
 The required packages (here debian/Ubuntu package names) for each interface are
@@ -228,10 +236,6 @@ Make sure to read the [docs](https://cmake.org/documentation/) and [CMake_Useful
 Make sure to understand the concept of [out of source builds](https://cmake.org/Wiki/CMake_FAQ#Out-of-source_build_trees).
 Here are some tips on common options that are useful
 
-Getting a list of possible interfaces to enable:
-
-    grep -E "OPTION.*(Modular)" CMakeLists.txt
-
 Specify a different swig executable:
 
     cmake -DSWIG_EXECUTABLE=/usr/bin/swig_custom
@@ -256,16 +260,16 @@ For that, you need to do something similar to
 
 For example, for `brew` installed Python under MacOS, use something like:
 
-    cmake -DPYTHON_INCLUDE_DIR=/usr/local/Cellar/python/2.7.5/Frameworks/Python.framework/Headers -DPYTHON_LIBRARY=/usr/local/Cellar/python/2.7.5/Frameworks/Python.framework/Versions/2.7/lib/libpython2.7.dylib  -DPythonModular=ON ..
+    cmake -DPYTHON_INCLUDE_DIR=/usr/local/Cellar/python/2.7.5/Frameworks/Python.framework/Headers -DPYTHON_LIBRARY=/usr/local/Cellar/python/2.7.5/Frameworks/Python.framework/Versions/2.7/lib/libpython2.7.dylib  -DINTERFACE_PYTHON=ON ..
 
 Under Linux, where you want to use Python 3, which is not the system's default:
 
-    cmake -DPYTHON_INCLUDE_DIR=/usr/include/python3.3 -DPYTHON_EXECUTABLE:FILEPATH=/usr/bin/python3 -DPYTHON_PACKAGES_PATH=/usr/local/lib/python3.3/dist-packages -DPythonModular=ON ..
+    cmake -DPYTHON_INCLUDE_DIR=/usr/include/python3.3 -DPYTHON_EXECUTABLE:FILEPATH=/usr/bin/python3 -DPYTHON_PACKAGES_PATH=/usr/local/lib/python3.3/dist-packages -DINTERFACE_PYTHON=ON ..
 
 On a Linux cluster without root access, using [Anaconda](https://www.continuum.io/downloads) (note you will need to activate your environment everytime you want to run Shogun):
 
     source path/to/anaconda/bin/activate
-    cmake -DCMAKE_INSTALL_PREFIX=path/to/shogun/install/dir -DPYTHON_INCLUDE_DIR=path/to/anaconda/include/python2.7/ -DPYTHON_LIBRARY=path/to/anaconda/lib/libpython2.7.so  -DPYTHON_EXECUTABLE=path/to/anaconda/bin/python -DPythonModular=On ..
+    cmake -DCMAKE_INSTALL_PREFIX=path/to/shogun/install/dir -DPYTHON_INCLUDE_DIR=path/to/anaconda/include/python2.7/ -DPYTHON_LIBRARY=path/to/anaconda/lib/libpython2.7.so  -DPYTHON_EXECUTABLE=path/to/anaconda/bin/python -DINTERFACE_PYTHON=ON ..
 
 ## Windows build <a name="manual-windows"></a>
 
@@ -273,7 +277,7 @@ Please see our [AppVeyor](https://ci.appveyor.com/project/vigsterkr/shogun) buil
 It is recommended to use "Visual Studio 14 2015" or "MSBuild".
 You will need to adjust all path names to the Windows style, e.g.
 
-    git clone https://github.com/shogun-toolbox/shogun.git C:\projects\shogun 
+    git clone https://github.com/shogun-toolbox/shogun.git C:\projects\shogun
     git submodule -q update --init
     cd C:\projects\shogun
     md build && cd build
diff --git a/doc/readme/INTERFACES.md b/doc/readme/INTERFACES.md
index b348a5b3217..b5e2aef34ca 100644
--- a/doc/readme/INTERFACES.md
+++ b/doc/readme/INTERFACES.md
@@ -39,53 +39,53 @@ Running it:
     ./native_example
 
 ### Python
-This needs `modshogun.py` to be visible, which is either in `path/to/build/src/interfaces/python_modular/` or in something similar to `path/to/shogun-install/lib/python2.7/dist-packages/`
+This needs `shogun.py` to be visible, which is either in `path/to/build/src/interfaces/python_modular/` or in something similar to `path/to/shogun-install/lib/python2.7/dist-packages/`
 
-    export PYTHONPATH="path/to/modshogun.py:$PYTHONPATH"
+    export PYTHONPATH="path/to/shogun.py:$PYTHONPATH"
 
 Running an example:
 
     python path/to/python_example.py
 
 ### Octave
-This needs `modshogun.oct` to be visible, which is either in `path/to/build/src/interfaces/octave_modular/` or in something similar to `path/to/shogun-install/lib/x86_64-linux-gnu/octave/site/oct/api-v50+/x86_64-pc-linux-gnu/shogun/`
+This needs `shogun.oct` to be visible, which is either in `path/to/build/src/interfaces/octave_modular/` or in something similar to `path/to/shogun-install/lib/x86_64-linux-gnu/octave/site/oct/api-v50+/x86_64-pc-linux-gnu/shogun/`
 
-    export OCTAVE_PATH="path/to/modshogun.oct:$OCTAVE_PATH"
+    export OCTAVE_PATH="path/to/shogun.oct:$OCTAVE_PATH"
 
 Running an example:
 
     python path/to/octave_example.py
 
 ### Ruby
-This needs `modshogun.rb` to be visible, which is either in `path/to/build/src/interfaces/ruby_modular/` or in something similar to `path/to/shogun-install/lib/x86_64-linux-gnu/site_ruby`
-    export RUBYLIB="path/to/modshogun.rb:$RUBYLIB"
+This needs `shogun.rb` to be visible, which is either in `path/to/build/src/interfaces/ruby_modular/` or in something similar to `path/to/shogun-install/lib/x86_64-linux-gnu/site_ruby`
+    export RUBYLIB="path/to/shogun.rb:$RUBYLIB"
 
 Running an example:
 
     ruby path/to/ruby_example.rb
 
 ### R
-This needs `modshogun.R` to be visible, which is either in `path/to/build/src/interfaces/r_modular/` or in something similar to `path/to/shogun-install/lib/R/site-library`
-    export R_LIBS_USER="path/to/modshogun.R:$R_LIBS_USER"
+This needs `shogun.R` to be visible, which is either in `path/to/build/src/interfaces/r_modular/` or in something similar to `path/to/shogun-install/lib/R/site-library`
+    export R_LIBS_USER="path/to/shogun.R:$R_LIBS_USER"
 
 Running an example:
     R --no-restore --no-save --no-readline --slave -f path/to/r_example.rb
 
 ### Lua
-This needs `libmodshogun.so` (this is the interface file, not the shared library file `libshogun.so`) to be visible, which is either in `path/to/build/src/interfaces/lua_modular/` or in something similar to `path/to/shogun-install/lib/lua/5.1/`
+This needs `libshogun.so` (this is the interface file, not the shared library file `libshogun.so`) to be visible, which is either in `path/to/build/src/interfaces/lua_modular/` or in something similar to `path/to/shogun-install/lib/lua/5.1/`
 
-    export LUA_CPATH="path/to/libmodshogun.so:$LUA_CPATH"
+    export LUA_CPATH="path/to/libshogun.so:$LUA_CPATH"
 
 Running an example:
 
     R --no-restore --no-save --no-readline --slave -f path/to/r_example.R
 
 ### CSharp
-This needs `modshogun.dll` to be visible, which is either in `path/to/build/src/interfaces/csharp_modular` or in something similar to `path/to/shogun-install/lib/cli/shogun/`
+This needs `shogun.dll` to be visible, which is either in `path/to/build/src/interfaces/csharp_modular` or in something similar to `path/to/shogun-install/lib/cli/shogun/`
 
 Compiling code works with the mono C# compiler and passing location of the above file
 
-    mcs path/to/csharp_example.cs /lib:path/to/modshogun.dll/r:modshogun -out:csharp_example.exe
+    mcs path/to/csharp_example.cs /lib:path/to/shogun.dll/r:shogun -out:csharp_example.exe
 
 Running requires setting the mono path
 
@@ -103,7 +103,7 @@ usually in `/usr/share/java/`.
 Compiling code works with the java compiler and passing location of `shogun.jar`,
 `jblas.jar`, and the example itself in the class path
 
-    javac -cp /path/to/jblas.jar:/path/to/modshogun.jar:path/to/java_example.java -d /path/to/output/ /path/to/java_example.java
+    javac -cp /path/to/jblas.jar:/path/to/shogun.jar:path/to/java_example.java -d /path/to/output/ /path/to/java_example.java
 
 Running it again requires the above class path and some more options
 
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index a82e3a06382..0b3d5780f52 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -2,8 +2,8 @@ IF (NOT TRAVIS_DISABLE_LIBSHOGUN_TESTS AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/un
 	add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/undocumented/libshogun)
 ENDIF()
 
-IF (PythonModular AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/undocumented/python_modular)
-	add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/undocumented/python_modular)
+IF (INTERFACE_PYTHON AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/undocumented/python)
+	add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/undocumented/python)
 ENDIF()
 
 IF(BUILD_META_EXAMPLES)
diff --git a/examples/meta/CMakeLists.txt b/examples/meta/CMakeLists.txt
index 2ffca780f55..9fd5374bdc6 100644
--- a/examples/meta/CMakeLists.txt
+++ b/examples/meta/CMakeLists.txt
@@ -27,7 +27,7 @@ LIST(APPEND GENERATOR_DEPENDENCIES ${TARGET_LANGUAGES})
 file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/parser_files)
 
 # list of interfaces for which we dont generate meta examples
-SET(DISABLED_INTERFACES PerlModular)
+SET(DISABLED_INTERFACES INTERFACE_PERL)
 
 FOREACH(META_EXAMPLE ${META_EXAMPLES})
     # assume a structure <target_language>/<category>/listing.sg
@@ -40,7 +40,7 @@ FOREACH(META_EXAMPLE ${META_EXAMPLES})
     # FIXME
     # Hack to avoid generation of scala meta examples dependencies that will
     # not be generated by generate.py script (since there is no scala.json)
-    LIST(REMOVE_ITEM AVAILABLE_INTERFACES "ScalaModular")
+	LIST(REMOVE_ITEM AVAILABLE_INTERFACES INTERFACE_SCALA)
     FOREACH(interface ${AVAILABLE_INTERFACES})
         list(FIND DISABLED_INTERFACES ${interface} disabled)
         if (${disabled} STREQUAL "-1")
@@ -48,7 +48,7 @@ FOREACH(META_EXAMPLE ${META_EXAMPLES})
             LIST(APPEND EXAMPLE_LISTINGS ${CMAKE_CURRENT_BINARY_DIR}/${DIRECTORY}/${BASENAME}.${EXTENSION})
         endif()
     ENDFOREACH()
-    LIST(APPEND AVAILABLE_INTERFACES "ScalaModular")
+	LIST(APPEND AVAILABLE_INTERFACES INTERFACE_SCALA)
 
 	# Set generate.py flags
 	SET(GENERATOR_FLAGS
@@ -75,7 +75,7 @@ FOREACH(META_EXAMPLE ${META_EXAMPLES})
 ENDFOREACH()
 SET_SOURCE_FILES_PROPERTIES(${TRANSLATED_META_EXAMPLES} PROPERTIES GENERATED TRUE)
 
-add_custom_target(meta_examples
+add_custom_target(meta_examples ALL
     DEPENDS ${TRANSLATED_META_EXAMPLES})
 
 INSTALL(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/cpp
@@ -104,11 +104,11 @@ ENDIF()
 # FIXME: these are the interfaces which has various problems
 # hence need to disable their testing
 # temporarily disabled as R and static calls has to be fixed
-LIST(APPEND DISABLED_INTERFACES RModular)
+LIST(APPEND DISABLED_INTERFACES INTERFACE_R)
 # temporarily removed since lua modular currently
 # does not support overloaded c++ methods
 # see https://github.com/shogun-toolbox/shogun/issues/3018
-LIST(APPEND DISABLED_INTERFACES LuaModular)
+LIST(APPEND DISABLED_INTERFACES INTERFACE_LUA)
 
 
 FOREACH(interface ${AVAILABLE_INTERFACES})
diff --git a/examples/meta/cpp/CMakeLists.txt b/examples/meta/cpp/CMakeLists.txt
index a6a9e154928..f79be391918 100644
--- a/examples/meta/cpp/CMakeLists.txt
+++ b/examples/meta/cpp/CMakeLists.txt
@@ -32,9 +32,9 @@ FOREACH(META_EXAMPLE ${META_EXAMPLES})
 	# Add examples to the dependencies of modular interfaces to make sure
 	# nothing will infer with them being build single-threaded.
 	IF(SWIG_SINGLE_THREADED)
-		FOREACH(SG_MODULAR_INTERFACE_TARGET ${SG_MODULAR_INTERFACE_TARGETS})
-			ADD_DEPENDENCIES(${SG_MODULAR_INTERFACE_TARGET} ${GENERATED_CPP_BINARY})
-		ENDFOREACH(SG_MODULAR_INTERFACE_TARGET ${SG_MODULAR_INTERFACE_TARGETS})
+		FOREACH(SG_INTERFACE_TARGET ${SG_INTERFACE_TARGETS})
+			ADD_DEPENDENCIES(${SG_INTERFACE_TARGET} ${GENERATED_CPP_BINARY})
+		ENDFOREACH(SG_INTERFACE_TARGET ${SG_INTERFACE_TARGETS})
 	ENDIF(SWIG_SINGLE_THREADED)
 
     LIST(APPEND GENERATED_CPP_EXAMPLES ${GENERATED_CPP_BINARY})
diff --git a/examples/meta/csharp/CMakeLists.txt b/examples/meta/csharp/CMakeLists.txt
index 38a7b28c4f1..7e0d86bfc81 100644
--- a/examples/meta/csharp/CMakeLists.txt
+++ b/examples/meta/csharp/CMakeLists.txt
@@ -1,4 +1,4 @@
-SET(CSHARP_FLAGS "/lib:${CSHARP_MODULAR_BUILD_DIR};/r:modshogun")
+SET(CSHARP_FLAGS "/lib:${INTERFACE_CSHARP_BUILD_DIR};/r:shogun")
 
 # add test case for each generated example
 # (not generated yet so have to fake filenames from META_EXAMPLES list)
@@ -11,7 +11,7 @@ FOREACH(META_EXAMPLE ${META_EXAMPLES})
 
 	ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE_REL_DIR}/${EXAMPLE_NAME}.exe
 		COMMAND	${CSHARP_COMPILER} ${EXAMPLE_NAME}.cs ${CSHARP_FLAGS} -out:${EXAMPLE_NAME}.exe
-		DEPENDS csharp_modular shogun meta_examples ${CSHARP_META_EXAMPLE_SOURCE}
+		DEPENDS interface_csharp shogun meta_examples ${CSHARP_META_EXAMPLE_SOURCE}
 		WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE_REL_DIR})
 
 	ADD_CUSTOM_TARGET(csharp_${EXAMPLE_NAME} ALL
@@ -23,7 +23,7 @@ FOREACH(META_EXAMPLE ${META_EXAMPLES})
 		WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE_REL_DIR}
 		COMMAND ${CSHARP_INTERPRETER} ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE_REL_DIR}/${EXAMPLE_NAME}.exe)
 	set_property(TEST generated_csharp-${EXAMPLE_NAME_WITH_DIR} PROPERTY
-		ENVIRONMENT "MONO_PATH=${CSHARP_MODULAR_BUILD_DIR}")
+        ENVIRONMENT "MONO_PATH=${INTERFACE_CSHARP_BUILD_DIR}")
 ENDFOREACH()
 
 add_custom_target(build_csharp_meta_examples ALL
diff --git a/examples/meta/generator/generate.py b/examples/meta/generator/generate.py
index 908667be078..ba8d1e8d355 100755
--- a/examples/meta/generator/generate.py
+++ b/examples/meta/generator/generate.py
@@ -88,8 +88,13 @@ def translateExamples(inputDir, outputDir, targetsDir, ctagsFile,
             extension = target["FileExtension"]
 
             # Create directory if it does not exist
-            if not os.path.exists(directory):
-                os.makedirs(directory)
+            try:
+                os.makedirs(name=directory, exist_ok=True)
+            except TypeError:
+                try:
+                    os.makedirs(directory)
+                except OSError:
+                    pass
 
             # Write translation
             outputFile = os.path.join(directory,
@@ -98,9 +103,12 @@ def translateExamples(inputDir, outputDir, targetsDir, ctagsFile,
 
             # create subdirectories if they don't exist yet
             try:
-                os.makedirs(os.path.dirname(outputFile))
-            except OSError:
-                pass
+                os.makedirs(name=os.path.dirname(outputFile), exist_ok=True)
+            except TypeError:
+                try:
+                    os.makedirs(os.path.dirname(outputFile))
+                except OSError:
+                    pass
 
             with open(outputFile, "w") as nf:
                 nf.write(translation)
diff --git a/examples/meta/generator/parse.py b/examples/meta/generator/parse.py
index 2d68ed33a5f..e84247630dc 100644
--- a/examples/meta/generator/parse.py
+++ b/examples/meta/generator/parse.py
@@ -213,6 +213,10 @@ def p_staticCall(self, p):
         "staticCall : type COLON identifier LPAREN argumentList RPAREN"
         p[0] = {"StaticCall": [p[1], p[3], p[5]]}
 
+    def p_globalCall(self, p):
+        "globalCall : identifier LPAREN argumentList RPAREN"
+        p[0] = {"GlobalCall": [p[1], p[3]]}
+
     def p_indexList(self, p):
         """
         indexList : int
@@ -262,6 +266,7 @@ def p_expr(self, p):
         expr : enum
              | methodCall
              | staticCall
+             | globalCall
              | elementAccess
              | string
              | char
diff --git a/examples/meta/generator/targets/cpp.json b/examples/meta/generator/targets/cpp.json
index 73a224161e0..2dbcbe2dd03 100644
--- a/examples/meta/generator/targets/cpp.json
+++ b/examples/meta/generator/targets/cpp.json
@@ -3,6 +3,7 @@
     "Dependencies": {
         "IncludeAllClasses": true,
         "IncludeEnums": true,
+        "IncludeGlobalFunctions": true,
         "DependencyListElement": "#include <$includePath>",
         "DependencyListSeparator": "\n"
     },
@@ -85,6 +86,7 @@
         "FloatLiteral": "${number}f",
         "MethodCall": "$object->$method($arguments)",
         "StaticCall": "C$typeName::$method($arguments)",
+        "GlobalCall": "$method($arguments)",
         "Identifier": "$identifier",
         "Enum":"$typeName::$value"
     },
diff --git a/examples/meta/generator/targets/csharp.json b/examples/meta/generator/targets/csharp.json
index 50d40a6baf0..5325014e41c 100644
--- a/examples/meta/generator/targets/csharp.json
+++ b/examples/meta/generator/targets/csharp.json
@@ -1,5 +1,5 @@
 {
-    "Program": "using System;\n\npublic class classifier_knn_modular {\npublic static void Main() {\nmodshogun.init_shogun_with_defaults();\n\n$program\n}\n}\n",
+    "Program": "using System;\n\npublic class Application {\npublic static void Main() {\nshogun.init_shogun_with_defaults();\n\n$program\n}\n}\n",
     "Statement": "$statement;\n",
     "Comment": "//$comment\n",
     "Init": {
@@ -70,6 +70,7 @@
         "FloatLiteral": "${number}f",
         "MethodCall": "$object.$method($arguments)",
         "StaticCall": "$typeName.$method($arguments)",
+        "GlobalCall": "shogun.$method($arguments)",
         "Identifier": "$identifier",
         "Enum":"$typeName.$value"
     },
diff --git a/examples/meta/generator/targets/java.json b/examples/meta/generator/targets/java.json
index 59ec7da993c..bf46a84921f 100644
--- a/examples/meta/generator/targets/java.json
+++ b/examples/meta/generator/targets/java.json
@@ -1,8 +1,9 @@
 {
-    "Program": "import org.jblas.DoubleMatrix;\nimport org.jblas.FloatMatrix;\n\nimport org.shogun.modshogun;\n$dependencies\n\npublic class $programName {\nstatic {\nSystem.loadLibrary(\"modshogun\");\n}\n\npublic static void main(String argv[]) {\nmodshogun.init_shogun_with_defaults();\n\n$program\n}\n}\n",
+    "Program": "import org.jblas.DoubleMatrix;\nimport org.jblas.FloatMatrix;\n\nimport org.shogun.shogun;\n$dependencies\n\npublic class $programName {\nstatic {\nSystem.loadLibrary(\"shogun\");\n}\n\npublic static void main(String argv[]) {\nshogun.init_shogun_with_defaults();\n\n$program\n}\n}\n",
     "Dependencies": {
         "IncludeAllClasses": true, 
         "IncludeEnums": true,
+        "IncludeGlobalFunctions": false,
         "DependencyListElement": "import org.shogun.$typeName;",
         "DependencyListSeparator": "\n"
     },
@@ -75,6 +76,7 @@
         "FloatLiteral": "${number}f",
         "MethodCall": "$object.$method($arguments)",
         "StaticCall": "$typeName.$method($arguments)",
+        "GlobalCall": "shogun.$method($arguments)",
         "Identifier": "$identifier",
         "Enum":"$typeName.$value"
     },
diff --git a/examples/meta/generator/targets/lua.json b/examples/meta/generator/targets/lua.json
index a5a5171f525..9cef4e54ef1 100644
--- a/examples/meta/generator/targets/lua.json
+++ b/examples/meta/generator/targets/lua.json
@@ -1,9 +1,9 @@
 {
-    "Program": "require 'modshogun'\n\n$program",
+    "Program": "require 'shogun'\n\n$program",
     "Statement": "$statement\n",
     "Comment": "--$comment\n",
     "Init": {
-        "Construct": "$name = modshogun.$typeName($arguments)",
+        "Construct": "$name = shogun.$typeName($arguments)",
         "Copy": "$name = $expr"
     },
     "Assign": "$identifier = $expr",
@@ -22,8 +22,9 @@
         "FloatLiteral": "$number",
         "MethodCall": "$object:$method($arguments)",
         "StaticCall": "$typeName:$method($arguments)",
+        "GlobalCall": "$method($arguments)",
         "Identifier": "$identifier",
-        "Enum":"modshogun.$value"
+        "Enum":"shogun.$value"
     },
     "Element": {
         "Access": {
diff --git a/examples/meta/generator/targets/octave.json b/examples/meta/generator/targets/octave.json
index 010aa47f040..07bb58c247a 100644
--- a/examples/meta/generator/targets/octave.json
+++ b/examples/meta/generator/targets/octave.json
@@ -1,5 +1,5 @@
 {
-    "Program": "modshogun\n\n$program",
+    "Program": "shogun\n\n$program",
     "Statement": "$statement;\n",
     "Comment": "%$comment\n",
     "Init": {
@@ -40,6 +40,7 @@
         "FloatLiteral": "$number",
         "MethodCall": "$object.$method($arguments)",
         "StaticCall": "$typeName.$method($arguments)",
+        "GlobalCall": "$method($arguments)",
         "Identifier": "$identifier",
         "Enum":"$value"
     },
diff --git a/examples/meta/generator/targets/python.json b/examples/meta/generator/targets/python.json
index b83e113b50b..228b2e75bcf 100644
--- a/examples/meta/generator/targets/python.json
+++ b/examples/meta/generator/targets/python.json
@@ -4,8 +4,9 @@
         "IncludeAllClasses": false,
         "IncludeInterfacedClasses": true, 
         "IncludeEnums": true,
-        "DependencyListElement": "from modshogun import $typeName",
-        "DependencyListElementEnum": "from modshogun import $value",
+        "IncludeGlobalFunctions": true,
+        "DependencyListElement": "from shogun import $typeName",
+        "DependencyListElementEnum": "from shogun import $value",
         "DependencyListSeparator": "\n"
     },
     "Statement": "$statement\n",
@@ -50,6 +51,7 @@
         "FloatLiteral": "$number",
         "MethodCall": "$object.$method($arguments)",
         "StaticCall": "$typeName.$method($arguments)",
+        "GlobalCall": "$method($arguments)",
         "Identifier": "$identifier",
         "Enum":"$value"
     },
diff --git a/examples/meta/generator/targets/r.json b/examples/meta/generator/targets/r.json
index 0c79eed971e..c270184ea6f 100644
--- a/examples/meta/generator/targets/r.json
+++ b/examples/meta/generator/targets/r.json
@@ -22,6 +22,7 @@
         "FloatLiteral": "$number",
         "MethodCall": "$object$$$method($arguments)",
         "StaticCall": "$typeName$$$method($arguments)",
+        "GlobalCall": "$method($arguments)",
         "Identifier": "$identifier",
         "Enum":"\"$value\""
     },
diff --git a/examples/meta/generator/targets/ruby.json b/examples/meta/generator/targets/ruby.json
index d6dd8c9441a..c9d889c6856 100644
--- a/examples/meta/generator/targets/ruby.json
+++ b/examples/meta/generator/targets/ruby.json
@@ -1,9 +1,9 @@
 {
-    "Program": "require 'modshogun'\n\n$dependencies$program",
+    "Program": "require 'shogun'\n\n$dependencies$program",
     "Statement": "$statement\n",
     "Comment": "#$comment\n",
     "Init": {
-        "Construct": "$name = Modshogun::$typeName.new $arguments",
+        "Construct": "$name = Shogun::$typeName.new $arguments",
         "Copy": "$name = $expr",
         "CharVector": "$name = NArray.byte($arguments)",
         "ByteVector": "$name = NArray.byte($arguments)",
@@ -41,9 +41,10 @@
         "RealLiteral": "$number",
         "FloatLiteral": "$number",
         "MethodCall": "$object.$method $arguments",
-        "StaticCall": "Modshogun::$typeName.$method $arguments",
+        "StaticCall": "Shogun::$typeName.$method $arguments",
+        "GlobalCall": "Shogun::$method $arguments",
         "Identifier": "$identifier",
-        "Enum":"Modshogun::$value"
+        "Enum":"Shogun::$value"
     },
     "Element": {
         "Access": {
diff --git a/examples/meta/generator/translate.py b/examples/meta/generator/translate.py
index 25477a85241..d6e73551ad2 100644
--- a/examples/meta/generator/translate.py
+++ b/examples/meta/generator/translate.py
@@ -33,6 +33,7 @@ def getDependencies(program):
     allClasses = set()
     interfaceClasses = set()
     enums = set()
+    globalFunctions = set()
 
     # All classes used
     for objectType in find("ObjectType", program):
@@ -55,13 +56,18 @@ def getDependencies(program):
         objectKey = list(typeDict.keys())[0]
         interfaceClasses.add(typeDict[objectKey])
 
+    # All global function calls
+    for globalCall in find("GlobalCall", program):
+        identifier = list(globalCall[0].values())[0]
+        globalFunctions.add(identifier)
+
     # All enums used
     for enum in find("Enum", program):
         enumType = enum[0]["Identifier"]
         enumValue = enum[1]["Identifier"]
         enums.add((enumType, enumValue))
 
-    return allClasses, interfaceClasses, enums
+    return allClasses, interfaceClasses, enums, globalFunctions
 
 
 def getBasicTypesToStore():
@@ -160,11 +166,12 @@ def translateProgram(self, program, programName=None, storeVars=False):
                     pass
                 raise e
 
-        allClasses, interfacedClasses, enums = getDependencies(program)
+        allClasses, interfacedClasses, enums, globalFunctions = getDependencies(program)
         try:
             dependenciesString = self.dependenciesString(allClasses,
                                                          interfacedClasses,
-                                                         enums)
+                                                         enums,
+                                                         globalFunctions)
         except Exception as e:
             print("Translation of dependencies failed!")
             raise
@@ -239,9 +246,10 @@ def injectVarsStoring(self, statementList, programName, varsToStore):
         }
         statementList.append({"Statement": storageSerialize})
 
-    def dependenciesString(self, allClasses, interfacedClasses, enums):
+    def dependenciesString(self, allClasses, interfacedClasses, enums,
+                           globalFunctions):
         """ Returns dependency import string
-            e.g. for python: "from modshogun import RealFeatures\n\n"
+            e.g. for python: "from shogun import RealFeatures\n\n"
         """
 
         if "Dependencies" not in self.targetDict:
@@ -256,13 +264,19 @@ def dependenciesString(self, allClasses, interfacedClasses, enums):
             dependencies = dependencies.union(interfacedClasses)
         if self.targetDict["Dependencies"].get("IncludeEnums"):
             dependencies = dependencies.union(enums)
+        if self.targetDict["Dependencies"].get("IncludeGlobalFunctions"):
+            dependencies = dependencies.union(globalFunctions)
+
+        dependencies = list(dependencies)
 
-        translations = set(map(self.translateDependencyElement, dependencies))
+        translations = list(map(self.translateDependencyElement, dependencies))
+        translations.sort()
 
         separator = self.targetDict["Dependencies"]["DependencyListSeparator"]
-        return reduce(lambda l, r: r if l == "" else l+separator+r,
+        result = reduce(lambda l, r: r if l == "" else l+separator+r,
                       translations,
                       "")
+        return result
 
     def translateDependencyElement(self, dependencyElement):
         """ Translates a dependency element
@@ -288,7 +302,6 @@ def translateDependencyElement(self, dependencyElement):
         elif "DependencyListElementClass" in dependencyRules:
             elementTemplate = Template(dependencyRules["DependencyListElementClass"])
 
-
         if "$includePath" in elementTemplate.template:
             includePath = self.getIncludePathForClass(typeName)
 
@@ -465,6 +478,19 @@ def translateExpr(self, expr):
                                        method=method,
                                        arguments=translatedArgsList)
 
+        elif key == "GlobalCall":
+            template = Template(self.targetDict["Expr"]["GlobalCall"])
+            method = expr[key][0]["Identifier"]
+            argsList = None
+            try:
+                argsList = expr[key][2]
+            except IndexError:
+                pass
+            translatedArgsList = self.translateArgumentList(argsList)
+
+            return template.substitute(typeName=type,method=method,
+                                       arguments=translatedArgsList)
+
         elif key == "ElementAccess":
             return self.translateElementAccess(expr[key])
 
diff --git a/examples/meta/java/CMakeLists.txt b/examples/meta/java/CMakeLists.txt
index e97abcfb3b5..dee81196e12 100644
--- a/examples/meta/java/CMakeLists.txt
+++ b/examples/meta/java/CMakeLists.txt
@@ -1,6 +1,6 @@
 SET(CLASSPATH "${JBLAS}:${SHOGUN_JAR}:${CMAKE_CURRENT_BINARY_DIR}")
 SET(JAVAOPTS "-Xmx1024m")
-SET(JAVA_LIB_PATH "${JAVA_MODULAR_BUILD_DIR}")
+SET(JAVA_LIB_PATH "${INTERFACE_JAVA_BUILD_DIR}")
 
 # add test case for each generated example
 # (not generated yet so have to fake filenames from META_EXAMPLES list)
@@ -15,7 +15,7 @@ FOREACH(META_EXAMPLE ${META_EXAMPLES})
 		COMMAND ${Java_JAVAC_EXECUTABLE} -cp ${CLASSPATH}
 			-d ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE_REL_DIR}
 			${JAVA_META_EXAMPLE_SOURCE}
-		DEPENDS java_modular meta_examples shogun ${JAVA_META_EXAMPLE_SOURCE})
+		DEPENDS interface_java meta_examples shogun ${JAVA_META_EXAMPLE_SOURCE})
 
 	ADD_CUSTOM_TARGET(java_${EXAMPLE_NAME} ALL
 		DEPENDS ${COMPILED_JAVA_EXAMPLE}
diff --git a/examples/meta/lua/CMakeLists.txt b/examples/meta/lua/CMakeLists.txt
index 88bb1fead64..fa79808184f 100644
--- a/examples/meta/lua/CMakeLists.txt
+++ b/examples/meta/lua/CMakeLists.txt
@@ -8,5 +8,5 @@ FOREACH(META_EXAMPLE ${META_EXAMPLES})
 			WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/{EXAMPLE_REL_DIR}
 			COMMAND ${LUA_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE_REL_DIR}/${EXAMPLE_NAME}.lua)
 	set_property(TEST generated_lua-${EXAMPLE_NAME_WITH_DIR} PROPERTY
-		ENVIRONMENT "LUA_CPATH=${LUA_MODULAR_BUILD_DIR}/libmodshogun.so")
+        ENVIRONMENT "LUA_CPATH=${INTERFACE_LUA_BUILD_DIR}/libshogun.so")
 ENDFOREACH()
diff --git a/examples/meta/octave/CMakeLists.txt b/examples/meta/octave/CMakeLists.txt
index 571414ec640..355ae6b5a72 100644
--- a/examples/meta/octave/CMakeLists.txt
+++ b/examples/meta/octave/CMakeLists.txt
@@ -8,6 +8,6 @@ FOREACH(META_EXAMPLE ${META_EXAMPLES})
 			WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE_REL_DIR}
 			COMMAND ${OCTAVE_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE_REL_DIR}/${EXAMPLE_NAME}.m)
 	set_property(TEST generated_octave-${EXAMPLE_NAME_WITH_DIR} PROPERTY
-				ENVIRONMENT "OCTAVE_PATH=${OCTAVE_MODULAR_BINARY_DIR}")
+                 ENVIRONMENT "OCTAVE_PATH=${INTERFACE_OCTAVE_BINARY_DIR}")
 
 ENDFOREACH()
diff --git a/examples/meta/ruby/CMakeLists.txt b/examples/meta/ruby/CMakeLists.txt
index fd24f5a41fa..ef4d3ef72f3 100644
--- a/examples/meta/ruby/CMakeLists.txt
+++ b/examples/meta/ruby/CMakeLists.txt
@@ -10,5 +10,5 @@ FOREACH(META_EXAMPLE ${META_EXAMPLES})
 			WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE_REL_DIR}
 			COMMAND ${RUBY_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE_REL_DIR}/${EXAMPLE_NAME}.rb)
 	set_property(TEST generated_ruby-${EXAMPLE_NAME_WITH_DIR} PROPERTY
-		ENVIRONMENT "RUBYLIB=${RUBY_MODULAR_SOURCE_DIR}:${RUBY_MODULAR_BINARY_DIR}:.")
+        ENVIRONMENT "RUBYLIB=${INTERFACE_RUBY_SOURCE_DIR}:${INTERFACE_RUBY_BINARY_DIR}:.")
 ENDFOREACH()
diff --git a/examples/meta/scala/CMakeLists.txt b/examples/meta/scala/CMakeLists.txt
index c7e3a0f3771..f3907d75f7f 100644
--- a/examples/meta/scala/CMakeLists.txt
+++ b/examples/meta/scala/CMakeLists.txt
@@ -1,6 +1,6 @@
 string( REGEX REPLACE "/scala$" "/java" CMAKE_JAVA_DIR "${CMAKE_CURRENT_BINARY_DIR}" )
 SET(CLASSPATH "${JBLAS}:${SHOGUN_JAR}:${Scala_JAR_EXECUTABLE}:${CMAKE_CURRENT_BINARY_DIR}")
-SET(JAVA_LIB_PATH "${JAVA_MODULAR_BUILD_DIR}")
+SET(JAVA_LIB_PATH "${INTERFACE_JAVA_BUILD_DIR}")
 
 # add test case for each generated example
 # (not generated yet so have to fake filenames from META_EXAMPLES list)
diff --git a/examples/meta/src/converter/ica_fast.sg b/examples/meta/src/converter/ica_fast.sg
new file mode 100644
index 00000000000..616f6767275
--- /dev/null
+++ b/examples/meta/src/converter/ica_fast.sg
@@ -0,0 +1,24 @@
+CSVFile f_feats("../../data/ica_2_sources.dat")
+
+Math:init_random(1)
+
+#![create_features]
+RealFeatures features(f_feats)
+#![create_features]
+
+#![set_parameters]
+FastICA ica()
+ica.set_max_iter(200)
+ica.set_tol(0.00001)
+#![set_parameters]
+
+#![apply_convert]
+Features converted = ica.apply(features)
+#![apply_convert]
+
+#![extract]
+RealMatrix mixing_matrix = ica.get_mixing_matrix()
+RealFeatures casted = RealFeatures:obtain_from_generic(converted)
+RealMatrix unmixed_signal = casted.get_feature_matrix()
+#![extract]
+
diff --git a/examples/meta/src/converter/ica_ff_sep.sg b/examples/meta/src/converter/ica_ff_sep.sg
new file mode 100644
index 00000000000..c8d6b47af0d
--- /dev/null
+++ b/examples/meta/src/converter/ica_ff_sep.sg
@@ -0,0 +1,24 @@
+CSVFile f_feats("../../data/ica_2_sources.dat")
+
+Math:init_random(1)
+
+#![create_features]
+RealFeatures features(f_feats)
+#![create_features]
+
+#![set_parameters]
+FFSep ica()
+ica.set_max_iter(200)
+ica.set_tol(0.00001)
+#![set_parameters]
+
+#![apply_convert]
+Features converted = ica.apply(features)
+#![apply_convert]
+
+#![extract]
+RealMatrix mixing_matrix = ica.get_mixing_matrix()
+RealFeatures casted = RealFeatures:obtain_from_generic(converted)
+RealMatrix unmixed_signal = casted.get_feature_matrix()
+#![extract]
+
diff --git a/examples/meta/src/converter/ica_jade.sg b/examples/meta/src/converter/ica_jade.sg
new file mode 100644
index 00000000000..633e0eedf45
--- /dev/null
+++ b/examples/meta/src/converter/ica_jade.sg
@@ -0,0 +1,24 @@
+CSVFile f_feats("../../data/ica_2_sources.dat")
+
+Math:init_random(1)
+
+#![create_features]
+RealFeatures features(f_feats)
+#![create_features]
+
+#![set_parameters]
+Jade ica()
+ica.set_max_iter(200)
+ica.set_tol(0.00001)
+#![set_parameters]
+
+#![apply_convert]
+Features converted = ica.apply(features)
+#![apply_convert]
+
+#![extract]
+RealMatrix mixing_matrix = ica.get_mixing_matrix()
+RealFeatures casted = RealFeatures:obtain_from_generic(converted)
+RealMatrix unmixed_signal = casted.get_feature_matrix()
+#![extract]
+
diff --git a/examples/meta/src/converter/ica_jedi_sep.sg b/examples/meta/src/converter/ica_jedi_sep.sg
new file mode 100644
index 00000000000..68501bc9ab5
--- /dev/null
+++ b/examples/meta/src/converter/ica_jedi_sep.sg
@@ -0,0 +1,24 @@
+CSVFile f_feats("../../data/ica_2_sources.dat")
+
+Math:init_random(1)
+
+#![create_features]
+RealFeatures features(f_feats)
+#![create_features]
+
+#![set_parameters]
+JediSep ica()
+ica.set_max_iter(200)
+ica.set_tol(0.00001)
+#![set_parameters]
+
+#![apply_convert]
+Features converted = ica.apply(features)
+#![apply_convert]
+
+#![extract]
+RealMatrix mixing_matrix = ica.get_mixing_matrix()
+RealFeatures casted = RealFeatures:obtain_from_generic(converted)
+RealMatrix unmixed_signal = casted.get_feature_matrix()
+#![extract]
+
diff --git a/examples/meta/src/converter/ica_sobi.sg b/examples/meta/src/converter/ica_sobi.sg
new file mode 100644
index 00000000000..51ddd07c713
--- /dev/null
+++ b/examples/meta/src/converter/ica_sobi.sg
@@ -0,0 +1,24 @@
+CSVFile f_feats("../../data/ica_2_sources.dat")
+
+Math:init_random(1)
+
+#![create_features]
+RealFeatures features(f_feats)
+#![create_features]
+
+#![set_parameters]
+SOBI ica()
+ica.set_max_iter(200)
+ica.set_tol(0.00001)
+#![set_parameters]
+
+#![apply_convert]
+Features converted = ica.apply(features)
+#![apply_convert]
+
+#![extract]
+RealMatrix mixing_matrix = ica.get_mixing_matrix()
+RealFeatures casted = RealFeatures:obtain_from_generic(converted)
+RealMatrix unmixed_signal = casted.get_feature_matrix()
+#![extract]
+
diff --git a/examples/meta/src/evaluation/cross_validation_mkl_weight_storage.sg b/examples/meta/src/evaluation/cross_validation_mkl_weight_storage.sg
new file mode 100644
index 00000000000..fc78600bd8d
--- /dev/null
+++ b/examples/meta/src/evaluation/cross_validation_mkl_weight_storage.sg
@@ -0,0 +1,62 @@
+CSVFile f_feats("../../data/fm_train_real.dat")
+CSVFile f_labels("../../data/label_train_twoclass.dat")
+
+#![create_features]
+RealFeatures features(f_feats)
+CombinedFeatures combined_features()
+combined_features.append_feature_obj(features)
+combined_features.append_feature_obj(features)
+combined_features.append_feature_obj(features)
+BinaryLabels labels(f_labels)
+#![create_features]
+
+#![create_kernels]
+CombinedKernel kernel()
+GaussianKernel k_1(10, 0.1)
+GaussianKernel k_2(10, 1)
+GaussianKernel k_3(10, 2)
+kernel.append_kernel(k_1)
+kernel.append_kernel(k_2)
+kernel.append_kernel(k_3)
+#![create_kernels]
+
+#![create_classifier]
+LibSVM libsvm()
+MKLClassification svm(libsvm)
+svm.set_interleaved_optimization_enabled(False)
+svm.set_kernel(kernel)
+#![create_classifier]
+
+#![create_cross_validation]
+StratifiedCrossValidationSplitting splitting_strategy(labels, 5)
+AccuracyMeasure evaluation_criterium()
+CrossValidation cross(svm, combined_features, labels, splitting_strategy, evaluation_criterium)
+cross.set_autolock(False)
+cross.set_num_runs(2)
+#![create_cross_validation]
+
+#![create_observer]
+ParameterObserverCV mkl_obs(True)
+cross.subscribe_to_parameters(mkl_obs)
+#[!create_observer]
+
+#![evaluate_and_get_result]
+CrossValidationResult result()
+result = CrossValidationResult:obtain_from_generic(cross.evaluate())
+#![evaluate_and_get_result]
+
+#![get_results]
+real mean = result.get_mean()
+real stddev = result.get_std_dev()
+#![get_results]
+
+#![get_fold_machine]
+CrossValidationStorage obs = mkl_obs.get_observation(0)
+CrossValidationFoldStorage fold = obs.get_fold(0)
+MKLClassification machine = MKLClassification:obtain_from_generic(fold.get_trained_machine())
+#![get_fold_machine]
+
+#![get_weights]
+CombinedKernel k = CombinedKernel:obtain_from_generic(machine.get_kernel())
+RealVector w = k.get_subkernel_weights()
+#![get_weights]
\ No newline at end of file
diff --git a/examples/meta/src/meta_api/calls.sg b/examples/meta/src/meta_api/calls.sg
new file mode 100644
index 00000000000..14b0ef034e6
--- /dev/null
+++ b/examples/meta/src/meta_api/calls.sg
@@ -0,0 +1,9 @@
+# static call
+Math:init_random(1)
+
+# global function call
+get_global_io()
+
+# member function call
+GaussianKernel k()
+k.set_width(1)
diff --git a/examples/meta/src/multiclass_classifier/multiclass_ecoc_random.sg b/examples/meta/src/multiclass_classifier/multiclass_ecoc_random.sg
index 79e79d4a10d..8d9f459ae19 100644
--- a/examples/meta/src/multiclass_classifier/multiclass_ecoc_random.sg
+++ b/examples/meta/src/multiclass_classifier/multiclass_ecoc_random.sg
@@ -1,8 +1,9 @@
+Math:init_random(1)
+
 CSVFile f_feats_train("../../data/classifier_4class_2d_linear_features_train.dat")
 CSVFile f_feats_test("../../data/classifier_4class_2d_linear_features_test.dat")
 CSVFile f_labels_train("../../data/classifier_4class_2d_linear_labels_train.dat")
 CSVFile f_labels_test("../../data/classifier_4class_2d_linear_labels_test.dat")
-Math:init_random(1)
 
 #![create_features]
 RealFeatures features_train(f_feats_train)
diff --git a/examples/meta/src/regression/krr_nystrom.sg b/examples/meta/src/regression/krr_nystrom.sg
index de84de05f40..8e651753fd0 100644
--- a/examples/meta/src/regression/krr_nystrom.sg
+++ b/examples/meta/src/regression/krr_nystrom.sg
@@ -27,12 +27,12 @@ RegressionLabels labels_predict = nystrom.apply_regression(features_test)
 #![train_and_apply]
 
 #![extract_alpha]
-RealVector alpha = nystrom.get_alphas()
+nystrom.get_alphas()
 #![extract_alpha]
 
 #![evaluate_error]
 MeanSquaredError eval()
-real mse = eval.evaluate(labels_predict, labels_test)
+eval.evaluate(labels_predict, labels_test)
 #![evaluate_error]
 
 # integration testing variables
diff --git a/examples/minimal/CMakeLists.txt b/examples/minimal/CMakeLists.txt
new file mode 100644
index 00000000000..c5eb8fa789e
--- /dev/null
+++ b/examples/minimal/CMakeLists.txt
@@ -0,0 +1,11 @@
+FILE(GLOB EXAMPLES_CPP RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
+
+FOREACH(EXAMPLE_CPP ${EXAMPLES_CPP})
+	STRING(REGEX REPLACE ".cpp\$" "" EXAMPLE "${EXAMPLE_CPP}")
+	AddLibShogunExample(${EXAMPLE_CPP})
+	LIST(APPEND INSTALL_EXAMPLES ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE})
+ENDFOREACH()
+
+INSTALL(PROGRAMS ${INSTALL_EXAMPLES}
+	DESTINATION share/shogun/examples/libshogun/
+	COMPONENT minimal-example)
diff --git a/examples/undocumented/libshogun/basic_minimal.cpp b/examples/minimal/minimal.cpp
similarity index 100%
rename from examples/undocumented/libshogun/basic_minimal.cpp
rename to examples/minimal/minimal.cpp
diff --git a/examples/undocumented/libshogun/CMakeLists.txt b/examples/undocumented/libshogun/CMakeLists.txt
index fe114912e0c..92bedd0631c 100644
--- a/examples/undocumented/libshogun/CMakeLists.txt
+++ b/examples/undocumented/libshogun/CMakeLists.txt
@@ -4,6 +4,16 @@ IF (NOT USE_MOSEK)
 	LIST(REMOVE_ITEM EXAMPLES_CPP "so_fg_model.cpp" "so_multiclass.cpp" "so_multilabel.cpp")
 ENDIF ()
 
+IF (NOT USE_GPL_SHOGUN)
+	LIST(REMOVE_ITEM EXAMPLES_CPP
+	"modelselection_model_selection_parameters_test.cpp"
+	"modelselection_parameter_tree.cpp"
+	"modelselection_grid_search_string_kernel.cpp"
+	"parameter_modsel_parameters.cpp"
+	"classifier_svmlight_string_features_precomputed_kernel.cpp"
+	)
+ENDIF()
+
 LIST(REMOVE_ITEM EXAMPLES_CPP
 	"classifier_bagging_liblinear.cpp"
 	"classifier_larank.cpp"
@@ -37,23 +47,8 @@ LIST(REMOVE_ITEM EXAMPLES_CPP
 
 FOREACH(EXAMPLE_CPP ${EXAMPLES_CPP})
 	STRING(REGEX REPLACE ".cpp\$" "" EXAMPLE "${EXAMPLE_CPP}")
-
-	add_executable(${EXAMPLE} ${CMAKE_CURRENT_SOURCE_DIR}/${EXAMPLE_CPP})
-	target_link_libraries(${EXAMPLE} shogun::shogun ${SANITIZER_LIBRARY})
-	IF(SANITIZER_FLAGS)
-		set_target_properties(${EXAMPLE} PROPERTIES COMPILE_FLAGS ${SANITIZER_FLAGS})
-	ENDIF()
-	add_test(libshogun-${EXAMPLE} ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE})
+    AddLibShogunExample(${EXAMPLE_CPP})
 	LIST(APPEND INSTALL_EXAMPLES ${CMAKE_CURRENT_BINARY_DIR}/${EXAMPLE})
-	
-	# Add examples to the dependencies of modular interfaces to make sure
-	# nothing will infer with them being build single-threaded.
-	IF(SWIG_SINGLE_THREADED)
-		FOREACH(SG_MODULAR_INTERFACE_TARGET ${SG_MODULAR_INTERFACE_TARGETS})
-			ADD_DEPENDENCIES(${SG_MODULAR_INTERFACE_TARGET} ${EXAMPLE})
-		ENDFOREACH(SG_MODULAR_INTERFACE_TARGET ${SG_MODULAR_INTERFACE_TARGETS})
-	ENDIF(SWIG_SINGLE_THREADED)
-
 ENDFOREACH()
 
 INSTALL(PROGRAMS ${INSTALL_EXAMPLES}
diff --git a/examples/undocumented/libshogun/classifier_latent_svm.cpp b/examples/undocumented/libshogun/classifier_latent_svm.cpp
index 626d7f1e587..18471b8d506 100644
--- a/examples/undocumented/libshogun/classifier_latent_svm.cpp
+++ b/examples/undocumented/libshogun/classifier_latent_svm.cpp
@@ -1,10 +1,11 @@
-#include <shogun/labels/LatentLabels.h>
+#include <shogun/base/init.h>
+#include <shogun/base/progress.h>
+#include <shogun/features/DenseFeatures.h>
 #include <shogun/features/LatentFeatures.h>
+#include <shogun/io/SGIO.h>
+#include <shogun/labels/LatentLabels.h>
 #include <shogun/latent/LatentSVM.h>
-#include <shogun/features/DenseFeatures.h>
-#include <shogun/base/init.h>
 #include <shogun/lib/common.h>
-#include <shogun/io/SGIO.h>
 #include <shogun/mathematics/Math.h>
 
 #include <libgen.h>
@@ -113,6 +114,7 @@ static void read_dataset(char* fname, CLatentFeatures*& feats, CLatentLabels*& l
 	feats = new CLatentFeatures(num_examples);
 	SG_REF(feats);
 
+	auto pb = progress(range(num_examples));
 	CMath::init_random();
 	for (int i = 0; (!feof(fd)) && (i < num_examples); ++i)
 	{
@@ -150,7 +152,7 @@ static void read_dataset(char* fname, CLatentFeatures*& feats, CLatentLabels*& l
 		CBoundingBox* bb = new CBoundingBox(x,y);
 		labels->add_latent_label(bb);
 
-		SG_SPROGRESS(i, 0, num_examples);
+		pb.print_progress();
 		CHOGFeatures* hog = new CHOGFeatures(width, height);
 		hog->hog = SG_CALLOC(float64_t**, hog->width);
 		for (int j = 0; j < width; ++j)
@@ -176,7 +178,7 @@ static void read_dataset(char* fname, CLatentFeatures*& feats, CLatentLabels*& l
 
 	labels->set_labels(ys);
 
-	SG_SDONE();
+	pb.complete();
 }
 
 int main(int argc, char** argv)
diff --git a/examples/undocumented/libshogun/evaluation_cross_validation_mkl_weight_storage.cpp b/examples/undocumented/libshogun/evaluation_cross_validation_mkl_weight_storage.cpp
index 7bc3e500f91..dd042371127 100644
--- a/examples/undocumented/libshogun/evaluation_cross_validation_mkl_weight_storage.cpp
+++ b/examples/undocumented/libshogun/evaluation_cross_validation_mkl_weight_storage.cpp
@@ -8,17 +8,16 @@
  */
 
 #include <shogun/base/init.h>
-#include <shogun/kernel/GaussianKernel.h>
-#include <shogun/kernel/CombinedKernel.h>
-#include <shogun/labels/BinaryLabels.h>
-#include <shogun/features/DenseFeatures.h>
 #include <shogun/classifier/mkl/MKLClassification.h>
 #include <shogun/classifier/svm/LibSVM.h>
+#include <shogun/evaluation/ContingencyTableEvaluation.h>
 #include <shogun/evaluation/CrossValidation.h>
-#include <shogun/evaluation/CrossValidationPrintOutput.h>
-#include <shogun/evaluation/CrossValidationMKLStorage.h>
 #include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
-#include <shogun/evaluation/ContingencyTableEvaluation.h>
+#include <shogun/features/DenseFeatures.h>
+#include <shogun/kernel/CombinedKernel.h>
+#include <shogun/kernel/GaussianKernel.h>
+#include <shogun/labels/BinaryLabels.h>
+#include <shogun/lib/parameter_observers/ParameterObserverCV.h>
 #include <shogun/mathematics/Statistics.h>
 
 using namespace shogun;
@@ -50,6 +49,40 @@ void gen_rand_data(SGVector<float64_t> lab, SGMatrix<float64_t> feat,
 	feat.display_matrix("feat");
 }
 
+SGMatrix<float64_t> calculate_weights(
+    CParameterObserverCV& obs, int32_t folds, int32_t run, int32_t len)
+{
+	int32_t column = 0;
+	SGMatrix<float64_t> weights(len, folds * run);
+	for (int o = 0; o < obs.get_num_observations(); o++)
+	{
+		auto obs_storage = obs.get_observation(o);
+		for (int i = 0; i < obs_storage->get_num_folds(); i++)
+		{
+			auto fold = obs_storage->get_fold(i);
+			CMKLClassification* machine =
+			    (CMKLClassification*)fold->get_trained_machine();
+			SG_REF(machine)
+			CCombinedKernel* k = (CCombinedKernel*)machine->get_kernel();
+			auto w = k->get_subkernel_weights();
+
+			/* Copy the weights inside the matrix */
+			/* Each of the columns will represent a set of weights */
+			for (auto j = 0; j < w.size(); j++)
+			{
+				weights.set_element(w[j], j, column);
+			}
+
+			SG_UNREF(k)
+			SG_UNREF(machine)
+			SG_UNREF(fold)
+			column++;
+		}
+		SG_UNREF(obs_storage)
+	}
+	return weights;
+}
+
 void test_mkl_cross_validation()
 {
 	/* generate random data */
@@ -97,16 +130,14 @@ void test_mkl_cross_validation()
 	CCrossValidation* cross=new CCrossValidation(svm, comb_features, labels, split, eval, false);
 
 	/* add print output listener and mkl storage listener */
-	cross->add_cross_validation_output(new CCrossValidationPrintOutput());
-	CCrossValidationMKLStorage* mkl_storage=new CCrossValidationMKLStorage();
-	cross->add_cross_validation_output(mkl_storage);
+	CParameterObserverCV mkl_obs{true};
+	cross->subscribe_to_parameters(&mkl_obs);
 
-	/* perform cross-validation, this will print loads of information
-	 * (caused by the CCrossValidationPrintOutput instance attached to it) */
+	/* perform cross-validation, this will print loads of information */
 	CEvaluationResult* result=cross->evaluate();
 
 	/* print mkl weights */
-	SGMatrix<float64_t> weights=mkl_storage->get_mkl_weights();
+	auto weights = calculate_weights(mkl_obs, num_folds, 1, 3);
 	weights.display_matrix("mkl weights");
 
 	/* print mean and variance of each kernel weight. These could for example
@@ -115,6 +146,8 @@ void test_mkl_cross_validation()
 	CStatistics::matrix_variance(weights, false).display_vector("variance per kernel");
 	CStatistics::matrix_std_deviation(weights, false).display_vector("std-dev per kernel");
 
+	/* Clear */
+	mkl_obs.clear();
 	SG_UNREF(result);
 
 	/* again for two runs */
@@ -122,14 +155,17 @@ void test_mkl_cross_validation()
 	result=cross->evaluate();
 
 	/* print mkl weights */
-	weights=mkl_storage->get_mkl_weights();
-	weights.display_matrix("mkl weights");
+	SGMatrix<float64_t> weights_2 = calculate_weights(mkl_obs, num_folds, 2, 3);
+	weights_2.display_matrix("mkl weights");
 
 	/* print mean and variance of each kernel weight. These could for example
 	 * been used to compute confidence intervals */
-	CStatistics::matrix_mean(weights, false).display_vector("mean per kernel");
-	CStatistics::matrix_variance(weights, false).display_vector("variance per kernel");
-	CStatistics::matrix_std_deviation(weights, false).display_vector("std-dev per kernel");
+	CStatistics::matrix_mean(weights_2, false)
+	    .display_vector("mean per kernel");
+	CStatistics::matrix_variance(weights_2, false)
+	    .display_vector("variance per kernel");
+	CStatistics::matrix_std_deviation(weights_2, false)
+	    .display_vector("std-dev per kernel");
 
 	/* clean up */
 	SG_UNREF(result);
diff --git a/examples/undocumented/libshogun/evaluation_cross_validation_multiclass_mkl.cpp b/examples/undocumented/libshogun/evaluation_cross_validation_multiclass_mkl.cpp
index eb83866eaf0..39e43638690 100644
--- a/examples/undocumented/libshogun/evaluation_cross_validation_multiclass_mkl.cpp
+++ b/examples/undocumented/libshogun/evaluation_cross_validation_multiclass_mkl.cpp
@@ -97,11 +97,12 @@ void test_multiclass_mkl_cv()
 
 	/* perform x-val and print result */
 	CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();
-	SG_SPRINT("mean of %d %d-fold x-val runs: %f\n", n_runs, n_folds,
-			result->mean);
+	SG_SPRINT(
+	    "mean of %d %d-fold x-val runs: %f\n", n_runs, n_folds,
+	    result->get_mean());
 
 	/* assert high accuracy */
-	ASSERT(result->mean>0.81);
+	ASSERT(result->get_mean() > 0.81);
 
 	/* clean up */
 	SG_UNREF(features);
diff --git a/examples/undocumented/libshogun/evaluation_cross_validation_regression.cpp b/examples/undocumented/libshogun/evaluation_cross_validation_regression.cpp
index abd7490a82f..4b0faf98c33 100644
--- a/examples/undocumented/libshogun/evaluation_cross_validation_regression.cpp
+++ b/examples/undocumented/libshogun/evaluation_cross_validation_regression.cpp
@@ -105,7 +105,7 @@ void test_cross_validation()
 	result->print_result();
 
 	/* same crude assertion as for above evaluation */
-	ASSERT(result->mean<2);
+	ASSERT(result->get_mean() < 2);
 
 	/* clean up */
 	SG_UNREF(result);
diff --git a/examples/undocumented/libshogun/library_dyn_int.cpp b/examples/undocumented/libshogun/library_dyn_int.cpp
deleted file mode 100644
index 82a0d33889f..00000000000
--- a/examples/undocumented/libshogun/library_dyn_int.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2009 Soeren Sonnenburg
- * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
- */
-
-#include <shogun/base/init.h>
-#include <shogun/io/SGIO.h>
-#include <shogun/lib/Time.h>
-#include <shogun/lib/ShogunException.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/lib/DynInt.h>
-
-using namespace shogun;
-
-void print_message(FILE* target, const char* str)
-{
-	fprintf(target, "%s", str);
-}
-
-void print_warning(FILE* target, const char* str)
-{
-	fprintf(target, "%s", str);
-}
-
-void print_error(FILE* target, const char* str)
-{
-	fprintf(target, "%s", str);
-}
-
-void gen_ints(uint256_t* &a, uint32_t* &b, uint32_t len)
-{
-	a=SG_MALLOC(uint256_t, len);
-	b=SG_MALLOC(uint32_t, len);
-
-	CMath::init_random(17);
-
-	for (uint32_t i=0; i<len; i++)
-	{
-		uint64_t r[4]={(uint64_t) CMath::random() << 32 | CMath::random(),
-			(uint64_t) CMath::random() << 32 | CMath::random(),
-			(uint64_t) CMath::random() << 32 | CMath::random(),
-			(uint64_t) CMath::random() << 32 | CMath::random()};
-
-		a[len-i-1]=r;
-		b[len-i-1]=i;
-	}
-}
-
-const int LEN = 5*1024;
-
-int main()
-{
-	init_shogun(&print_message, &print_warning,
-			&print_error);
-	try
-	{
-		uint256_t* a;
-		uint32_t* b;
-		CTime t;
-		t.io->set_loglevel(MSG_DEBUG);
-
-		SG_SPRINT("gen data..");
-		t.start();
-		gen_ints(a,b, LEN);
-		t.cur_time_diff(true);
-
-		SG_SPRINT("qsort..");
-		t.start();
-		CMath::qsort_index(a, b, LEN);
-		t.cur_time_diff(true);
-
-		SG_SPRINT("\n\n");
-		for (uint32_t i=0; i<10; i++)
-		{
-			SG_SPRINT("a[%d]=", i);
-			a[i].print_hex();
-			SG_SPRINT("\n");
-		}
-
-		SG_SPRINT("\n\n");
-
-		uint64_t val1[4]={1,2,3,4};
-		uint64_t val2[4]={5,6,7,8};
-		a[0]=val1;
-		a[1]=val2;
-		a[2]=a[0];
-		CMath::swap(a[0],a[1]);
-
-		printf("a[0]==a[1] %d\n", (int) (a[0] == a[1]));
-		printf("a[0]<a[1] %d\n", (int) (a[0] < a[1]));
-		printf("a[0]<=a[1] %d\n", (int) (a[0] <= a[1]));
-		printf("a[0]>a[1] %d\n", (int) (a[0] > a[1]));
-		printf("a[0]>=a[1] %d\n", (int) (a[0] >= a[1]));
-
-		printf("a[0]==a[0] %d\n", (int) (a[0] == a[0]));
-		printf("a[0]<a[0] %d\n", (int) (a[0] < a[0]));
-		printf("a[0]<=a[0] %d\n", (int) (a[0] <= a[0]));
-		printf("a[0]>a[0] %d\n", (int) (a[0] > a[0]));
-		printf("a[0]>=a[0] %d\n", (int) (a[0] >= a[0]));
-
-		SG_SPRINT("\n\n");
-		for (uint32_t i=0; i<10 ; i++)
-		{
-			SG_SPRINT("a[%d]=", i);
-			a[i].print_hex();
-			printf("\n");
-		}
-
-		SG_FREE(a);
-		SG_FREE(b);
-	}
-	catch(ShogunException & sh)
-	{
-		SG_SPRINT("%s",sh.get_exception_string());
-	}
-
-	exit_shogun();
-
-	return 0;
-}
diff --git a/examples/undocumented/libshogun/library_indirect_object.cpp b/examples/undocumented/libshogun/library_indirect_object.cpp
deleted file mode 100644
index edcd3344570..00000000000
--- a/examples/undocumented/libshogun/library_indirect_object.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include <shogun/base/init.h>
-#include <shogun/lib/common.h>
-#include <shogun/lib/memory.h>
-#include <shogun/lib/IndirectObject.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/base/SGObject.h>
-
-#include <stdio.h>
-
-using namespace shogun;
-
-const int l=10;
-
-int main(int argc, char** argv)
-{
-	init_shogun();
-
-	// create array a
-	int32_t* a=SG_MALLOC(int32_t, l);
-	for (int i=0; i<l; i++)
-		a[i]=l-i;
-	typedef CIndirectObject<int32_t, int32_t**> INDIRECT;
-	// create array of indirect objects pointing to array a
-	INDIRECT::set_array(&a);
-	INDIRECT* x = SG_MALLOC(INDIRECT, l);
-	INDIRECT::init_slice(x, l);
-
-
-	printf("created array a and indirect object array x pointing to a.\n\n");
-	for (int i=0; i<l; i++)
-		printf("a[%d]=%d x[%d]=%d\n", i, a[i], i, int32_t(x[i]));
-
-	//sort the array
-	CMath::qsort(x, l);
-
-	printf("\n\nvoila! sorted indirect object array x, keeping a const.\n\n");
-	for (int i=0; i<l; i++)
-		printf("a[%d]=%d x[%d]=%d\n", i, a[i], i, int32_t(x[i]));
-
-	SG_FREE(x);
-	SG_FREE(a);
-	exit_shogun();
-
-	return 0;
-}
diff --git a/examples/undocumented/libshogun/parameter_set_from_parameters.cpp b/examples/undocumented/libshogun/parameter_set_from_parameters.cpp
deleted file mode 100644
index 120f836cb0c..00000000000
--- a/examples/undocumented/libshogun/parameter_set_from_parameters.cpp
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2011 Heiko Strathmann
- * DS-Kernel implementation Written (W) 2008 Sébastien Boisvert under GPLv3
- * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
- */
-
-#include <shogun/base/init.h>
-#include <shogun/io/SGIO.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/base/Parameter.h>
-#include <shogun/kernel/string/DistantSegmentsKernel.h>
-#include <shogun/kernel/GaussianKernel.h>
-
-using namespace shogun;
-
-int32_t max=3;
-const float64_t initial_value=1;
-const float64_t another_value=2;
-
-void print_message(FILE* target, const char* str)
-{
-	fprintf(target, "%s", str);
-}
-
-bool test_float_scalar()
-{
-	bool result=true;
-
-	Parameter* original_parameter_list=new Parameter();
-	float64_t original_parameter=initial_value;
-	original_parameter_list->add(&original_parameter, "param", "");
-
-	float64_t new_parameter=another_value;
-	Parameter* new_parameter_list=new Parameter();
-	new_parameter_list->add(&new_parameter, "param", "");
-
-	original_parameter_list->set_from_parameters(new_parameter_list);
-
-	result&=original_parameter==another_value;
-
-	delete original_parameter_list;
-	delete new_parameter_list;
-
-	return result;
-}
-
-bool test_float_vector()
-{
-	bool result=true;
-
-	Parameter* original_parameter_list=new Parameter();
-	float64_t* original_parameter=SG_MALLOC(float64_t, max);
-	SGVector<float64_t>::fill_vector(original_parameter, max, initial_value);
-
-	original_parameter_list->add_vector(&original_parameter, &max, "param", "");
-
-	float64_t* new_parameter=SG_MALLOC(float64_t, max);
-	SGVector<float64_t>::fill_vector(new_parameter, max, another_value);
-
-	Parameter* new_parameter_list=new Parameter();
-	new_parameter_list->add_vector(&new_parameter, &max, "param", "");
-
-	original_parameter_list->set_from_parameters(new_parameter_list);
-
-	for (int32_t i=0; i<max; ++i)
-		result&=original_parameter[i]==another_value;
-
-	delete original_parameter;
-	delete new_parameter;
-	delete original_parameter_list;
-	delete new_parameter_list;
-
-	return result;
-}
-
-bool test_float_matrix()
-{
-	bool result=true;
-
-	Parameter* original_parameter_list=new Parameter();
-	float64_t* original_parameter=SG_MALLOC(float64_t, max*max);
-	SGVector<float64_t>::fill_vector(original_parameter, max*max, initial_value);
-
-	original_parameter_list->add_matrix(&original_parameter, &max, &max, "param", "");
-
-	float64_t* new_parameter=SG_MALLOC(float64_t, max*max);
-	SGVector<float64_t>::fill_vector(new_parameter, max*max, another_value);
-
-	Parameter* new_parameter_list=new Parameter();
-	new_parameter_list->add_matrix(&new_parameter, &max, &max, "param", "");
-
-	original_parameter_list->set_from_parameters(new_parameter_list);
-
-	for (int32_t i=0; i<max*max; ++i)
-		result&=original_parameter[i]==another_value;
-
-	delete original_parameter;
-	delete new_parameter;
-	delete original_parameter_list;
-	delete new_parameter_list;
-
-	return result;
-}
-
-bool test_sgobject_scalar()
-{
-	bool result=true;
-
-	Parameter* original_parameter_list=new Parameter();
-	CSGObject* original_parameter=new CGaussianKernel(10, 10);
-	SG_REF(original_parameter);
-	original_parameter_list->add(&original_parameter, "kernel", "");
-
-	CSGObject* new_parameter=new CDistantSegmentsKernel(10, 10, 10);
-	Parameter* new_parameter_list=new Parameter();
-	new_parameter_list->add(&new_parameter, "kernel", "");
-
-	/* note: old_parameter is SG_UNREF'ed, new one SG_REF'ed */
-	original_parameter_list->set_from_parameters(new_parameter_list);
-
-	result&=original_parameter==new_parameter;
-
-	/* old original kernel was deleted by shogun's SG_UNREF */
-	SG_UNREF(new_parameter);
-	delete original_parameter_list;
-	delete new_parameter_list;
-
-	return result;
-}
-
-bool test_sgobject_vector()
-{
-	bool result=true;
-
-	Parameter* original_parameter_list=new Parameter();
-	CSGObject** original_parameter=SG_MALLOC(CSGObject*, max);
-	for (int32_t i=0; i<max; ++i)
-	{
-		original_parameter[i]=new CDistantSegmentsKernel(1, 1, 1);
-		SG_REF(original_parameter[i]);
-	}
-
-	original_parameter_list->add_vector(&original_parameter, &max, "param", "");
-
-	CSGObject** new_parameter=SG_MALLOC(CSGObject*, max);
-	for (int32_t i=0; i<max; ++i)
-		new_parameter[i]=new CDistantSegmentsKernel(2, 2, 2);
-
-	Parameter* new_parameter_list=new Parameter();
-	new_parameter_list->add_vector(&new_parameter, &max, "param", "");
-
-	/* note: old_parameters are SG_UNREF'ed, new ones SG_REF'ed */
-	original_parameter_list->set_from_parameters(new_parameter_list);
-
-	for (int32_t i=0; i<max; ++i)
-		result&=original_parameter[i]==new_parameter[i];
-
-	/* old original kernels were deleted by shogun's SG_UNREF */
-	delete original_parameter;
-
-	for (int32_t i=0; i<max; ++i)
-		SG_UNREF(new_parameter[i]);
-
-	delete new_parameter;
-	delete original_parameter_list;
-	delete new_parameter_list;
-
-	return result;
-}
-
-bool test_sgobject_matrix()
-{
-	bool result=true;
-
-	Parameter* original_parameter_list=new Parameter();
-	CSGObject** original_parameter=SG_MALLOC(CSGObject*, max*max);
-	for (int32_t i=0; i<max; ++i)
-	{
-		for (int32_t j=0; j<max; ++j)
-		{
-			original_parameter[j*max+i]=new CDistantSegmentsKernel(1, 1, 1);
-			SG_REF(original_parameter[j*max+i]);
-		}
-	}
-
-	original_parameter_list->add_matrix(&original_parameter, &max, &max, "param", "");
-
-	CSGObject** new_parameter=SG_MALLOC(CSGObject*, max*max);
-	for (int32_t i=0; i<max; ++i)
-	{
-		for (int32_t j=0; j<max; ++j)
-			new_parameter[j*max+i]=new CDistantSegmentsKernel(1, 1, 1);
-	}
-
-	Parameter* new_parameter_list=new Parameter();
-	new_parameter_list->add_matrix(&new_parameter, &max, &max, "param", "");
-
-	/* note: old_parameters are SG_UNREF'ed, new ones SG_REF'ed */
-	original_parameter_list->set_from_parameters(new_parameter_list);
-
-	for (int32_t i=0; i<max; ++i)
-	{
-		for (int32_t j=0; j<max; ++j)
-			result&=original_parameter[j*max+i]==new_parameter[j*max+i];
-	}
-
-	/* old original kernels were deleted by shogun's SG_UNREF */
-	delete original_parameter;
-
-	for (int32_t i=0; i<max*max; ++i)
-		SG_UNREF(new_parameter[i]);
-
-	delete new_parameter;
-	delete original_parameter_list;
-	delete new_parameter_list;
-
-	return result;
-}
-
-int main(int argc, char **argv)
-{
-	init_shogun(&print_message, &print_message, &print_message);
-
-	bool result=true;
-
-	/* test wheater set_from_parameters works for these types */
-	result&=test_float_scalar();
-	result&=test_sgobject_scalar();
-	result&=test_sgobject_vector();
-	result&=test_sgobject_matrix();
-	result&=test_float_matrix();
-	result&=test_float_vector();
-
-	if (result)
-		SG_SPRINT("SUCCESS!\n")
-	else
-		SG_SPRINT("FAILURE!\n")
-
-	exit_shogun();
-
-	return 0;
-}
diff --git a/examples/undocumented/libshogun/regression_libsvr.cpp b/examples/undocumented/libshogun/regression_libsvr.cpp
index 0f0d8dc5ea5..b4e62c26163 100644
--- a/examples/undocumented/libshogun/regression_libsvr.cpp
+++ b/examples/undocumented/libshogun/regression_libsvr.cpp
@@ -18,7 +18,6 @@ using namespace shogun;
 
 void test_libsvr()
 {
-	const int32_t kernel_cache=0;
 	const float64_t rbf_width=10;
 	const float64_t svm_C=10;
 	const float64_t svm_nu=0.01;
@@ -48,7 +47,7 @@ void test_libsvr()
 	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(
 			feat_test);
 
-	CGaussianKernel* kernel=new CGaussianKernel(kernel_cache, rbf_width);
+	CGaussianKernel* kernel=new CGaussianKernel(rbf_width);
 	kernel->init(features_train, features_train);
 
 	// also epsilon svr possible here
diff --git a/examples/undocumented/python/CMakeLists.txt b/examples/undocumented/python/CMakeLists.txt
new file mode 100644
index 00000000000..3ded2f7410e
--- /dev/null
+++ b/examples/undocumented/python/CMakeLists.txt
@@ -0,0 +1,25 @@
+INCLUDE(PythonEnvironment)
+GET_PYTHON_ENV()
+
+FILE(GLOB PYTHON_EXAMPLES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/*.py")
+
+IF (NOT USE_GPL_SHOGUN)
+	LIST(REMOVE_ITEM PYTHON_EXAMPLES
+	"classifier_svmlin.py"
+	"kernel_distantsegments.py"
+	"modelselection_parameter_tree.py"
+	"variational_classifier.py"
+	"evaluation_cross_validation_multiclass_storage.py"
+	)
+ENDIF()
+
+FOREACH(EXAMPLE_PY ${PYTHON_EXAMPLES})
+	STRING(REGEX REPLACE ".py\$" "" EXAMPLE "${EXAMPLE_PY}")
+	MESSAGE(${EXAMPLE_PY})
+	
+	add_test(NAME python_legacy-${EXAMPLE}
+			WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+			COMMAND ${PYTHON_EXECUTABLE} ${EXAMPLE_PY})
+	set_property(TEST python_legacy-${EXAMPLE} PROPERTY
+				ENVIRONMENT "${PYTHON_ENV_VARS}")
+ENDFOREACH()
\ No newline at end of file
diff --git a/examples/undocumented/python_modular/classifier_custom_kernel_modular.py b/examples/undocumented/python/classifier_custom_kernel.py
similarity index 73%
rename from examples/undocumented/python_modular/classifier_custom_kernel_modular.py
rename to examples/undocumented/python/classifier_custom_kernel.py
index 699926b2ba1..89e3ffef759 100644
--- a/examples/undocumented/python_modular/classifier_custom_kernel_modular.py
+++ b/examples/undocumented/python/classifier_custom_kernel.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 parameter_list = [[1,7],[2,8]]
 
-def classifier_custom_kernel_modular (C=1,dim=7):
-	from modshogun import RealFeatures, BinaryLabels, CustomKernel, LibSVM
+def classifier_custom_kernel (C=1,dim=7):
+	from shogun import RealFeatures, BinaryLabels, CustomKernel, LibSVM
 	from numpy import diag,ones,sign
 	from numpy.random import rand,seed
 
@@ -23,4 +23,4 @@ def classifier_custom_kernel_modular (C=1,dim=7):
 
 if __name__=='__main__':
 	print('custom_kernel')
-	classifier_custom_kernel_modular(*parameter_list[0])
+	classifier_custom_kernel(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_domainadaptationsvm_modular.py b/examples/undocumented/python/classifier_domainadaptationsvm.py
similarity index 89%
rename from examples/undocumented/python_modular/classifier_domainadaptationsvm_modular.py
rename to examples/undocumented/python/classifier_domainadaptationsvm.py
index 3b28ff00199..a92f7f98f98 100644
--- a/examples/undocumented/python_modular/classifier_domainadaptationsvm_modular.py
+++ b/examples/undocumented/python/classifier_domainadaptationsvm.py
@@ -1,17 +1,17 @@
 #!/usr/bin/env python
 import numpy
 
-from modshogun import StringCharFeatures, BinaryLabels, DNA
-from modshogun import WeightedDegreeStringKernel
-from modshogun import MSG_DEBUG
+from shogun import StringCharFeatures, BinaryLabels, DNA
+from shogun import WeightedDegreeStringKernel
+from shogun import MSG_DEBUG
 try:
-	from modshogun import DomainAdaptationSVM
+	from shogun import DomainAdaptationSVM
 except ImportError:
 	print("DomainAdaptationSVM not available")
 	exit(0)
 
 try:
-	from modshogun import SVMLight
+	from shogun import SVMLight
 except ImportError:
 	print("SVMLight not available")
 	exit(0)
@@ -67,7 +67,7 @@
                        testdna2,label_testdna2,1,3],[traindna,testdna,label_traindna,label_testdna,traindna2,label_traindna2, \
                        testdna2,label_testdna2,2,5]]
 
-def classifier_domainadaptationsvm_modular (fm_train_dna=traindna,fm_test_dna=testdna, \
+def classifier_domainadaptationsvm (fm_train_dna=traindna,fm_test_dna=testdna, \
                                                 label_train_dna=label_traindna, \
                                                label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \
                                                label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3):
@@ -102,4 +102,4 @@ def classifier_domainadaptationsvm_modular (fm_train_dna=traindna,fm_test_dna=te
 
 if __name__=='__main__':
 	print('SVMLight')
-	classifier_domainadaptationsvm_modular(*parameter_list[0])
+	classifier_domainadaptationsvm(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_featureblock_logistic_regression.py b/examples/undocumented/python/classifier_featureblock_logistic_regression.py
similarity index 90%
rename from examples/undocumented/python_modular/classifier_featureblock_logistic_regression.py
rename to examples/undocumented/python/classifier_featureblock_logistic_regression.py
index 2f52c02b1f0..1e4da7837fa 100644
--- a/examples/undocumented/python_modular/classifier_featureblock_logistic_regression.py
+++ b/examples/undocumented/python/classifier_featureblock_logistic_regression.py
@@ -12,9 +12,9 @@
 
 def classifier_featureblock_logistic_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat):
 
-	from modshogun import BinaryLabels, RealFeatures, IndexBlock, IndexBlockGroup
+	from shogun import BinaryLabels, RealFeatures, IndexBlock, IndexBlockGroup
 	try:
-		from modshogun import FeatureBlockLogisticRegression
+		from shogun import FeatureBlockLogisticRegression
 	except ImportError:
 		print("FeatureBlockLogisticRegression not available")
 		exit(0)
diff --git a/examples/undocumented/python_modular/classifier_gmnpsvm_modular.py b/examples/undocumented/python/classifier_gmnpsvm.py
similarity index 69%
rename from examples/undocumented/python_modular/classifier_gmnpsvm_modular.py
rename to examples/undocumented/python/classifier_gmnpsvm.py
index a0ca1149f7c..a3a1afb3f07 100644
--- a/examples/undocumented/python_modular/classifier_gmnpsvm_modular.py
+++ b/examples/undocumented/python/classifier_gmnpsvm.py
@@ -5,9 +5,9 @@
 
 parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]
 
-def classifier_gmnpsvm_modular (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,width=2.1,C=1,epsilon=1e-5):
-	from modshogun import RealFeatures, MulticlassLabels
-	from modshogun import GaussianKernel, GMNPSVM, CSVFile
+def classifier_gmnpsvm (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,width=2.1,C=1,epsilon=1e-5):
+	from shogun import RealFeatures, MulticlassLabels
+	from shogun import GaussianKernel, GMNPSVM, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -23,4 +23,4 @@ def classifier_gmnpsvm_modular (train_fname=traindat,test_fname=testdat,label_fn
 	return out,kernel
 if __name__=='__main__':
 	print('GMNPSVM')
-	classifier_gmnpsvm_modular(*parameter_list[0])
+	classifier_gmnpsvm(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_gpbtsvm_modular.py b/examples/undocumented/python/classifier_gpbtsvm.py
similarity index 69%
rename from examples/undocumented/python_modular/classifier_gpbtsvm_modular.py
rename to examples/undocumented/python/classifier_gpbtsvm.py
index f71b650ca29..f74d7ed26b0 100644
--- a/examples/undocumented/python_modular/classifier_gpbtsvm_modular.py
+++ b/examples/undocumented/python/classifier_gpbtsvm.py
@@ -5,12 +5,12 @@
 
 parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]
 
-def classifier_gpbtsvm_modular (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,width=2.1,C=1,epsilon=1e-5):
-	from modshogun import RealFeatures, BinaryLabels
-	from modshogun import GaussianKernel
-	from modshogun import CSVFile
+def classifier_gpbtsvm (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,width=2.1,C=1,epsilon=1e-5):
+	from shogun import RealFeatures, BinaryLabels
+	from shogun import GaussianKernel
+	from shogun import CSVFile
 	try:
-		from modshogun import GPBTSVM
+		from shogun import GPBTSVM
 	except ImportError:
 		print("GPBTSVM not available")
 		exit(0)
@@ -30,4 +30,4 @@ def classifier_gpbtsvm_modular (train_fname=traindat,test_fname=testdat,label_fn
 
 if __name__=='__main__':
 	print('GPBTSVM')
-	classifier_gpbtsvm_modular(*parameter_list[0])
+	classifier_gpbtsvm(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_larank_modular.py b/examples/undocumented/python/classifier_larank.py
similarity index 71%
rename from examples/undocumented/python_modular/classifier_larank_modular.py
rename to examples/undocumented/python/classifier_larank.py
index 893c17884cd..a6a152d0f35 100644
--- a/examples/undocumented/python_modular/classifier_larank_modular.py
+++ b/examples/undocumented/python/classifier_larank.py
@@ -2,11 +2,11 @@
 from numpy import *
 parameter_list = [[10,3,15,0.9,1,2000,1],[20,4,15,0.9,1,5000,2]]
 
-def classifier_larank_modular (num_vec,num_class,distance,C=0.9,num_threads=1,num_iter=5,seed=1):
-	from modshogun import RealFeatures, MulticlassLabels
-	from modshogun import GaussianKernel
-	from modshogun import LaRank
-	from modshogun import Math_init_random
+def classifier_larank (num_vec,num_class,distance,C=0.9,num_threads=1,num_iter=5,seed=1):
+	from shogun import RealFeatures, MulticlassLabels
+	from shogun import GaussianKernel
+	from shogun import LaRank
+	from shogun import Math_init_random
 
 	# reproducible results
 	Math_init_random(seed)
@@ -18,8 +18,8 @@ def classifier_larank_modular (num_vec,num_class,distance,C=0.9,num_threads=1,nu
 	fm_train=array(random.randn(num_class,num_vec))
 	fm_test=array(random.randn(num_class,num_vec))
 	for i in range(len(label_train)):
-		fm_train[label_train[i],i]+=distance
-		fm_test[label_test[i],i]+=distance
+		fm_train[int(label_train[i]),i]+=distance
+		fm_test[int(label_test[i]),i]+=distance
 
 	feats_train=RealFeatures(fm_train)
 	feats_test=RealFeatures(fm_test)
@@ -43,4 +43,4 @@ def classifier_larank_modular (num_vec,num_class,distance,C=0.9,num_threads=1,nu
 
 if __name__=='__main__':
 	print('LaRank')
-	[predictions, svm, labels] = classifier_larank_modular(*parameter_list[0])
+	[predictions, svm, labels] = classifier_larank(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_lda_modular.py b/examples/undocumented/python/classifier_lda.py
similarity index 72%
rename from examples/undocumented/python_modular/classifier_lda_modular.py
rename to examples/undocumented/python/classifier_lda.py
index 2b1df46596d..917c1a8b2a7 100644
--- a/examples/undocumented/python_modular/classifier_lda_modular.py
+++ b/examples/undocumented/python/classifier_lda.py
@@ -5,8 +5,8 @@
 
 parameter_list = [[traindat,testdat,label_traindat,3,1],[traindat,testdat,label_traindat,4,1]]
 
-def classifier_lda_modular (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,gamma=3,num_threads=1):
-	from modshogun import RealFeatures, BinaryLabels, LDA, CSVFile
+def classifier_lda (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,gamma=3,num_threads=1):
+	from shogun import RealFeatures, BinaryLabels, LDA, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -22,4 +22,4 @@ def classifier_lda_modular (train_fname=traindat,test_fname=testdat,label_fname=
 
 if __name__=='__main__':
 	print('LDA')
-	classifier_lda_modular(*parameter_list[0])
+	classifier_lda(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_libsvmoneclass_modular.py b/examples/undocumented/python/classifier_libsvmoneclass.py
similarity index 69%
rename from examples/undocumented/python_modular/classifier_libsvmoneclass_modular.py
rename to examples/undocumented/python/classifier_libsvmoneclass.py
index d7aef5a8b8b..43126195910 100644
--- a/examples/undocumented/python_modular/classifier_libsvmoneclass_modular.py
+++ b/examples/undocumented/python/classifier_libsvmoneclass.py
@@ -4,8 +4,8 @@
 
 parameter_list = [[traindat,testdat,2.2,1,1e-7],[traindat,testdat,2.1,1,1e-5]]
 
-def classifier_libsvmoneclass_modular (train_fname=traindat,test_fname=testdat,width=2.1,C=1,epsilon=1e-5):
-	from modshogun import RealFeatures, GaussianKernel, LibSVMOneClass, CSVFile
+def classifier_libsvmoneclass (train_fname=traindat,test_fname=testdat,width=2.1,C=1,epsilon=1e-5):
+	from shogun import RealFeatures, GaussianKernel, LibSVMOneClass, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -21,4 +21,4 @@ def classifier_libsvmoneclass_modular (train_fname=traindat,test_fname=testdat,w
 
 if __name__=='__main__':
 	print('LibSVMOneClass')
-	classifier_libsvmoneclass_modular(*parameter_list[0])
+	classifier_libsvmoneclass(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_mpdsvm_modular.py b/examples/undocumented/python/classifier_mpdsvm.py
similarity index 69%
rename from examples/undocumented/python_modular/classifier_mpdsvm_modular.py
rename to examples/undocumented/python/classifier_mpdsvm.py
index 10a7ecc41ca..eb05a6ea52b 100644
--- a/examples/undocumented/python_modular/classifier_mpdsvm_modular.py
+++ b/examples/undocumented/python/classifier_mpdsvm.py
@@ -5,11 +5,11 @@
 
 parameter_list = [[traindat,testdat,label_traindat,1,1e-5],[traindat,testdat,label_traindat,0.9,1e-5]]
 
-def classifier_mpdsvm_modular (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,C=1,epsilon=1e-5):
+def classifier_mpdsvm (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,C=1,epsilon=1e-5):
 
-	from modshogun import RealFeatures, BinaryLabels
-	from modshogun import GaussianKernel
-	from modshogun import MPDSVM, CSVFile
+	from shogun import RealFeatures, BinaryLabels
+	from shogun import GaussianKernel
+	from shogun import MPDSVM, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -26,4 +26,4 @@ def classifier_mpdsvm_modular (train_fname=traindat,test_fname=testdat,label_fna
 
 if __name__=='__main__':
 	print('MPDSVM')
-	classifier_mpdsvm_modular(*parameter_list[0])
+	classifier_mpdsvm(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_multiclass_ecoc.py b/examples/undocumented/python/classifier_multiclass_ecoc.py
similarity index 85%
rename from examples/undocumented/python_modular/classifier_multiclass_ecoc.py
rename to examples/undocumented/python/classifier_multiclass_ecoc.py
index b80b4a9c557..4c6311c3b1e 100644
--- a/examples/undocumented/python_modular/classifier_multiclass_ecoc.py
+++ b/examples/undocumented/python/classifier_multiclass_ecoc.py
@@ -12,21 +12,21 @@
 
 def classifier_multiclass_ecoc (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5):
 
-	import modshogun
-	from modshogun import ECOCStrategy, LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
-	from modshogun import MulticlassAccuracy
-	from modshogun import RealFeatures, MulticlassLabels
+	import shogun
+	from shogun import ECOCStrategy, LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
+	from shogun import MulticlassAccuracy
+	from shogun import RealFeatures, MulticlassLabels
 
 	def nonabstract_class(name):
 		try:
-		    getattr(modshogun, name)()
+		    getattr(shogun, name)()
 		except TypeError:
 		    return False
 		return True
 
-	encoders = [x for x in dir(modshogun)
+	encoders = [x for x in dir(shogun)
 		    if re.match(r'ECOC.+Encoder', x) and nonabstract_class(x)]
-	decoders = [x for x in dir(modshogun)
+	decoders = [x for x in dir(shogun)
 		    if re.match(r'ECOC.+Decoder', x) and nonabstract_class(x)]
 
 	fea_train = RealFeatures(fm_train_real)
@@ -46,8 +46,8 @@ def nonabstract_class(name):
 	#print((format_str % ('s', 's', 's')) % ('encoder', 'decoder', 'codelen', 'time', 'accuracy'))
 
 	def run_ecoc(ier, idr):
-		encoder = getattr(modshogun, encoders[ier])()
-		decoder = getattr(modshogun, decoders[idr])()
+		encoder = getattr(shogun, encoders[ier])()
+		decoder = getattr(shogun, decoders[idr])()
 
 		# whether encoder is data dependent
 		if hasattr(encoder, 'set_labels'):
diff --git a/examples/undocumented/python_modular/classifier_multiclassliblinear_modular.py b/examples/undocumented/python/classifier_multiclassliblinear.py
similarity index 68%
rename from examples/undocumented/python_modular/classifier_multiclassliblinear_modular.py
rename to examples/undocumented/python/classifier_multiclassliblinear.py
index 1a0d09a0b26..cd6be65156e 100644
--- a/examples/undocumented/python_modular/classifier_multiclassliblinear_modular.py
+++ b/examples/undocumented/python/classifier_multiclassliblinear.py
@@ -5,9 +5,9 @@
 
 parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]
 
-def classifier_multiclassliblinear_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5):
-	from modshogun import RealFeatures, MulticlassLabels
-	from modshogun import MulticlassLibLinear
+def classifier_multiclassliblinear (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5):
+	from shogun import RealFeatures, MulticlassLabels
+	from shogun import MulticlassLibLinear
 
 	feats_train=RealFeatures(fm_train_real)
 	feats_test=RealFeatures(fm_test_real)
@@ -21,7 +21,7 @@ def classifier_multiclassliblinear_modular (fm_train_real=traindat,fm_test_real=
 	out = label_pred.get_labels()
 
 	if label_test_multiclass is not None:
-		from modshogun import MulticlassAccuracy
+		from shogun import MulticlassAccuracy
 		labels_test = MulticlassLabels(label_test_multiclass)
 		evaluator = MulticlassAccuracy()
 		acc = evaluator.evaluate(label_pred, labels_test)
@@ -31,4 +31,4 @@ def classifier_multiclassliblinear_modular (fm_train_real=traindat,fm_test_real=
 
 if __name__=='__main__':
 	print('MulticlassLibLinear')
-	classifier_multiclassliblinear_modular(*parameter_list[0])
+	classifier_multiclassliblinear(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_multiclassmachine_modular.py b/examples/undocumented/python/classifier_multiclassmachine.py
similarity index 67%
rename from examples/undocumented/python_modular/classifier_multiclassmachine_modular.py
rename to examples/undocumented/python/classifier_multiclassmachine.py
index 3c21f9168c2..1f670b084f6 100644
--- a/examples/undocumented/python_modular/classifier_multiclassmachine_modular.py
+++ b/examples/undocumented/python/classifier_multiclassmachine.py
@@ -5,10 +5,10 @@
 
 parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]
 
-def classifier_multiclassmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
-	from modshogun import RealFeatures, MulticlassLabels
-	from modshogun import GaussianKernel
-	from modshogun import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy
+def classifier_multiclassmachine (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
+	from shogun import RealFeatures, MulticlassLabels
+	from shogun import GaussianKernel
+	from shogun import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy
 
 	feats_train=RealFeatures(fm_train_real)
 	feats_test=RealFeatures(fm_test_real)
@@ -28,4 +28,4 @@ def classifier_multiclassmachine_modular (fm_train_real=traindat,fm_test_real=te
 
 if __name__=='__main__':
 	print('MulticlassMachine')
-	classifier_multiclassmachine_modular(*parameter_list[0])
+	classifier_multiclassmachine(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_multiclassocas_modular.py b/examples/undocumented/python/classifier_multiclassocas.py
similarity index 71%
rename from examples/undocumented/python_modular/classifier_multiclassocas_modular.py
rename to examples/undocumented/python/classifier_multiclassocas.py
index 3f0cbb96b45..f4984854f0d 100644
--- a/examples/undocumented/python_modular/classifier_multiclassocas_modular.py
+++ b/examples/undocumented/python/classifier_multiclassocas.py
@@ -2,11 +2,11 @@
 from numpy import *
 parameter_list = [[10,3,15,2.1,1,1e-5,1],[20,4,15,2.2,2,1e-5,2]]
 
-def classifier_multiclassocas_modular (num_vec=10,num_class=3,distance=15,width=2.1,C=1,epsilon=1e-5,seed=1):
-	from modshogun import RealFeatures, MulticlassLabels
-	from modshogun import Math_init_random
+def classifier_multiclassocas (num_vec=10,num_class=3,distance=15,width=2.1,C=1,epsilon=1e-5,seed=1):
+	from shogun import RealFeatures, MulticlassLabels
+	from shogun import Math_init_random
 	try:
-		from modshogun import MulticlassOCAS
+		from shogun import MulticlassOCAS
 	except ImportError:
 		print("MulticlassOCAS not available")
 		return
@@ -21,8 +21,8 @@ def classifier_multiclassocas_modular (num_vec=10,num_class=3,distance=15,width=
 	fm_train=array(random.randn(num_class,num_vec))
 	fm_test=array(random.randn(num_class,num_vec))
 	for i in range(len(label_train)):
-		fm_train[label_train[i],i]+=distance
-		fm_test[label_test[i],i]+=distance
+		fm_train[int(label_train[i]),i]+=distance
+		fm_test[int(label_test[i]),i]+=distance
 
 	feats_train=RealFeatures(fm_train)
 	feats_test=RealFeatures(fm_test)
@@ -39,4 +39,4 @@ def classifier_multiclassocas_modular (num_vec=10,num_class=3,distance=15,width=
 
 if __name__=='__main__':
 	print('MulticlassOCAS')
-	classifier_multiclassocas_modular(*parameter_list[0])
+	classifier_multiclassocas(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_multilabeloutputliblinear_modular.py b/examples/undocumented/python/classifier_multilabeloutputliblinear.py
similarity index 64%
rename from examples/undocumented/python_modular/classifier_multilabeloutputliblinear_modular.py
rename to examples/undocumented/python/classifier_multilabeloutputliblinear.py
index 9d4cd603fd2..57099cf9ec2 100644
--- a/examples/undocumented/python_modular/classifier_multilabeloutputliblinear_modular.py
+++ b/examples/undocumented/python/classifier_multilabeloutputliblinear.py
@@ -5,9 +5,9 @@
 
 parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]
 
-def classifier_multilabeloutputliblinear_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5):
-	from modshogun import RealFeatures, MulticlassLabels, MultilabelLabels
-	from modshogun import MulticlassLibLinear
+def classifier_multilabeloutputliblinear (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5):
+	from shogun import RealFeatures, MulticlassLabels, MultilabelLabels
+	from shogun import MulticlassLibLinear
 
 	feats_train=RealFeatures(fm_train_real)
 	feats_test=RealFeatures(fm_test_real)
@@ -24,4 +24,4 @@ def classifier_multilabeloutputliblinear_modular (fm_train_real=traindat,fm_test
 
 if __name__=='__main__':
 	print('MultilabelOutputLibLinear')
-	classifier_multilabeloutputliblinear_modular(*parameter_list[0])
+	classifier_multilabeloutputliblinear(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_perceptron_modular.py b/examples/undocumented/python/classifier_perceptron.py
similarity index 81%
rename from examples/undocumented/python_modular/classifier_perceptron_modular.py
rename to examples/undocumented/python/classifier_perceptron.py
index 716903368fd..7d6743f40a5 100644
--- a/examples/undocumented/python_modular/classifier_perceptron_modular.py
+++ b/examples/undocumented/python/classifier_perceptron.py
@@ -4,9 +4,9 @@
 
 parameter_list = [[100, 2, 5,1.,1000,1,1], [100, 2, 5,1.,1000,1,2]]
 
-def classifier_perceptron_modular (n=100, dim=2, distance=5,learn_rate=1.,max_iter=1000,num_threads=1,seed=1):
-	from modshogun import RealFeatures, BinaryLabels
-	from modshogun import Perceptron
+def classifier_perceptron (n=100, dim=2, distance=5,learn_rate=1.,max_iter=1000,num_threads=1,seed=1):
+	from shogun import RealFeatures, BinaryLabels
+	from shogun import Perceptron
 
 	random.seed(seed)
 
@@ -39,4 +39,4 @@ def classifier_perceptron_modular (n=100, dim=2, distance=5,learn_rate=1.,max_it
 
 if __name__=='__main__':
 	print('Perceptron')
-	classifier_perceptron_modular(*parameter_list[0])
+	classifier_perceptron(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_ssk_modular.py b/examples/undocumented/python/classifier_ssk.py
similarity index 82%
rename from examples/undocumented/python_modular/classifier_ssk_modular.py
rename to examples/undocumented/python/classifier_ssk.py
index 8c426a3e05a..ccb983a62b7 100644
--- a/examples/undocumented/python_modular/classifier_ssk_modular.py
+++ b/examples/undocumented/python/classifier_ssk.py
@@ -18,11 +18,11 @@
 
 parameter_list = [[traindat,testdat,label_traindat,1,5,0.9]]
 
-def classifier_ssk_modular (fm_train_dna=traindat,fm_test_dna=testdat,
+def classifier_ssk (fm_train_dna=traindat,fm_test_dna=testdat,
 		label_train_dna=label_traindat,C=1,maxlen=1,decay=1):
-	from modshogun import StringCharFeatures, BinaryLabels
-	from modshogun import LibSVM, SubsequenceStringKernel, DNA
-	from modshogun import ErrorRateMeasure
+	from shogun import StringCharFeatures, BinaryLabels
+	from shogun import LibSVM, SubsequenceStringKernel, DNA
+	from shogun import ErrorRateMeasure
 
 	feats_train=StringCharFeatures(fm_train_dna, DNA)
 	feats_test=StringCharFeatures(fm_test_dna, DNA)
@@ -45,4 +45,4 @@ def classifier_ssk_modular (fm_train_dna=traindat,fm_test_dna=testdat,
 
 if __name__=='__main__':
 	print('SringSubsequenceKernel classification DNA')
-	classifier_ssk_modular(*parameter_list[0])
+	classifier_ssk(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_svmlight_modular.py b/examples/undocumented/python/classifier_svmlight.py
similarity index 73%
rename from examples/undocumented/python_modular/classifier_svmlight_modular.py
rename to examples/undocumented/python/classifier_svmlight.py
index 08ed0e49300..156c7be45e4 100644
--- a/examples/undocumented/python_modular/classifier_svmlight_modular.py
+++ b/examples/undocumented/python/classifier_svmlight.py
@@ -8,11 +8,11 @@
 
 parameter_list = [[traindat,testdat,label_traindat,1.1,1e-5,1],[traindat,testdat,label_traindat,1.2,1e-5,1]]
 
-def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1):
-	from modshogun import StringCharFeatures, BinaryLabels, DNA
-	from modshogun import WeightedDegreeStringKernel
+def classifier_svmlight (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1):
+	from shogun import StringCharFeatures, BinaryLabels, DNA
+	from shogun import WeightedDegreeStringKernel
 	try:
-		from modshogun import SVMLight
+		from shogun import SVMLight
 	except ImportError:
 		print('No support for SVMLight available.')
 		return
@@ -37,4 +37,4 @@ def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label
 	return kernel
 if __name__=='__main__':
 	print('SVMLight')
-	classifier_svmlight_modular(*parameter_list[0])
+	classifier_svmlight(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_svmlight_batch_linadd_modular.py b/examples/undocumented/python/classifier_svmlight_batch_linadd.py
similarity index 81%
rename from examples/undocumented/python_modular/classifier_svmlight_batch_linadd_modular.py
rename to examples/undocumented/python/classifier_svmlight_batch_linadd.py
index fd907279799..abe957423aa 100644
--- a/examples/undocumented/python_modular/classifier_svmlight_batch_linadd_modular.py
+++ b/examples/undocumented/python/classifier_svmlight_batch_linadd.py
@@ -9,13 +9,13 @@
 parameter_list=[[train_dna, test_dna, label, 20, 0.9, 1e-7, 1],
 		[train_dna, test_dna, label, 20, 2.3, 1e-7, 4]]
 
-def classifier_svmlight_batch_linadd_modular (fm_train_dna, fm_test_dna,
+def classifier_svmlight_batch_linadd (fm_train_dna, fm_test_dna,
 		label_train_dna, degree, C, epsilon, num_threads):
 
-	from modshogun import StringCharFeatures, BinaryLabels, DNA
-	from modshogun import WeightedDegreeStringKernel, MSG_DEBUG
+	from shogun import StringCharFeatures, BinaryLabels, DNA
+	from shogun import WeightedDegreeStringKernel, MSG_DEBUG
 	try:
-		from modshogun import SVMLight
+		from shogun import SVMLight
 	except ImportError:
 		print('No support for SVMLight available.')
 		return
@@ -51,4 +51,4 @@ def classifier_svmlight_batch_linadd_modular (fm_train_dna, fm_test_dna,
 
 if __name__=='__main__':
 	print('SVMlight batch')
-	classifier_svmlight_batch_linadd_modular(*parameter_list[0])
+	classifier_svmlight_batch_linadd(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_svmlight_linear_term_modular.py b/examples/undocumented/python/classifier_svmlight_linear_term.py
similarity index 85%
rename from examples/undocumented/python_modular/classifier_svmlight_linear_term_modular.py
rename to examples/undocumented/python/classifier_svmlight_linear_term.py
index 5d9fc08d213..fa319153f74 100644
--- a/examples/undocumented/python_modular/classifier_svmlight_linear_term_modular.py
+++ b/examples/undocumented/python/classifier_svmlight_linear_term.py
@@ -26,14 +26,14 @@
 
 parameter_list = [[traindna,testdna,label_traindna,3,10,1e-5,1],[traindna,testdna,label_traindna,3,10,1e-5,1]]
 
-def classifier_svmlight_linear_term_modular (fm_train_dna=traindna,fm_test_dna=testdna, \
+def classifier_svmlight_linear_term (fm_train_dna=traindna,fm_test_dna=testdna, \
                                                 label_train_dna=label_traindna,degree=3, \
                                                 C=10,epsilon=1e-5,num_threads=1):
 
-    from modshogun import StringCharFeatures, BinaryLabels, DNA
-    from modshogun import WeightedDegreeStringKernel
+    from shogun import StringCharFeatures, BinaryLabels, DNA
+    from shogun import WeightedDegreeStringKernel
     try:
-    	from modshogun import SVMLight
+    	from shogun import SVMLight
     except ImportError:
     	print("SVMLight is not available")
     	exit(0)
@@ -60,4 +60,4 @@ def classifier_svmlight_linear_term_modular (fm_train_dna=traindna,fm_test_dna=t
 
 if __name__=='__main__':
     print('SVMLight')
-    classifier_svmlight_linear_term_modular(*parameter_list[0])
+    classifier_svmlight_linear_term(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_svmlin_modular.py b/examples/undocumented/python/classifier_svmlin.py
similarity index 71%
rename from examples/undocumented/python_modular/classifier_svmlin_modular.py
rename to examples/undocumented/python/classifier_svmlin.py
index 038eb1c1bc4..c35990fe6bf 100644
--- a/examples/undocumented/python_modular/classifier_svmlin_modular.py
+++ b/examples/undocumented/python/classifier_svmlin.py
@@ -5,9 +5,9 @@
 
 parameter_list = [[traindat,testdat,label_traindat,0.9,1e-5,1],[traindat,testdat,label_traindat,0.8,1e-5,1]]
 
-def classifier_svmlin_modular (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,C=0.9,epsilon=1e-5,num_threads=1):
-	from modshogun import RealFeatures, SparseRealFeatures, BinaryLabels
-	from modshogun import SVMLin, CSVFile
+def classifier_svmlin (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,C=0.9,epsilon=1e-5,num_threads=1):
+	from shogun import RealFeatures, SparseRealFeatures, BinaryLabels
+	from shogun import SVMLin, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -26,4 +26,4 @@ def classifier_svmlin_modular (train_fname=traindat,test_fname=testdat,label_fna
 
 if __name__=='__main__':
 	print('SVMLin')
-	classifier_svmlin_modular(*parameter_list[0])
+	classifier_svmlin(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_svmocas_modular.py b/examples/undocumented/python/classifier_svmocas.py
similarity index 72%
rename from examples/undocumented/python_modular/classifier_svmocas_modular.py
rename to examples/undocumented/python/classifier_svmocas.py
index 057057b7ed8..69d9328d8fd 100644
--- a/examples/undocumented/python_modular/classifier_svmocas_modular.py
+++ b/examples/undocumented/python/classifier_svmocas.py
@@ -5,11 +5,11 @@
 
 parameter_list = [[traindat,testdat,label_traindat,0.9,1e-5,1],[traindat,testdat,label_traindat,0.8,1e-5,1]]
 
-def classifier_svmocas_modular (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,C=0.9,epsilon=1e-5,num_threads=1):
-	from modshogun import RealFeatures, BinaryLabels
-	from modshogun import CSVFile
+def classifier_svmocas (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,C=0.9,epsilon=1e-5,num_threads=1):
+	from shogun import RealFeatures, BinaryLabels
+	from shogun import CSVFile
 	try:
-		from modshogun import SVMOcas
+		from shogun import SVMOcas
 	except ImportError:
 		print("SVMOcas not available")
 		return
@@ -31,4 +31,4 @@ def classifier_svmocas_modular (train_fname=traindat,test_fname=testdat,label_fn
 
 if __name__=='__main__':
 	print('SVMOcas')
-	classifier_svmocas_modular(*parameter_list[0])
+	classifier_svmocas(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/classifier_svmsgd_modular.py b/examples/undocumented/python/classifier_svmsgd.py
similarity index 69%
rename from examples/undocumented/python_modular/classifier_svmsgd_modular.py
rename to examples/undocumented/python/classifier_svmsgd.py
index 183fcc405a8..de289376aff 100644
--- a/examples/undocumented/python_modular/classifier_svmsgd_modular.py
+++ b/examples/undocumented/python/classifier_svmsgd.py
@@ -5,9 +5,9 @@
 
 parameter_list = [[traindat,testdat,label_traindat,0.9,1,6],[traindat,testdat,label_traindat,0.8,1,5]]
 
-def classifier_svmsgd_modular (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,C=0.9,num_threads=1,num_iter=5):
-	from modshogun import RealFeatures, SparseRealFeatures, BinaryLabels
-	from modshogun import SVMSGD, CSVFile
+def classifier_svmsgd (train_fname=traindat,test_fname=testdat,label_fname=label_traindat,C=0.9,num_threads=1,num_iter=5):
+	from shogun import RealFeatures, SparseRealFeatures, BinaryLabels
+	from shogun import SVMSGD, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -25,4 +25,4 @@ def classifier_svmsgd_modular (train_fname=traindat,test_fname=testdat,label_fna
 
 if __name__=='__main__':
 	print('SVMSGD')
-	classifier_svmsgd_modular(*parameter_list[0])
+	classifier_svmsgd(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/converter_diffusionmaps_modular.py b/examples/undocumented/python/converter_diffusionmaps.py
similarity index 70%
rename from examples/undocumented/python_modular/converter_diffusionmaps_modular.py
rename to examples/undocumented/python/converter_diffusionmaps.py
index 622433c8008..ce134aae07f 100644
--- a/examples/undocumented/python_modular/converter_diffusionmaps_modular.py
+++ b/examples/undocumented/python/converter_diffusionmaps.py
@@ -2,9 +2,9 @@
 data = '../data/fm_train_real.dat'
 parameter_list = [[data,10],[data,20]]
 
-def converter_diffusionmaps_modular (data_fname,t):
+def converter_diffusionmaps (data_fname,t):
 	try:
-		from modshogun import RealFeatures, DiffusionMaps, GaussianKernel, CSVFile
+		from shogun import RealFeatures, DiffusionMaps, GaussianKernel, CSVFile
 
 		features = RealFeatures(CSVFile(data_fname))
 
@@ -20,5 +20,5 @@ def converter_diffusionmaps_modular (data_fname,t):
 
 if __name__=='__main__':
 	print('DiffusionMaps')
-	converter_diffusionmaps_modular(*parameter_list[0])
+	converter_diffusionmaps(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/converter_factoranalysis_modular.py b/examples/undocumented/python/converter_factoranalysis.py
similarity index 71%
rename from examples/undocumented/python_modular/converter_factoranalysis_modular.py
rename to examples/undocumented/python/converter_factoranalysis.py
index 17bfee2bae4..775a8aa09a1 100644
--- a/examples/undocumented/python_modular/converter_factoranalysis_modular.py
+++ b/examples/undocumented/python/converter_factoranalysis.py
@@ -2,10 +2,10 @@
 data = '../data/fm_train_real.dat'
 parameter_list = [[data]]
 
-def converter_factoranalysis_modular(data_fname):
+def converter_factoranalysis(data_fname):
 	try:
 		import numpy
-		from modshogun import RealFeatures, FactorAnalysis, EuclideanDistance, CSVFile
+		from shogun import RealFeatures, FactorAnalysis, EuclideanDistance, CSVFile
 
 		features = RealFeatures(CSVFile(data_fname))
 
@@ -22,4 +22,4 @@ def converter_factoranalysis_modular(data_fname):
 
 if __name__=='__main__':
 	print('Factor Analysis')
-	converter_factoranalysis_modular(*parameter_list[0])
+	converter_factoranalysis(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/converter_hasheddoc_modular.py b/examples/undocumented/python/converter_hasheddoc.py
similarity index 80%
rename from examples/undocumented/python_modular/converter_hasheddoc_modular.py
rename to examples/undocumented/python/converter_hasheddoc.py
index 880763a6c18..99299a3a964 100644
--- a/examples/undocumented/python_modular/converter_hasheddoc_modular.py
+++ b/examples/undocumented/python/converter_hasheddoc.py
@@ -4,10 +4,10 @@
 
 parameter_list=[[strings]]
 
-def converter_hasheddoc_modular(strings):
-	from modshogun import SparseRealFeatures, RAWBYTE, StringCharFeatures, Features, HashedDocDotFeatures
-	from modshogun import NGramTokenizer
-	from modshogun import HashedDocConverter
+def converter_hasheddoc(strings):
+	from shogun import SparseRealFeatures, RAWBYTE, StringCharFeatures, Features, HashedDocDotFeatures
+	from shogun import NGramTokenizer
+	from shogun import HashedDocConverter
 	from numpy import array
 
 	#create string features
@@ -43,6 +43,6 @@ def converter_hasheddoc_modular(strings):
 
 if __name__=='__main__':
 	print('HashedDocConverter')
-	converter_hasheddoc_modular(*parameter_list[0])
+	converter_hasheddoc(*parameter_list[0])
 
 
diff --git a/examples/undocumented/python_modular/converter_hessianlocallylinearembedding_modular.py b/examples/undocumented/python/converter_hessianlocallylinearembedding.py
similarity index 67%
rename from examples/undocumented/python_modular/converter_hessianlocallylinearembedding_modular.py
rename to examples/undocumented/python/converter_hessianlocallylinearembedding.py
index f97323fb452..61c07f43926 100644
--- a/examples/undocumented/python_modular/converter_hessianlocallylinearembedding_modular.py
+++ b/examples/undocumented/python/converter_hessianlocallylinearembedding.py
@@ -2,11 +2,11 @@
 data = '../data/fm_train_real.dat'
 parameter_list = [[data,20],[data,30]]
 
-def converter_hessianlocallylinearembedding_modular (data_fname,k):
+def converter_hessianlocallylinearembedding (data_fname,k):
 	try:
-		from modshogun import RealFeatures, CSVFile
+		from shogun import RealFeatures, CSVFile
 		try:
-			from modshogun import HessianLocallyLinearEmbedding
+			from shogun import HessianLocallyLinearEmbedding
 		except ImportError:
 			print("HessianLocallyLinearEmbedding not available")
 			exit(0)
@@ -24,5 +24,5 @@ def converter_hessianlocallylinearembedding_modular (data_fname,k):
 
 if __name__=='__main__':
 	print('HessianLocallyLinearEmbedding')
-	converter_hessianlocallylinearembedding_modular(*parameter_list[0])
+	converter_hessianlocallylinearembedding(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/converter_isomap_modular.py b/examples/undocumented/python/converter_isomap.py
similarity index 63%
rename from examples/undocumented/python_modular/converter_isomap_modular.py
rename to examples/undocumented/python/converter_isomap.py
index 0c633e8082f..fb41999abd9 100644
--- a/examples/undocumented/python_modular/converter_isomap_modular.py
+++ b/examples/undocumented/python/converter_isomap.py
@@ -2,9 +2,9 @@
 data = '../data/fm_train_real.dat'
 parameter_list = [[data]]
 
-def converter_isomap_modular (data_fname):
-	from modshogun import RealFeatures, CSVFile
-	from modshogun import Isomap
+def converter_isomap (data_fname):
+	from shogun import RealFeatures, CSVFile
+	from shogun import Isomap
 		
 	features = RealFeatures(CSVFile(data))
 
@@ -17,5 +17,5 @@ def converter_isomap_modular (data_fname):
 
 if __name__=='__main__':
 	print('Isomap')
-	#converter_isomap_modular(*parameter_list[0])
+	#converter_isomap(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/converter_kernellocallylinearembedding_modular.py b/examples/undocumented/python/converter_kernellocallylinearembedding.py
similarity index 68%
rename from examples/undocumented/python_modular/converter_kernellocallylinearembedding_modular.py
rename to examples/undocumented/python/converter_kernellocallylinearembedding.py
index b157d00275e..b8cf370b7a8 100644
--- a/examples/undocumented/python_modular/converter_kernellocallylinearembedding_modular.py
+++ b/examples/undocumented/python/converter_kernellocallylinearembedding.py
@@ -2,11 +2,11 @@
 data = '../data/fm_train_real.dat'
 parameter_list = [[data,20],[data,30]]
 
-def converter_kernellocallylinearembedding_modular (data_fname,k):
+def converter_kernellocallylinearembedding (data_fname,k):
 	try:
-		from modshogun import RealFeatures, LinearKernel, CSVFile
+		from shogun import RealFeatures, LinearKernel, CSVFile
 		try:
-			from modshogun import KernelLocallyLinearEmbedding
+			from shogun import KernelLocallyLinearEmbedding
 		except ImportError:
 			print("KernelLocallyLinearEmbedding not available")
 			exit(0)
@@ -26,5 +26,5 @@ def converter_kernellocallylinearembedding_modular (data_fname,k):
 
 if __name__=='__main__':
 	print('KernelLocallyLinearEmbedding')
-	converter_kernellocallylinearembedding_modular(*parameter_list[0])
+	converter_kernellocallylinearembedding(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/converter_laplacianeigenmaps_modular.py b/examples/undocumented/python/converter_laplacianeigenmaps.py
similarity index 71%
rename from examples/undocumented/python_modular/converter_laplacianeigenmaps_modular.py
rename to examples/undocumented/python/converter_laplacianeigenmaps.py
index b2126757fe0..810ded4cb55 100644
--- a/examples/undocumented/python_modular/converter_laplacianeigenmaps_modular.py
+++ b/examples/undocumented/python/converter_laplacianeigenmaps.py
@@ -2,11 +2,11 @@
 data = '../data/fm_train_real.dat'
 parameter_list = [[data,20],[data,30]]
 
-def converter_laplacianeigenmaps_modular (data_fname,k):
+def converter_laplacianeigenmaps (data_fname,k):
 	try:
-		from modshogun import RealFeatures, CSVFile
+		from shogun import RealFeatures, CSVFile
 		try:
-			from modshogun import LaplacianEigenmaps
+			from shogun import LaplacianEigenmaps
 		except ImportError:
 			print("LaplacianEigenmaps not available")
 			exit(0)
@@ -25,5 +25,5 @@ def converter_laplacianeigenmaps_modular (data_fname,k):
 
 if __name__=='__main__':
 	print('LaplacianEigenmaps')
-	converter_laplacianeigenmaps_modular(*parameter_list[0])
+	converter_laplacianeigenmaps(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/converter_linearlocaltangentspacealignment_modular.py b/examples/undocumented/python/converter_linearlocaltangentspacealignment.py
similarity index 67%
rename from examples/undocumented/python_modular/converter_linearlocaltangentspacealignment_modular.py
rename to examples/undocumented/python/converter_linearlocaltangentspacealignment.py
index 669dcf5b4b1..65e7218c963 100644
--- a/examples/undocumented/python_modular/converter_linearlocaltangentspacealignment_modular.py
+++ b/examples/undocumented/python/converter_linearlocaltangentspacealignment.py
@@ -2,11 +2,11 @@
 data = '../data/fm_train_real.dat'
 parameter_list = [[data,20],[data,30]]
 
-def converter_linearlocaltangentspacealignment_modular (data_fname,k):
+def converter_linearlocaltangentspacealignment (data_fname,k):
 	try:
-		from modshogun import RealFeatures, CSVFile
+		from shogun import RealFeatures, CSVFile
 		try:
-			from modshogun import LinearLocalTangentSpaceAlignment
+			from shogun import LinearLocalTangentSpaceAlignment
 		except ImportError:
 			print("LinearLocalTangentSpaceAlignment not available")
 			exit(0)
@@ -24,5 +24,5 @@ def converter_linearlocaltangentspacealignment_modular (data_fname,k):
 
 if __name__=='__main__':
 	print('LinearLocalTangentSpaceAlignment')
-	converter_linearlocaltangentspacealignment_modular(*parameter_list[0])
+	converter_linearlocaltangentspacealignment(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/converter_localitypreservingprojections_modular.py b/examples/undocumented/python/converter_localitypreservingprojections.py
similarity index 61%
rename from examples/undocumented/python_modular/converter_localitypreservingprojections_modular.py
rename to examples/undocumented/python/converter_localitypreservingprojections.py
index b240abb2283..2ff78bae35f 100644
--- a/examples/undocumented/python_modular/converter_localitypreservingprojections_modular.py
+++ b/examples/undocumented/python/converter_localitypreservingprojections.py
@@ -2,9 +2,9 @@
 data = '../data/fm_train_real.dat'
 parameter_list = [[data,20],[data,30]]
 
-def converter_localitypreservingprojections_modular (data_fname,k):
-	from modshogun import RealFeatures, CSVFile
-	from modshogun import LocalityPreservingProjections
+def converter_localitypreservingprojections (data_fname,k):
+	from shogun import RealFeatures, CSVFile
+	from shogun import LocalityPreservingProjections
 
 	features = RealFeatures(CSVFile(data_fname))
 	converter = LocalityPreservingProjections()
@@ -17,5 +17,5 @@ def converter_localitypreservingprojections_modular (data_fname,k):
 
 if __name__=='__main__':
 	print('LocalityPreservingProjections')
-	#converter_localitypreservingprojections_modular(*parameter_list[0])
+	#converter_localitypreservingprojections(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/converter_locallylinearembedding_modular.py b/examples/undocumented/python/converter_locallylinearembedding.py
similarity index 69%
rename from examples/undocumented/python_modular/converter_locallylinearembedding_modular.py
rename to examples/undocumented/python/converter_locallylinearembedding.py
index cf6510f563d..1c85d3ffd11 100644
--- a/examples/undocumented/python_modular/converter_locallylinearembedding_modular.py
+++ b/examples/undocumented/python/converter_locallylinearembedding.py
@@ -2,11 +2,11 @@
 data = '../data/fm_train_real.dat'
 parameter_list = [[data,20],[data,30]]
 
-def converter_locallylinearembedding_modular (data_fname,k):
+def converter_locallylinearembedding (data_fname,k):
 	try:
-		from modshogun import RealFeatures, CSVFile
+		from shogun import RealFeatures, CSVFile
 		try:
-			from modshogun import LocallyLinearEmbedding
+			from shogun import LocallyLinearEmbedding
 		except ImportError:
 			print("LocallyLinearEmbedding not available")
 			exit(0)
@@ -24,5 +24,5 @@ def converter_locallylinearembedding_modular (data_fname,k):
 
 if __name__=='__main__':
 	print('LocallyLinearEmbedding')
-	converter_locallylinearembedding_modular(*parameter_list[0])
+	converter_locallylinearembedding(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/converter_localtangentspacealignment_modular.py b/examples/undocumented/python/converter_localtangentspacealignment.py
similarity index 68%
rename from examples/undocumented/python_modular/converter_localtangentspacealignment_modular.py
rename to examples/undocumented/python/converter_localtangentspacealignment.py
index 42cde892c78..d47e706b5d7 100644
--- a/examples/undocumented/python_modular/converter_localtangentspacealignment_modular.py
+++ b/examples/undocumented/python/converter_localtangentspacealignment.py
@@ -2,11 +2,11 @@
 data = '../data/fm_train_real.dat'
 parameter_list = [[data,20],[data,30]]
 
-def converter_localtangentspacealignment_modular (data_fname,k):
+def converter_localtangentspacealignment (data_fname,k):
 	try:
-		from modshogun import RealFeatures, CSVFile
+		from shogun import RealFeatures, CSVFile
 		try:
-			from modshogun import LocalTangentSpaceAlignment
+			from shogun import LocalTangentSpaceAlignment
 		except ImportError:
 			print("LocalTangentSpaceAlignment not available")
 			exit(0)
@@ -25,5 +25,5 @@ def converter_localtangentspacealignment_modular (data_fname,k):
 
 if __name__=='__main__':
 	print('LocalTangentSpaceAlignment')
-	converter_localtangentspacealignment_modular(*parameter_list[0])
+	converter_localtangentspacealignment(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/converter_multidimensionalscaling_modular.py b/examples/undocumented/python/converter_multidimensionalscaling.py
similarity index 79%
rename from examples/undocumented/python_modular/converter_multidimensionalscaling_modular.py
rename to examples/undocumented/python/converter_multidimensionalscaling.py
index 54b8d9d944e..8acaac5de4a 100644
--- a/examples/undocumented/python_modular/converter_multidimensionalscaling_modular.py
+++ b/examples/undocumented/python/converter_multidimensionalscaling.py
@@ -2,10 +2,10 @@
 data = '../data/fm_train_real.dat'
 parameter_list = [[data]]
 
-def converter_multidimensionalscaling_modular (data_fname):
+def converter_multidimensionalscaling (data_fname):
 	try:
 		import numpy
-		from modshogun import RealFeatures, MultidimensionalScaling, EuclideanDistance, CSVFile
+		from shogun import RealFeatures, MultidimensionalScaling, EuclideanDistance, CSVFile
 
 		features = RealFeatures(CSVFile(data_fname))
 
@@ -29,4 +29,4 @@ def converter_multidimensionalscaling_modular (data_fname):
 
 if __name__=='__main__':
 	print('MultidimensionalScaling')
-	converter_multidimensionalscaling_modular(*parameter_list[0])
+	converter_multidimensionalscaling(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/converter_stochasticproximityembedding_modular.py b/examples/undocumented/python/converter_stochasticproximityembedding.py
similarity index 71%
rename from examples/undocumented/python_modular/converter_stochasticproximityembedding_modular.py
rename to examples/undocumented/python/converter_stochasticproximityembedding.py
index c1bcced509b..16684987d13 100644
--- a/examples/undocumented/python_modular/converter_stochasticproximityembedding_modular.py
+++ b/examples/undocumented/python/converter_stochasticproximityembedding.py
@@ -2,9 +2,9 @@
 data = '../data/fm_train_real.dat'
 parameter_list = [[data, 20]]
 
-def converter_stochasticproximityembedding_modular (data_fname, k):
+def converter_stochasticproximityembedding (data_fname, k):
 	try:
-		from modshogun import RealFeatures,StochasticProximityEmbedding, SPE_GLOBAL, SPE_LOCAL, CSVFile
+		from shogun import RealFeatures,StochasticProximityEmbedding, SPE_GLOBAL, SPE_LOCAL, CSVFile
 
 		features = RealFeatures(CSVFile(data_fname))
 
@@ -25,4 +25,4 @@ def converter_stochasticproximityembedding_modular (data_fname, k):
 
 if __name__=='__main__':
 	print('StochasticProximityEmbedding')
-	converter_stochasticproximityembedding_modular(*parameter_list[0])
+	converter_stochasticproximityembedding(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/converter_tdistributedstochasticneighborembedding_modular.py b/examples/undocumented/python/converter_tdistributedstochasticneighborembedding.py
similarity index 61%
rename from examples/undocumented/python_modular/converter_tdistributedstochasticneighborembedding_modular.py
rename to examples/undocumented/python/converter_tdistributedstochasticneighborembedding.py
index 16fdf12f726..9d93083ec32 100644
--- a/examples/undocumented/python_modular/converter_tdistributedstochasticneighborembedding_modular.py
+++ b/examples/undocumented/python/converter_tdistributedstochasticneighborembedding.py
@@ -2,10 +2,10 @@
 data = '../data/fm_train_real.dat'
 parameter_list = [[data]]
 
-def converter_tdistributedstochasticneighborembedding_modular(data_fname, seed=1):
+def converter_tdistributedstochasticneighborembedding(data_fname, seed=1):
 	try:
-		from modshogun import RealFeatures, TDistributedStochasticNeighborEmbedding
-		from modshogun import Math_init_random, CSVFile
+		from shogun import RealFeatures, TDistributedStochasticNeighborEmbedding
+		from shogun import Math_init_random, CSVFile
 
 		# reproducible results
 		Math_init_random(seed)
@@ -22,4 +22,4 @@ def converter_tdistributedstochasticneighborembedding_modular(data_fname, seed=1
 
 if __name__=='__main__':
 	print('TDistributedStochasticNeighborEmbedding')
-	converter_tdistributedstochasticneighborembedding_modular(*parameter_list[0])
+	converter_tdistributedstochasticneighborembedding(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distance_canberra_modular.py b/examples/undocumented/python/distance_canberra.py
similarity index 73%
rename from examples/undocumented/python_modular/distance_canberra_modular.py
rename to examples/undocumented/python/distance_canberra.py
index 7c65bf1dfa5..6c006827a54 100644
--- a/examples/undocumented/python_modular/distance_canberra_modular.py
+++ b/examples/undocumented/python/distance_canberra.py
@@ -4,8 +4,8 @@
 
 parameter_list = [[traindat,testdat],[traindat,testdat]]
 
-def distance_canberra_modular (train_fname=traindat,test_fname=testdat):
-	from modshogun import RealFeatures, CanberraMetric, CSVFile
+def distance_canberra (train_fname=traindat,test_fname=testdat):
+	from shogun import RealFeatures, CanberraMetric, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -19,4 +19,4 @@ def distance_canberra_modular (train_fname=traindat,test_fname=testdat):
 
 if __name__=='__main__':
 	print('CanberaMetric')
-	distance_canberra_modular(*parameter_list[0])
+	distance_canberra(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distance_canberraword_modular.py b/examples/undocumented/python/distance_canberraword.py
similarity index 78%
rename from examples/undocumented/python_modular/distance_canberraword_modular.py
rename to examples/undocumented/python/distance_canberraword.py
index ba7bcd876a8..b01198783b6 100644
--- a/examples/undocumented/python_modular/distance_canberraword_modular.py
+++ b/examples/undocumented/python/distance_canberraword.py
@@ -8,10 +8,10 @@
 
 parameter_list = [[traindna,testdna,3,0,False],[traindna,testdna,3,0,False]]
 
-def distance_canberraword_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False):
-	from modshogun import StringCharFeatures, StringWordFeatures, DNA
-	from modshogun import SortWordString
-	from modshogun import CanberraWordDistance
+def distance_canberraword (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False):
+	from shogun import StringCharFeatures, StringWordFeatures, DNA
+	from shogun import SortWordString
+	from shogun import CanberraWordDistance
 
 	charfeat=StringCharFeatures(DNA)
 	charfeat.set_features(fm_train_dna)
@@ -38,4 +38,4 @@ def distance_canberraword_modular (fm_train_dna=traindna,fm_test_dna=testdna,ord
 
 if __name__=='__main__':
 	print('CanberraWordDistance')
-	distance_canberraword_modular(*parameter_list[0])
+	distance_canberraword(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distance_chebyshew_modular.py b/examples/undocumented/python/distance_chebyshew.py
similarity index 73%
rename from examples/undocumented/python_modular/distance_chebyshew_modular.py
rename to examples/undocumented/python/distance_chebyshew.py
index fe33d770cb0..1a0a0bb6164 100644
--- a/examples/undocumented/python_modular/distance_chebyshew_modular.py
+++ b/examples/undocumented/python/distance_chebyshew.py
@@ -4,8 +4,8 @@
 
 parameter_list = [[traindat,testdat],[traindat,testdat]]
 
-def distance_chebyshew_modular (train_fname=traindat,test_fname=testdat):
-	from modshogun import RealFeatures, ChebyshewMetric, CSVFile
+def distance_chebyshew (train_fname=traindat,test_fname=testdat):
+	from shogun import RealFeatures, ChebyshewMetric, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -19,4 +19,4 @@ def distance_chebyshew_modular (train_fname=traindat,test_fname=testdat):
 
 if __name__=='__main__':
 	print('ChebyshewMetric')
-	distance_chebyshew_modular(*parameter_list[0])
+	distance_chebyshew(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distance_chisquare_modular.py b/examples/undocumented/python/distance_chisquare.py
similarity index 73%
rename from examples/undocumented/python_modular/distance_chisquare_modular.py
rename to examples/undocumented/python/distance_chisquare.py
index 0e30bc6d9b2..ce8588b4a34 100644
--- a/examples/undocumented/python_modular/distance_chisquare_modular.py
+++ b/examples/undocumented/python/distance_chisquare.py
@@ -4,8 +4,8 @@
 
 parameter_list = [[traindat,testdat,],[traindat,testdat]]
 
-def distance_chisquare_modular (train_fname=traindat,test_fname=testdat):
-	from modshogun import RealFeatures, ChiSquareDistance, CSVFile
+def distance_chisquare (train_fname=traindat,test_fname=testdat):
+	from shogun import RealFeatures, ChiSquareDistance, CSVFile
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
 
@@ -18,4 +18,4 @@ def distance_chisquare_modular (train_fname=traindat,test_fname=testdat):
 
 if __name__=='__main__':
 	print('ChiSquareDistance')
-	distance_chisquare_modular(*parameter_list[0])
+	distance_chisquare(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distance_director_euclidean_modular.py b/examples/undocumented/python/distance_director_euclidean.py
similarity index 80%
rename from examples/undocumented/python_modular/distance_director_euclidean_modular.py
rename to examples/undocumented/python/distance_director_euclidean.py
index 9945f8dcb37..deab1aa6aee 100644
--- a/examples/undocumented/python_modular/distance_director_euclidean_modular.py
+++ b/examples/undocumented/python/distance_director_euclidean.py
@@ -1,15 +1,15 @@
 #!/usr/bin/env python
 import numpy
-from modshogun import RealFeatures, MSG_DEBUG
+from shogun import RealFeatures, MSG_DEBUG
 
 numpy.random.seed(17)
 traindat = numpy.random.random_sample((10,10))
 testdat = numpy.random.random_sample((10,10))
 parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.4]]
 
-def distance_director_euclidean_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.2):
+def distance_director_euclidean (fm_train_real=traindat,fm_test_real=testdat,scale=1.2):
 	try:
-		from modshogun import DirectorDistance
+		from shogun import DirectorDistance
 	except ImportError:
 		print("recompile shogun with --enable-swig-directors")
 		return
@@ -22,8 +22,8 @@ def distance_function(self, idx_a, idx_b):
 			seq2 = self.get_rhs().get_feature_vector(idx_b)
 			return numpy.linalg.norm(seq1-seq2)
 
-	from modshogun import EuclideanDistance
-	from modshogun import Time
+	from shogun import EuclideanDistance
+	from shogun import Time
 
 	feats_train=RealFeatures(fm_train_real)
 	#feats_train.io.set_loglevel(MSG_DEBUG)
@@ -53,4 +53,4 @@ def distance_function(self, idx_a, idx_b):
 
 if __name__=='__main__':
 	print('DirectorEuclideanDistance')
-	distance_director_euclidean_modular(*parameter_list[0])
+	distance_director_euclidean(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distance_geodesic_modular.py b/examples/undocumented/python/distance_geodesic.py
similarity index 73%
rename from examples/undocumented/python_modular/distance_geodesic_modular.py
rename to examples/undocumented/python/distance_geodesic.py
index 1d33867bc4f..eab7ba89811 100644
--- a/examples/undocumented/python_modular/distance_geodesic_modular.py
+++ b/examples/undocumented/python/distance_geodesic.py
@@ -4,9 +4,9 @@
 
 parameter_list = [[traindat,testdat],[traindat,testdat]]
 
-def distance_geodesic_modular (train_fname=traindat,test_fname=testdat):
+def distance_geodesic (train_fname=traindat,test_fname=testdat):
 
-	from modshogun import RealFeatures, GeodesicMetric, CSVFile
+	from shogun import RealFeatures, GeodesicMetric, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -22,4 +22,4 @@ def distance_geodesic_modular (train_fname=traindat,test_fname=testdat):
 
 if __name__=='__main__':
 	print('GeodesicMetric')
-	distance_geodesic_modular(*parameter_list[0])
+	distance_geodesic(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distance_hammingword_modular.py b/examples/undocumented/python/distance_hammingword.py
similarity index 82%
rename from examples/undocumented/python_modular/distance_hammingword_modular.py
rename to examples/undocumented/python/distance_hammingword.py
index f853da7087f..e0534c0cbf9 100644
--- a/examples/undocumented/python_modular/distance_hammingword_modular.py
+++ b/examples/undocumented/python/distance_hammingword.py
@@ -9,12 +9,12 @@
 parameter_list = [[traindna,testdna,testdat,4,0,False,False],
 		[traindna,testdna,testdat,3,0,False,False]]
 
-def distance_hammingword_modular (fm_train_dna=traindna,fm_test_dna=testdna,
+def distance_hammingword (fm_train_dna=traindna,fm_test_dna=testdna,
 		fm_test_real=testdat,order=3,gap=0,reverse=False,use_sign=False):
 
-	from modshogun import StringCharFeatures, StringWordFeatures, DNA
-	from modshogun import SortWordString
-	from modshogun import HammingWordDistance
+	from shogun import StringCharFeatures, StringWordFeatures, DNA
+	from shogun import SortWordString
+	from shogun import HammingWordDistance
 
 	charfeat=StringCharFeatures(DNA)
 	charfeat.set_features(fm_train_dna)
@@ -41,4 +41,4 @@ def distance_hammingword_modular (fm_train_dna=traindna,fm_test_dna=testdna,
 
 if __name__=='__main__':
 	print('HammingWordDistance')
-	distance_hammingword_modular(*parameter_list[0])
+	distance_hammingword(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distance_jensen_modular.py b/examples/undocumented/python/distance_jensen.py
similarity index 74%
rename from examples/undocumented/python_modular/distance_jensen_modular.py
rename to examples/undocumented/python/distance_jensen.py
index 6b4205f0eb6..43cd0233a95 100644
--- a/examples/undocumented/python_modular/distance_jensen_modular.py
+++ b/examples/undocumented/python/distance_jensen.py
@@ -4,9 +4,9 @@
 
 parameter_list = [[traindat,testdat],[traindat,testdat]]
 
-def distance_jensen_modular (train_fname=traindat,test_fname=testdat):
+def distance_jensen (train_fname=traindat,test_fname=testdat):
 
-	from modshogun import RealFeatures, JensenMetric, CSVFile
+	from shogun import RealFeatures, JensenMetric, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -20,4 +20,4 @@ def distance_jensen_modular (train_fname=traindat,test_fname=testdat):
 
 if __name__=='__main__':
 	print('JensenMetric')
-	distance_jensen_modular(*parameter_list[0])
+	distance_jensen(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distance_manhattenword_modular.py b/examples/undocumented/python/distance_manhattenword.py
similarity index 77%
rename from examples/undocumented/python_modular/distance_manhattenword_modular.py
rename to examples/undocumented/python/distance_manhattenword.py
index c639dd72186..e377f757285 100644
--- a/examples/undocumented/python_modular/distance_manhattenword_modular.py
+++ b/examples/undocumented/python/distance_manhattenword.py
@@ -4,9 +4,9 @@
 
 parameter_list = [[traindna,testdna,3,0,False],[traindna,testdna,4,0,False]]
 
-def distance_manhattenword_modular (train_fname=traindna,test_fname=testdna,order=3,gap=0,reverse=False):
-	from modshogun import StringCharFeatures, StringWordFeatures, DNA
-	from modshogun import SortWordString, ManhattanWordDistance, CSVFile
+def distance_manhattenword (train_fname=traindna,test_fname=testdna,order=3,gap=0,reverse=False):
+	from shogun import StringCharFeatures, StringWordFeatures, DNA
+	from shogun import SortWordString, ManhattanWordDistance, CSVFile
 
 	charfeat=StringCharFeatures(CSVFile(train_fname), DNA)
 	feats_train=StringWordFeatures(charfeat.get_alphabet())
@@ -31,4 +31,4 @@ def distance_manhattenword_modular (train_fname=traindna,test_fname=testdna,orde
 
 if __name__=='__main__':
 	print('ManhattanWordDistance')
-	distance_manhattenword_modular(*parameter_list[0])
+	distance_manhattenword(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distance_minkowski_modular.py b/examples/undocumented/python/distance_minkowski.py
similarity index 73%
rename from examples/undocumented/python_modular/distance_minkowski_modular.py
rename to examples/undocumented/python/distance_minkowski.py
index 76250567fff..765e9818e9b 100644
--- a/examples/undocumented/python_modular/distance_minkowski_modular.py
+++ b/examples/undocumented/python/distance_minkowski.py
@@ -4,8 +4,8 @@
 
 parameter_list = [[traindat,testdat,3],[traindat,testdat,4]]
 
-def distance_minkowski_modular (train_fname=traindat,test_fname=testdat,k=3):
-	from modshogun import RealFeatures, MinkowskiMetric, CSVFile
+def distance_minkowski (train_fname=traindat,test_fname=testdat,k=3):
+	from shogun import RealFeatures, MinkowskiMetric, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -20,5 +20,5 @@ def distance_minkowski_modular (train_fname=traindat,test_fname=testdat,k=3):
 
 if __name__=='__main__':
 	print('MinkowskiMetric')
-	distance_minkowski_modular(*parameter_list[0])
+	distance_minkowski(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/distance_normsquared_modular.py b/examples/undocumented/python/distance_normsquared.py
similarity index 74%
rename from examples/undocumented/python_modular/distance_normsquared_modular.py
rename to examples/undocumented/python/distance_normsquared.py
index ede13312457..0e1d435514b 100644
--- a/examples/undocumented/python_modular/distance_normsquared_modular.py
+++ b/examples/undocumented/python/distance_normsquared.py
@@ -4,8 +4,8 @@
 
 parameter_list = [[traindat,testdat],[traindat,testdat]]
 
-def distance_normsquared_modular (train_fname=traindat,test_fname=testdat):
-	from modshogun import RealFeatures, EuclideanDistance, CSVFile
+def distance_normsquared (train_fname=traindat,test_fname=testdat):
+	from shogun import RealFeatures, EuclideanDistance, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -22,4 +22,4 @@ def distance_normsquared_modular (train_fname=traindat,test_fname=testdat):
 if __name__=='__main__':
 
 	print('EuclideanDistance - NormSquared')
-	distance_normsquared_modular(*parameter_list[0])
+	distance_normsquared(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distance_sparseeuclidean_modular.py b/examples/undocumented/python/distance_sparseeuclidean.py
similarity index 75%
rename from examples/undocumented/python_modular/distance_sparseeuclidean_modular.py
rename to examples/undocumented/python/distance_sparseeuclidean.py
index d1221cd4f5e..9f2c48eac01 100644
--- a/examples/undocumented/python_modular/distance_sparseeuclidean_modular.py
+++ b/examples/undocumented/python/distance_sparseeuclidean.py
@@ -4,8 +4,8 @@
 
 parameter_list = [[traindat,testdat],[traindat,testdat]]
 
-def distance_sparseeuclidean_modular (train_fname=traindat,test_fname=testdat):
-	from modshogun import RealFeatures, SparseRealFeatures, SparseEuclideanDistance, CSVFile
+def distance_sparseeuclidean (train_fname=traindat,test_fname=testdat):
+	from shogun import RealFeatures, SparseRealFeatures, SparseEuclideanDistance, CSVFile
 
 	realfeat=RealFeatures(CSVFile(train_fname))
 	feats_train=SparseRealFeatures()
@@ -24,4 +24,4 @@ def distance_sparseeuclidean_modular (train_fname=traindat,test_fname=testdat):
 
 if __name__=='__main__':
 	print('SparseEuclideanDistance')
-	distance_sparseeuclidean_modular(*parameter_list[0])
+	distance_sparseeuclidean(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distance_tanimoto_modular.py b/examples/undocumented/python/distance_tanimoto.py
similarity index 73%
rename from examples/undocumented/python_modular/distance_tanimoto_modular.py
rename to examples/undocumented/python/distance_tanimoto.py
index c71fb49b2e0..471ec4bec2a 100644
--- a/examples/undocumented/python_modular/distance_tanimoto_modular.py
+++ b/examples/undocumented/python/distance_tanimoto.py
@@ -4,8 +4,8 @@
 
 parameter_list = [[traindat,testdat],[traindat,testdat]]
 
-def distance_tanimoto_modular (train_fname=traindat,test_fname=testdat):
-	from modshogun import RealFeatures, TanimotoDistance, CSVFile
+def distance_tanimoto (train_fname=traindat,test_fname=testdat):
+	from shogun import RealFeatures, TanimotoDistance, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -20,4 +20,4 @@ def distance_tanimoto_modular (train_fname=traindat,test_fname=testdat):
 
 if __name__=='__main__':
 	print('TanimotoDistance')
-	distance_tanimoto_modular(*parameter_list[0])
+	distance_tanimoto(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distribution_histogram_modular.py b/examples/undocumented/python/distribution_histogram.py
similarity index 80%
rename from examples/undocumented/python_modular/distribution_histogram_modular.py
rename to examples/undocumented/python/distribution_histogram.py
index bacfd045b78..52f1752c1f4 100644
--- a/examples/undocumented/python_modular/distribution_histogram_modular.py
+++ b/examples/undocumented/python/distribution_histogram.py
@@ -6,9 +6,9 @@
 
 parameter_list = [[traindna,3,0,False],[traindna,4,0,False]]
 
-def distribution_histogram_modular (fm_dna=traindna,order=3,gap=0,reverse=False):
-	from modshogun import StringWordFeatures, StringCharFeatures, DNA
-	from modshogun import Histogram
+def distribution_histogram (fm_dna=traindna,order=3,gap=0,reverse=False):
+	from shogun import StringWordFeatures, StringCharFeatures, DNA
+	from shogun import Histogram
 
 	charfeat=StringCharFeatures(DNA)
 	charfeat.set_features(fm_dna)
@@ -35,5 +35,5 @@ def distribution_histogram_modular (fm_dna=traindna,order=3,gap=0,reverse=False)
 
 if __name__=='__main__':
 	print('Histogram')
-	distribution_histogram_modular(*parameter_list[0])
+	distribution_histogram(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/distribution_hmm_modular.py b/examples/undocumented/python/distribution_hmm.py
similarity index 82%
rename from examples/undocumented/python_modular/distribution_hmm_modular.py
rename to examples/undocumented/python/distribution_hmm.py
index c95e937b61e..a3019d33c8e 100644
--- a/examples/undocumented/python_modular/distribution_hmm_modular.py
+++ b/examples/undocumented/python/distribution_hmm.py
@@ -5,9 +5,9 @@
 
 parameter_list=[[data, 1, 64, 1e-5, 2, 0, False, 5], [data, 3, 6, 1e-1, 1, 0, False, 2]]
 
-def distribution_hmm_modular(fm_cube, N, M, pseudo, order, gap, reverse, num_examples):
-	from modshogun import StringWordFeatures, StringCharFeatures, CUBE
-	from modshogun import HMM, BW_NORMAL
+def distribution_hmm(fm_cube, N, M, pseudo, order, gap, reverse, num_examples):
+	from shogun import StringWordFeatures, StringCharFeatures, CUBE
+	from shogun import HMM, BW_NORMAL
 
 	charfeat=StringCharFeatures(CUBE)
 	charfeat.set_features(fm_cube)
@@ -42,4 +42,4 @@ def distribution_hmm_modular(fm_cube, N, M, pseudo, order, gap, reverse, num_exa
 
 if __name__=='__main__':
 	print('HMM')
-	distribution_hmm_modular(*parameter_list[0])
+	distribution_hmm(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/distribution_linearhmm_modular.py b/examples/undocumented/python/distribution_linearhmm.py
similarity index 80%
rename from examples/undocumented/python_modular/distribution_linearhmm_modular.py
rename to examples/undocumented/python/distribution_linearhmm.py
index 721f6e847af..defb8e0f366 100644
--- a/examples/undocumented/python_modular/distribution_linearhmm_modular.py
+++ b/examples/undocumented/python/distribution_linearhmm.py
@@ -6,10 +6,10 @@
 
 parameter_list = [[traindna,3,0,False],[traindna,4,0,False]]
 
-def distribution_linearhmm_modular (fm_dna=traindna,order=3,gap=0,reverse=False):
+def distribution_linearhmm (fm_dna=traindna,order=3,gap=0,reverse=False):
 
-	from modshogun import StringWordFeatures, StringCharFeatures, DNA
-	from modshogun import LinearHMM
+	from shogun import StringWordFeatures, StringCharFeatures, DNA
+	from shogun import LinearHMM
 
 	charfeat=StringCharFeatures(DNA)
 	charfeat.set_features(fm_dna)
@@ -36,5 +36,5 @@ def distribution_linearhmm_modular (fm_dna=traindna,order=3,gap=0,reverse=False)
 ###########################################################################
 
 if __name__=='__main__':
-	distribution_linearhmm_modular(*parameter_list[0])
+	distribution_linearhmm(*parameter_list[0])
 	print('LinearHMM')
diff --git a/examples/undocumented/python_modular/distribution_ppwm_modular.py b/examples/undocumented/python/distribution_ppwm.py
similarity index 86%
rename from examples/undocumented/python_modular/distribution_ppwm_modular.py
rename to examples/undocumented/python/distribution_ppwm.py
index 01f6fcb6513..11097a4f631 100644
--- a/examples/undocumented/python_modular/distribution_ppwm_modular.py
+++ b/examples/undocumented/python/distribution_ppwm.py
@@ -6,9 +6,9 @@
 
 parameter_list = [[traindna,3],[traindna,4]]
 
-def distribution_ppwm_modular (fm_dna=traindna, order=3):
-	from modshogun import StringByteFeatures, StringCharFeatures, DNA
-	from modshogun import PositionalPWM
+def distribution_ppwm (fm_dna=traindna, order=3):
+	from shogun import StringByteFeatures, StringCharFeatures, DNA
+	from shogun import PositionalPWM
 
 	from numpy import array,e,log,exp
 
@@ -62,4 +62,4 @@ def distribution_ppwm_modular (fm_dna=traindna, order=3):
 
 if __name__=='__main__':
 	print('PositionalPWM')
-	distribution_ppwm_modular(*parameter_list[0])
+	distribution_ppwm(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/evaluation_clustering.py b/examples/undocumented/python/evaluation_clustering.py
similarity index 84%
rename from examples/undocumented/python_modular/evaluation_clustering.py
rename to examples/undocumented/python/evaluation_clustering.py
index 22c5388eb3f..b0376772240 100644
--- a/examples/undocumented/python_modular/evaluation_clustering.py
+++ b/examples/undocumented/python/evaluation_clustering.py
@@ -26,9 +26,9 @@ def prepare_data():
 
 
 def run_clustering(data, k):
-	from modshogun import KMeans
-	from modshogun import EuclideanDistance
-	from modshogun import RealFeatures
+	from shogun import KMeans
+	from shogun import EuclideanDistance
+	from shogun import RealFeatures
 
 	fea = RealFeatures(data)
 	distance = EuclideanDistance(fea, fea)
@@ -40,9 +40,9 @@ def run_clustering(data, k):
 	return kmeans.get_cluster_centers()
 
 def assign_labels(data, centroids, ncenters):
-	from modshogun import EuclideanDistance
-	from modshogun import RealFeatures, MulticlassLabels
-	from modshogun import KNN
+	from shogun import EuclideanDistance
+	from shogun import RealFeatures, MulticlassLabels
+	from shogun import KNN
 	from numpy import arange
 
 	labels = MulticlassLabels(arange(0.,ncenters))
@@ -54,9 +54,9 @@ def assign_labels(data, centroids, ncenters):
 	return knn.apply(fea)
 
 def evaluation_clustering (features=fea, ground_truth=gnd_raw, ncenters=10):
-	from modshogun import ClusteringAccuracy, ClusteringMutualInformation
-	from modshogun import MulticlassLabels
-	from modshogun import Math
+	from shogun import ClusteringAccuracy, ClusteringMutualInformation
+	from shogun import MulticlassLabels
+	from shogun import Math
 
 	# reproducable results
 	Math.init_random(1)
diff --git a/examples/undocumented/python_modular/evaluation_clustering_simple.py b/examples/undocumented/python/evaluation_clustering_simple.py
similarity index 83%
rename from examples/undocumented/python_modular/evaluation_clustering_simple.py
rename to examples/undocumented/python/evaluation_clustering_simple.py
index a98c9d2c138..fbce11084ad 100644
--- a/examples/undocumented/python_modular/evaluation_clustering_simple.py
+++ b/examples/undocumented/python/evaluation_clustering_simple.py
@@ -5,10 +5,10 @@
 #from pylab import *
 
 def run_clustering(data, k):
-	from modshogun import KMeans
-	from modshogun import Math_init_random
-	from modshogun import EuclideanDistance
-	from modshogun import RealFeatures
+	from shogun import KMeans
+	from shogun import Math_init_random
+	from shogun import EuclideanDistance
+	from shogun import RealFeatures
 
 	fea = RealFeatures(data)
 	distance = EuclideanDistance(fea, fea)
@@ -20,9 +20,9 @@ def run_clustering(data, k):
 	return kmeans.get_cluster_centers()
 
 def assign_labels(data, centroids, ncenters):
-	from modshogun import EuclideanDistance
-	from modshogun import RealFeatures, MulticlassLabels
-	from modshogun import KNN
+	from shogun import EuclideanDistance
+	from shogun import RealFeatures, MulticlassLabels
+	from shogun import KNN
 	from numpy import arange
 
 	labels = MulticlassLabels(arange(0.,ncenters))
@@ -34,9 +34,9 @@ def assign_labels(data, centroids, ncenters):
 	return knn.apply(fea)
 
 def evaluation_clustering_simple (n_data=100, sqrt_num_blobs=4, distance=5):
-	from modshogun import ClusteringAccuracy, ClusteringMutualInformation
-	from modshogun import MulticlassLabels, GaussianBlobsDataGenerator
-	from modshogun import Math
+	from shogun import ClusteringAccuracy, ClusteringMutualInformation
+	from shogun import MulticlassLabels, GaussianBlobsDataGenerator
+	from shogun import Math
 
 	# reproducable results
 	Math.init_random(1)
diff --git a/examples/undocumented/python_modular/evaluation_contingencytableevaluation_modular.py b/examples/undocumented/python/evaluation_contingencytableevaluation.py
similarity index 78%
rename from examples/undocumented/python_modular/evaluation_contingencytableevaluation_modular.py
rename to examples/undocumented/python/evaluation_contingencytableevaluation.py
index 44c0ae06f89..8868e2ca310 100644
--- a/examples/undocumented/python_modular/evaluation_contingencytableevaluation_modular.py
+++ b/examples/undocumented/python/evaluation_contingencytableevaluation.py
@@ -9,12 +9,12 @@
 
 parameter_list = [[ground_truth,predicted]]
 
-def evaluation_contingencytableevaluation_modular (ground_truth, predicted):
-	from modshogun import BinaryLabels
-	from modshogun import ContingencyTableEvaluation
-	from modshogun import AccuracyMeasure,ErrorRateMeasure,BALMeasure
-	from modshogun import WRACCMeasure,F1Measure,CrossCorrelationMeasure
-	from modshogun import RecallMeasure,PrecisionMeasure,SpecificityMeasure
+def evaluation_contingencytableevaluation (ground_truth, predicted):
+	from shogun import BinaryLabels
+	from shogun import ContingencyTableEvaluation
+	from shogun import AccuracyMeasure,ErrorRateMeasure,BALMeasure
+	from shogun import WRACCMeasure,F1Measure,CrossCorrelationMeasure
+	from shogun import RecallMeasure,PrecisionMeasure,SpecificityMeasure
 
 	ground_truth_labels = BinaryLabels(ground_truth)
 	predicted_labels = BinaryLabels(predicted)
@@ -54,5 +54,5 @@ def evaluation_contingencytableevaluation_modular (ground_truth, predicted):
 
 if __name__=='__main__':
 	print('EvaluationContingencyTableEvaluation')
-	evaluation_contingencytableevaluation_modular(*parameter_list[0])
+	evaluation_contingencytableevaluation(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/evaluation_cross_validation_classification.py b/examples/undocumented/python/evaluation_cross_validation_classification.py
similarity index 85%
rename from examples/undocumented/python_modular/evaluation_cross_validation_classification.py
rename to examples/undocumented/python/evaluation_cross_validation_classification.py
index deafd88a9e6..5a7def4bace 100644
--- a/examples/undocumented/python_modular/evaluation_cross_validation_classification.py
+++ b/examples/undocumented/python/evaluation_cross_validation_classification.py
@@ -22,12 +22,12 @@
 parameter_list = [[traindat,label_traindat]]
 
 def evaluation_cross_validation_classification (traindat=traindat, label_traindat=label_traindat):
-    from modshogun import CrossValidation, CrossValidationResult
-    from modshogun import ContingencyTableEvaluation, ACCURACY
-    from modshogun import StratifiedCrossValidationSplitting
-    from modshogun import BinaryLabels
-    from modshogun import RealFeatures
-    from modshogun import LibLinear, L2R_L2LOSS_SVC
+    from shogun import CrossValidation, CrossValidationResult
+    from shogun import ContingencyTableEvaluation, ACCURACY
+    from shogun import StratifiedCrossValidationSplitting
+    from shogun import BinaryLabels
+    from shogun import RealFeatures
+    from shogun import LibLinear, L2R_L2LOSS_SVC
 
     # training data
     features=RealFeatures(traindat)
diff --git a/examples/undocumented/python_modular/evaluation_cross_validation_mkl_weight_storage.py b/examples/undocumented/python/evaluation_cross_validation_mkl_weight_storage.py
similarity index 71%
rename from examples/undocumented/python_modular/evaluation_cross_validation_mkl_weight_storage.py
rename to examples/undocumented/python/evaluation_cross_validation_mkl_weight_storage.py
index 9c8bd61d398..27a0378dbfb 100644
--- a/examples/undocumented/python_modular/evaluation_cross_validation_mkl_weight_storage.py
+++ b/examples/undocumented/python/evaluation_cross_validation_mkl_weight_storage.py
@@ -22,15 +22,14 @@
 parameter_list = [[traindat,label_traindat]]
 
 def evaluation_cross_validation_mkl_weight_storage(traindat=traindat, label_traindat=label_traindat):
-    from modshogun import CrossValidation, CrossValidationResult
-    from modshogun import CrossValidationPrintOutput
-    from modshogun import CrossValidationMKLStorage
-    from modshogun import ContingencyTableEvaluation, ACCURACY
-    from modshogun import StratifiedCrossValidationSplitting
-    from modshogun import BinaryLabels
-    from modshogun import RealFeatures, CombinedFeatures
-    from modshogun import GaussianKernel, CombinedKernel
-    from modshogun import LibSVM, MKLClassification
+    from shogun import CrossValidation, CrossValidationResult
+    from shogun import ParameterObserverCV
+    from shogun import ContingencyTableEvaluation, ACCURACY
+    from shogun import StratifiedCrossValidationSplitting
+    from shogun import BinaryLabels
+    from shogun import RealFeatures, CombinedFeatures
+    from shogun import GaussianKernel, CombinedKernel
+    from shogun import LibSVM, MKLClassification
 
     # training data, combined features all on same data
     features=RealFeatures(traindat)
@@ -65,18 +64,25 @@ def evaluation_cross_validation_mkl_weight_storage(traindat=traindat, label_trai
     cross_validation.set_autolock(False)
 
     # append cross vlaidation output classes
-    #cross_validation.add_cross_validation_output(CrossValidationPrintOutput())
-    mkl_storage=CrossValidationMKLStorage()
-    cross_validation.add_cross_validation_output(mkl_storage)
+    mkl_storage=ParameterObserverCV()
+    cross_validation.subscribe_to_parameters(mkl_storage)
     cross_validation.set_num_runs(3)
 
     # perform cross-validation
     result=cross_validation.evaluate()
 
     # print mkl weights
-    weights=mkl_storage.get_mkl_weights()
-    #print "mkl weights during cross--validation"
-    #print weights
+    weights = []
+    for obs_index in range(mkl_storage.get_num_observations()):
+        obs = mkl_storage.get_observation(obs_index)
+        for fold_index in range(obs.get_num_folds()):
+            fold = obs.get_fold(fold_index)
+            machine = MKLClassification.obtain_from_generic(fold.get_trained_machine())
+            w = machine.get_kernel().get_subkernel_weights()
+            weights.append(w)
+
+    print("mkl weights during cross--validation")
+    print(weights)
 
 if __name__=='__main__':
 	print('Evaluation CrossValidationClassification')
diff --git a/examples/undocumented/python_modular/evaluation_cross_validation_multiclass_storage.py b/examples/undocumented/python/evaluation_cross_validation_multiclass_storage.py
similarity index 64%
rename from examples/undocumented/python_modular/evaluation_cross_validation_multiclass_storage.py
rename to examples/undocumented/python/evaluation_cross_validation_multiclass_storage.py
index 03dad281383..8a622c731f5 100644
--- a/examples/undocumented/python_modular/evaluation_cross_validation_multiclass_storage.py
+++ b/examples/undocumented/python/evaluation_cross_validation_multiclass_storage.py
@@ -23,16 +23,16 @@
 parameter_list = [[traindat,label_traindat]]
 
 def evaluation_cross_validation_multiclass_storage (traindat=traindat, label_traindat=label_traindat):
-    from modshogun import CrossValidation, CrossValidationResult
-    from modshogun import CrossValidationPrintOutput
-    from modshogun import CrossValidationMKLStorage, CrossValidationMulticlassStorage
-    from modshogun import MulticlassAccuracy, F1Measure
-    from modshogun import StratifiedCrossValidationSplitting
-    from modshogun import MulticlassLabels
-    from modshogun import RealFeatures, CombinedFeatures
-    from modshogun import GaussianKernel, CombinedKernel
-    from modshogun import MKLMulticlass
-    from modshogun import Statistics, MSG_DEBUG, Math
+    from shogun import CrossValidation, CrossValidationResult
+    from shogun import ParameterObserverCV
+    from shogun import MulticlassAccuracy, F1Measure
+    from shogun import StratifiedCrossValidationSplitting
+    from shogun import MulticlassLabels
+    from shogun import RealFeatures, CombinedFeatures
+    from shogun import GaussianKernel, CombinedKernel
+    from shogun import MKLMulticlass
+    from shogun import Statistics, MSG_DEBUG, Math
+    from shogun import ROCEvaluation
 
     Math.init_random(1)
 
@@ -67,23 +67,28 @@ def evaluation_cross_validation_multiclass_storage (traindat=traindat, label_tra
         splitting_strategy, evaluation_criterium)
     cross_validation.set_autolock(False)
 
-    # append cross vlaidation output classes
-    #cross_validation.add_cross_validation_output(CrossValidationPrintOutput())
-    #mkl_storage=CrossValidationMKLStorage()
-    #cross_validation.add_cross_validation_output(mkl_storage)
-    multiclass_storage=CrossValidationMulticlassStorage()
-    multiclass_storage.append_binary_evaluation(F1Measure())
-    cross_validation.add_cross_validation_output(multiclass_storage)
+    # append cross validation parameter observer
+    multiclass_storage=ParameterObserverCV()
+    cross_validation.subscribe_to_parameters(multiclass_storage)
     cross_validation.set_num_runs(3)
 
     # perform cross-validation
     result=cross_validation.evaluate()
 
-    roc_0_0_0 = multiclass_storage.get_fold_ROC(0,0,0)
-    #print roc_0_0_0
-    auc_0_0_0 = multiclass_storage.get_fold_evaluation_result(0,0,0,0)
-    #print auc_0_0_0
-    return roc_0_0_0, auc_0_0_0
+    # get first observation and first fold
+    obs = multiclass_storage.get_observations()[0]
+    fold = obs.get_folds_results()[0]
+
+    # get fold ROC for first class
+    eval_ROC = ROCEvaluation()
+    pred_lab_binary = MulticlassLabels.obtain_from_generic(fold.get_test_result()).get_binary_for_class(0)
+    true_lab_binary = MulticlassLabels.obtain_from_generic(fold.get_test_true_result()).get_binary_for_class(0)
+    eval_ROC.evaluate(pred_lab_binary, true_lab_binary)
+    print eval_ROC.get_ROC()
+
+    # get fold evaluation result
+    acc_measure = F1Measure()
+    print acc_measure.evaluate(pred_lab_binary, true_lab_binary)
 
 
 if __name__=='__main__':
diff --git a/examples/undocumented/python_modular/evaluation_cross_validation_regression.py b/examples/undocumented/python/evaluation_cross_validation_regression.py
similarity index 87%
rename from examples/undocumented/python_modular/evaluation_cross_validation_regression.py
rename to examples/undocumented/python/evaluation_cross_validation_regression.py
index 45ce5bd5c4b..f124809b6ca 100644
--- a/examples/undocumented/python_modular/evaluation_cross_validation_regression.py
+++ b/examples/undocumented/python/evaluation_cross_validation_regression.py
@@ -14,10 +14,10 @@
 parameter_list = [[traindat,label_traindat,0.8,1e-6],[traindat,label_traindat,0.9,1e-7]]
 
 def evaluation_cross_validation_regression (train_fname=traindat,label_fname=label_traindat,width=0.8,tau=1e-6):
-	from modshogun import CrossValidation, CrossValidationResult
-	from modshogun import MeanSquaredError, CrossValidationSplitting
-	from modshogun import RegressionLabels, RealFeatures
-	from modshogun import GaussianKernel, KernelRidgeRegression, CSVFile
+	from shogun import CrossValidation, CrossValidationResult
+	from shogun import MeanSquaredError, CrossValidationSplitting
+	from shogun import RegressionLabels, RealFeatures
+	from shogun import GaussianKernel, KernelRidgeRegression, CSVFile
 
 	# training data
 	features=RealFeatures(CSVFile(train_fname))
diff --git a/examples/undocumented/python_modular/evaluation_director_contingencytableevaluation_modular.py b/examples/undocumented/python/evaluation_director_contingencytableevaluation.py
similarity index 77%
rename from examples/undocumented/python_modular/evaluation_director_contingencytableevaluation_modular.py
rename to examples/undocumented/python/evaluation_director_contingencytableevaluation.py
index 09447426d6d..2f839bd3b54 100644
--- a/examples/undocumented/python_modular/evaluation_director_contingencytableevaluation_modular.py
+++ b/examples/undocumented/python/evaluation_director_contingencytableevaluation.py
@@ -9,9 +9,9 @@
 
 parameter_list = [[ground_truth,predicted]]
 
-def evaluation_director_contingencytableevaluation_modular (ground_truth, predicted):
+def evaluation_director_contingencytableevaluation (ground_truth, predicted):
 	try:
-		from modshogun import DirectorContingencyTableEvaluation, ED_MAXIMIZE
+		from shogun import DirectorContingencyTableEvaluation, ED_MAXIMIZE
 	except ImportError:
 		print("recompile shogun with --enable-swig-directors")
 		return
@@ -24,7 +24,7 @@ def get_custom_direction(self):
 		def get_custom_score(self):
 			return self.get_WRACC()+self.get_BAL()
 
-	from modshogun import BinaryLabels
+	from shogun import BinaryLabels
 
 	evaluator = SimpleWeightedBinaryEvaluator()
 	r = evaluator.evaluate(BinaryLabels(ground_truth), BinaryLabels(predicted))
@@ -35,5 +35,5 @@ def get_custom_score(self):
 
 if __name__=='__main__':
 	print('EvaluationDirectorContingencyTableEvaluation')
-	evaluation_director_contingencytableevaluation_modular(*parameter_list[0])
+	evaluation_director_contingencytableevaluation(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/evaluation_meansquarederror_modular.py b/examples/undocumented/python/evaluation_meansquarederror.py
similarity index 70%
rename from examples/undocumented/python_modular/evaluation_meansquarederror_modular.py
rename to examples/undocumented/python/evaluation_meansquarederror.py
index be46a716477..141b281f845 100644
--- a/examples/undocumented/python_modular/evaluation_meansquarederror_modular.py
+++ b/examples/undocumented/python/evaluation_meansquarederror.py
@@ -11,9 +11,9 @@
 
 parameter_list = [[ground_truth,predicted]]
 
-def evaluation_meansquarederror_modular (ground_truth, predicted):
-	from modshogun import RegressionLabels
-	from modshogun import MeanSquaredError
+def evaluation_meansquarederror (ground_truth, predicted):
+	from shogun import RegressionLabels
+	from shogun import MeanSquaredError
 
 	ground_truth_labels = RegressionLabels(ground_truth)
 	predicted_labels = RegressionLabels(predicted)
@@ -26,5 +26,5 @@ def evaluation_meansquarederror_modular (ground_truth, predicted):
 
 if __name__=='__main__':
 	print('MeanSquaredError')
-	evaluation_meansquarederror_modular(*parameter_list[0])
+	evaluation_meansquarederror(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/evaluation_meansquaredlogerror_modular.py b/examples/undocumented/python/evaluation_meansquaredlogerror.py
similarity index 71%
rename from examples/undocumented/python_modular/evaluation_meansquaredlogerror_modular.py
rename to examples/undocumented/python/evaluation_meansquaredlogerror.py
index fde8efa5c77..b15c4a00b2d 100644
--- a/examples/undocumented/python_modular/evaluation_meansquaredlogerror_modular.py
+++ b/examples/undocumented/python/evaluation_meansquaredlogerror.py
@@ -11,9 +11,9 @@
 
 parameter_list = [[ground_truth,predicted]]
 
-def evaluation_meansquaredlogerror_modular (ground_truth, predicted):
-	from modshogun import RegressionLabels
-	from modshogun import MeanSquaredLogError
+def evaluation_meansquaredlogerror (ground_truth, predicted):
+	from shogun import RegressionLabels
+	from shogun import MeanSquaredLogError
 
 	ground_truth_labels = RegressionLabels(ground_truth)
 	predicted_labels = RegressionLabels(predicted)
@@ -26,5 +26,5 @@ def evaluation_meansquaredlogerror_modular (ground_truth, predicted):
 
 if __name__=='__main__':
 	print('EvaluationMeanSquaredLogError')
-	evaluation_meansquaredlogerror_modular(*parameter_list[0])
+	evaluation_meansquaredlogerror(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/evaluation_multiclassaccuracy_modular.py b/examples/undocumented/python/evaluation_multiclassaccuracy.py
similarity index 73%
rename from examples/undocumented/python_modular/evaluation_multiclassaccuracy_modular.py
rename to examples/undocumented/python/evaluation_multiclassaccuracy.py
index aab08d4540a..3c38486fed9 100644
--- a/examples/undocumented/python_modular/evaluation_multiclassaccuracy_modular.py
+++ b/examples/undocumented/python/evaluation_multiclassaccuracy.py
@@ -9,9 +9,9 @@
 
 parameter_list = [[ground_truth,predicted]]
 
-def evaluation_multiclassaccuracy_modular (ground_truth, predicted):
-	from modshogun import MulticlassLabels
-	from modshogun import MulticlassAccuracy
+def evaluation_multiclassaccuracy (ground_truth, predicted):
+	from shogun import MulticlassLabels
+	from shogun import MulticlassAccuracy
 
 	ground_truth_labels = MulticlassLabels(ground_truth)
 	predicted_labels = MulticlassLabels(predicted)
@@ -24,5 +24,5 @@ def evaluation_multiclassaccuracy_modular (ground_truth, predicted):
 
 if __name__=='__main__':
 	print('MulticlassAccuracy')
-	evaluation_multiclassaccuracy_modular(*parameter_list[0])
+	evaluation_multiclassaccuracy(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/evaluation_multiclassovrevaluation_modular.py b/examples/undocumented/python/evaluation_multiclassovrevaluation.py
similarity index 71%
rename from examples/undocumented/python_modular/evaluation_multiclassovrevaluation_modular.py
rename to examples/undocumented/python/evaluation_multiclassovrevaluation.py
index 4452611e04d..0e2f8d6a959 100644
--- a/examples/undocumented/python_modular/evaluation_multiclassovrevaluation_modular.py
+++ b/examples/undocumented/python/evaluation_multiclassovrevaluation.py
@@ -4,10 +4,10 @@
 
 parameter_list = [[traindat, label_traindat]]
 
-def evaluation_multiclassovrevaluation_modular(train_fname=traindat, label_fname=label_traindat):
-	from modshogun import MulticlassOVREvaluation,ROCEvaluation
-	from modshogun import MulticlassLibLinear,RealFeatures,ContingencyTableEvaluation,ACCURACY
-	from modshogun import MulticlassLabels, Math, CSVFile
+def evaluation_multiclassovrevaluation(train_fname=traindat, label_fname=label_traindat):
+	from shogun import MulticlassOVREvaluation,ROCEvaluation
+	from shogun import MulticlassLibLinear,RealFeatures,ContingencyTableEvaluation,ACCURACY
+	from shogun import MulticlassLabels, Math, CSVFile
 
 	Math.init_random(1)
 	ground_truth_labels = MulticlassLabels(CSVFile(label_fname))
@@ -29,5 +29,5 @@ def evaluation_multiclassovrevaluation_modular(train_fname=traindat, label_fname
 
 if __name__=='__main__':
 	print('MulticlassOVREvaluation')
-	evaluation_multiclassovrevaluation_modular(*parameter_list[0])
+	evaluation_multiclassovrevaluation(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/evaluation_prcevaluation_modular.py b/examples/undocumented/python/evaluation_prcevaluation.py
similarity index 74%
rename from examples/undocumented/python_modular/evaluation_prcevaluation_modular.py
rename to examples/undocumented/python/evaluation_prcevaluation.py
index 9472157f1d7..6c5325ac66d 100644
--- a/examples/undocumented/python_modular/evaluation_prcevaluation_modular.py
+++ b/examples/undocumented/python/evaluation_prcevaluation.py
@@ -9,9 +9,9 @@
 
 parameter_list = [[ground_truth,predicted]]
 
-def evaluation_prcevaluation_modular (ground_truth, predicted):
-	from modshogun import BinaryLabels
-	from modshogun import PRCEvaluation
+def evaluation_prcevaluation (ground_truth, predicted):
+	from shogun import BinaryLabels
+	from shogun import PRCEvaluation
 
 	ground_truth_labels = BinaryLabels(ground_truth)
 	predicted_labels = BinaryLabels(predicted)
@@ -24,5 +24,5 @@ def evaluation_prcevaluation_modular (ground_truth, predicted):
 
 if __name__=='__main__':
 	print('PRCEvaluation')
-	evaluation_prcevaluation_modular(*parameter_list[0])
+	evaluation_prcevaluation(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/evaluation_rocevaluation_modular.py b/examples/undocumented/python/evaluation_rocevaluation.py
similarity index 74%
rename from examples/undocumented/python_modular/evaluation_rocevaluation_modular.py
rename to examples/undocumented/python/evaluation_rocevaluation.py
index b56c4987b01..932c2e009f5 100644
--- a/examples/undocumented/python_modular/evaluation_rocevaluation_modular.py
+++ b/examples/undocumented/python/evaluation_rocevaluation.py
@@ -9,9 +9,9 @@
 
 parameter_list = [[ground_truth,predicted]]
 
-def evaluation_rocevaluation_modular (ground_truth, predicted):
-	from modshogun import BinaryLabels
-	from modshogun import ROCEvaluation
+def evaluation_rocevaluation (ground_truth, predicted):
+	from shogun import BinaryLabels
+	from shogun import ROCEvaluation
 
 	ground_truth_labels = BinaryLabels(ground_truth)
 	predicted_labels = BinaryLabels(predicted)
@@ -24,5 +24,5 @@ def evaluation_rocevaluation_modular (ground_truth, predicted):
 
 if __name__=='__main__':
 	print('ROCEvaluation')
-	evaluation_rocevaluation_modular(*parameter_list[0])
+	evaluation_rocevaluation(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/evaluation_thresholds_modular.py b/examples/undocumented/python/evaluation_thresholds.py
similarity index 83%
rename from examples/undocumented/python_modular/evaluation_thresholds_modular.py
rename to examples/undocumented/python/evaluation_thresholds.py
index 2089a7ed10b..cd6f1e516b6 100644
--- a/examples/undocumented/python_modular/evaluation_thresholds_modular.py
+++ b/examples/undocumented/python/evaluation_thresholds.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 parameter_list = [[1000]]
 
-def evaluation_thresholds_modular (index):
-	from modshogun import BinaryLabels, ROCEvaluation
+def evaluation_thresholds (index):
+	from shogun import BinaryLabels, ROCEvaluation
 	import numpy
 	numpy.random.seed(17)
 	output=numpy.arange(-1,1,0.001)
@@ -29,4 +29,4 @@ def evaluation_thresholds_modular (index):
 
 if __name__=='__main__':
 	print('Evaluation with Thresholds')
-	evaluation_thresholds_modular(*parameter_list[0])
+	evaluation_thresholds(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_binned_dot_modular.py b/examples/undocumented/python/features_binned_dot.py
similarity index 81%
rename from examples/undocumented/python_modular/features_binned_dot_modular.py
rename to examples/undocumented/python/features_binned_dot.py
index 4605bf6e91b..a4ef9b18141 100644
--- a/examples/undocumented/python_modular/features_binned_dot_modular.py
+++ b/examples/undocumented/python/features_binned_dot.py
@@ -7,8 +7,8 @@
 
 parameter_list = [(matrix,bins)]
 
-def features_binned_dot_modular (matrix, bins):
-	from modshogun import RealFeatures, BinnedDotFeatures
+def features_binned_dot (matrix, bins):
+	from shogun import RealFeatures, BinnedDotFeatures
 	rf=RealFeatures(matrix)
 
 	#print(rf.get_feature_matrix())
@@ -29,4 +29,4 @@ def features_binned_dot_modular (matrix, bins):
 
 if __name__=='__main__':
     print('BinnedDotFeatures')
-    features_binned_dot_modular(*parameter_list[0])
+    features_binned_dot(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_dense_modular.py b/examples/undocumented/python/features_dense.py
similarity index 85%
rename from examples/undocumented/python_modular/features_dense_modular.py
rename to examples/undocumented/python/features_dense.py
index ae671ae57b4..136bc216d4a 100644
--- a/examples/undocumented/python_modular/features_dense_modular.py
+++ b/examples/undocumented/python/features_dense.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-from modshogun import RealFeatures, LongIntFeatures, ByteFeatures
+from shogun import RealFeatures, LongIntFeatures, ByteFeatures
 from numpy import array, float64, int64, uint8, all
 
 # create dense matrices A,B,C
@@ -11,7 +11,7 @@
 # ... of type Real, LongInt and Byte
 parameter_list = [[matrixA,matrixB,matrixC]]
 
-def features_dense_modular (A=matrixA,B=matrixB,C=matrixC):
+def features_dense (A=matrixA,B=matrixB,C=matrixC):
 
     a=RealFeatures(A)
     b=LongIntFeatures(B)
@@ -42,4 +42,4 @@ def features_dense_modular (A=matrixA,B=matrixB,C=matrixC):
 
 if __name__=='__main__':
     print('dense')
-    features_dense_modular(*parameter_list[0])
+    features_dense(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_dense_byte_modular.py b/examples/undocumented/python/features_dense_byte.py
similarity index 84%
rename from examples/undocumented/python_modular/features_dense_byte_modular.py
rename to examples/undocumented/python/features_dense_byte.py
index abe348f9b2f..f85645529c9 100644
--- a/examples/undocumented/python_modular/features_dense_byte_modular.py
+++ b/examples/undocumented/python/features_dense_byte.py
@@ -6,8 +6,8 @@
 
 parameter_list=[[A]]
 
-def features_dense_byte_modular (A):
-	from modshogun import ByteFeatures
+def features_dense_byte (A):
+	from shogun import ByteFeatures
 
 	# create dense features a
 	# ... of type Byte
@@ -31,4 +31,4 @@ def features_dense_byte_modular (A):
 
 if __name__=='__main__':
 	print('ByteFeatures')
-	features_dense_byte_modular(*parameter_list[0])
+	features_dense_byte(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_dense_io_modular.py b/examples/undocumented/python/features_dense_io.py
similarity index 63%
rename from examples/undocumented/python_modular/features_dense_io_modular.py
rename to examples/undocumented/python/features_dense_io.py
index 0acf352ddbf..630468c0d3c 100644
--- a/examples/undocumented/python_modular/features_dense_io_modular.py
+++ b/examples/undocumented/python/features_dense_io.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 parameter_list=[[]]
 
-def features_dense_io_modular():
-	from modshogun import RealFeatures, CSVFile
+def features_dense_io():
+	from shogun import RealFeatures, CSVFile
 	feats=RealFeatures()
 	f=CSVFile("../data/fm_train_real.dat","r")
 	f.set_delimiter(" ")
@@ -11,4 +11,4 @@ def features_dense_io_modular():
 
 if __name__=='__main__':
 	print('Dense Real Features IO')
-	features_dense_io_modular(*parameter_list[0])
+	features_dense_io(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_dense_longint_modular.py b/examples/undocumented/python/features_dense_longint.py
similarity index 77%
rename from examples/undocumented/python_modular/features_dense_longint_modular.py
rename to examples/undocumented/python/features_dense_longint.py
index 330dfcd5114..5a21f5b1f42 100644
--- a/examples/undocumented/python_modular/features_dense_longint_modular.py
+++ b/examples/undocumented/python/features_dense_longint.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-from modshogun import LongIntFeatures
+from shogun import LongIntFeatures
 from numpy import array, int64, all
 
 # create dense matrix A
@@ -8,7 +8,7 @@
 parameter_list = [[matrix]]
 
 # ... of type LongInt
-def features_dense_longint_modular (A=matrix):
+def features_dense_longint (A=matrix):
 	a=LongIntFeatures(A)
 	# get first feature vector and set it
 
@@ -22,4 +22,4 @@ def features_dense_longint_modular (A=matrix):
 
 if __name__=='__main__':
 	print('dense_longint')
-	features_dense_longint_modular(*parameter_list[0])
+	features_dense_longint(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_dense_protocols_modular.py b/examples/undocumented/python/features_dense_protocols.py
similarity index 88%
rename from examples/undocumented/python_modular/features_dense_protocols_modular.py
rename to examples/undocumented/python/features_dense_protocols.py
index 9b200a23458..a11a8639fc3 100644
--- a/examples/undocumented/python_modular/features_dense_protocols_modular.py
+++ b/examples/undocumented/python/features_dense_protocols.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import numpy
-from modshogun import RealFeatures
-from modshogun import LongIntFeatures
+from shogun import RealFeatures
+from shogun import LongIntFeatures
 
 from numpy import array, float64, int64
 
@@ -10,7 +10,7 @@
 
 parameter_list = [[data]]
 
-def features_dense_protocols_modular (in_data=data):
+def features_dense_protocols (in_data=data):
 	m_real=array(in_data, dtype=float64, order='F')
 	f_real=RealFeatures(m_real)
 
@@ -81,4 +81,4 @@ def features_dense_protocols_modular (in_data=data):
 
 if __name__=='__main__':
 	print('dense_protocols')
-	features_dense_protocols_modular(*parameter_list[0])
+	features_dense_protocols(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_dense_real_modular.py b/examples/undocumented/python/features_dense_real.py
similarity index 83%
rename from examples/undocumented/python_modular/features_dense_real_modular.py
rename to examples/undocumented/python/features_dense_real.py
index 2d25c25eac8..169f8011abd 100644
--- a/examples/undocumented/python_modular/features_dense_real_modular.py
+++ b/examples/undocumented/python/features_dense_real.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-from modshogun import RealFeatures
+from shogun import RealFeatures
 from numpy import array, float64, all
 
 # create dense matrices A,B,C
@@ -8,7 +8,7 @@
 parameter_list = [[matrix]]
 
 # ... of type LongInt
-def features_dense_real_modular (A=matrix):
+def features_dense_real (A=matrix):
 
 # ... of type Real, LongInt and Byte
     a=RealFeatures(A)
@@ -30,4 +30,4 @@ def features_dense_real_modular (A=matrix):
 
 if __name__=='__main__':
     print('dense_real')
-    features_dense_real_modular(*parameter_list[0])
+    features_dense_real(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_dense_zero_copy_modular.py b/examples/undocumented/python/features_dense_zero_copy.py
similarity index 82%
rename from examples/undocumented/python_modular/features_dense_zero_copy_modular.py
rename to examples/undocumented/python/features_dense_zero_copy.py
index 0e28d2f48c6..9e7e0c5a40c 100644
--- a/examples/undocumented/python_modular/features_dense_zero_copy_modular.py
+++ b/examples/undocumented/python/features_dense_zero_copy.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 import numpy
-from modshogun import RealFeatures
+from shogun import RealFeatures
 from numpy import array, float64, int64
 
 # create dense matrice
@@ -8,7 +8,7 @@
 
 parameter_list = [[data]]
 
-def features_dense_zero_copy_modular (in_data=data):
+def features_dense_zero_copy (in_data=data):
 	feats = None
 	if numpy.__version__ >= '1.5':
 		feats=numpy.array(in_data, dtype=float64, order='F')
@@ -38,4 +38,4 @@ def features_dense_zero_copy_modular (in_data=data):
 
 if __name__=='__main__':
 	print('dense_zero_copy')
-	features_dense_zero_copy_modular(*parameter_list[0])
+	features_dense_zero_copy(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_director_dot_modular.py b/examples/undocumented/python/features_director_dot.py
similarity index 88%
rename from examples/undocumented/python_modular/features_director_dot_modular.py
rename to examples/undocumented/python/features_director_dot.py
index d09952ab2fc..047e2c2fda3 100644
--- a/examples/undocumented/python_modular/features_director_dot_modular.py
+++ b/examples/undocumented/python/features_director_dot.py
@@ -9,11 +9,11 @@
 
 parameter_list = [[traindat,testdat,label_traindat,0.9,1e-3],[traindat,testdat,label_traindat,0.8,1e-2]]
 
-def features_director_dot_modular (fm_train_real, fm_test_real,
+def features_director_dot (fm_train_real, fm_test_real,
 		label_train_twoclass, C, epsilon):
 	try:
-		from modshogun import DirectorDotFeatures
-		from modshogun import RealVector
+		from shogun import DirectorDotFeatures
+		from shogun import RealVector
 	except ImportError:
 		print("recompile shogun with --enable-swig-directors")
 		return
@@ -61,9 +61,9 @@ def get_dim_feature_space(self):
 	#		return NumpyFeatures(self.data-other.data)
 
 
-	#from modshogun import RealFeatures, SparseRealFeatures, BinaryLabels
-	#from modshogun import LibLinear, L2R_L2LOSS_SVC_DUAL
-	#from modshogun import Math_init_random
+	#from shogun import RealFeatures, SparseRealFeatures, BinaryLabels
+	#from shogun import LibLinear, L2R_L2LOSS_SVC_DUAL
+	#from shogun import Math_init_random
 	#Math_init_random(17)
 
 	#feats_train=RealFeatures(fm_train_real)
@@ -105,4 +105,4 @@ def get_dim_feature_space(self):
 
 if __name__=='__main__':
 	print('DirectorLinear')
-	features_director_dot_modular(*parameter_list[0])
+	features_director_dot(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_hasheddocdot_modular.py b/examples/undocumented/python/features_hasheddocdot.py
similarity index 76%
rename from examples/undocumented/python_modular/features_hasheddocdot_modular.py
rename to examples/undocumented/python/features_hasheddocdot.py
index bc20aebb69e..06b4bc78695 100644
--- a/examples/undocumented/python_modular/features_hasheddocdot_modular.py
+++ b/examples/undocumented/python/features_hasheddocdot.py
@@ -3,10 +3,10 @@
 
 parameter_list=[[strings]]
 
-def features_hasheddocdot_modular(strings):
-	from modshogun import StringCharFeatures, RAWBYTE
-	from modshogun import HashedDocDotFeatures
-	from modshogun import NGramTokenizer
+def features_hasheddocdot(strings):
+	from shogun import StringCharFeatures, RAWBYTE
+	from shogun import HashedDocDotFeatures
+	from shogun import NGramTokenizer
 	from numpy import array
 
 	#create string features
@@ -34,4 +34,4 @@ def features_hasheddocdot_modular(strings):
 
 if __name__=='__main__':
 	print('HashedDocDotFeatures')
-	features_hasheddocdot_modular(*parameter_list[0])
+	features_hasheddocdot(*parameter_list[0])
diff --git a/examples/undocumented/python/features_io.py b/examples/undocumented/python/features_io.py
new file mode 100644
index 00000000000..ba41c42d3d0
--- /dev/null
+++ b/examples/undocumented/python/features_io.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+from tools.load import LoadMatrix
+lm=LoadMatrix()
+data=lm.load_numbers('../data/fm_train_real.dat')
+label=lm.load_numbers('../data/label_train_twoclass.dat')
+
+parameter_list=[[data,label]]
+
+def features_io (fm_train_real, label_train_twoclass):
+	import numpy
+	from shogun import SparseRealFeatures, RealFeatures, MulticlassLabels
+	from shogun import GaussianKernel
+	from shogun import LibSVMFile, CSVFile, BinaryFile, HDF5File
+	from tempfile import NamedTemporaryFile
+
+	feats=SparseRealFeatures(fm_train_real)
+	feats2=SparseRealFeatures()
+
+	tmp_fm_train_sparsereal_bin = NamedTemporaryFile(suffix='sparsereal.bin')
+	f=BinaryFile(tmp_fm_train_sparsereal_bin.name, "w")
+	feats.save(f)
+
+	tmp_fm_train_sparsereal_ascii = NamedTemporaryFile(suffix='sparsereal.ascii')
+	f=LibSVMFile(tmp_fm_train_sparsereal_ascii.name, "w")
+	feats.save(f)
+
+	f=BinaryFile(tmp_fm_train_sparsereal_bin.name)
+	feats2.load(f)
+
+	f=LibSVMFile(tmp_fm_train_sparsereal_ascii.name)
+	feats2.load(f)
+
+	feats=RealFeatures(fm_train_real)
+	feats2=RealFeatures()
+
+	tmp_fm_train_real_bin = NamedTemporaryFile(suffix='real.bin')
+	f=BinaryFile(tmp_fm_train_real_bin.name, "w")
+	feats.save(f)
+
+	tmp_fm_train_real_h5 = NamedTemporaryFile(suffix='real.h5')
+	f=HDF5File(tmp_fm_train_real_h5.name, "w", "/data/doubles")
+	feats.save(f)
+
+	tmp_fm_train_real_ascii = NamedTemporaryFile(suffix='real.ascii')
+	f=CSVFile(tmp_fm_train_real_ascii.name, "w")
+	feats.save(f)
+
+	f=BinaryFile(tmp_fm_train_real_bin.name)
+	feats2.load(f)
+	#print("diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))
+
+	f=CSVFile(tmp_fm_train_real_ascii.name)
+	feats2.load(f)
+	#print("diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))
+
+	lab=MulticlassLabels(numpy.array([0.0,1.0,2.0,3.0]))
+	lab2=MulticlassLabels()
+	tmp_label_train_twoclass_ascii = NamedTemporaryFile(suffix='twoclass.ascii')
+	f=CSVFile(tmp_label_train_twoclass_ascii.name, "w")
+	lab.save(f)
+
+	tmp_label_train_twoclass_bin = NamedTemporaryFile(suffix='twoclass.bin')
+	f=BinaryFile(tmp_label_train_twoclass_bin.name, "w")
+	lab.save(f)
+
+	tmp_label_train_real_h5 = NamedTemporaryFile(suffix='real.h5')
+	f=HDF5File(tmp_label_train_real_h5.name, "w", "/data/labels")
+	lab.save(f)
+
+	f=CSVFile(tmp_label_train_twoclass_ascii.name)
+	lab2.load(f)
+
+	f=BinaryFile(tmp_label_train_twoclass_bin.name)
+	lab2.load(f)
+
+	f=HDF5File(tmp_fm_train_real_h5.name, "r", "/data/doubles")
+	feats2.load(f)
+	#print(feats2.get_feature_matrix())
+	f=HDF5File(tmp_label_train_real_h5.name, "r", "/data/labels")
+	lab2.load(f)
+	#print(lab2.get_labels())
+
+	return feats, feats2, lab, lab2
+
+if __name__=='__main__':
+	print('Features IO')
+	features_io(*parameter_list[0])
diff --git a/examples/undocumented/python/features_read_svmlight_format.py b/examples/undocumented/python/features_read_svmlight_format.py
new file mode 100644
index 00000000000..741fd59571b
--- /dev/null
+++ b/examples/undocumented/python/features_read_svmlight_format.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+parameter_list=[['../data/train_sparsereal.light']]
+
+def features_read_svmlight_format (fname):
+	from tempfile import NamedTemporaryFile
+	from shogun import SparseRealFeatures
+	from shogun import LibSVMFile
+
+	f=SparseRealFeatures()
+	lab=f.load_with_labels(LibSVMFile(fname))
+	tmp_file = NamedTemporaryFile(suffix='svmlight')
+	f.save_with_labels(LibSVMFile(tmp_file.name, 'w'), lab)
+
+if __name__=='__main__':
+	print('Reading SVMLIGHT format')
+	features_read_svmlight_format(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_snp_modular.py b/examples/undocumented/python/features_snp.py
similarity index 71%
rename from examples/undocumented/python_modular/features_snp_modular.py
rename to examples/undocumented/python/features_snp.py
index aae5d7fcec5..afaa31aecf3 100644
--- a/examples/undocumented/python_modular/features_snp_modular.py
+++ b/examples/undocumented/python/features_snp.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 parameter_list=[['../data/snps.dat']]
 
-def features_snp_modular (fname):
-	from modshogun import StringByteFeatures, SNPFeatures, SNP
+def features_snp (fname):
+	from shogun import StringByteFeatures, SNPFeatures, SNP
 
 	sf=StringByteFeatures(SNP)
 	sf.load_ascii_file(fname, False, SNP, SNP)
@@ -14,4 +14,4 @@ def features_snp_modular (fname):
 
 if __name__=='__main__':
 	print('SNP Features')
-	features_snp_modular(*parameter_list[0])
+	features_snp(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_sparse_modular.py b/examples/undocumented/python/features_sparse.py
similarity index 90%
rename from examples/undocumented/python_modular/features_sparse_modular.py
rename to examples/undocumented/python/features_sparse.py
index 46cd441c2fb..8c80f69f461 100644
--- a/examples/undocumented/python_modular/features_sparse_modular.py
+++ b/examples/undocumented/python/features_sparse.py
@@ -4,9 +4,9 @@
 A=numpy.array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=numpy.float64)
 
 parameter_list=[[A]]
-def features_sparse_modular (A):
+def features_sparse (A):
 	from scipy.sparse import csc_matrix
-	from modshogun import SparseRealFeatures
+	from shogun import SparseRealFeatures
 	from numpy import array, float64, all
 
 	# sparse representation X of dense matrix A
@@ -42,4 +42,4 @@ def features_sparse_modular (A):
 
 if __name__=='__main__':
 	print('Sparse Features')
-	features_sparse_modular(*parameter_list[0])
+	features_sparse(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_string_char_modular.py b/examples/undocumented/python/features_string_char.py
similarity index 82%
rename from examples/undocumented/python_modular/features_string_char_modular.py
rename to examples/undocumented/python/features_string_char.py
index 58d87b53464..cab77c02af4 100644
--- a/examples/undocumented/python_modular/features_string_char_modular.py
+++ b/examples/undocumented/python/features_string_char.py
@@ -3,8 +3,8 @@
 
 parameter_list=[[strings]]
 
-def features_string_char_modular (strings):
-	from modshogun import StringCharFeatures, RAWBYTE
+def features_string_char (strings):
+	from shogun import StringCharFeatures, RAWBYTE
 	from numpy import array
 
 	#create string features
@@ -25,4 +25,4 @@ def features_string_char_modular (strings):
 
 if __name__=='__main__':
 	print('StringCharFeatures')
-	features_string_char_modular(*parameter_list[0])
+	features_string_char(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_string_char_compressed_modular.py b/examples/undocumented/python/features_string_char_compressed.py
similarity index 89%
rename from examples/undocumented/python_modular/features_string_char_compressed_modular.py
rename to examples/undocumented/python/features_string_char_compressed.py
index c7ef1e5d0a6..462809c66bc 100644
--- a/examples/undocumented/python_modular/features_string_char_compressed_modular.py
+++ b/examples/undocumented/python/features_string_char_compressed.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python
-parameter_list = [['features_string_char_compressed_modular.py']]
+parameter_list = [['features_string_char_compressed.py']]
 
-def features_string_char_compressed_modular (fname):
-	from modshogun import StringCharFeatures, StringFileCharFeatures, RAWBYTE
-	from modshogun import UNCOMPRESSED,SNAPPY,LZO,GZIP,BZIP2,LZMA, MSG_DEBUG
-	from modshogun import DecompressCharString
+def features_string_char_compressed (fname):
+	from shogun import StringCharFeatures, StringFileCharFeatures, RAWBYTE
+	from shogun import UNCOMPRESSED,SNAPPY,LZO,GZIP,BZIP2,LZMA, MSG_DEBUG
+	from shogun import DecompressCharString
 
 	f=StringFileCharFeatures(fname, RAWBYTE)
 
@@ -93,4 +93,4 @@ def features_string_char_compressed_modular (fname):
 
 if __name__=='__main__':
     print('Compressing StringCharFileFeatures')
-    features_string_char_compressed_modular(*parameter_list[0])
+    features_string_char_compressed(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_string_file_modular.py b/examples/undocumented/python/features_string_file.py
similarity index 75%
rename from examples/undocumented/python_modular/features_string_file_modular.py
rename to examples/undocumented/python/features_string_file.py
index 5917e55c12e..d0e633d1226 100644
--- a/examples/undocumented/python_modular/features_string_file_modular.py
+++ b/examples/undocumented/python/features_string_file.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python
-parameter_list=[[".", "features_string_char_modular.py"]]
+parameter_list=[[".", "features_string_char.py"]]
 
-def features_string_file_modular (directory, fname):
-	from modshogun import StringCharFeatures, RAWBYTE
-	from modshogun import CSVFile
+def features_string_file (directory, fname):
+	from shogun import StringCharFeatures, RAWBYTE
+	from shogun import CSVFile
 
 	# load features from directory
 	f=StringCharFeatures(RAWBYTE)
@@ -29,4 +29,4 @@ def features_string_file_modular (directory, fname):
 
 if __name__=='__main__':
 	print('StringWordFeatures')
-	features_string_file_modular(*parameter_list[0])
+	features_string_file(*parameter_list[0])
diff --git a/examples/undocumented/python/features_string_file_char.py b/examples/undocumented/python/features_string_file_char.py
new file mode 100644
index 00000000000..3335b668535
--- /dev/null
+++ b/examples/undocumented/python/features_string_file_char.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+parameter_list = [['features_string_file_char.py']]
+
+def features_string_file_char (fname):
+	from shogun import StringFileCharFeatures, RAWBYTE
+	f = StringFileCharFeatures(fname, RAWBYTE)
+	#print("strings", f.get_features())
+	return f
+
+if __name__=='__main__':
+    print('Compressing StringCharFileFeatures')
+    features_string_file_char(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_string_hashed_wd_modular.py b/examples/undocumented/python/features_string_hashed_wd.py
similarity index 69%
rename from examples/undocumented/python_modular/features_string_hashed_wd_modular.py
rename to examples/undocumented/python/features_string_hashed_wd.py
index d430c9d8049..7ee0679fcee 100644
--- a/examples/undocumented/python_modular/features_string_hashed_wd_modular.py
+++ b/examples/undocumented/python/features_string_hashed_wd.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-from modshogun import LongIntFeatures
+from shogun import LongIntFeatures
 from numpy import array, int64, all
 
 # create dense matrix A
@@ -8,12 +8,12 @@
 parameter_list = [[matrix,3,1,2],[matrix,3,1,2]]
 
 # ... of type LongInt
-def features_string_hashed_wd_modular (A=matrix,order=3,start_order=1,hash_bits=2):
+def features_string_hashed_wd (A=matrix,order=3,start_order=1,hash_bits=2):
     a=LongIntFeatures(A)
 
     from numpy import array, uint8
-    from modshogun import HashedWDFeatures, StringByteFeatures, RAWDNA
-    from modshogun import MSG_DEBUG
+    from shogun import HashedWDFeatures, StringByteFeatures, RAWDNA
+    from shogun import MSG_DEBUG
 
     x=[array([0,1,2,3,0,1,2,3,3,2,2,1,1],dtype=uint8)]
     from_order=order
@@ -28,4 +28,4 @@ def features_string_hashed_wd_modular (A=matrix,order=3,start_order=1,hash_bits=
 
 if __name__=='__main__':
     print('string_hashed_wd')
-    features_string_hashed_wd_modular(*parameter_list[0])
+    features_string_hashed_wd(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_string_sliding_window_modular.py b/examples/undocumented/python/features_string_sliding_window.py
similarity index 85%
rename from examples/undocumented/python_modular/features_string_sliding_window_modular.py
rename to examples/undocumented/python/features_string_sliding_window.py
index 8d6ff6831a0..05ac8de69c7 100644
--- a/examples/undocumented/python_modular/features_string_sliding_window_modular.py
+++ b/examples/undocumented/python/features_string_sliding_window.py
@@ -4,9 +4,9 @@
 
 parameter_list=[[s]]
 
-def features_string_sliding_window_modular (strings):
-	from modshogun import StringCharFeatures, DNA
-	from modshogun import DynamicIntArray
+def features_string_sliding_window (strings):
+	from shogun import StringCharFeatures, DNA
+	from shogun import DynamicIntArray
 
 	f=StringCharFeatures([strings], DNA)
 
@@ -45,4 +45,4 @@ def features_string_sliding_window_modular (strings):
 
 if __name__=='__main__':
 	print('Sliding Window')
-	features_string_sliding_window_modular(*parameter_list[0])
+	features_string_sliding_window(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_string_ulong_modular.py b/examples/undocumented/python/features_string_ulong.py
similarity index 71%
rename from examples/undocumented/python_modular/features_string_ulong_modular.py
rename to examples/undocumented/python/features_string_ulong.py
index 7ea478e34b9..ca0808299fa 100644
--- a/examples/undocumented/python_modular/features_string_ulong_modular.py
+++ b/examples/undocumented/python/features_string_ulong.py
@@ -2,9 +2,9 @@
 
 parameter_list = [[0,2,0,False],[0,3,0,False]]
 
-def features_string_ulong_modular (start=0,order=2,gap=0,rev=False):
+def features_string_ulong (start=0,order=2,gap=0,rev=False):
 
-    from modshogun import StringCharFeatures, StringUlongFeatures, RAWBYTE
+    from shogun import StringCharFeatures, StringUlongFeatures, RAWBYTE
     from numpy import array, uint64
 
 #create string features
@@ -21,4 +21,4 @@ def features_string_ulong_modular (start=0,order=2,gap=0,rev=False):
 
 if __name__=='__main__':
     print('simple_longint')
-    features_string_ulong_modular(*parameter_list[0])
+    features_string_ulong(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_string_word_modular.py b/examples/undocumented/python/features_string_word.py
similarity index 80%
rename from examples/undocumented/python_modular/features_string_word_modular.py
rename to examples/undocumented/python/features_string_word.py
index dfd58099d39..0bba571d6a8 100644
--- a/examples/undocumented/python_modular/features_string_word_modular.py
+++ b/examples/undocumented/python/features_string_word.py
@@ -3,8 +3,8 @@
 
 parameter_list=[[strings,0,2,0,False]]
 
-def features_string_word_modular (strings, start, order, gap, rev):
-	from modshogun import StringCharFeatures, StringWordFeatures, RAWBYTE
+def features_string_word (strings, start, order, gap, rev):
+	from shogun import StringCharFeatures, StringWordFeatures, RAWBYTE
 	from numpy import array, uint16
 
 	#create string features
@@ -28,4 +28,4 @@ def features_string_word_modular (strings, start, order, gap, rev):
 
 if __name__=='__main__':
 	print('StringWordFeatures')
-	features_string_word_modular(*parameter_list[0])
+	features_string_word(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/graphical/classifier_gaussian_process_binary_classification.py b/examples/undocumented/python/graphical/classifier_gaussian_process_binary_classification.py
similarity index 97%
rename from examples/undocumented/python_modular/graphical/classifier_gaussian_process_binary_classification.py
rename to examples/undocumented/python/graphical/classifier_gaussian_process_binary_classification.py
index ce8e06feb56..2d88b9693c6 100644
--- a/examples/undocumented/python_modular/graphical/classifier_gaussian_process_binary_classification.py
+++ b/examples/undocumented/python/graphical/classifier_gaussian_process_binary_classification.py
@@ -35,7 +35,7 @@ def gaussian_process_binary_classification_laplace(X_train, y_train, n_test=50):
 
     # import all necessary modules from Shogun (some of them require Eigen3)
     try:
-        from modshogun import RealFeatures, BinaryLabels, GaussianKernel, \
+        from shogun import RealFeatures, BinaryLabels, GaussianKernel, \
             LogitLikelihood, ProbitLikelihood, ZeroMean, SingleLaplacianInferenceMethod, \
             EPInferenceMethod, GaussianProcessClassification
     except ImportError:
diff --git a/examples/undocumented/python_modular/graphical/classifier_perceptron_graphical.py b/examples/undocumented/python/graphical/classifier_perceptron_graphical.py
similarity index 94%
rename from examples/undocumented/python_modular/graphical/classifier_perceptron_graphical.py
rename to examples/undocumented/python/graphical/classifier_perceptron_graphical.py
index d7efc844432..2064921b835 100644
--- a/examples/undocumented/python_modular/graphical/classifier_perceptron_graphical.py
+++ b/examples/undocumented/python/graphical/classifier_perceptron_graphical.py
@@ -7,9 +7,9 @@
 parameter_list = [[20, 5, 1., 1000, 1, None, 5], [100, 5, 1., 1000, 1, None, 10]]
 
 def classifier_perceptron_graphical(n=100, distance=5, learn_rate=1., max_iter=1000, num_threads=1, seed=None, nperceptrons=5):
-	from modshogun import RealFeatures, BinaryLabels
-	from modshogun import Perceptron
-	from modshogun import MSG_INFO
+	from shogun import RealFeatures, BinaryLabels
+	from shogun import Perceptron
+	from shogun import MSG_INFO
 
 	# 2D data
 	_DIM = 2
diff --git a/examples/undocumented/python_modular/graphical/cluster_kmeans.py b/examples/undocumented/python/graphical/cluster_kmeans.py
similarity index 97%
rename from examples/undocumented/python_modular/graphical/cluster_kmeans.py
rename to examples/undocumented/python/graphical/cluster_kmeans.py
index e549ab3bbbf..63a82911954 100644
--- a/examples/undocumented/python_modular/graphical/cluster_kmeans.py
+++ b/examples/undocumented/python/graphical/cluster_kmeans.py
@@ -2,7 +2,7 @@
 from numpy import ones,zeros,cos,sin,concatenate
 from numpy.random import randn
 
-from modshogun import *
+from shogun import *
 
 k=4
 num=1000
diff --git a/examples/undocumented/python_modular/graphical/cluster_kpp.py b/examples/undocumented/python/graphical/cluster_kpp.py
similarity index 98%
rename from examples/undocumented/python_modular/graphical/cluster_kpp.py
rename to examples/undocumented/python/graphical/cluster_kpp.py
index c0c0f6f9b9a..70b76cdeeeb 100644
--- a/examples/undocumented/python_modular/graphical/cluster_kpp.py
+++ b/examples/undocumented/python/graphical/cluster_kpp.py
@@ -11,7 +11,7 @@
 from numpy import array,ones,zeros,cos,sin,concatenate
 from numpy.random import randn
 
-from modshogun import *
+from shogun import *
 
 k=2
 num=500
diff --git a/examples/undocumented/python_modular/graphical/converter_fastica_bss.py b/examples/undocumented/python/graphical/converter_fastica_bss.py
similarity index 100%
rename from examples/undocumented/python_modular/graphical/converter_fastica_bss.py
rename to examples/undocumented/python/graphical/converter_fastica_bss.py
diff --git a/examples/undocumented/python_modular/graphical/converter_ffsep_bss.py b/examples/undocumented/python/graphical/converter_ffsep_bss.py
similarity index 94%
rename from examples/undocumented/python_modular/graphical/converter_ffsep_bss.py
rename to examples/undocumented/python/graphical/converter_ffsep_bss.py
index 64e01a94a3a..cfb4527e568 100644
--- a/examples/undocumented/python_modular/graphical/converter_ffsep_bss.py
+++ b/examples/undocumented/python/graphical/converter_ffsep_bss.py
@@ -11,8 +11,8 @@
 import numpy as np
 import pylab as pl
 
-from modshogun  import RealFeatures
-from modshogun import FFSep
+from shogun  import RealFeatures
+from shogun import FFSep
 
 # Generate sample data
 np.random.seed(0)
diff --git a/examples/undocumented/python_modular/graphical/converter_jade_bss.py b/examples/undocumented/python/graphical/converter_jade_bss.py
similarity index 94%
rename from examples/undocumented/python_modular/graphical/converter_jade_bss.py
rename to examples/undocumented/python/graphical/converter_jade_bss.py
index 973508a27c0..f06b2efe033 100644
--- a/examples/undocumented/python_modular/graphical/converter_jade_bss.py
+++ b/examples/undocumented/python/graphical/converter_jade_bss.py
@@ -11,8 +11,8 @@
 import numpy as np
 import pylab as pl
 
-from modshogun  import RealFeatures
-from modshogun import Jade
+from shogun  import RealFeatures
+from shogun import Jade
 
 # Generate sample data
 np.random.seed(0)
diff --git a/examples/undocumented/python_modular/graphical/converter_jedi_bss.py b/examples/undocumented/python/graphical/converter_jedi_bss.py
similarity index 94%
rename from examples/undocumented/python_modular/graphical/converter_jedi_bss.py
rename to examples/undocumented/python/graphical/converter_jedi_bss.py
index 184f86f5a33..b110ccc493b 100644
--- a/examples/undocumented/python_modular/graphical/converter_jedi_bss.py
+++ b/examples/undocumented/python/graphical/converter_jedi_bss.py
@@ -11,8 +11,8 @@
 import numpy as np
 import pylab as pl
 
-from modshogun  import RealFeatures
-from modshogun import JediSep
+from shogun  import RealFeatures
+from shogun import JediSep
 
 # Generate sample data
 np.random.seed(0)
diff --git a/examples/undocumented/python_modular/graphical/converter_sobi_bss.py b/examples/undocumented/python/graphical/converter_sobi_bss.py
similarity index 94%
rename from examples/undocumented/python_modular/graphical/converter_sobi_bss.py
rename to examples/undocumented/python/graphical/converter_sobi_bss.py
index 808f0880ad8..67b11eaab4e 100644
--- a/examples/undocumented/python_modular/graphical/converter_sobi_bss.py
+++ b/examples/undocumented/python/graphical/converter_sobi_bss.py
@@ -11,8 +11,8 @@
 import numpy as np
 import pylab as pl
 
-from modshogun  import RealFeatures
-from modshogun import SOBI
+from shogun  import RealFeatures
+from shogun import SOBI
 
 # Generate sample data
 np.random.seed(0)
diff --git a/examples/undocumented/python_modular/graphical/converter_spe_helix.py b/examples/undocumented/python/graphical/converter_spe_helix.py
similarity index 94%
rename from examples/undocumented/python_modular/graphical/converter_spe_helix.py
rename to examples/undocumented/python/graphical/converter_spe_helix.py
index c3c79aaf6d3..08164b1d2d0 100644
--- a/examples/undocumented/python_modular/graphical/converter_spe_helix.py
+++ b/examples/undocumented/python/graphical/converter_spe_helix.py
@@ -19,9 +19,9 @@
 import pylab
 import util
 
-from modshogun  import RealFeatures
-from modshogun import StochasticProximityEmbedding, SPE_GLOBAL
-from modshogun import SPE_LOCAL, Isomap
+from shogun  import RealFeatures
+from shogun import StochasticProximityEmbedding, SPE_GLOBAL
+from shogun import SPE_LOCAL, Isomap
 
 # Number of data points
 N = 500
diff --git a/examples/undocumented/python_modular/graphical/converter_uwedge_bss.py b/examples/undocumented/python/graphical/converter_uwedge_bss.py
similarity index 93%
rename from examples/undocumented/python_modular/graphical/converter_uwedge_bss.py
rename to examples/undocumented/python/graphical/converter_uwedge_bss.py
index 0450d7fee1f..bbab4c6ccaa 100644
--- a/examples/undocumented/python_modular/graphical/converter_uwedge_bss.py
+++ b/examples/undocumented/python/graphical/converter_uwedge_bss.py
@@ -8,8 +8,8 @@
 import numpy as np
 import pylab as pl
 
-from modshogun  import RealFeatures
-from modshogun import UWedgeSep
+from shogun  import RealFeatures
+from shogun import UWedgeSep
 
 # Generate sample data
 np.random.seed(0)
diff --git a/examples/undocumented/python_modular/graphical/eigenfaces.py b/examples/undocumented/python/graphical/eigenfaces.py
similarity index 98%
rename from examples/undocumented/python_modular/graphical/eigenfaces.py
rename to examples/undocumented/python/graphical/eigenfaces.py
index d4a888f4627..60253957680 100644
--- a/examples/undocumented/python_modular/graphical/eigenfaces.py
+++ b/examples/undocumented/python/graphical/eigenfaces.py
@@ -41,9 +41,9 @@
 import numpy as np
 from numpy    import random
 
-from modshogun import RealFeatures
-from modshogun import PCA
-from modshogun import EuclideanDistance
+from shogun import RealFeatures
+from shogun import PCA
+from shogun import EuclideanDistance
 import math
 import os
 import pylab as pl
diff --git a/examples/undocumented/python_modular/graphical/em_1d_gmm.py b/examples/undocumented/python/graphical/em_1d_gmm.py
similarity index 96%
rename from examples/undocumented/python_modular/graphical/em_1d_gmm.py
rename to examples/undocumented/python/graphical/em_1d_gmm.py
index cb00495acb2..1e826e3a514 100644
--- a/examples/undocumented/python_modular/graphical/em_1d_gmm.py
+++ b/examples/undocumented/python/graphical/em_1d_gmm.py
@@ -1,7 +1,7 @@
 from pylab import figure,show,connect,hist,plot,legend
 from numpy import array, append, arange, empty, exp
-from modshogun import Gaussian, GMM
-from modshogun import RealFeatures
+from shogun import Gaussian, GMM
+from shogun import RealFeatures
 import util
 
 util.set_title('EM for 1d GMM example')
diff --git a/examples/undocumented/python_modular/graphical/em_2d_gmm.py b/examples/undocumented/python/graphical/em_2d_gmm.py
similarity index 97%
rename from examples/undocumented/python_modular/graphical/em_2d_gmm.py
rename to examples/undocumented/python/graphical/em_2d_gmm.py
index 92cb298de37..90551c7b440 100644
--- a/examples/undocumented/python_modular/graphical/em_2d_gmm.py
+++ b/examples/undocumented/python/graphical/em_2d_gmm.py
@@ -1,7 +1,7 @@
 from pylab import figure,scatter,contour,show,legend,connect
 from numpy import array, append, arange, reshape, empty, exp
-from modshogun import Gaussian, GMM
-from modshogun import RealFeatures
+from shogun import Gaussian, GMM
+from shogun import RealFeatures
 import util
 
 util.set_title('EM for 2d GMM example')
diff --git a/examples/undocumented/python_modular/graphical/group_lasso.py b/examples/undocumented/python/graphical/group_lasso.py
similarity index 98%
rename from examples/undocumented/python_modular/graphical/group_lasso.py
rename to examples/undocumented/python/graphical/group_lasso.py
index fb27e2361c9..1de54d418b6 100644
--- a/examples/undocumented/python_modular/graphical/group_lasso.py
+++ b/examples/undocumented/python/graphical/group_lasso.py
@@ -4,7 +4,7 @@
 import matplotlib.pyplot as plt
 from numpy.random import rand, randn, permutation, multivariate_normal
 
-from modshogun import BinaryLabels, RealFeatures, IndexBlock, IndexBlockGroup, FeatureBlockLogisticRegression
+from shogun import BinaryLabels, RealFeatures, IndexBlock, IndexBlockGroup, FeatureBlockLogisticRegression
 
 
 def generate_synthetic_logistic_data(n, p, L, blk_nnz, gcov, nstd):
diff --git a/examples/undocumented/python_modular/graphical/interactive_clustering_demo.py b/examples/undocumented/python/graphical/interactive_clustering_demo.py
similarity index 99%
rename from examples/undocumented/python_modular/graphical/interactive_clustering_demo.py
rename to examples/undocumented/python/graphical/interactive_clustering_demo.py
index ad434521619..38c14597cae 100644
--- a/examples/undocumented/python_modular/graphical/interactive_clustering_demo.py
+++ b/examples/undocumented/python/graphical/interactive_clustering_demo.py
@@ -16,9 +16,9 @@
 from matplotlib.backends.backend_qt4agg import NavigationToolbar2QT as NavigationToolbar
 from matplotlib.figure import Figure
 
-from modshogun import *
-from modshogun import *
-from modshogun import *
+from shogun import *
+from shogun import *
+from shogun import *
 import util
 
 class Form(QMainWindow):
diff --git a/examples/undocumented/python_modular/graphical/interactive_gp_demo.py b/examples/undocumented/python/graphical/interactive_gp_demo.py
similarity index 99%
rename from examples/undocumented/python_modular/graphical/interactive_gp_demo.py
rename to examples/undocumented/python/graphical/interactive_gp_demo.py
index ebbff94d0e2..9e1846eb52c 100644
--- a/examples/undocumented/python_modular/graphical/interactive_gp_demo.py
+++ b/examples/undocumented/python/graphical/interactive_gp_demo.py
@@ -29,9 +29,9 @@
 from matplotlib.backends.backend_qt4agg import NavigationToolbar2QT as NavigationToolbar
 from matplotlib.figure import Figure
 
-from modshogun import *
-from modshogun import *
-from modshogun import *
+from shogun import *
+from shogun import *
+from shogun import *
 import util
 
 class Form(QMainWindow):
diff --git a/examples/undocumented/python_modular/graphical/interactive_kmm_demo.py b/examples/undocumented/python/graphical/interactive_kmm_demo.py
similarity index 99%
rename from examples/undocumented/python_modular/graphical/interactive_kmm_demo.py
rename to examples/undocumented/python/graphical/interactive_kmm_demo.py
index 80f957201b7..9eae0833371 100644
--- a/examples/undocumented/python_modular/graphical/interactive_kmm_demo.py
+++ b/examples/undocumented/python/graphical/interactive_kmm_demo.py
@@ -25,9 +25,9 @@
 from matplotlib.backends.backend_qt4agg import NavigationToolbar2QT as NavigationToolbar
 from matplotlib.figure import Figure
 
-from modshogun import *
-from modshogun import KernelMeanMatching
-from modshogun import Math
+from shogun import *
+from shogun import KernelMeanMatching
+from shogun import Math
 import util
 
 class Form(QMainWindow):
diff --git a/examples/undocumented/python_modular/graphical/interactive_svm_demo.py b/examples/undocumented/python/graphical/interactive_svm_demo.py
similarity index 99%
rename from examples/undocumented/python_modular/graphical/interactive_svm_demo.py
rename to examples/undocumented/python/graphical/interactive_svm_demo.py
index 14b00292d29..1732f89394d 100644
--- a/examples/undocumented/python_modular/graphical/interactive_svm_demo.py
+++ b/examples/undocumented/python/graphical/interactive_svm_demo.py
@@ -16,7 +16,7 @@
 from matplotlib.backends.backend_qt4agg import NavigationToolbar2QT as NavigationToolbar
 from matplotlib.figure import Figure
 
-from modshogun import *
+from shogun import *
 import util
 
 class Form(QMainWindow):
diff --git a/examples/undocumented/python_modular/graphical/interactive_svr_demo.py b/examples/undocumented/python/graphical/interactive_svr_demo.py
similarity index 99%
rename from examples/undocumented/python_modular/graphical/interactive_svr_demo.py
rename to examples/undocumented/python/graphical/interactive_svr_demo.py
index b0276ae3765..302e9e86c82 100644
--- a/examples/undocumented/python_modular/graphical/interactive_svr_demo.py
+++ b/examples/undocumented/python/graphical/interactive_svr_demo.py
@@ -16,7 +16,7 @@
 from matplotlib.backends.backend_qt4agg import NavigationToolbar2QT as NavigationToolbar
 from matplotlib.figure import Figure
 
-from modshogun import *
+from shogun import *
 
 class Form(QMainWindow):
     def __init__(self, parent=None):
diff --git a/examples/undocumented/python_modular/graphical/inverse_covariance_estimation_demo.py b/examples/undocumented/python/graphical/inverse_covariance_estimation_demo.py
similarity index 96%
rename from examples/undocumented/python_modular/graphical/inverse_covariance_estimation_demo.py
rename to examples/undocumented/python/graphical/inverse_covariance_estimation_demo.py
index 09faf32ac6f..91848758b03 100755
--- a/examples/undocumented/python_modular/graphical/inverse_covariance_estimation_demo.py
+++ b/examples/undocumented/python/graphical/inverse_covariance_estimation_demo.py
@@ -5,7 +5,7 @@
 from pylab import show, imshow
 
 def simulate_data (n,p):
-	from modshogun import SparseInverseCovariance
+	from shogun import SparseInverseCovariance
 	import numpy as np
 
 	#create a random pxp covariance matrix
@@ -18,7 +18,7 @@ def simulate_data (n,p):
 	return data
 
 def inverse_covariance (data,lc):
-	from modshogun import SparseInverseCovariance
+	from shogun import SparseInverseCovariance
 	from numpy import dot
 
 	sic = SparseInverseCovariance()
diff --git a/examples/undocumented/python_modular/graphical/kernel_ridge_regression.py b/examples/undocumented/python/graphical/kernel_ridge_regression.py
similarity index 97%
rename from examples/undocumented/python_modular/graphical/kernel_ridge_regression.py
rename to examples/undocumented/python/graphical/kernel_ridge_regression.py
index 7d9a9227b32..be72f275076 100644
--- a/examples/undocumented/python_modular/graphical/kernel_ridge_regression.py
+++ b/examples/undocumented/python/graphical/kernel_ridge_regression.py
@@ -1,7 +1,7 @@
 from pylab import figure,pcolor,scatter,contour,colorbar,show,subplot,plot,connect
 from numpy import array,meshgrid,reshape,linspace,min,max
 from numpy import concatenate,transpose,ravel
-from modshogun import *
+from shogun import *
 import util
 
 util.set_title('KernelRidgeRegression')
diff --git a/examples/undocumented/python_modular/graphical/kernel_ridge_regression_sinc.py b/examples/undocumented/python/graphical/kernel_ridge_regression_sinc.py
similarity index 96%
rename from examples/undocumented/python_modular/graphical/kernel_ridge_regression_sinc.py
rename to examples/undocumented/python/graphical/kernel_ridge_regression_sinc.py
index 2ea9a0aa311..7f9d68d8284 100644
--- a/examples/undocumented/python_modular/graphical/kernel_ridge_regression_sinc.py
+++ b/examples/undocumented/python/graphical/kernel_ridge_regression_sinc.py
@@ -1,5 +1,5 @@
 from pylab import figure,pcolor,scatter,contour,colorbar,show,subplot,plot,legend,connect
-from modshogun import *
+from shogun import *
 import util
 
 util.set_title('KernelRidgeRegression on Sine')
diff --git a/examples/undocumented/python_modular/graphical/latex_plot_inits.py b/examples/undocumented/python/graphical/latex_plot_inits.py
similarity index 100%
rename from examples/undocumented/python_modular/graphical/latex_plot_inits.py
rename to examples/undocumented/python/graphical/latex_plot_inits.py
diff --git a/examples/undocumented/python_modular/graphical/lda.py b/examples/undocumented/python/graphical/lda.py
similarity index 96%
rename from examples/undocumented/python_modular/graphical/lda.py
rename to examples/undocumented/python/graphical/lda.py
index 14b4ccf9c47..9eef3796f2f 100644
--- a/examples/undocumented/python_modular/graphical/lda.py
+++ b/examples/undocumented/python/graphical/lda.py
@@ -1,5 +1,5 @@
 from pylab import figure,pcolor,scatter,contour,colorbar,show,subplot,plot,connect
-from modshogun import *
+from shogun import *
 import util
 
 util.set_title('LDA')
diff --git a/examples/undocumented/python_modular/graphical/mclda.py b/examples/undocumented/python/graphical/mclda.py
similarity index 93%
rename from examples/undocumented/python_modular/graphical/mclda.py
rename to examples/undocumented/python/graphical/mclda.py
index 961b93dfe7d..eae10a4ec65 100644
--- a/examples/undocumented/python_modular/graphical/mclda.py
+++ b/examples/undocumented/python/graphical/mclda.py
@@ -1,6 +1,6 @@
-from modshogun import RealFeatures
-from modshogun import MulticlassLabels
-from modshogun import MCLDA
+from shogun import RealFeatures
+from shogun import MulticlassLabels
+from shogun import MCLDA
 from pylab import pcolor, contour, colorbar, connect, show, plot, axis
 
 import numpy as np
diff --git a/examples/undocumented/python_modular/graphical/metric_lmnn_objective.py b/examples/undocumented/python/graphical/metric_lmnn_objective.py
similarity index 92%
rename from examples/undocumented/python_modular/graphical/metric_lmnn_objective.py
rename to examples/undocumented/python/graphical/metric_lmnn_objective.py
index 1fc83932eb0..bae796fd351 100644
--- a/examples/undocumented/python_modular/graphical/metric_lmnn_objective.py
+++ b/examples/undocumented/python/graphical/metric_lmnn_objective.py
@@ -36,10 +36,10 @@ def load_compressed_features(fname_features):
 
 def metric_lmnn_statistics(k=3, fname_features='../../data/fm_train_multiclass_digits.dat.gz', fname_labels='../../data/label_train_multiclass_digits.dat'):
 	try:
-		from modshogun import LMNN, CSVFile, RealFeatures, MulticlassLabels, MSG_DEBUG
+		from shogun import LMNN, CSVFile, RealFeatures, MulticlassLabels, MSG_DEBUG
 		import matplotlib.pyplot as pyplot
 	except ImportError:
-		print 'Error importing modshogun or other required modules. Please, verify their installation.'
+		print 'Error importing shogun or other required modules. Please, verify their installation.'
 		return
 
 	features = RealFeatures(load_compressed_features(fname_features).T)
diff --git a/examples/undocumented/python_modular/graphical/multiclass_qda.py b/examples/undocumented/python/graphical/multiclass_qda.py
similarity index 97%
rename from examples/undocumented/python_modular/graphical/multiclass_qda.py
rename to examples/undocumented/python/graphical/multiclass_qda.py
index 91afb419078..53419f45e57 100644
--- a/examples/undocumented/python_modular/graphical/multiclass_qda.py
+++ b/examples/undocumented/python/graphical/multiclass_qda.py
@@ -10,8 +10,8 @@
 import util
 
 from scipy import linalg
-from modshogun import QDA
-from modshogun import RealFeatures, MulticlassLabels
+from shogun import QDA
+from shogun import RealFeatures, MulticlassLabels
 
 # colormap
 cmap = mpl.colors.LinearSegmentedColormap('color_classes',
diff --git a/examples/undocumented/python_modular/graphical/multiple_smvs.py b/examples/undocumented/python/graphical/multiple_smvs.py
similarity index 98%
rename from examples/undocumented/python_modular/graphical/multiple_smvs.py
rename to examples/undocumented/python/graphical/multiple_smvs.py
index 21ad567584e..a728ba5b03a 100644
--- a/examples/undocumented/python_modular/graphical/multiple_smvs.py
+++ b/examples/undocumented/python/graphical/multiple_smvs.py
@@ -4,7 +4,7 @@
 from pylab import figure,pcolor,scatter,contour,colorbar,show,subplot,connect,axis
 from numpy import concatenate
 from numpy.random import randn
-from modshogun import *
+from shogun import *
 import util
 
 util.set_title('Multiple SVMS')
diff --git a/examples/undocumented/python_modular/graphical/prc.py b/examples/undocumented/python/graphical/prc.py
similarity index 92%
rename from examples/undocumented/python_modular/graphical/prc.py
rename to examples/undocumented/python/graphical/prc.py
index d13eda7ada1..79ef6c0c942 100644
--- a/examples/undocumented/python_modular/graphical/prc.py
+++ b/examples/undocumented/python/graphical/prc.py
@@ -1,7 +1,7 @@
 from pylab import plot,grid,title,subplot,xlabel,ylabel,text,subplots_adjust,fill_between,mean,connect,show
-from modshogun import GaussianKernel
-from modshogun import LibSVM, LDA
-from modshogun import PRCEvaluation
+from shogun import GaussianKernel
+from shogun import LibSVM, LDA
+from shogun import PRCEvaluation
 import util
 
 util.set_title('PRC example')
diff --git a/examples/undocumented/python_modular/graphical/preprocessor_kpca_graphical.py b/examples/undocumented/python/graphical/preprocessor_kpca_graphical.py
similarity index 94%
rename from examples/undocumented/python_modular/graphical/preprocessor_kpca_graphical.py
rename to examples/undocumented/python/graphical/preprocessor_kpca_graphical.py
index a2344ef8629..c6ef1437235 100644
--- a/examples/undocumented/python_modular/graphical/preprocessor_kpca_graphical.py
+++ b/examples/undocumented/python/graphical/preprocessor_kpca_graphical.py
@@ -25,9 +25,9 @@
 parameter_list = [[data,0.01,1.0], [data,0.05,2.0]]
 def preprocessor_kernelpca_modular (data, threshold, width):
 
-	from modshogun import RealFeatures
-	from modshogun import KernelPCA
-	from modshogun import GaussianKernel
+	from shogun import RealFeatures
+	from shogun import KernelPCA
+	from shogun import GaussianKernel
 	features = RealFeatures(data)
 	kernel=GaussianKernel(features,features,width)
 	preprocessor=KernelPCA(kernel)
diff --git a/examples/undocumented/python_modular/graphical/qda.py b/examples/undocumented/python/graphical/qda.py
similarity index 93%
rename from examples/undocumented/python_modular/graphical/qda.py
rename to examples/undocumented/python/graphical/qda.py
index 3361cf12378..e8486410e5d 100644
--- a/examples/undocumented/python_modular/graphical/qda.py
+++ b/examples/undocumented/python/graphical/qda.py
@@ -1,6 +1,6 @@
-from modshogun import RealFeatures
-from modshogun import MulticlassLabels
-from modshogun import QDA
+from shogun import RealFeatures
+from shogun import MulticlassLabels
+from shogun import QDA
 from pylab import pcolor, contour, colorbar, connect, show, plot, axis
 
 import numpy as np
diff --git a/examples/undocumented/python_modular/graphical/regression_gaussian_process_demo.py b/examples/undocumented/python/graphical/regression_gaussian_process_demo.py
similarity index 98%
rename from examples/undocumented/python_modular/graphical/regression_gaussian_process_demo.py
rename to examples/undocumented/python/graphical/regression_gaussian_process_demo.py
index eb24bf4f1b9..132d6a71ebc 100644
--- a/examples/undocumented/python_modular/graphical/regression_gaussian_process_demo.py
+++ b/examples/undocumented/python/graphical/regression_gaussian_process_demo.py
@@ -6,14 +6,14 @@
 ###########################################################################
 from numpy import *
 from numpy.random import randn
-from modshogun import *
+from shogun import *
 import pylab as PL
 import matplotlib
 import logging as LG
 import scipy as SP
-from modshogun import GradientModelSelection
-from modshogun import ModelSelectionParameters, R_EXP, R_LINEAR
-from modshogun import ParameterCombination
+from shogun import GradientModelSelection
+from shogun import ModelSelectionParameters, R_EXP, R_LINEAR
+from shogun import ParameterCombination
 
 def plot_training_data(x, y,
                        shift=None,
diff --git a/examples/undocumented/python_modular/graphical/regression_gaussian_process_modelselection.py b/examples/undocumented/python/graphical/regression_gaussian_process_modelselection.py
similarity index 91%
rename from examples/undocumented/python_modular/graphical/regression_gaussian_process_modelselection.py
rename to examples/undocumented/python/graphical/regression_gaussian_process_modelselection.py
index d9621f259cc..f916e7a4ea8 100644
--- a/examples/undocumented/python_modular/graphical/regression_gaussian_process_modelselection.py
+++ b/examples/undocumented/python/graphical/regression_gaussian_process_modelselection.py
@@ -6,10 +6,10 @@
 def regression_gaussian_process_modelselection (n=100, n_test=100, \
 		x_range=5, x_range_test=10, noise_var=0.4):
 
-	from modshogun import RealFeatures, RegressionLabels
-	from modshogun import GaussianKernel
-	from modshogun import GradientModelSelection, ModelSelectionParameters
-	from modshogun import GaussianLikelihood, ZeroMean, \
+	from shogun import RealFeatures, RegressionLabels
+	from shogun import GaussianKernel
+	from shogun import GradientModelSelection, ModelSelectionParameters
+	from shogun import GaussianLikelihood, ZeroMean, \
 		ExactInferenceMethod, GaussianProcessRegression, GradientCriterion, \
 		GradientEvaluation
 
diff --git a/examples/undocumented/python_modular/graphical/regression_lars.py b/examples/undocumented/python/graphical/regression_lars.py
similarity index 94%
rename from examples/undocumented/python_modular/graphical/regression_lars.py
rename to examples/undocumented/python/graphical/regression_lars.py
index d78aef010c0..bab8189130a 100644
--- a/examples/undocumented/python_modular/graphical/regression_lars.py
+++ b/examples/undocumented/python/graphical/regression_lars.py
@@ -3,9 +3,9 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
-from modshogun import RegressionLabels, RealFeatures
-from modshogun import LeastAngleRegression, LinearRidgeRegression, LeastSquaresRegression
-from modshogun import MeanSquaredError
+from shogun import RegressionLabels, RealFeatures
+from shogun import LeastAngleRegression, LinearRidgeRegression, LeastSquaresRegression
+from shogun import MeanSquaredError
 
 # we compare LASSO with ordinary least-squares (OLE)
 # in the ideal case, the MSE of OLE should coincide
diff --git a/examples/undocumented/python_modular/graphical/roc.py b/examples/undocumented/python/graphical/roc.py
similarity index 92%
rename from examples/undocumented/python_modular/graphical/roc.py
rename to examples/undocumented/python/graphical/roc.py
index aeb16c4275e..0c0b6c60438 100644
--- a/examples/undocumented/python_modular/graphical/roc.py
+++ b/examples/undocumented/python/graphical/roc.py
@@ -1,7 +1,7 @@
 from pylab import plot,grid,title,subplot,xlabel,ylabel,text,subplots_adjust,fill_between,mean,connect,show
-from modshogun import GaussianKernel
-from modshogun import LibSVM, LDA
-from modshogun import ROCEvaluation
+from shogun import GaussianKernel
+from shogun import LibSVM, LDA
+from shogun import ROCEvaluation
 import util
 
 util.set_title('ROC example')
diff --git a/examples/undocumented/python_modular/graphical/smem_1d_gmm.py b/examples/undocumented/python/graphical/smem_1d_gmm.py
similarity index 96%
rename from examples/undocumented/python_modular/graphical/smem_1d_gmm.py
rename to examples/undocumented/python/graphical/smem_1d_gmm.py
index 46daf184344..256404c93f7 100644
--- a/examples/undocumented/python_modular/graphical/smem_1d_gmm.py
+++ b/examples/undocumented/python/graphical/smem_1d_gmm.py
@@ -1,7 +1,7 @@
 from pylab import figure,show,connect,hist,plot,legend
 from numpy import array, append, arange, empty, exp
-from modshogun import Gaussian, GMM
-from modshogun import RealFeatures
+from shogun import Gaussian, GMM
+from shogun import RealFeatures
 import util
 
 util.set_title('SMEM for 1d GMM example')
diff --git a/examples/undocumented/python_modular/graphical/smem_2d_gmm.py b/examples/undocumented/python/graphical/smem_2d_gmm.py
similarity index 98%
rename from examples/undocumented/python_modular/graphical/smem_2d_gmm.py
rename to examples/undocumented/python/graphical/smem_2d_gmm.py
index 1b113a96989..6655aceee30 100644
--- a/examples/undocumented/python_modular/graphical/smem_2d_gmm.py
+++ b/examples/undocumented/python/graphical/smem_2d_gmm.py
@@ -1,7 +1,7 @@
 from pylab import figure,scatter,contour,show,legend,connect
 from numpy import array, append, arange, reshape, empty, exp
-from modshogun import Gaussian, GMM
-from modshogun import RealFeatures
+from shogun import Gaussian, GMM
+from shogun import RealFeatures
 import util
 
 util.set_title('SMEM for 2d GMM example')
diff --git a/examples/undocumented/python_modular/graphical/so_multiclass_BMRM.py b/examples/undocumented/python/graphical/so_multiclass_BMRM.py
similarity index 92%
rename from examples/undocumented/python_modular/graphical/so_multiclass_BMRM.py
rename to examples/undocumented/python/graphical/so_multiclass_BMRM.py
index 64295040669..8c59826e060 100644
--- a/examples/undocumented/python_modular/graphical/so_multiclass_BMRM.py
+++ b/examples/undocumented/python/graphical/so_multiclass_BMRM.py
@@ -3,10 +3,10 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
-from modshogun import RealFeatures
-from modshogun import MulticlassModel, MulticlassSOLabels, RealNumber, DualLibQPBMSOSVM
-from modshogun import BMRM, PPBMRM, P3BMRM
-from modshogun import StructuredAccuracy
+from shogun import RealFeatures
+from shogun import MulticlassModel, MulticlassSOLabels, RealNumber, DualLibQPBMSOSVM
+from shogun import BMRM, PPBMRM, P3BMRM
+from shogun import StructuredAccuracy
 
 def fill_data(cnt, minv, maxv):
 	x1 = np.linspace(minv, maxv, cnt)
diff --git a/examples/undocumented/python_modular/graphical/so_multiclass_director_BMRM.py b/examples/undocumented/python/graphical/so_multiclass_director_BMRM.py
similarity index 94%
rename from examples/undocumented/python_modular/graphical/so_multiclass_director_BMRM.py
rename to examples/undocumented/python/graphical/so_multiclass_director_BMRM.py
index e49e28987eb..0edbe80b833 100644
--- a/examples/undocumented/python_modular/graphical/so_multiclass_director_BMRM.py
+++ b/examples/undocumented/python/graphical/so_multiclass_director_BMRM.py
@@ -3,10 +3,10 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
-from modshogun import RealFeatures
-from modshogun import MulticlassModel, MulticlassSOLabels, RealNumber, DualLibQPBMSOSVM, DirectorStructuredModel
-from modshogun import BMRM, PPBMRM, P3BMRM, ResultSet, RealVector
-from modshogun import StructuredAccuracy
+from shogun import RealFeatures
+from shogun import MulticlassModel, MulticlassSOLabels, RealNumber, DualLibQPBMSOSVM, DirectorStructuredModel
+from shogun import BMRM, PPBMRM, P3BMRM, ResultSet, RealVector
+from shogun import StructuredAccuracy
 
 class MulticlassStructuredModel(DirectorStructuredModel):
 	def __init__(self,features,labels):
diff --git a/examples/undocumented/python_modular/graphical/statistics_hsic.py b/examples/undocumented/python/graphical/statistics_hsic.py
similarity index 95%
rename from examples/undocumented/python_modular/graphical/statistics_hsic.py
rename to examples/undocumented/python/graphical/statistics_hsic.py
index c5b76658179..a0e44f24d3b 100644
--- a/examples/undocumented/python_modular/graphical/statistics_hsic.py
+++ b/examples/undocumented/python/graphical/statistics_hsic.py
@@ -10,13 +10,13 @@
 from pylab import *
 from scipy import *
 
-from modshogun import RealFeatures
-from modshogun import DataGenerator
-from modshogun import GaussianKernel
-from modshogun import HSIC
-from modshogun import PERMUTATION, HSIC_GAMMA
-from modshogun import EuclideanDistance
-from modshogun import Statistics, Math
+from shogun import RealFeatures
+from shogun import DataGenerator
+from shogun import GaussianKernel
+from shogun import HSIC
+from shogun import PERMUTATION, HSIC_GAMMA
+from shogun import EuclideanDistance
+from shogun import Statistics, Math
 
 # for nice plotting that fits into our shogun tutorial
 import latex_plot_inits
diff --git a/examples/undocumented/python_modular/graphical/statistics_linear_time_mmd.py b/examples/undocumented/python/graphical/statistics_linear_time_mmd.py
similarity index 94%
rename from examples/undocumented/python_modular/graphical/statistics_linear_time_mmd.py
rename to examples/undocumented/python/graphical/statistics_linear_time_mmd.py
index f57ef905866..c2e001dc399 100644
--- a/examples/undocumented/python_modular/graphical/statistics_linear_time_mmd.py
+++ b/examples/undocumented/python/graphical/statistics_linear_time_mmd.py
@@ -10,13 +10,13 @@
 from pylab import *
 from scipy import *
 
-from modshogun import RealFeatures
-from modshogun import MeanShiftDataGenerator
-from modshogun import GaussianKernel, CombinedKernel
-from modshogun import LinearTimeMMD, MMDKernelSelectionOpt
-from modshogun import PERMUTATION, MMD1_GAUSSIAN
-from modshogun import EuclideanDistance
-from modshogun import Statistics, Math
+from shogun import RealFeatures
+from shogun import MeanShiftDataGenerator
+from shogun import GaussianKernel, CombinedKernel
+from shogun import LinearTimeMMD, MMDKernelSelectionOpt
+from shogun import PERMUTATION, MMD1_GAUSSIAN
+from shogun import EuclideanDistance
+from shogun import Statistics, Math
 
 # for nice plotting that fits into our shogun tutorial
 import latex_plot_inits
diff --git a/examples/undocumented/python_modular/graphical/statistics_quadratic_time_mmd.py b/examples/undocumented/python/graphical/statistics_quadratic_time_mmd.py
similarity index 95%
rename from examples/undocumented/python_modular/graphical/statistics_quadratic_time_mmd.py
rename to examples/undocumented/python/graphical/statistics_quadratic_time_mmd.py
index f70459583fe..1fc33ff41f6 100644
--- a/examples/undocumented/python_modular/graphical/statistics_quadratic_time_mmd.py
+++ b/examples/undocumented/python/graphical/statistics_quadratic_time_mmd.py
@@ -10,13 +10,13 @@
 from pylab import *
 from scipy import *
 
-from modshogun import RealFeatures
-from modshogun import MeanShiftDataGenerator
-from modshogun import GaussianKernel, CombinedKernel
-from modshogun import QuadraticTimeMMD, MMDKernelSelectionMax
-from modshogun import PERMUTATION, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, UNBIASED
-from modshogun import EuclideanDistance
-from modshogun import Statistics, Math
+from shogun import RealFeatures
+from shogun import MeanShiftDataGenerator
+from shogun import GaussianKernel, CombinedKernel
+from shogun import QuadraticTimeMMD, MMDKernelSelectionMax
+from shogun import PERMUTATION, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, UNBIASED
+from shogun import EuclideanDistance
+from shogun import Statistics, Math
 
 # for nice plotting that fits into our shogun tutorial
 import latex_plot_inits
diff --git a/examples/undocumented/python_modular/graphical/svm.py b/examples/undocumented/python/graphical/svm.py
similarity index 96%
rename from examples/undocumented/python_modular/graphical/svm.py
rename to examples/undocumented/python/graphical/svm.py
index 688aeea0637..9f6146fa259 100644
--- a/examples/undocumented/python_modular/graphical/svm.py
+++ b/examples/undocumented/python/graphical/svm.py
@@ -1,6 +1,6 @@
 from pylab import figure,pcolor,scatter,contour,colorbar,show,subplot,plot,connect,axis
 from numpy.random import randn
-from modshogun import *
+from shogun import *
 import util
 
 util.set_title('SVM')
diff --git a/examples/undocumented/python_modular/graphical/svmlin.py b/examples/undocumented/python/graphical/svmlin.py
similarity index 98%
rename from examples/undocumented/python_modular/graphical/svmlin.py
rename to examples/undocumented/python/graphical/svmlin.py
index 402edcdf85d..b380b7cf4ae 100644
--- a/examples/undocumented/python_modular/graphical/svmlin.py
+++ b/examples/undocumented/python/graphical/svmlin.py
@@ -1,5 +1,5 @@
 from pylab import figure,pcolor,scatter,contour,colorbar,show,subplot,plot,axis, connect
-from modshogun import *
+from shogun import *
 import util
 
 util.set_title('SVM Linear 1')
diff --git a/examples/undocumented/python_modular/graphical/svr_sinc.py b/examples/undocumented/python/graphical/svr_sinc.py
similarity index 96%
rename from examples/undocumented/python_modular/graphical/svr_sinc.py
rename to examples/undocumented/python/graphical/svr_sinc.py
index d1245f71aee..54ff5b96b3b 100644
--- a/examples/undocumented/python_modular/graphical/svr_sinc.py
+++ b/examples/undocumented/python/graphical/svr_sinc.py
@@ -1,5 +1,5 @@
 from pylab import figure,pcolor,scatter,contour,colorbar,show,subplot,plot,legend, connect
-from modshogun import *
+from shogun import *
 import util
 
 util.set_title('SVR on Sinus')
diff --git a/examples/undocumented/python_modular/graphical/util.py b/examples/undocumented/python/graphical/util.py
similarity index 96%
rename from examples/undocumented/python_modular/graphical/util.py
rename to examples/undocumented/python/graphical/util.py
index e4b749e014c..00bf51f917d 100644
--- a/examples/undocumented/python_modular/graphical/util.py
+++ b/examples/undocumented/python/graphical/util.py
@@ -4,7 +4,7 @@
 from numpy import ones, array, double, meshgrid, reshape, linspace, \
 	concatenate, ravel, pi, sinc
 from numpy.random import randn, rand
-from modshogun import BinaryLabels, RegressionLabels, RealFeatures, SparseRealFeatures
+from shogun import BinaryLabels, RegressionLabels, RealFeatures, SparseRealFeatures
 
 QUITKEY='q'
 NUM_EXAMPLES=100
diff --git a/examples/undocumented/python_modular/kernel_anova_modular.py b/examples/undocumented/python/kernel_anova.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_anova_modular.py
rename to examples/undocumented/python/kernel_anova.py
index 2da2547cbb9..e1ab024add1 100644
--- a/examples/undocumented/python_modular/kernel_anova_modular.py
+++ b/examples/undocumented/python/kernel_anova.py
@@ -3,8 +3,8 @@
 testdat = '../data/fm_test_real.dat'
 parameter_list = [[traindat,testdat,2,10], [traindat,testdat,5,10]]
 
-def kernel_anova_modular (train_fname=traindat,test_fname=testdat,cardinality=2, size_cache=10):
-	from modshogun import ANOVAKernel,RealFeatures,CSVFile
+def kernel_anova (train_fname=traindat,test_fname=testdat,cardinality=2, size_cache=10):
+	from shogun import ANOVAKernel,RealFeatures,CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -18,4 +18,4 @@ def kernel_anova_modular (train_fname=traindat,test_fname=testdat,cardinality=2,
 
 if __name__=='__main__':
 	print('ANOVA')
-	kernel_anova_modular(*parameter_list[0])
+	kernel_anova(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_auc_modular.py b/examples/undocumented/python/kernel_auc.py
similarity index 68%
rename from examples/undocumented/python_modular/kernel_auc_modular.py
rename to examples/undocumented/python/kernel_auc.py
index c5acf276a6e..eb62666c13b 100644
--- a/examples/undocumented/python_modular/kernel_auc_modular.py
+++ b/examples/undocumented/python/kernel_auc.py
@@ -3,9 +3,9 @@
 label_traindat = '../data/label_train_twoclass.dat'
 parameter_list = [[traindat,label_traindat,1.7], [traindat,label_traindat,1.6]]
 
-def kernel_auc_modular (train_fname=traindat,label_fname=label_traindat,width=1.7):
-	from modshogun import GaussianKernel, AUCKernel, RealFeatures
-	from modshogun import BinaryLabels, CSVFile
+def kernel_auc (train_fname=traindat,label_fname=label_traindat,width=1.7):
+	from shogun import GaussianKernel, AUCKernel, RealFeatures
+	from shogun import BinaryLabels, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	subkernel=GaussianKernel(feats_train, feats_train, width)
@@ -17,4 +17,4 @@ def kernel_auc_modular (train_fname=traindat,label_fname=label_traindat,width=1.
 
 if __name__=='__main__':
 	print('AUC')
-	kernel_auc_modular(*parameter_list[0])
+	kernel_auc(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_cauchy_modular.py b/examples/undocumented/python/kernel_cauchy.py
similarity index 73%
rename from examples/undocumented/python_modular/kernel_cauchy_modular.py
rename to examples/undocumented/python/kernel_cauchy.py
index b0da43cc6ec..10814ebf1eb 100644
--- a/examples/undocumented/python_modular/kernel_cauchy_modular.py
+++ b/examples/undocumented/python/kernel_cauchy.py
@@ -4,8 +4,8 @@
 
 parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 10.0]]
 
-def kernel_cauchy_modular (train_fname=traindat,test_fname=testdat, sigma=1.0):
-	from modshogun import RealFeatures, CauchyKernel, CSVFile, EuclideanDistance
+def kernel_cauchy (train_fname=traindat,test_fname=testdat, sigma=1.0):
+	from shogun import RealFeatures, CauchyKernel, CSVFile, EuclideanDistance
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
 
@@ -20,4 +20,4 @@ def kernel_cauchy_modular (train_fname=traindat,test_fname=testdat, sigma=1.0):
 
 if __name__=='__main__':
 	print('Cauchy')
-	kernel_cauchy_modular(*parameter_list[0])
+	kernel_cauchy(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_chi2_modular.py b/examples/undocumented/python/kernel_chi2.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_chi2_modular.py
rename to examples/undocumented/python/kernel_chi2.py
index c157f23be49..95c4b4a62b8 100644
--- a/examples/undocumented/python_modular/kernel_chi2_modular.py
+++ b/examples/undocumented/python/kernel_chi2.py
@@ -4,8 +4,8 @@
 
 parameter_list = [[traindat,testdat,1.4,10], [traindat,testdat,1.5,10]]
 
-def kernel_chi2_modular (train_fname=traindat,test_fname=testdat,width=1.4, size_cache=10):
-	from modshogun import RealFeatures, Chi2Kernel, CSVFile, NormOne
+def kernel_chi2 (train_fname=traindat,test_fname=testdat,width=1.4, size_cache=10):
+	from shogun import RealFeatures, Chi2Kernel, CSVFile, NormOne
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -19,4 +19,4 @@ def kernel_chi2_modular (train_fname=traindat,test_fname=testdat,width=1.4, size
 
 if __name__=='__main__':
 	print('Chi2')
-	kernel_chi2_modular(*parameter_list[0])
+	kernel_chi2(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_circular_modular.py b/examples/undocumented/python/kernel_circular.py
similarity index 73%
rename from examples/undocumented/python_modular/kernel_circular_modular.py
rename to examples/undocumented/python/kernel_circular.py
index 17d1c41d36f..872a523c365 100644
--- a/examples/undocumented/python_modular/kernel_circular_modular.py
+++ b/examples/undocumented/python/kernel_circular.py
@@ -4,8 +4,8 @@
 
 parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]
 
-def kernel_circular_modular(train_fname=traindat,test_fname=testdat, sigma=1.0):
-	from modshogun import RealFeatures, CircularKernel, EuclideanDistance, CSVFile
+def kernel_circular(train_fname=traindat,test_fname=testdat, sigma=1.0):
+	from shogun import RealFeatures, CircularKernel, EuclideanDistance, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -21,4 +21,4 @@ def kernel_circular_modular(train_fname=traindat,test_fname=testdat, sigma=1.0):
 
 if __name__=='__main__':
 	print('Circular')
-	kernel_circular_modular(*parameter_list[0])
+	kernel_circular(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_combined_modular.py b/examples/undocumented/python/kernel_combined.py
similarity index 81%
rename from examples/undocumented/python_modular/kernel_combined_modular.py
rename to examples/undocumented/python/kernel_combined.py
index bf35563caa7..50227811a12 100644
--- a/examples/undocumented/python_modular/kernel_combined_modular.py
+++ b/examples/undocumented/python/kernel_combined.py
@@ -9,9 +9,9 @@
 testdna = lm.load_dna('../data/fm_test_dna.dat')
 
 parameter_list = [[traindat,testdat,traindna,testdna],[traindat,testdat,traindna,testdna]]
-def kernel_combined_modular (fm_train_real=traindat,fm_test_real=testdat,fm_train_dna=traindna,fm_test_dna=testdna ):
-	from modshogun import CombinedKernel, GaussianKernel, FixedDegreeStringKernel, LocalAlignmentStringKernel
-	from modshogun import RealFeatures, StringCharFeatures, CombinedFeatures, DNA
+def kernel_combined (fm_train_real=traindat,fm_test_real=testdat,fm_train_dna=traindna,fm_test_dna=testdna ):
+	from shogun import CombinedKernel, GaussianKernel, FixedDegreeStringKernel, LocalAlignmentStringKernel
+	from shogun import RealFeatures, StringCharFeatures, CombinedFeatures, DNA
 
 	kernel=CombinedKernel()
 	feats_train=CombinedFeatures()
@@ -47,6 +47,6 @@ def kernel_combined_modular (fm_train_real=traindat,fm_test_real=testdat,fm_trai
 
 if __name__=='__main__':
 	print('Combined')
-	kernel_combined_modular(*parameter_list[0])
+	kernel_combined(*parameter_list[0])
 
 
diff --git a/examples/undocumented/python_modular/kernel_combined_custom_poly_modular.py b/examples/undocumented/python/kernel_combined_custom_poly.py
similarity index 80%
rename from examples/undocumented/python_modular/kernel_combined_custom_poly_modular.py
rename to examples/undocumented/python/kernel_combined_custom_poly.py
index ebb57d1884b..490f7fecac9 100644
--- a/examples/undocumented/python_modular/kernel_combined_custom_poly_modular.py
+++ b/examples/undocumented/python/kernel_combined_custom_poly.py
@@ -7,10 +7,10 @@
 parameter_list= [[traindat,testdat,label_traindat],[traindat,testdat,label_traindat]]
 
 
-def kernel_combined_custom_poly_modular (train_fname = traindat,test_fname = testdat,train_label_fname=label_traindat):
-    from modshogun import CombinedFeatures, RealFeatures, BinaryLabels
-    from modshogun import CombinedKernel, PolyKernel, CustomKernel
-    from modshogun import LibSVM, CSVFile
+def kernel_combined_custom_poly (train_fname = traindat,test_fname = testdat,train_label_fname=label_traindat):
+    from shogun import CombinedFeatures, RealFeatures, BinaryLabels
+    from shogun import CombinedKernel, PolyKernel, CustomKernel
+    from shogun import LibSVM, CSVFile
 
     kernel = CombinedKernel()
     feats_train = CombinedFeatures()
@@ -53,4 +53,4 @@ def kernel_combined_custom_poly_modular (train_fname = traindat,test_fname = tes
     return km_train,kernel
 
 if __name__=='__main__':
-    kernel_combined_custom_poly_modular(*parameter_list[0])
+    kernel_combined_custom_poly(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_comm_ulong_string_modular.py b/examples/undocumented/python/kernel_comm_ulong_string.py
similarity index 77%
rename from examples/undocumented/python_modular/kernel_comm_ulong_string_modular.py
rename to examples/undocumented/python/kernel_comm_ulong_string.py
index 5c7c33c8c69..f6f04e45521 100644
--- a/examples/undocumented/python_modular/kernel_comm_ulong_string_modular.py
+++ b/examples/undocumented/python/kernel_comm_ulong_string.py
@@ -6,11 +6,11 @@
 testdat =  lm.load_dna('../data/fm_test_dna.dat')
 parameter_list = [[traindat,testdat,3,0,False ],[traindat,testdat,4,0,False]]
 
-def kernel_comm_ulong_string_modular (fm_train_dna=traindat,fm_test_dna=testdat, order=3, gap=0, reverse = False):
+def kernel_comm_ulong_string (fm_train_dna=traindat,fm_test_dna=testdat, order=3, gap=0, reverse = False):
 
-	from modshogun import CommUlongStringKernel
-	from modshogun import StringUlongFeatures, StringCharFeatures, DNA
-	from modshogun import SortUlongString
+	from shogun import CommUlongStringKernel
+	from shogun import StringUlongFeatures, StringCharFeatures, DNA
+	from shogun import SortUlongString
 
 	charfeat=StringCharFeatures(DNA)
 	charfeat.set_features(fm_train_dna)
@@ -40,4 +40,4 @@ def kernel_comm_ulong_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,
 
 if __name__=='__main__':
 	print('CommUlongString')
-	kernel_comm_ulong_string_modular(*parameter_list[0])
+	kernel_comm_ulong_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_comm_word_string_modular.py b/examples/undocumented/python/kernel_comm_word_string.py
similarity index 76%
rename from examples/undocumented/python_modular/kernel_comm_word_string_modular.py
rename to examples/undocumented/python/kernel_comm_word_string.py
index e9179dd23be..8054abb9081 100644
--- a/examples/undocumented/python_modular/kernel_comm_word_string_modular.py
+++ b/examples/undocumented/python/kernel_comm_word_string.py
@@ -6,11 +6,11 @@
 testdat = lm.load_dna('../data/fm_test_dna.dat')
 parameter_list = [[traindat,testdat,4,0,False, False],[traindat,testdat,4,0,False,False]]
 
-def kernel_comm_word_string_modular (fm_train_dna=traindat, fm_test_dna=testdat, order=3, gap=0, reverse = False, use_sign = False):
+def kernel_comm_word_string (fm_train_dna=traindat, fm_test_dna=testdat, order=3, gap=0, reverse = False, use_sign = False):
 
-	from modshogun import CommWordStringKernel
-	from modshogun import StringWordFeatures, StringCharFeatures, DNA
-	from modshogun import SortWordString
+	from shogun import CommWordStringKernel
+	from shogun import StringWordFeatures, StringCharFeatures, DNA
+	from shogun import SortWordString
 
 	charfeat=StringCharFeatures(DNA)
 	charfeat.set_features(fm_train_dna)
@@ -37,4 +37,4 @@ def kernel_comm_word_string_modular (fm_train_dna=traindat, fm_test_dna=testdat,
 
 if __name__=='__main__':
 	print('CommWordString')
-	kernel_comm_word_string_modular(*parameter_list[0])
+	kernel_comm_word_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_const_modular.py b/examples/undocumented/python/kernel_const.py
similarity index 70%
rename from examples/undocumented/python_modular/kernel_const_modular.py
rename to examples/undocumented/python/kernel_const.py
index a9fc552dbd9..5c3f0658df2 100644
--- a/examples/undocumented/python_modular/kernel_const_modular.py
+++ b/examples/undocumented/python/kernel_const.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python
 parameter_list =[[23],[24]]
 
-def kernel_const_modular (c=23):
-	from modshogun import DummyFeatures
-	from modshogun import ConstKernel
+def kernel_const (c=23):
+	from shogun import DummyFeatures
+	from shogun import ConstKernel
 
 	feats_train=DummyFeatures(10)
 	feats_test=DummyFeatures(17)
@@ -17,4 +17,4 @@ def kernel_const_modular (c=23):
 
 if __name__=='__main__':
 	print('Const')
-	kernel_const_modular(*parameter_list[0])
+	kernel_const(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_custom_modular.py b/examples/undocumented/python/kernel_custom.py
similarity index 89%
rename from examples/undocumented/python_modular/kernel_custom_modular.py
rename to examples/undocumented/python/kernel_custom.py
index 0264719871e..c77d1017166 100644
--- a/examples/undocumented/python_modular/kernel_custom_modular.py
+++ b/examples/undocumented/python/kernel_custom.py
@@ -4,12 +4,12 @@
 
 parameter_list=[[7],[8]]
 
-def kernel_custom_modular (dim=7):
+def kernel_custom (dim=7):
 	from numpy.random import rand, seed
 	from numpy import array, float32, int32
-	from modshogun import RealFeatures
-	from modshogun import CustomKernel
-	from modshogun import IndexFeatures
+	from shogun import RealFeatures
+	from shogun import CustomKernel
+	from shogun import IndexFeatures
 
 	seed(17)
 	data=rand(dim, dim)
@@ -56,5 +56,5 @@ def kernel_custom_modular (dim=7):
 
 if __name__=='__main__':
 	print('Custom')
-	kernel_custom_modular(*parameter_list[0])
+	kernel_custom(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/kernel_diag_modular.py b/examples/undocumented/python/kernel_diag.py
similarity index 70%
rename from examples/undocumented/python_modular/kernel_diag_modular.py
rename to examples/undocumented/python/kernel_diag.py
index 88894facc1a..6c24a4e9614 100644
--- a/examples/undocumented/python_modular/kernel_diag_modular.py
+++ b/examples/undocumented/python/kernel_diag.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 parameter_list =[[23],[24]]
-def kernel_diag_modular (diag=23):
-	from modshogun import DummyFeatures
-	from modshogun import DiagKernel
+def kernel_diag (diag=23):
+	from shogun import DummyFeatures
+	from shogun import DiagKernel
 
 	feats_train=DummyFeatures(10)
 	feats_test=DummyFeatures(17)
@@ -16,5 +16,5 @@ def kernel_diag_modular (diag=23):
 
 if __name__=='__main__':
 	print('Diag')
-	kernel_diag_modular(*parameter_list[0])
+	kernel_diag(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/kernel_director_linear_modular.py b/examples/undocumented/python/kernel_director_linear.py
similarity index 80%
rename from examples/undocumented/python_modular/kernel_director_linear_modular.py
rename to examples/undocumented/python/kernel_director_linear.py
index 42d560ed7d8..6892d7e9d24 100644
--- a/examples/undocumented/python_modular/kernel_director_linear_modular.py
+++ b/examples/undocumented/python/kernel_director_linear.py
@@ -1,13 +1,13 @@
 #!/usr/bin/env python
 import numpy
-from modshogun import RealFeatures, MSG_DEBUG
+from shogun import RealFeatures, MSG_DEBUG
 traindat = numpy.random.random_sample((10,10))
 testdat = numpy.random.random_sample((10,10))
 parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.4]]
 
-def kernel_director_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.2):
+def kernel_director_linear (fm_train_real=traindat,fm_test_real=testdat,scale=1.2):
 	try:
-		from modshogun import DirectorKernel
+		from shogun import DirectorKernel
 	except ImportError:
 		print("recompile shogun with --enable-swig-directors")
 		return
@@ -21,8 +21,8 @@ def kernel_function(self, idx_a, idx_b):
 			return numpy.dot(seq1, seq2)
 
 
-	from modshogun import LinearKernel, AvgDiagKernelNormalizer
-	from modshogun import Time
+	from shogun import LinearKernel, AvgDiagKernelNormalizer
+	from shogun import Time
 
 	feats_train=RealFeatures(fm_train_real)
 	#feats_train.io.set_loglevel(MSG_DEBUG)
@@ -54,4 +54,4 @@ def kernel_function(self, idx_a, idx_b):
 
 if __name__=='__main__':
 	print('DirectorLinear')
-	kernel_director_linear_modular(*parameter_list[0])
+	kernel_director_linear(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_distance_modular.py b/examples/undocumented/python/kernel_distance.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_distance_modular.py
rename to examples/undocumented/python/kernel_distance.py
index b1081077557..29ce767eefa 100644
--- a/examples/undocumented/python_modular/kernel_distance_modular.py
+++ b/examples/undocumented/python/kernel_distance.py
@@ -4,8 +4,8 @@
 
 parameter_list=[[traindat,testdat,1.7],[traindat,testdat,1.8]]
 
-def kernel_distance_modular (train_fname=traindat,test_fname=testdat,width=1.7):
-	from modshogun import RealFeatures, DistanceKernel, EuclideanDistance, CSVFile
+def kernel_distance (train_fname=traindat,test_fname=testdat,width=1.7):
+	from shogun import RealFeatures, DistanceKernel, EuclideanDistance, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -20,4 +20,4 @@ def kernel_distance_modular (train_fname=traindat,test_fname=testdat,width=1.7):
 
 if __name__=='__main__':
 	print('Distance')
-	kernel_distance_modular(*parameter_list[0])
+	kernel_distance(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_distantsegments_modular.py b/examples/undocumented/python/kernel_distantsegments.py
similarity index 71%
rename from examples/undocumented/python_modular/kernel_distantsegments_modular.py
rename to examples/undocumented/python/kernel_distantsegments.py
index cbbf14bf411..082ec2f7fde 100644
--- a/examples/undocumented/python_modular/kernel_distantsegments_modular.py
+++ b/examples/undocumented/python/kernel_distantsegments.py
@@ -6,9 +6,9 @@
 
 parameter_list = [[traindat,testdat,5,5],[traindat,testdat,6,6]]
 
-def kernel_distantsegments_modular (fm_train_dna=traindat,fm_test_dna=testdat,delta=5, theta=5):
-	from modshogun import StringCharFeatures, DNA
-	from modshogun import DistantSegmentsKernel
+def kernel_distantsegments (fm_train_dna=traindat,fm_test_dna=testdat,delta=5, theta=5):
+	from shogun import StringCharFeatures, DNA
+	from shogun import DistantSegmentsKernel
 
 	feats_train=StringCharFeatures(fm_train_dna, DNA)
 	feats_test=StringCharFeatures(fm_test_dna, DNA)
@@ -24,4 +24,4 @@ def kernel_distantsegments_modular (fm_train_dna=traindat,fm_test_dna=testdat,de
 
 if __name__=='__main__':
 	print('DistantSegments')
-	kernel_distantsegments_modular(*parameter_list[0])
+	kernel_distantsegments(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_exponential_modular.py b/examples/undocumented/python/kernel_exponential.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_exponential_modular.py
rename to examples/undocumented/python/kernel_exponential.py
index 41aa3e8f741..4bf809a8642 100644
--- a/examples/undocumented/python_modular/kernel_exponential_modular.py
+++ b/examples/undocumented/python/kernel_exponential.py
@@ -4,8 +4,8 @@
 
 parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]
 
-def kernel_exponential_modular (train_fname=traindat,test_fname=testdat, tau_coef=1.0):
-	from modshogun import RealFeatures, ExponentialKernel, EuclideanDistance, CSVFile
+def kernel_exponential (train_fname=traindat,test_fname=testdat, tau_coef=1.0):
+	from shogun import RealFeatures, ExponentialKernel, EuclideanDistance, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -21,4 +21,4 @@ def kernel_exponential_modular (train_fname=traindat,test_fname=testdat, tau_coe
 
 if __name__=='__main__':
 	print('Exponential')
-	kernel_exponential_modular(*parameter_list[0])
+	kernel_exponential(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_fisher_modular.py b/examples/undocumented/python/kernel_fisher.py
similarity index 89%
rename from examples/undocumented/python_modular/kernel_fisher_modular.py
rename to examples/undocumented/python/kernel_fisher.py
index 0397542ce01..99a452e07ea 100644
--- a/examples/undocumented/python_modular/kernel_fisher_modular.py
+++ b/examples/undocumented/python/kernel_fisher.py
@@ -11,14 +11,14 @@
 fm_hmm_pos=[ traindat[i] for i in where([label_traindat==1])[1] ]
 fm_hmm_neg=[ traindat[i] for i in where([label_traindat==-1])[1] ]
 
-def kernel_fisher_modular (fm_train_dna=traindat, fm_test_dna=testdat,
+def kernel_fisher (fm_train_dna=traindat, fm_test_dna=testdat,
 		label_train_dna=label_traindat,
 		N=1,M=4,pseudo=1e-1,order=1,gap=0,reverse=False,
 		kargs=[1,False,True]):
 
-	from modshogun import StringCharFeatures, StringWordFeatures, FKFeatures, DNA
-	from modshogun import PolyKernel
-	from modshogun import HMM, BW_NORMAL#, MSG_DEBUG
+	from shogun import StringCharFeatures, StringWordFeatures, FKFeatures, DNA
+	from shogun import PolyKernel
+	from shogun import HMM, BW_NORMAL#, MSG_DEBUG
 
 	# train HMM for positive class
 	charfeat=StringCharFeatures(fm_hmm_pos, DNA)
@@ -66,4 +66,4 @@ def kernel_fisher_modular (fm_train_dna=traindat, fm_test_dna=testdat,
 
 if __name__=='__main__':
 	print("Fisher Kernel")
-	kernel_fisher_modular(*parameter_list[0])
+	kernel_fisher(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_fixed_degree_string_modular.py b/examples/undocumented/python/kernel_fixed_degree_string.py
similarity index 71%
rename from examples/undocumented/python_modular/kernel_fixed_degree_string_modular.py
rename to examples/undocumented/python/kernel_fixed_degree_string.py
index ff46c982c2a..51c8a5d42f0 100644
--- a/examples/undocumented/python_modular/kernel_fixed_degree_string_modular.py
+++ b/examples/undocumented/python/kernel_fixed_degree_string.py
@@ -6,9 +6,9 @@
 testdat = lm.load_dna('../data/fm_test_dna.dat')
 parameter_list=[[traindat, testdat,3],[traindat,testdat,4]]
 
-def kernel_fixed_degree_string_modular (fm_train_dna=traindat, fm_test_dna=testdat,degree=3):
-	from modshogun import StringCharFeatures, DNA
-	from modshogun import FixedDegreeStringKernel
+def kernel_fixed_degree_string (fm_train_dna=traindat, fm_test_dna=testdat,degree=3):
+	from shogun import StringCharFeatures, DNA
+	from shogun import FixedDegreeStringKernel
 
 	feats_train=StringCharFeatures(fm_train_dna, DNA)
 	feats_test=StringCharFeatures(fm_test_dna, DNA)
@@ -22,4 +22,4 @@ def kernel_fixed_degree_string_modular (fm_train_dna=traindat, fm_test_dna=testd
 	return km_train,km_test,kernel
 if __name__=='__main__':
 	print('FixedDegreeString')
-	kernel_fixed_degree_string_modular(*parameter_list[0])
+	kernel_fixed_degree_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_gaussian_modular.py b/examples/undocumented/python/kernel_gaussian.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_gaussian_modular.py
rename to examples/undocumented/python/kernel_gaussian.py
index 12aa031204a..181465ffe2d 100644
--- a/examples/undocumented/python_modular/kernel_gaussian_modular.py
+++ b/examples/undocumented/python/kernel_gaussian.py
@@ -4,8 +4,8 @@
 
 parameter_list=[[traindat,testdat, 1.3],[traindat,testdat, 1.4]]
 
-def kernel_gaussian_modular (train_fname=traindat,test_fname=testdat, width=1.3):
-	from modshogun import RealFeatures, GaussianKernel, CSVFile
+def kernel_gaussian (train_fname=traindat,test_fname=testdat, width=1.3):
+	from shogun import RealFeatures, GaussianKernel, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -20,4 +20,4 @@ def kernel_gaussian_modular (train_fname=traindat,test_fname=testdat, width=1.3)
 
 if __name__=='__main__':
 	print('Gaussian')
-	kernel_gaussian_modular(*parameter_list[0])
+	kernel_gaussian(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_gaussian_shift_modular.py b/examples/undocumented/python/kernel_gaussian_shift.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_gaussian_shift_modular.py
rename to examples/undocumented/python/kernel_gaussian_shift.py
index a025b317b61..460dd43bedb 100644
--- a/examples/undocumented/python_modular/kernel_gaussian_shift_modular.py
+++ b/examples/undocumented/python/kernel_gaussian_shift.py
@@ -7,8 +7,8 @@
 
 parameter_list=[[traindat,testdat,1.8,2,1],[traindat,testdat,1.9,2,1]]
 
-def kernel_gaussian_shift_modular (train_fname=traindat,test_fname=testdat,width=1.8,max_shift=2,shift_step=1):
-	from modshogun import RealFeatures, GaussianShiftKernel, CSVFile
+def kernel_gaussian_shift (train_fname=traindat,test_fname=testdat,width=1.8,max_shift=2,shift_step=1):
+	from shogun import RealFeatures, GaussianShiftKernel, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -23,4 +23,4 @@ def kernel_gaussian_shift_modular (train_fname=traindat,test_fname=testdat,width
 
 if __name__=='__main__':
 	print('GaussianShift')
-	kernel_gaussian_shift_modular(*parameter_list[0])
+	kernel_gaussian_shift(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_histogram_word_string_modular.py b/examples/undocumented/python/kernel_histogram_word_string.py
similarity index 74%
rename from examples/undocumented/python_modular/kernel_histogram_word_string_modular.py
rename to examples/undocumented/python/kernel_histogram_word_string.py
index 140bbf3c52d..92b955b63e2 100644
--- a/examples/undocumented/python_modular/kernel_histogram_word_string_modular.py
+++ b/examples/undocumented/python/kernel_histogram_word_string.py
@@ -7,11 +7,11 @@
 label_traindat = lm.load_labels('../data/label_train_dna.dat')
 parameter_list=[[traindat,testdat,label_traindat,1,1e1, 1e0],[traindat,testdat,label_traindat,1,1e4,1e4]]
 
-def kernel_histogram_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,order=3,ppseudo_count=1,npseudo_count=1):
+def kernel_histogram_word_string (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,order=3,ppseudo_count=1,npseudo_count=1):
 
-	from modshogun import StringCharFeatures, StringWordFeatures, DNA, BinaryLabels
-	from modshogun import HistogramWordStringKernel, AvgDiagKernelNormalizer
-	from modshogun import PluginEstimate#, MSG_DEBUG
+	from shogun import StringCharFeatures, StringWordFeatures, DNA, BinaryLabels
+	from shogun import HistogramWordStringKernel, AvgDiagKernelNormalizer
+	from shogun import PluginEstimate#, MSG_DEBUG
 
 	charfeat=StringCharFeatures(DNA)
 	#charfeat.io.set_loglevel(MSG_DEBUG)
@@ -40,4 +40,4 @@ def kernel_histogram_word_string_modular (fm_train_dna=traindat,fm_test_dna=test
 
 if __name__=='__main__':
 	print('PluginEstimate w/ HistogramWord')
-	kernel_histogram_word_string_modular(*parameter_list[0])
+	kernel_histogram_word_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_inversemultiquadric_modular.py b/examples/undocumented/python/kernel_inversemultiquadric.py
similarity index 70%
rename from examples/undocumented/python_modular/kernel_inversemultiquadric_modular.py
rename to examples/undocumented/python/kernel_inversemultiquadric.py
index 4b9490232e7..4efefd7f4e1 100644
--- a/examples/undocumented/python_modular/kernel_inversemultiquadric_modular.py
+++ b/examples/undocumented/python/kernel_inversemultiquadric.py
@@ -4,8 +4,8 @@
 
 parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]
 
-def kernel_inversemultiquadric_modular (train_fname=traindat,test_fname=testdat, shift_coef=1.0):
-	from modshogun import RealFeatures, InverseMultiQuadricKernel, EuclideanDistance, CSVFile
+def kernel_inversemultiquadric (train_fname=traindat,test_fname=testdat, shift_coef=1.0):
+	from shogun import RealFeatures, InverseMultiQuadricKernel, EuclideanDistance, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -21,4 +21,4 @@ def kernel_inversemultiquadric_modular (train_fname=traindat,test_fname=testdat,
 
 if __name__=='__main__':
 	print('InverseMultiquadric')
-	kernel_inversemultiquadric_modular(*parameter_list[0])
+	kernel_inversemultiquadric(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_io_modular.py b/examples/undocumented/python/kernel_io.py
similarity index 59%
rename from examples/undocumented/python_modular/kernel_io_modular.py
rename to examples/undocumented/python/kernel_io.py
index 20597fa839f..720cb738763 100644
--- a/examples/undocumented/python_modular/kernel_io_modular.py
+++ b/examples/undocumented/python/kernel_io.py
@@ -4,31 +4,29 @@
 
 parameter_list=[[traindat,testdat,1.9],[traindat,testdat,1.7]]
 
-def kernel_io_modular (train_fname=traindat,test_fname=testdat,width=1.9):
-	from modshogun import RealFeatures, GaussianKernel, CSVFile
+def kernel_io (train_fname=traindat,test_fname=testdat,width=1.9):
+	from shogun import RealFeatures, GaussianKernel, CSVFile
+	from tempfile import NamedTemporaryFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
 
 	kernel=GaussianKernel(feats_train, feats_train, width)
 	km_train=kernel.get_kernel_matrix()
-	f=CSVFile("tmp/gaussian_train.csv","w")
+	tmp_train_csv = NamedTemporaryFile(suffix='train.csv')
+	f=CSVFile(tmp_train_csv.name, "w")
 	kernel.save(f)
 	del f
 
 	kernel.init(feats_train, feats_test)
 	km_test=kernel.get_kernel_matrix()
-	f=CSVFile("tmp/gaussian_test.csv","w")
+	tmp_test_csv = NamedTemporaryFile(suffix='test.csv')
+	f=CSVFile(tmp_test_csv.name,"w")
 	kernel.save(f)
 	del f
 
-	#clean up
-	import os
-	os.unlink("tmp/gaussian_test.csv")
-	os.unlink("tmp/gaussian_train.csv")
-
 	return km_train, km_test, kernel
 
 if __name__=='__main__':
 	print('Gaussian')
-	kernel_io_modular(*parameter_list[0])
+	kernel_io(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_linear_modular.py b/examples/undocumented/python/kernel_linear.py
similarity index 73%
rename from examples/undocumented/python_modular/kernel_linear_modular.py
rename to examples/undocumented/python/kernel_linear.py
index e4c88b16078..2f668b453b4 100644
--- a/examples/undocumented/python_modular/kernel_linear_modular.py
+++ b/examples/undocumented/python/kernel_linear.py
@@ -4,9 +4,9 @@
 
 parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.4]]
 
-def kernel_linear_modular (train_fname=traindat,test_fname=testdat,scale=1.2):
+def kernel_linear (train_fname=traindat,test_fname=testdat,scale=1.2):
 
-	from modshogun import RealFeatures, LinearKernel, AvgDiagKernelNormalizer, CSVFile
+	from shogun import RealFeatures, LinearKernel, AvgDiagKernelNormalizer, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -22,4 +22,4 @@ def kernel_linear_modular (train_fname=traindat,test_fname=testdat,scale=1.2):
 
 if __name__=='__main__':
 	print('Linear')
-	kernel_linear_modular(*parameter_list[0])
+	kernel_linear(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_linear_byte_modular.py b/examples/undocumented/python/kernel_linear_byte.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_linear_byte_modular.py
rename to examples/undocumented/python/kernel_linear_byte.py
index e0f8068cad9..41ebbee28a9 100644
--- a/examples/undocumented/python_modular/kernel_linear_byte_modular.py
+++ b/examples/undocumented/python/kernel_linear_byte.py
@@ -4,8 +4,8 @@
 
 parameter_list=[[traindat,testdat],[traindat,testdat]]
 
-def kernel_linear_byte_modular (train_fname=traindat,test_fname=testdat):
-	from modshogun import LinearKernel, ByteFeatures, CSVFile
+def kernel_linear_byte (train_fname=traindat,test_fname=testdat):
+	from shogun import LinearKernel, ByteFeatures, CSVFile
 
 	feats_train=ByteFeatures(CSVFile(train_fname))
 	feats_test=ByteFeatures(CSVFile(test_fname))
@@ -19,4 +19,4 @@ def kernel_linear_byte_modular (train_fname=traindat,test_fname=testdat):
 
 if __name__=='__main__':
 	print('LinearByte')
-	kernel_linear_byte_modular(*parameter_list[0])
+	kernel_linear_byte(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_linear_string_modular.py b/examples/undocumented/python/kernel_linear_string.py
similarity index 73%
rename from examples/undocumented/python_modular/kernel_linear_string_modular.py
rename to examples/undocumented/python/kernel_linear_string.py
index 3c4b4f1d168..22eeb1d63ef 100644
--- a/examples/undocumented/python_modular/kernel_linear_string_modular.py
+++ b/examples/undocumented/python/kernel_linear_string.py
@@ -6,9 +6,9 @@
 
 parameter_list=[[traindat,testdat],[traindat,testdat]]
 
-def kernel_linear_string_modular (fm_train_dna=traindat,fm_test_dna=testdat):
-	from modshogun import StringCharFeatures, DNA
-	from modshogun import LinearStringKernel
+def kernel_linear_string (fm_train_dna=traindat,fm_test_dna=testdat):
+	from shogun import StringCharFeatures, DNA
+	from shogun import LinearStringKernel
 
 	feats_train=StringCharFeatures(fm_train_dna, DNA)
 	feats_test=StringCharFeatures(fm_test_dna, DNA)
@@ -23,4 +23,4 @@ def kernel_linear_string_modular (fm_train_dna=traindat,fm_test_dna=testdat):
 if __name__=='__main__':
 	from tools.load import LoadMatrix
 	print('LinearString')
-	kernel_linear_string_modular(*parameter_list[0])
+	kernel_linear_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_linear_word_modular.py b/examples/undocumented/python/kernel_linear_word.py
similarity index 74%
rename from examples/undocumented/python_modular/kernel_linear_word_modular.py
rename to examples/undocumented/python/kernel_linear_word.py
index e484a84d275..1ac78017dd0 100644
--- a/examples/undocumented/python_modular/kernel_linear_word_modular.py
+++ b/examples/undocumented/python/kernel_linear_word.py
@@ -8,10 +8,10 @@
 
 parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.2]]
 
-def kernel_linear_word_modular (fm_train_word=traindat,fm_test_word=testdat,scale=1.2):
+def kernel_linear_word (fm_train_word=traindat,fm_test_word=testdat,scale=1.2):
 
-	from modshogun import LinearKernel, AvgDiagKernelNormalizer
-	from modshogun import WordFeatures
+	from shogun import LinearKernel, AvgDiagKernelNormalizer
+	from shogun import WordFeatures
 
 	feats_train=WordFeatures(fm_train_word)
 	feats_test=WordFeatures(fm_test_word)
@@ -27,4 +27,4 @@ def kernel_linear_word_modular (fm_train_word=traindat,fm_test_word=testdat,scal
 
 if __name__=='__main__':
 	print('LinearWord')
-	kernel_linear_word_modular(*parameter_list[0])
+	kernel_linear_word(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_local_alignment_string_modular.py b/examples/undocumented/python/kernel_local_alignment_string.py
similarity index 70%
rename from examples/undocumented/python_modular/kernel_local_alignment_string_modular.py
rename to examples/undocumented/python/kernel_local_alignment_string.py
index 0a4990932b3..e2ab9aeb245 100644
--- a/examples/undocumented/python_modular/kernel_local_alignment_string_modular.py
+++ b/examples/undocumented/python/kernel_local_alignment_string.py
@@ -6,10 +6,10 @@
 testdat = lm.load_dna('../data/fm_test_dna.dat')
 parameter_list=[[traindat,testdat],[traindat,testdat]]
 
-def kernel_local_alignment_string_modular (fm_train_dna=traindat,fm_test_dna=testdat):
+def kernel_local_alignment_string (fm_train_dna=traindat,fm_test_dna=testdat):
 
-	from modshogun import StringCharFeatures, DNA
-	from modshogun import LocalAlignmentStringKernel
+	from shogun import StringCharFeatures, DNA
+	from shogun import LocalAlignmentStringKernel
 
 	feats_train=StringCharFeatures(fm_train_dna, DNA)
 	feats_test=StringCharFeatures(fm_test_dna, DNA)
@@ -23,4 +23,4 @@ def kernel_local_alignment_string_modular (fm_train_dna=traindat,fm_test_dna=tes
 
 if __name__=='__main__':
 	print('LocalAlignmentString')
-	kernel_local_alignment_string_modular(*parameter_list[0])
+	kernel_local_alignment_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_locality_improved_string_modular.py b/examples/undocumented/python/kernel_locality_improved_string.py
similarity index 69%
rename from examples/undocumented/python_modular/kernel_locality_improved_string_modular.py
rename to examples/undocumented/python/kernel_locality_improved_string.py
index 581c3b64179..95d01f499b0 100644
--- a/examples/undocumented/python_modular/kernel_locality_improved_string_modular.py
+++ b/examples/undocumented/python/kernel_locality_improved_string.py
@@ -6,10 +6,10 @@
 testdat = lm.load_dna('../data/fm_test_dna.dat')
 parameter_list=[[traindat,testdat,5,5,7],[traindat,testdat,5,5,7]]
 
-def kernel_locality_improved_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,length=5,inner_degree=5,outer_degree=7):
+def kernel_locality_improved_string (fm_train_dna=traindat,fm_test_dna=testdat,length=5,inner_degree=5,outer_degree=7):
 
-	from modshogun import StringCharFeatures, DNA
-	from modshogun import LocalityImprovedStringKernel
+	from shogun import StringCharFeatures, DNA
+	from shogun import LocalityImprovedStringKernel
 
 	feats_train=StringCharFeatures(fm_train_dna, DNA)
 	feats_test=StringCharFeatures(fm_test_dna, DNA)
@@ -24,4 +24,4 @@ def kernel_locality_improved_string_modular (fm_train_dna=traindat,fm_test_dna=t
 
 if __name__=='__main__':
 	print('LocalityImprovedString')
-	kernel_locality_improved_string_modular(*parameter_list[0])
+	kernel_locality_improved_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_log_modular.py b/examples/undocumented/python/kernel_log.py
similarity index 74%
rename from examples/undocumented/python_modular/kernel_log_modular.py
rename to examples/undocumented/python/kernel_log.py
index 95d93e06d8a..3c6cc15f84b 100644
--- a/examples/undocumented/python_modular/kernel_log_modular.py
+++ b/examples/undocumented/python/kernel_log.py
@@ -4,8 +4,8 @@
 
 parameter_list=[[traindat,testdat, 2.0],[traindat,testdat, 3.0]]
 
-def kernel_log_modular (train_fname=traindat,test_fname=testdat, degree=2.0):
-	from modshogun import RealFeatures, LogKernel, EuclideanDistance, CSVFile
+def kernel_log (train_fname=traindat,test_fname=testdat, degree=2.0):
+	from shogun import RealFeatures, LogKernel, EuclideanDistance, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -22,4 +22,4 @@ def kernel_log_modular (train_fname=traindat,test_fname=testdat, degree=2.0):
 
 if __name__=='__main__':
 	print('Log')
-	kernel_log_modular(*parameter_list[0])
+	kernel_log(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_match_word_string_modular.py b/examples/undocumented/python/kernel_match_word_string.py
similarity index 78%
rename from examples/undocumented/python_modular/kernel_match_word_string_modular.py
rename to examples/undocumented/python/kernel_match_word_string.py
index 71e86adc933..f7a81e02162 100644
--- a/examples/undocumented/python_modular/kernel_match_word_string_modular.py
+++ b/examples/undocumented/python/kernel_match_word_string.py
@@ -7,10 +7,10 @@
 parameter_list = [[traindat,testdat, 3,1.4,10,3,0,False],[
 traindat,testdat, 3,1.4,10,3,0,False]]
 
-def kernel_match_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,
+def kernel_match_word_string (fm_train_dna=traindat,fm_test_dna=testdat,
 degree=3,scale=1.4,size_cache=10,order=3,gap=0,reverse=False):
-	from modshogun import MatchWordStringKernel, AvgDiagKernelNormalizer
-	from modshogun import StringWordFeatures, StringCharFeatures, DNA
+	from shogun import MatchWordStringKernel, AvgDiagKernelNormalizer
+	from shogun import StringWordFeatures, StringCharFeatures, DNA
 
 	charfeat=StringCharFeatures(fm_train_dna, DNA)
 	feats_train=StringWordFeatures(DNA)
@@ -31,5 +31,5 @@ def kernel_match_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,
 
 if __name__=='__main__':
 	print('MatchWordString')
-	kernel_match_word_string_modular(*parameter_list[0])
+	kernel_match_word_string(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/kernel_multiquadric_modular.py b/examples/undocumented/python/kernel_multiquadric.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_multiquadric_modular.py
rename to examples/undocumented/python/kernel_multiquadric.py
index c09c8b18038..f377e35ebc7 100644
--- a/examples/undocumented/python_modular/kernel_multiquadric_modular.py
+++ b/examples/undocumented/python/kernel_multiquadric.py
@@ -5,8 +5,8 @@
 
 parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]
 
-def kernel_multiquadric_modular (train_fname=traindat,test_fname=testdat, shift_coef=1.0):
-	from modshogun import RealFeatures, MultiquadricKernel, EuclideanDistance, CSVFile
+def kernel_multiquadric (train_fname=traindat,test_fname=testdat, shift_coef=1.0):
+	from shogun import RealFeatures, MultiquadricKernel, EuclideanDistance, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -23,4 +23,4 @@ def kernel_multiquadric_modular (train_fname=traindat,test_fname=testdat, shift_
 
 if __name__=='__main__':
 	print('Multiquadric')
-	kernel_multiquadric_modular(*parameter_list[0])
+	kernel_multiquadric(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_oligo_string_modular.py b/examples/undocumented/python/kernel_oligo_string.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_oligo_string_modular.py
rename to examples/undocumented/python/kernel_oligo_string.py
index bb7d6b0b301..466a739ef0d 100644
--- a/examples/undocumented/python_modular/kernel_oligo_string_modular.py
+++ b/examples/undocumented/python/kernel_oligo_string.py
@@ -6,9 +6,9 @@
 
 parameter_list = [[traindat,testdat,3,1.2,10],[traindat,testdat,4,1.3,10]]
 
-def kernel_oligo_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,k=3,width=1.2,size_cache=10):
-	from modshogun import StringCharFeatures, DNA
-	from modshogun import OligoStringKernel
+def kernel_oligo_string (fm_train_dna=traindat,fm_test_dna=testdat,k=3,width=1.2,size_cache=10):
+	from shogun import StringCharFeatures, DNA
+	from shogun import OligoStringKernel
 
 	feats_train=StringCharFeatures(fm_train_dna, DNA)
 	feats_test=StringCharFeatures(fm_test_dna, DNA)
@@ -23,4 +23,4 @@ def kernel_oligo_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,k=3,w
 
 if __name__=='__main__':
 	print('OligoString')
-	kernel_oligo_string_modular(*parameter_list[0])
+	kernel_oligo_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_poly_modular.py b/examples/undocumented/python/kernel_poly.py
similarity index 75%
rename from examples/undocumented/python_modular/kernel_poly_modular.py
rename to examples/undocumented/python/kernel_poly.py
index b8adae1d85d..56b481a59be 100644
--- a/examples/undocumented/python_modular/kernel_poly_modular.py
+++ b/examples/undocumented/python/kernel_poly.py
@@ -4,9 +4,9 @@
 
 parameter_list = [[traindat,testdat,4,False,True],[traindat,testdat,5,False,True]]
 
-def kernel_poly_modular (train_fname=traindat,test_fname=testdat,degree=4,inhomogene=False,
+def kernel_poly (train_fname=traindat,test_fname=testdat,degree=4,inhomogene=False,
 	use_normalization=True):
-	from modshogun import RealFeatures, PolyKernel, CSVFile
+	from shogun import RealFeatures, PolyKernel, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -20,4 +20,4 @@ def kernel_poly_modular (train_fname=traindat,test_fname=testdat,degree=4,inhomo
 	return km_train,km_test,kernel
 if __name__=='__main__':
 	print('Poly')
-	kernel_poly_modular (*parameter_list[0])
+	kernel_poly (*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_poly_match_string_modular.py b/examples/undocumented/python/kernel_poly_match_string.py
similarity index 70%
rename from examples/undocumented/python_modular/kernel_poly_match_string_modular.py
rename to examples/undocumented/python/kernel_poly_match_string.py
index 2fb76de93d4..2fc6bb0ce5a 100644
--- a/examples/undocumented/python_modular/kernel_poly_match_string_modular.py
+++ b/examples/undocumented/python/kernel_poly_match_string.py
@@ -5,9 +5,9 @@
 testdat = lm.load_dna('../data/fm_test_dna.dat')
 
 parameter_list = [[traindat,testdat,3,False],[traindat,testdat,4,False]]
-def kernel_poly_match_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=3,inhomogene=False):
-	from modshogun import PolyMatchStringKernel
-	from modshogun import StringCharFeatures, DNA
+def kernel_poly_match_string (fm_train_dna=traindat,fm_test_dna=testdat,degree=3,inhomogene=False):
+	from shogun import PolyMatchStringKernel
+	from shogun import StringCharFeatures, DNA
 
 	feats_train=StringCharFeatures(fm_train_dna, DNA)
 	feats_test=StringCharFeatures(fm_train_dna, DNA)
@@ -21,4 +21,4 @@ def kernel_poly_match_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,
 
 if __name__=='__main__':
 	print('PolyMatchString')
-	kernel_poly_match_string_modular(*parameter_list[0])
+	kernel_poly_match_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_poly_match_word_string_modular.py b/examples/undocumented/python/kernel_poly_match_word_string.py
similarity index 77%
rename from examples/undocumented/python_modular/kernel_poly_match_word_string_modular.py
rename to examples/undocumented/python/kernel_poly_match_word_string.py
index 51ed2ce57f2..c2ac3017603 100644
--- a/examples/undocumented/python_modular/kernel_poly_match_word_string_modular.py
+++ b/examples/undocumented/python/kernel_poly_match_word_string.py
@@ -6,10 +6,10 @@
 
 parameter_list = [[traindat,testdat,2,True,3,0,False],[traindat,testdat,2,True,3,0,False]]
 
-def kernel_poly_match_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,
+def kernel_poly_match_word_string (fm_train_dna=traindat,fm_test_dna=testdat,
 degree=2,inhomogene=True,order=3,gap=0,reverse=False):
-	from modshogun import PolyMatchWordStringKernel
-	from modshogun import StringWordFeatures, StringCharFeatures, DNA
+	from shogun import PolyMatchWordStringKernel
+	from shogun import StringWordFeatures, StringCharFeatures, DNA
 
 
 
@@ -30,4 +30,4 @@ def kernel_poly_match_word_string_modular (fm_train_dna=traindat,fm_test_dna=tes
 
 if __name__=='__main__':
 	print('PolyMatchWordString')
-	kernel_poly_match_word_string_modular(*parameter_list[0])
+	kernel_poly_match_word_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_power_modular.py b/examples/undocumented/python/kernel_power.py
similarity index 74%
rename from examples/undocumented/python_modular/kernel_power_modular.py
rename to examples/undocumented/python/kernel_power.py
index 072b788eb8b..52c7c471808 100644
--- a/examples/undocumented/python_modular/kernel_power_modular.py
+++ b/examples/undocumented/python/kernel_power.py
@@ -4,8 +4,8 @@
 testdat = '../data/fm_test_real.dat'
 parameter_list=[[traindat,testdat, 2.0],[traindat,testdat, 3.0]]
 
-def kernel_power_modular (train_fname=traindat,test_fname=testdat, degree=2.0):
-	from modshogun import RealFeatures, PowerKernel, EuclideanDistance, CSVFile
+def kernel_power (train_fname=traindat,test_fname=testdat, degree=2.0):
+	from shogun import RealFeatures, PowerKernel, EuclideanDistance, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -22,4 +22,4 @@ def kernel_power_modular (train_fname=traindat,test_fname=testdat, degree=2.0):
 
 if __name__=='__main__':
 	print('Power')
-	kernel_power_modular(*parameter_list[0])
+	kernel_power(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_rationalquadratic_modular.py b/examples/undocumented/python/kernel_rationalquadratic.py
similarity index 71%
rename from examples/undocumented/python_modular/kernel_rationalquadratic_modular.py
rename to examples/undocumented/python/kernel_rationalquadratic.py
index 87119a13696..f2e4a991c7a 100644
--- a/examples/undocumented/python_modular/kernel_rationalquadratic_modular.py
+++ b/examples/undocumented/python/kernel_rationalquadratic.py
@@ -4,8 +4,8 @@
 testdat = '../data/fm_test_real.dat'
 parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]
 
-def kernel_rationalquadratic_modular (train_fname=traindat,test_fname=testdat, shift_coef=1.0):
-	from modshogun import RealFeatures, RationalQuadraticKernel, EuclideanDistance, CSVFile
+def kernel_rationalquadratic (train_fname=traindat,test_fname=testdat, shift_coef=1.0):
+	from shogun import RealFeatures, RationalQuadraticKernel, EuclideanDistance, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -22,4 +22,4 @@ def kernel_rationalquadratic_modular (train_fname=traindat,test_fname=testdat, s
 
 if __name__=='__main__':
 	print('RationalQuadratic')
-	kernel_rationalquadratic_modular(*parameter_list[0])
+	kernel_rationalquadratic(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_salzberg_word_string_modular.py b/examples/undocumented/python/kernel_salzberg_word_string.py
similarity index 77%
rename from examples/undocumented/python_modular/kernel_salzberg_word_string_modular.py
rename to examples/undocumented/python/kernel_salzberg_word_string.py
index 4063f1dc5f2..88a4773896e 100644
--- a/examples/undocumented/python_modular/kernel_salzberg_word_string_modular.py
+++ b/examples/undocumented/python/kernel_salzberg_word_string.py
@@ -6,11 +6,11 @@
 label_traindat = lm.load_labels('../data/label_train_dna.dat')
 
 parameter_list = [[traindat,testdat,label_traindat,3,0,False],[traindat,testdat,label_traindat,3,0,False]]
-def kernel_salzberg_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,
+def kernel_salzberg_word_string (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,
 order=3,gap=0,reverse=False):
-	from modshogun import StringCharFeatures, StringWordFeatures, DNA, BinaryLabels
-	from modshogun import SalzbergWordStringKernel
-	from modshogun import PluginEstimate
+	from shogun import StringCharFeatures, StringWordFeatures, DNA, BinaryLabels
+	from shogun import SalzbergWordStringKernel
+	from shogun import PluginEstimate
 
 	charfeat=StringCharFeatures(fm_train_dna, DNA)
 	feats_train=StringWordFeatures(charfeat.get_alphabet())
@@ -37,5 +37,5 @@ def kernel_salzberg_word_string_modular (fm_train_dna=traindat,fm_test_dna=testd
 
 if __name__=='__main__':
 	print('PluginEstimate w/ SalzbergWord')
-	kernel_salzberg_word_string_modular(*parameter_list[0])
+	kernel_salzberg_word_string(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/kernel_sigmoid_modular.py b/examples/undocumented/python/kernel_sigmoid.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_sigmoid_modular.py
rename to examples/undocumented/python/kernel_sigmoid.py
index f85d810c2a4..64ad32595ba 100644
--- a/examples/undocumented/python_modular/kernel_sigmoid_modular.py
+++ b/examples/undocumented/python/kernel_sigmoid.py
@@ -4,8 +4,8 @@
 
 parameter_list = [[traindat,testdat,10,1.2,1.3],[traindat,testdat,10,1.2,1.3]]
 
-def kernel_sigmoid_modular (train_fname=traindat,test_fname=testdat,size_cache=10,gamma=1.2,coef0=1.3):
-	from modshogun import RealFeatures, SigmoidKernel, CSVFile
+def kernel_sigmoid (train_fname=traindat,test_fname=testdat,size_cache=10,gamma=1.2,coef0=1.3):
+	from shogun import RealFeatures, SigmoidKernel, CSVFile
 
 	feats_train=RealFeatures(CSVFile(train_fname))
 	feats_test=RealFeatures(CSVFile(test_fname))
@@ -19,4 +19,4 @@ def kernel_sigmoid_modular (train_fname=traindat,test_fname=testdat,size_cache=1
 
 if __name__=='__main__':
 	print('Sigmoid')
-	kernel_sigmoid_modular(*parameter_list[0])
+	kernel_sigmoid(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_simple_locality_improved_string_modular.py b/examples/undocumented/python/kernel_simple_locality_improved_string.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_simple_locality_improved_string_modular.py
rename to examples/undocumented/python/kernel_simple_locality_improved_string.py
index 672405e181c..cd9e33053d8 100644
--- a/examples/undocumented/python_modular/kernel_simple_locality_improved_string_modular.py
+++ b/examples/undocumented/python/kernel_simple_locality_improved_string.py
@@ -6,11 +6,11 @@
 
 parameter_list = [[traindat,testdat,5,5,1],[traindat,testdat,5,3,2]]
 
-def kernel_simple_locality_improved_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,
+def kernel_simple_locality_improved_string (fm_train_dna=traindat,fm_test_dna=testdat,
 	length=5,inner_degree=5,outer_degree=1 ):
 
-	from modshogun import StringCharFeatures, DNA
-	from modshogun import SimpleLocalityImprovedStringKernel, MSG_DEBUG
+	from shogun import StringCharFeatures, DNA
+	from shogun import SimpleLocalityImprovedStringKernel, MSG_DEBUG
 
 	feats_train=StringCharFeatures(fm_train_dna, DNA)
 	#feats_train.io.set_loglevel(MSG_DEBUG)
@@ -27,4 +27,4 @@ def kernel_simple_locality_improved_string_modular (fm_train_dna=traindat,fm_tes
 
 if __name__=='__main__':
 	print('SimpleLocalityImprovedString')
-	kernel_simple_locality_improved_string_modular(*parameter_list[0])
+	kernel_simple_locality_improved_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_sparse_gaussian_modular.py b/examples/undocumented/python/kernel_sparse_gaussian.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_sparse_gaussian_modular.py
rename to examples/undocumented/python/kernel_sparse_gaussian.py
index 62628b24893..1ad4623d420 100644
--- a/examples/undocumented/python_modular/kernel_sparse_gaussian_modular.py
+++ b/examples/undocumented/python/kernel_sparse_gaussian.py
@@ -6,9 +6,9 @@
 
 parameter_list = [[traindat,testdat,1.1],[traindat,testdat,1.2]]
 
-def kernel_sparse_gaussian_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.1 ):
-	from modshogun import SparseRealFeatures
-	from modshogun import GaussianKernel
+def kernel_sparse_gaussian (fm_train_real=traindat,fm_test_real=testdat,width=1.1 ):
+	from shogun import SparseRealFeatures
+	from shogun import GaussianKernel
 
 	feats_train=SparseRealFeatures(fm_train_real)
 	feats_test=SparseRealFeatures(fm_test_real)
@@ -23,4 +23,4 @@ def kernel_sparse_gaussian_modular (fm_train_real=traindat,fm_test_real=testdat,
 
 if __name__=='__main__':
 	print('SparseGaussian')
-	kernel_sparse_gaussian_modular (*parameter_list[0])
+	kernel_sparse_gaussian (*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_sparse_linear_modular.py b/examples/undocumented/python/kernel_sparse_linear.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_sparse_linear_modular.py
rename to examples/undocumented/python/kernel_sparse_linear.py
index 794092c7c61..8ca21aa1831 100644
--- a/examples/undocumented/python_modular/kernel_sparse_linear_modular.py
+++ b/examples/undocumented/python/kernel_sparse_linear.py
@@ -7,9 +7,9 @@
 
 parameter_list = [[traindat,testdat,1.1],[traindat,testdat,1.2]]
 
-def kernel_sparse_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.1):
-	from modshogun import SparseRealFeatures
-	from modshogun import LinearKernel, AvgDiagKernelNormalizer
+def kernel_sparse_linear (fm_train_real=traindat,fm_test_real=testdat,scale=1.1):
+	from shogun import SparseRealFeatures
+	from shogun import LinearKernel, AvgDiagKernelNormalizer
 
 	feats_train=SparseRealFeatures(fm_train_real)
 	feats_test=SparseRealFeatures(fm_test_real)
@@ -25,4 +25,4 @@ def kernel_sparse_linear_modular (fm_train_real=traindat,fm_test_real=testdat,sc
 
 if __name__=='__main__':
 	print('SparseLinear')
-	kernel_sparse_linear_modular(*parameter_list[0])
+	kernel_sparse_linear(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_sparse_poly_modular.py b/examples/undocumented/python/kernel_sparse_poly.py
similarity index 77%
rename from examples/undocumented/python_modular/kernel_sparse_poly_modular.py
rename to examples/undocumented/python/kernel_sparse_poly.py
index 13c11f00b80..b881012da50 100644
--- a/examples/undocumented/python_modular/kernel_sparse_poly_modular.py
+++ b/examples/undocumented/python/kernel_sparse_poly.py
@@ -6,11 +6,11 @@
 
 parameter_list = [[traindat,testdat,10,3,True],[traindat,testdat,10,4,True]]
 
-def kernel_sparse_poly_modular (fm_train_real=traindat,fm_test_real=testdat,
+def kernel_sparse_poly (fm_train_real=traindat,fm_test_real=testdat,
 		 size_cache=10,degree=3,inhomogene=True ):
 
-	from modshogun import SparseRealFeatures
-	from modshogun import PolyKernel
+	from shogun import SparseRealFeatures
+	from shogun import PolyKernel
 
 	feats_train=SparseRealFeatures(fm_train_real)
 	feats_test=SparseRealFeatures(fm_test_real)
@@ -27,4 +27,4 @@ def kernel_sparse_poly_modular (fm_train_real=traindat,fm_test_real=testdat,
 
 if __name__=='__main__':
 	print('SparsePoly')
-	kernel_sparse_poly_modular(*parameter_list[0])
+	kernel_sparse_poly(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_spherical_modular.py b/examples/undocumented/python/kernel_spherical.py
similarity index 72%
rename from examples/undocumented/python_modular/kernel_spherical_modular.py
rename to examples/undocumented/python/kernel_spherical.py
index 76e9f4432eb..52a429e8a99 100644
--- a/examples/undocumented/python_modular/kernel_spherical_modular.py
+++ b/examples/undocumented/python/kernel_spherical.py
@@ -8,10 +8,10 @@
 
 parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]
 
-def kernel_spherical_modular (fm_train_real=traindat,fm_test_real=testdat, sigma=1.0):
-	from modshogun import RealFeatures
-	from modshogun import MultiquadricKernel
-	from modshogun import EuclideanDistance
+def kernel_spherical (fm_train_real=traindat,fm_test_real=testdat, sigma=1.0):
+	from shogun import RealFeatures
+	from shogun import MultiquadricKernel
+	from shogun import EuclideanDistance
 
 	feats_train=RealFeatures(fm_train_real)
 	feats_test=RealFeatures(fm_test_real)
@@ -28,4 +28,4 @@ def kernel_spherical_modular (fm_train_real=traindat,fm_test_real=testdat, sigma
 
 if __name__=='__main__':
 	print('Spherical')
-	kernel_spherical_modular(*parameter_list[0])
+	kernel_spherical(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_spline_modular.py b/examples/undocumented/python/kernel_spline.py
similarity index 75%
rename from examples/undocumented/python_modular/kernel_spline_modular.py
rename to examples/undocumented/python/kernel_spline.py
index 2b4dcbc9a1b..26aae43c802 100644
--- a/examples/undocumented/python_modular/kernel_spline_modular.py
+++ b/examples/undocumented/python/kernel_spline.py
@@ -8,9 +8,9 @@
 
 parameter_list=[[traindat,testdat],[traindat,testdat]]
 
-def kernel_spline_modular (fm_train_real=traindat,fm_test_real=testdat):
-	from modshogun import RealFeatures
-	from modshogun import SplineKernel
+def kernel_spline (fm_train_real=traindat,fm_test_real=testdat):
+	from shogun import RealFeatures
+	from shogun import SplineKernel
 
 	feats_train=RealFeatures(fm_train_real)
 	feats_test=RealFeatures(fm_test_real)
@@ -25,4 +25,4 @@ def kernel_spline_modular (fm_train_real=traindat,fm_test_real=testdat):
 
 if __name__=='__main__':
 	print('Spline')
-	kernel_spline_modular(*parameter_list[0])
+	kernel_spline(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_ssk_string_modular.py b/examples/undocumented/python/kernel_ssk_string.py
similarity index 77%
rename from examples/undocumented/python_modular/kernel_ssk_string_modular.py
rename to examples/undocumented/python/kernel_ssk_string.py
index e5694c81c5a..bfa5e28ed1a 100644
--- a/examples/undocumented/python_modular/kernel_ssk_string_modular.py
+++ b/examples/undocumented/python/kernel_ssk_string.py
@@ -17,9 +17,9 @@
 
 parameter_list = [[traindat,testdat,2,0.75],[traindat,testdat,3,0.75]]
 
-def kernel_ssk_string_modular (fm_train_dna=traindat, fm_test_dna=testdat, maxlen=1, decay=1):
-	from modshogun import SubsequenceStringKernel
-	from modshogun import StringCharFeatures, DNA
+def kernel_ssk_string (fm_train_dna=traindat, fm_test_dna=testdat, maxlen=1, decay=1):
+	from shogun import SubsequenceStringKernel
+	from shogun import StringCharFeatures, DNA
 
 	feats_train=StringCharFeatures(fm_train_dna, DNA)
 	feats_test=StringCharFeatures(fm_test_dna, DNA)
@@ -35,5 +35,5 @@ def kernel_ssk_string_modular (fm_train_dna=traindat, fm_test_dna=testdat, maxle
 
 if __name__=='__main__':
 	print('SubsequenceStringKernel DNA')
-	kernel_ssk_string_modular(*parameter_list[0])
-	kernel_ssk_string_modular(*parameter_list[1])
+	kernel_ssk_string(*parameter_list[0])
+	kernel_ssk_string(*parameter_list[1])
diff --git a/examples/undocumented/python_modular/kernel_top_modular.py b/examples/undocumented/python/kernel_top.py
similarity index 88%
rename from examples/undocumented/python_modular/kernel_top_modular.py
rename to examples/undocumented/python/kernel_top.py
index 337b63a1771..5f76324121e 100644
--- a/examples/undocumented/python_modular/kernel_top_modular.py
+++ b/examples/undocumented/python/kernel_top.py
@@ -13,11 +13,11 @@
 parameter_list = [[traindat,testdat,label_traindat,1e-1,1,0,False,[1, False, True]], \
 [traindat,testdat,label_traindat,1e-1,1,0,False,[1, False, True] ]]
 
-def kernel_top_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,pseudo=1e-1,
+def kernel_top (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,pseudo=1e-1,
 	order=1,gap=0,reverse=False,kargs=[1, False, True]):
-	from modshogun import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA
-	from modshogun import PolyKernel
-	from modshogun import HMM, BW_NORMAL
+	from shogun import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA
+	from shogun import PolyKernel
+	from shogun import HMM, BW_NORMAL
 
 	N=1 # toy HMM with 1 state
 	M=4 # 4 observations -> DNA
@@ -66,4 +66,4 @@ def kernel_top_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dn
 
 if __name__=='__main__':
 	print("TOP Kernel")
-	kernel_top_modular(*parameter_list[0])
+	kernel_top(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_tstudent_modular.py b/examples/undocumented/python/kernel_tstudent.py
similarity index 73%
rename from examples/undocumented/python_modular/kernel_tstudent_modular.py
rename to examples/undocumented/python/kernel_tstudent.py
index f88b34b0bf8..3cca9640459 100644
--- a/examples/undocumented/python_modular/kernel_tstudent_modular.py
+++ b/examples/undocumented/python/kernel_tstudent.py
@@ -8,10 +8,10 @@
 
 parameter_list=[[traindat,testdat, 2.0],[traindat,testdat, 3.0]]
 
-def kernel_tstudent_modular (fm_train_real=traindat,fm_test_real=testdat, degree=2.0):
-	from modshogun import RealFeatures
-	from modshogun import TStudentKernel
-	from modshogun import EuclideanDistance
+def kernel_tstudent (fm_train_real=traindat,fm_test_real=testdat, degree=2.0):
+	from shogun import RealFeatures
+	from shogun import TStudentKernel
+	from shogun import EuclideanDistance
 
 	feats_train=RealFeatures(fm_train_real)
 	feats_test=RealFeatures(fm_test_real)
@@ -28,4 +28,4 @@ def kernel_tstudent_modular (fm_train_real=traindat,fm_test_real=testdat, degree
 
 if __name__=='__main__':
 	print('TStudent')
-	kernel_tstudent_modular(*parameter_list[0])
+	kernel_tstudent(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_wave_modular.py b/examples/undocumented/python/kernel_wave.py
similarity index 73%
rename from examples/undocumented/python_modular/kernel_wave_modular.py
rename to examples/undocumented/python/kernel_wave.py
index e225bf9dcff..accf91cbe96 100644
--- a/examples/undocumented/python_modular/kernel_wave_modular.py
+++ b/examples/undocumented/python/kernel_wave.py
@@ -8,10 +8,10 @@
 
 parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 10.0]]
 
-def kernel_wave_modular (fm_train_real=traindat,fm_test_real=testdat, theta=1.0):
-	from modshogun import RealFeatures
-	from modshogun import WaveKernel
-	from modshogun import EuclideanDistance
+def kernel_wave (fm_train_real=traindat,fm_test_real=testdat, theta=1.0):
+	from shogun import RealFeatures
+	from shogun import WaveKernel
+	from shogun import EuclideanDistance
 
 	feats_train=RealFeatures(fm_train_real)
 	feats_test=RealFeatures(fm_test_real)
@@ -28,4 +28,4 @@ def kernel_wave_modular (fm_train_real=traindat,fm_test_real=testdat, theta=1.0)
 
 if __name__=='__main__':
 	print('Wave')
-	kernel_wave_modular(*parameter_list[0])
+	kernel_wave(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_wavelet_modular.py b/examples/undocumented/python/kernel_wavelet.py
similarity index 73%
rename from examples/undocumented/python_modular/kernel_wavelet_modular.py
rename to examples/undocumented/python/kernel_wavelet.py
index 01d9e4ea5fc..f18df45b22b 100644
--- a/examples/undocumented/python_modular/kernel_wavelet_modular.py
+++ b/examples/undocumented/python/kernel_wavelet.py
@@ -8,9 +8,9 @@
 
 parameter_list=[[traindat,testdat, 1.5, 1.0],[traindat,testdat, 1.0, 1.5]]
 
-def kernel_wavelet_modular (fm_train_real=traindat,fm_test_real=testdat, dilation=1.5, translation=1.0):
-	from modshogun import RealFeatures
-	from modshogun import WaveletKernel
+def kernel_wavelet (fm_train_real=traindat,fm_test_real=testdat, dilation=1.5, translation=1.0):
+	from shogun import RealFeatures
+	from shogun import WaveletKernel
 
 	feats_train=RealFeatures(fm_train_real)
 	feats_test=RealFeatures(fm_test_real)
@@ -25,4 +25,4 @@ def kernel_wavelet_modular (fm_train_real=traindat,fm_test_real=testdat, dilatio
 
 if __name__=='__main__':
 	print('Wavelet')
-	kernel_wavelet_modular(*parameter_list[0])
+	kernel_wavelet(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_weighted_comm_word_string_modular.py b/examples/undocumented/python/kernel_weighted_comm_word_string.py
similarity index 76%
rename from examples/undocumented/python_modular/kernel_weighted_comm_word_string_modular.py
rename to examples/undocumented/python/kernel_weighted_comm_word_string.py
index 512d45a3ee8..763d4705b27 100644
--- a/examples/undocumented/python_modular/kernel_weighted_comm_word_string_modular.py
+++ b/examples/undocumented/python/kernel_weighted_comm_word_string.py
@@ -6,10 +6,10 @@
 
 parameter_list = [[traindat,testdat],[traindat,testdat]]
 
-def kernel_weighted_comm_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,order=3,gap=0,reverse=True ):
-	from modshogun import WeightedCommWordStringKernel
-	from modshogun import StringWordFeatures, StringCharFeatures, DNA
-	from modshogun import SortWordString
+def kernel_weighted_comm_word_string (fm_train_dna=traindat,fm_test_dna=testdat,order=3,gap=0,reverse=True ):
+	from shogun import WeightedCommWordStringKernel
+	from shogun import StringWordFeatures, StringCharFeatures, DNA
+	from shogun import SortWordString
 
 	charfeat=StringCharFeatures(fm_train_dna, DNA)
 	feats_train=StringWordFeatures(charfeat.get_alphabet())
@@ -35,4 +35,4 @@ def kernel_weighted_comm_word_string_modular (fm_train_dna=traindat,fm_test_dna=
 
 if __name__=='__main__':
 	print('WeightedCommWordString')
-	kernel_weighted_comm_word_string_modular(*parameter_list[0])
+	kernel_weighted_comm_word_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_weighted_degree_position_string_modular.py b/examples/undocumented/python/kernel_weighted_degree_position_string.py
similarity index 74%
rename from examples/undocumented/python_modular/kernel_weighted_degree_position_string_modular.py
rename to examples/undocumented/python/kernel_weighted_degree_position_string.py
index 47ade02b538..9c515b12ecf 100644
--- a/examples/undocumented/python_modular/kernel_weighted_degree_position_string_modular.py
+++ b/examples/undocumented/python/kernel_weighted_degree_position_string.py
@@ -6,9 +6,9 @@
 testdat = lm.load_dna('../data/fm_test_dna.dat')
 
 parameter_list = [[traindat,testdat,20],[traindat,testdat,22]]
-def kernel_weighted_degree_position_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
-	from modshogun import StringCharFeatures, DNA
-	from modshogun import WeightedDegreePositionStringKernel, MSG_DEBUG
+def kernel_weighted_degree_position_string (fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
+	from shogun import StringCharFeatures, DNA
+	from shogun import WeightedDegreePositionStringKernel, MSG_DEBUG
 
 	feats_train=StringCharFeatures(fm_train_dna, DNA)
 	#feats_train.io.set_loglevel(MSG_DEBUG)
@@ -27,4 +27,4 @@ def kernel_weighted_degree_position_string_modular (fm_train_dna=traindat,fm_tes
 
 if __name__=='__main__':
 	print('WeightedDegreePositionString')
-	kernel_weighted_degree_position_string_modular(*parameter_list[0])
+	kernel_weighted_degree_position_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/kernel_weighted_degree_string_modular.py b/examples/undocumented/python/kernel_weighted_degree_string.py
similarity index 80%
rename from examples/undocumented/python_modular/kernel_weighted_degree_string_modular.py
rename to examples/undocumented/python/kernel_weighted_degree_string.py
index ed063c2b6e5..96c131ceba0 100644
--- a/examples/undocumented/python_modular/kernel_weighted_degree_string_modular.py
+++ b/examples/undocumented/python/kernel_weighted_degree_string.py
@@ -6,9 +6,9 @@
 
 parameter_list = [[traindat,testdat,3],[traindat,testdat,20]]
 
-def kernel_weighted_degree_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
-	from modshogun import StringCharFeatures, DNA
-	from modshogun import WeightedDegreeStringKernel, MSG_DEBUG
+def kernel_weighted_degree_string (fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
+	from shogun import StringCharFeatures, DNA
+	from shogun import WeightedDegreeStringKernel, MSG_DEBUG
 
 	feats_train=StringCharFeatures(fm_train_dna, DNA)
 	#feats_train.io.set_loglevel(MSG_DEBUG)
@@ -38,4 +38,4 @@ def kernel_weighted_degree_string_modular (fm_train_dna=traindat,fm_test_dna=tes
 
 if __name__=='__main__':
 	print('WeightedDegreeString')
-	kernel_weighted_degree_string_modular(*parameter_list[0])
+	kernel_weighted_degree_string(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/labels_io_modular.py b/examples/undocumented/python/labels_io.py
similarity index 67%
rename from examples/undocumented/python_modular/labels_io_modular.py
rename to examples/undocumented/python/labels_io.py
index 52673cc1a67..b98179d0d6f 100644
--- a/examples/undocumented/python_modular/labels_io_modular.py
+++ b/examples/undocumented/python/labels_io.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 parameter_list=[[]]
 
-def labels_io_modular():
-	from modshogun import RegressionLabels, CSVFile
+def labels_io():
+	from shogun import RegressionLabels, CSVFile
 	lab=RegressionLabels()
 	f=CSVFile("../data/label_train_regression.dat","r")
 	f.set_delimiter(" ")
@@ -12,4 +12,4 @@ def labels_io_modular():
 
 if __name__=='__main__':
 	print('Labels IO')
-	labels_io_modular(*parameter_list[0])
+	labels_io(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/library_fisher2x3_modular.py b/examples/undocumented/python/library_fisher2x3.py
similarity index 76%
rename from examples/undocumented/python_modular/library_fisher2x3_modular.py
rename to examples/undocumented/python/library_fisher2x3.py
index ce023ecd793..fc6de163164 100644
--- a/examples/undocumented/python_modular/library_fisher2x3_modular.py
+++ b/examples/undocumented/python/library_fisher2x3.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 from numpy import *
-from modshogun import *
+from shogun import *
 
 x=array([[20.0,15,15],[10,20,20]])
 y=array([[21.0,21,18],[19,19,22]])
@@ -9,11 +9,11 @@
 
 parameter_list = [[x,concatenate((x,y,z),1)]]
 
-def library_fisher2x3_modular (table, tables):
+def library_fisher2x3 (table, tables):
 	pval=Statistics_fishers_exact_test_for_2x3_table(table)
 	pvals=Statistics_fishers_exact_test_for_multiple_2x3_tables(tables)
 	return (pval,pvals)
 
 if __name__=='__main__':
 	print('Fisher 2x3')
-	library_fisher2x3_modular(*parameter_list[0])
+	library_fisher2x3(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/library_time.py b/examples/undocumented/python/library_time.py
similarity index 94%
rename from examples/undocumented/python_modular/library_time.py
rename to examples/undocumented/python/library_time.py
index 58ef6ea6779..cf7fde8309e 100644
--- a/examples/undocumented/python_modular/library_time.py
+++ b/examples/undocumented/python/library_time.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 import time
-from modshogun import Time
+from shogun import Time
 
 parameter_list = [[5],[1.0]]
 def library_time (sleep_secs):
diff --git a/examples/undocumented/python_modular/mathematics_linsolver_cg.py b/examples/undocumented/python/mathematics_linsolver_cg.py
similarity index 100%
rename from examples/undocumented/python_modular/mathematics_linsolver_cg.py
rename to examples/undocumented/python/mathematics_linsolver_cg.py
diff --git a/examples/undocumented/python_modular/mathematics_logdet.py b/examples/undocumented/python/mathematics_logdet.py
similarity index 100%
rename from examples/undocumented/python_modular/mathematics_logdet.py
rename to examples/undocumented/python/mathematics_logdet.py
diff --git a/examples/undocumented/python_modular/mathematics_sparseinversecovariance_modular.py b/examples/undocumented/python/mathematics_sparseinversecovariance.py
similarity index 71%
rename from examples/undocumented/python_modular/mathematics_sparseinversecovariance_modular.py
rename to examples/undocumented/python/mathematics_sparseinversecovariance.py
index aa51ba94f58..3f82d661005 100644
--- a/examples/undocumented/python_modular/mathematics_sparseinversecovariance_modular.py
+++ b/examples/undocumented/python/mathematics_sparseinversecovariance.py
@@ -6,9 +6,9 @@
 
 parameter_list = [[data,0.0],[data,1.0]]
 
-def mathematics_sparseinversecovariance_modular (data,lc):
+def mathematics_sparseinversecovariance (data,lc):
 	try:
-		from modshogun import SparseInverseCovariance
+		from shogun import SparseInverseCovariance
 	except ImportError:
 		print("SparseInverseCovariance not available")
 		exit(0)
@@ -24,6 +24,6 @@ def mathematics_sparseinversecovariance_modular (data,lc):
 
 if __name__=='__main__':
 	print('SparseInverseCovariance')
-	mathematics_sparseinversecovariance_modular(*parameter_list[0])
+	mathematics_sparseinversecovariance(*parameter_list[0])
 
 
diff --git a/examples/undocumented/python_modular/metric_lmnn_modular.py b/examples/undocumented/python/metric_lmnn.py
similarity index 77%
rename from examples/undocumented/python_modular/metric_lmnn_modular.py
rename to examples/undocumented/python/metric_lmnn.py
index 1768e627de3..f9aa05d1184 100644
--- a/examples/undocumented/python_modular/metric_lmnn_modular.py
+++ b/examples/undocumented/python/metric_lmnn.py
@@ -6,9 +6,9 @@
 
 parameter_list = [[traindat,testdat,label_traindat,3]]
 
-def metric_lmnn_modular(train_fname=traindat,test_fname=testdat,label_train_fname=label_traindat,k=3):
+def metric_lmnn(train_fname=traindat,test_fname=testdat,label_train_fname=label_traindat,k=3):
 	try:
-		from modshogun import RealFeatures,MulticlassLabels,LMNN,KNN,CSVFile
+		from shogun import RealFeatures,MulticlassLabels,LMNN,KNN,CSVFile
 	except ImportError:
 		return
 
@@ -31,4 +31,4 @@ def metric_lmnn_modular(train_fname=traindat,test_fname=testdat,label_train_fnam
 
 if __name__=='__main__':
 	print('LMNN')
-	metric_lmnn_modular(*parameter_list[0])
+	metric_lmnn(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/mkl_binclass_modular.py b/examples/undocumented/python/mkl_binclass.py
similarity index 85%
rename from examples/undocumented/python_modular/mkl_binclass_modular.py
rename to examples/undocumented/python/mkl_binclass.py
index 90ab575b541..c5211faa5e4 100644
--- a/examples/undocumented/python_modular/mkl_binclass_modular.py
+++ b/examples/undocumented/python/mkl_binclass.py
@@ -1,13 +1,13 @@
 #!/usr/bin/env python
-from modshogun import CombinedFeatures, RealFeatures, BinaryLabels
-from modshogun import CombinedKernel, PolyKernel, CustomKernel
-from modshogun import MKLClassification
+from shogun import CombinedFeatures, RealFeatures, BinaryLabels
+from shogun import CombinedKernel, PolyKernel, CustomKernel
+from shogun import MKLClassification
 from tools.load import LoadMatrix
 lm=LoadMatrix()
 
 #only run example if SVMLight is included as LibSVM solver crashes in MKLClassification
 try:
-	from modshogun import SVMLight
+	from shogun import SVMLight
 except ImportError:
 	print("SVMLight not available")
 	exit(0)
@@ -21,7 +21,7 @@
 #    fm_test_real.shape
 #    combined_custom()
 
-def mkl_binclass_modular (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat):
+def mkl_binclass (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat):
 
     ##################################
     # set up and train
@@ -85,4 +85,4 @@ def mkl_binclass_modular (fm_train_real=traindat,fm_test_real=testdat,fm_label_t
     return mkl.apply(),kernel
 
 if __name__=='__main__':
-    mkl_binclass_modular (*parameter_list[0])
+    mkl_binclass (*parameter_list[0])
diff --git a/examples/undocumented/python_modular/mkl_multiclass_modular.py b/examples/undocumented/python/mkl_multiclass.py
similarity index 85%
rename from examples/undocumented/python_modular/mkl_multiclass_modular.py
rename to examples/undocumented/python/mkl_multiclass.py
index 19312976be2..d88cc7bdf58 100644
--- a/examples/undocumented/python_modular/mkl_multiclass_modular.py
+++ b/examples/undocumented/python/mkl_multiclass.py
@@ -9,12 +9,12 @@
 		[ fm_train_real, fm_test_real, label_train_multiclass, 1.2, 1.2, 1e-5, 1, 0.001, 1.5],
 		[ fm_train_real, fm_test_real, label_train_multiclass, 5, 1.2, 1e-2, 1, 0.001, 2]]
 
-def mkl_multiclass_modular (fm_train_real, fm_test_real, label_train_multiclass,
+def mkl_multiclass (fm_train_real, fm_test_real, label_train_multiclass,
 	width, C, epsilon, num_threads, mkl_epsilon, mkl_norm):
 
-	from modshogun import CombinedFeatures, RealFeatures, MulticlassLabels
-	from modshogun import CombinedKernel, GaussianKernel, LinearKernel,PolyKernel
-	from modshogun import MKLMulticlass
+	from shogun import CombinedFeatures, RealFeatures, MulticlassLabels
+	from shogun import CombinedKernel, GaussianKernel, LinearKernel,PolyKernel
+	from shogun import MKLMulticlass
 
 	kernel = CombinedKernel()
 	feats_train = CombinedFeatures()
@@ -61,4 +61,4 @@ def mkl_multiclass_modular (fm_train_real, fm_test_real, label_train_multiclass,
 
 if __name__ == '__main__':
 	print('mkl_multiclass')
-	mkl_multiclass_modular(*parameter_list[0])
+	mkl_multiclass(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/modelselection_grid_search_kernel.py b/examples/undocumented/python/modelselection_grid_search_kernel.py
similarity index 86%
rename from examples/undocumented/python_modular/modelselection_grid_search_kernel.py
rename to examples/undocumented/python/modelselection_grid_search_kernel.py
index 6f41f34b28a..3163ed5acce 100644
--- a/examples/undocumented/python_modular/modelselection_grid_search_kernel.py
+++ b/examples/undocumented/python/modelselection_grid_search_kernel.py
@@ -12,18 +12,18 @@
 from numpy import random
 import math
 
-from modshogun import CrossValidation, CrossValidationResult
-from modshogun import ContingencyTableEvaluation, ACCURACY
-from modshogun import StratifiedCrossValidationSplitting
-from modshogun import BinaryLabels
-from modshogun import RealFeatures
-from modshogun import GaussianKernel, PowerKernel
-from modshogun import LibSVM
-from modshogun import MinkowskiMetric
-from modshogun import GridSearchModelSelection
-from modshogun import ModelSelectionParameters, R_EXP, R_LINEAR
-from modshogun import ParameterCombination
-from modshogun import Math
+from shogun import CrossValidation, CrossValidationResult
+from shogun import ContingencyTableEvaluation, ACCURACY
+from shogun import StratifiedCrossValidationSplitting
+from shogun import BinaryLabels
+from shogun import RealFeatures
+from shogun import GaussianKernel, PowerKernel
+from shogun import LibSVM
+from shogun import MinkowskiMetric
+from shogun import GridSearchModelSelection
+from shogun import ModelSelectionParameters, R_EXP, R_LINEAR
+from shogun import ParameterCombination
+from shogun import Math
 
 def create_param_tree():
 	root=ModelSelectionParameters()
@@ -132,7 +132,7 @@ def modelselection_grid_search_kernel (num_subsets, num_vectors, dim_vectors):
 	casted=CrossValidationResult.obtain_from_generic(result);
 	#print "result mean:", casted.mean
 
-	return classifier,result,casted.mean
+	return classifier,result,casted.get_mean()
 
 if __name__=='__main__':
 	print('ModelselectionGridSearchKernel')
diff --git a/examples/undocumented/python_modular/modelselection_grid_search_krr_modular.py b/examples/undocumented/python/modelselection_grid_search_krr.py
similarity index 85%
rename from examples/undocumented/python_modular/modelselection_grid_search_krr_modular.py
rename to examples/undocumented/python/modelselection_grid_search_krr.py
index 1a88381c2a2..dd673c53ede 100644
--- a/examples/undocumented/python_modular/modelselection_grid_search_krr_modular.py
+++ b/examples/undocumented/python/modelselection_grid_search_krr.py
@@ -22,16 +22,16 @@
 parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5,1e-2], \
                  [traindat,testdat,label_traindat,2.1,1,1e-5,1e-2]]
 
-def modelselection_grid_search_krr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
+def modelselection_grid_search_krr (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
 				       width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):
-    from modshogun import CrossValidation, CrossValidationResult
-    from modshogun import MeanSquaredError
-    from modshogun import CrossValidationSplitting
-    from modshogun import RegressionLabels
-    from modshogun import RealFeatures
-    from modshogun import KernelRidgeRegression
-    from modshogun import GridSearchModelSelection
-    from modshogun import ModelSelectionParameters
+    from shogun import CrossValidation, CrossValidationResult
+    from shogun import MeanSquaredError
+    from shogun import CrossValidationSplitting
+    from shogun import RegressionLabels
+    from shogun import RealFeatures
+    from shogun import KernelRidgeRegression
+    from shogun import GridSearchModelSelection
+    from shogun import ModelSelectionParameters
 
     # training data
     features_train=RealFeatures(traindat)
@@ -91,9 +91,9 @@ def modelselection_grid_search_krr_modular (fm_train=traindat,fm_test=testdat,la
 
 # creates all the parameters to optimize
 def create_param_tree():
-    from modshogun import ModelSelectionParameters, R_EXP, R_LINEAR
-    from modshogun import ParameterCombination
-    from modshogun import GaussianKernel, PolyKernel
+    from shogun import ModelSelectionParameters, R_EXP, R_LINEAR
+    from shogun import ParameterCombination
+    from shogun import GaussianKernel, PolyKernel
     import math
     root=ModelSelectionParameters()
 
@@ -142,4 +142,4 @@ def create_param_tree():
 
 if __name__=='__main__':
 	print('ModelselectionGridSearchKRR')
-	modelselection_grid_search_krr_modular(*parameter_list[0])
+	modelselection_grid_search_krr(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/modelselection_grid_search_liblinear_modular.py b/examples/undocumented/python/modelselection_grid_search_liblinear.py
similarity index 78%
rename from examples/undocumented/python_modular/modelselection_grid_search_liblinear_modular.py
rename to examples/undocumented/python/modelselection_grid_search_liblinear.py
index ac733591473..dd267c97f32 100644
--- a/examples/undocumented/python_modular/modelselection_grid_search_liblinear_modular.py
+++ b/examples/undocumented/python/modelselection_grid_search_liblinear.py
@@ -21,16 +21,16 @@
 
 parameter_list = [[traindat,label_traindat]]
 
-def modelselection_grid_search_liblinear_modular (traindat=traindat, label_traindat=label_traindat):
-    from modshogun import CrossValidation, CrossValidationResult
-    from modshogun import ContingencyTableEvaluation, ACCURACY
-    from modshogun import StratifiedCrossValidationSplitting
-    from modshogun import GridSearchModelSelection
-    from modshogun import ModelSelectionParameters, R_EXP
-    from modshogun import ParameterCombination
-    from modshogun import BinaryLabels
-    from modshogun import RealFeatures
-    from modshogun import LibLinear, L2R_L2LOSS_SVC
+def modelselection_grid_search_liblinear (traindat=traindat, label_traindat=label_traindat):
+    from shogun import CrossValidation, CrossValidationResult
+    from shogun import ContingencyTableEvaluation, ACCURACY
+    from shogun import StratifiedCrossValidationSplitting
+    from shogun import GridSearchModelSelection
+    from shogun import ModelSelectionParameters, R_EXP
+    from shogun import ParameterCombination
+    from shogun import BinaryLabels
+    from shogun import RealFeatures
+    from shogun import LibLinear, L2R_L2LOSS_SVC
 
     # build parameter tree to select C1 and C2
     param_tree_root=ModelSelectionParameters()
@@ -83,4 +83,4 @@ def modelselection_grid_search_liblinear_modular (traindat=traindat, label_train
 
 if __name__=='__main__':
     print('ModelSelectionGridSearchLibLinear')
-    modelselection_grid_search_liblinear_modular(*parameter_list[0])
+    modelselection_grid_search_liblinear(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/modelselection_grid_search_libsvr_modular.py b/examples/undocumented/python/modelselection_grid_search_libsvr.py
similarity index 84%
rename from examples/undocumented/python_modular/modelselection_grid_search_libsvr_modular.py
rename to examples/undocumented/python/modelselection_grid_search_libsvr.py
index cabe6ae75ba..deaf18a7495 100644
--- a/examples/undocumented/python_modular/modelselection_grid_search_libsvr_modular.py
+++ b/examples/undocumented/python/modelselection_grid_search_libsvr.py
@@ -22,18 +22,18 @@
 parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5,1e-2], \
                  [traindat,testdat,label_traindat,2.1,1,1e-5,1e-2]]
 
-def modelselection_grid_search_libsvr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
+def modelselection_grid_search_libsvr (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
 				       width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):
-    from modshogun import CrossValidation, CrossValidationResult
-    from modshogun import MeanSquaredError
-    from modshogun import CrossValidationSplitting
-    from modshogun import RegressionLabels
-    from modshogun import RealFeatures
-    from modshogun import GaussianKernel
-    from modshogun import LibSVR
-    from modshogun import GridSearchModelSelection
-    from modshogun import ModelSelectionParameters, R_EXP
-    from modshogun import ParameterCombination
+    from shogun import CrossValidation, CrossValidationResult
+    from shogun import MeanSquaredError
+    from shogun import CrossValidationSplitting
+    from shogun import RegressionLabels
+    from shogun import RealFeatures
+    from shogun import GaussianKernel
+    from shogun import LibSVR
+    from shogun import GridSearchModelSelection
+    from shogun import ModelSelectionParameters, R_EXP
+    from shogun import ParameterCombination
 
     # training data
     features_train=RealFeatures(traindat)
@@ -109,4 +109,4 @@ def modelselection_grid_search_libsvr_modular (fm_train=traindat,fm_test=testdat
 
 if __name__=='__main__':
 	print('ModelselectionGridSearchLibSVR')
-	modelselection_grid_search_libsvr_modular(*parameter_list[0])
+	modelselection_grid_search_libsvr(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/modelselection_parameter_tree_modular.py b/examples/undocumented/python/modelselection_parameter_tree.py
similarity index 88%
rename from examples/undocumented/python_modular/modelselection_parameter_tree_modular.py
rename to examples/undocumented/python/modelselection_parameter_tree.py
index b55a73beea6..d1b221bbdb8 100644
--- a/examples/undocumented/python_modular/modelselection_parameter_tree_modular.py
+++ b/examples/undocumented/python/modelselection_parameter_tree.py
@@ -11,13 +11,13 @@
 
 parameter_list=[[None]]
 
-def modelselection_parameter_tree_modular (dummy):
-    from modshogun import ParameterCombination
-    from modshogun import ModelSelectionParameters, R_EXP, R_LINEAR
-    from modshogun import PowerKernel
-    from modshogun import GaussianKernel
-    from modshogun import DistantSegmentsKernel
-    from modshogun import MinkowskiMetric
+def modelselection_parameter_tree (dummy):
+    from shogun import ParameterCombination
+    from shogun import ModelSelectionParameters, R_EXP, R_LINEAR
+    from shogun import PowerKernel
+    from shogun import GaussianKernel
+    from shogun import DistantSegmentsKernel
+    from shogun import MinkowskiMetric
     import math
 
     root=ModelSelectionParameters()
@@ -99,7 +99,7 @@ def modelselection_parameter_tree_modular (dummy):
 
 if __name__=='__main__':
     print('ModelSelection ParameterTree')
-    modelselection_parameter_tree_modular(*parameter_list[0])
+    modelselection_parameter_tree(*parameter_list[0])
 
 
 
diff --git a/examples/undocumented/python_modular/modelselection_random_search_liblinear_modular.py b/examples/undocumented/python/modelselection_random_search_liblinear.py
similarity index 78%
rename from examples/undocumented/python_modular/modelselection_random_search_liblinear_modular.py
rename to examples/undocumented/python/modelselection_random_search_liblinear.py
index a2b42f3ad72..3eef55e44ba 100644
--- a/examples/undocumented/python_modular/modelselection_random_search_liblinear_modular.py
+++ b/examples/undocumented/python/modelselection_random_search_liblinear.py
@@ -19,16 +19,16 @@
 
 parameter_list = [[traindat,label_traindat]]
 
-def modelselection_random_search_liblinear_modular (traindat=traindat, label_traindat=label_traindat):
-    from modshogun import CrossValidation, CrossValidationResult
-    from modshogun import ContingencyTableEvaluation, ACCURACY
-    from modshogun import StratifiedCrossValidationSplitting
-    from modshogun import RandomSearchModelSelection
-    from modshogun import ModelSelectionParameters, R_EXP
-    from modshogun import ParameterCombination
-    from modshogun import BinaryLabels
-    from modshogun import RealFeatures
-    from modshogun import LibLinear, L2R_L2LOSS_SVC
+def modelselection_random_search_liblinear (traindat=traindat, label_traindat=label_traindat):
+    from shogun import CrossValidation, CrossValidationResult
+    from shogun import ContingencyTableEvaluation, ACCURACY
+    from shogun import StratifiedCrossValidationSplitting
+    from shogun import RandomSearchModelSelection
+    from shogun import ModelSelectionParameters, R_EXP
+    from shogun import ParameterCombination
+    from shogun import BinaryLabels
+    from shogun import RealFeatures
+    from shogun import LibLinear, L2R_L2LOSS_SVC
 
     # build parameter tree to select C1 and C2
     param_tree_root=ModelSelectionParameters()
@@ -81,4 +81,4 @@ def modelselection_random_search_liblinear_modular (traindat=traindat, label_tra
 
 if __name__=='__main__':
     print('ModelSelectionRandomSearchLibLinear')
-    modelselection_random_search_liblinear_modular(*parameter_list[0])
+    modelselection_random_search_liblinear(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/multiclass_c45classifiertree_modular.py b/examples/undocumented/python/multiclass_c45classifiertree.py
similarity index 81%
rename from examples/undocumented/python_modular/multiclass_c45classifiertree_modular.py
rename to examples/undocumented/python/multiclass_c45classifiertree.py
index 2b48b4415c1..aca26ef09b3 100644
--- a/examples/undocumented/python_modular/multiclass_c45classifiertree_modular.py
+++ b/examples/undocumented/python/multiclass_c45classifiertree.py
@@ -10,9 +10,9 @@
 
 parameter_list = [[traindat,testdat,label_traindat,feattypes]]
 
-def multiclass_c45classifiertree_modular(train=traindat,test=testdat,labels=label_traindat,ft=feattypes):
+def multiclass_c45classifiertree(train=traindat,test=testdat,labels=label_traindat,ft=feattypes):
 	try:
-		from modshogun import RealFeatures, MulticlassLabels, CSVFile, C45ClassifierTree
+		from shogun import RealFeatures, MulticlassLabels, CSVFile, C45ClassifierTree
 		from numpy import random, int32
 	except ImportError:
 		print("Could not import Shogun and/or numpy modules")
@@ -25,8 +25,8 @@ def multiclass_c45classifiertree_modular(train=traindat,test=testdat,labels=labe
 
 	# divide train dataset into training and validation subsets in the ratio 2/3 to 1/3
 	subset=int32(random.permutation(feats_train.get_num_vectors()))
-	vsubset=subset[1:subset.size/3]
-	trsubset=subset[1+subset.size/3:subset.size]
+	vsubset=subset[1:int(subset.size/3)]
+	trsubset=subset[1+int(subset.size/3):subset.size]
 
 	# C4.5 Tree formation using training subset
 	train_labels.add_subset(trsubset)
@@ -57,4 +57,4 @@ def multiclass_c45classifiertree_modular(train=traindat,test=testdat,labels=labe
 
 if __name__=='__main__':
 	print('C45ClassifierTree')
-	multiclass_c45classifiertree_modular(*parameter_list[0])
+	multiclass_c45classifiertree(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/multiclass_id3classifiertree_modular.py b/examples/undocumented/python/multiclass_id3classifiertree.py
similarity index 83%
rename from examples/undocumented/python_modular/multiclass_id3classifiertree_modular.py
rename to examples/undocumented/python/multiclass_id3classifiertree.py
index bc7902fd4fb..58f7cb0ed74 100644
--- a/examples/undocumented/python_modular/multiclass_id3classifiertree_modular.py
+++ b/examples/undocumented/python/multiclass_id3classifiertree.py
@@ -16,9 +16,9 @@
 
 parameter_list = [[train_data, train_labels, test_data]]
 
-def multiclass_id3classifiertree_modular(train=train_data,labels=train_labels,test=test_data):
+def multiclass_id3classifiertree(train=train_data,labels=train_labels,test=test_data):
 	try:
-		from modshogun import RealFeatures, MulticlassLabels, ID3ClassifierTree
+		from shogun import RealFeatures, MulticlassLabels, ID3ClassifierTree
 	except ImportError:
 		return
 
@@ -39,4 +39,4 @@ def multiclass_id3classifiertree_modular(train=train_data,labels=train_labels,te
 
 if __name__=='__main__':
 	print('ID3ClassifierTree')
-	multiclass_id3classifiertree_modular(*parameter_list[0])
+	multiclass_id3classifiertree(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/multiclass_randomforest_modular.py b/examples/undocumented/python/multiclass_randomforest.py
similarity index 79%
rename from examples/undocumented/python_modular/multiclass_randomforest_modular.py
rename to examples/undocumented/python/multiclass_randomforest.py
index 55f9358492f..a110b58e739 100644
--- a/examples/undocumented/python_modular/multiclass_randomforest_modular.py
+++ b/examples/undocumented/python/multiclass_randomforest.py
@@ -10,9 +10,9 @@
 
 parameter_list = [[traindat,testdat,label_traindat,feattypes]]
 
-def multiclass_randomforest_modular(train=traindat,test=testdat,labels=label_traindat,ft=feattypes):
+def multiclass_randomforest(train=traindat,test=testdat,labels=label_traindat,ft=feattypes):
 	try:
-		from modshogun import RealFeatures, MulticlassLabels, CSVFile, RandomForest, MajorityVote
+		from shogun import RealFeatures, MulticlassLabels, CSVFile, RandomForest, MajorityVote
 	except ImportError:
 		print("Could not import Shogun modules")
 		return
@@ -35,4 +35,4 @@ def multiclass_randomforest_modular(train=traindat,test=testdat,labels=label_tra
 
 if __name__=='__main__':
 	print('RandomForest')
-	multiclass_randomforest_modular(*parameter_list[0])
+	multiclass_randomforest(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/preprocessor_dimensionreductionpreprocessor_modular.py b/examples/undocumented/python/preprocessor_dimensionreductionpreprocessor.py
similarity index 67%
rename from examples/undocumented/python_modular/preprocessor_dimensionreductionpreprocessor_modular.py
rename to examples/undocumented/python/preprocessor_dimensionreductionpreprocessor.py
index ccfe6a735f0..577def4dc9a 100644
--- a/examples/undocumented/python_modular/preprocessor_dimensionreductionpreprocessor_modular.py
+++ b/examples/undocumented/python/preprocessor_dimensionreductionpreprocessor.py
@@ -6,11 +6,11 @@
 
 parameter_list = [[data, 20], [data, 30]]
 
-def preprocessor_dimensionreductionpreprocessor_modular (data, k):
-	from modshogun import RealFeatures
-	from modshogun import DimensionReductionPreprocessor
+def preprocessor_dimensionreductionpreprocessor (data, k):
+	from shogun import RealFeatures
+	from shogun import DimensionReductionPreprocessor
 	try:
-		from modshogun import LocallyLinearEmbedding
+		from shogun import LocallyLinearEmbedding
 	except ImportError:
 		print("LocallyLinearEmbedding not available")
 		exit(0)
@@ -29,5 +29,5 @@ def preprocessor_dimensionreductionpreprocessor_modular (data, k):
 
 if __name__=='__main__':
 	print('DimensionReductionPreprocessor')
-	preprocessor_dimensionreductionpreprocessor_modular(*parameter_list[0])
+	preprocessor_dimensionreductionpreprocessor(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/preprocessor_fisherlda_modular.py b/examples/undocumented/python/preprocessor_fisherlda.py
similarity index 65%
rename from examples/undocumented/python_modular/preprocessor_fisherlda_modular.py
rename to examples/undocumented/python/preprocessor_fisherlda.py
index 0e263c194e5..5ff12da25dd 100644
--- a/examples/undocumented/python_modular/preprocessor_fisherlda_modular.py
+++ b/examples/undocumented/python/preprocessor_fisherlda.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 from tools.load import LoadMatrix
-from modshogun import *
+from shogun import *
 
 
 lm=LoadMatrix()
@@ -8,11 +8,11 @@
 labels = lm.load_numbers('../data/label_train_multiclass.dat')
 
 parameter_list = [[data, labels, CANVAR_FLDA], [data, labels, CLASSIC_FLDA]]
-def preprocessor_fisherlda_modular (data, labels, method):
+def preprocessor_fisherlda (data, labels, method):
 
-	from modshogun import RealFeatures, MulticlassLabels, CANVAR_FLDA
-	from modshogun import FisherLda
-	from modshogun import MulticlassLabels
+	from shogun import RealFeatures, MulticlassLabels, CANVAR_FLDA
+	from shogun import FisherLda
+	from shogun import MulticlassLabels
 
 	sg_features = RealFeatures(data)
 	sg_labels = MulticlassLabels(labels)
@@ -26,5 +26,5 @@ def preprocessor_fisherlda_modular (data, labels, method):
 
 if __name__=='__main__':
 	print('FisherLda')
-	preprocessor_fisherlda_modular(*parameter_list[0])
+	preprocessor_fisherlda(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/preprocessor_kernelpca_modular.py b/examples/undocumented/python/preprocessor_kernelpca.py
similarity index 68%
rename from examples/undocumented/python_modular/preprocessor_kernelpca_modular.py
rename to examples/undocumented/python/preprocessor_kernelpca.py
index a4efcd3c815..40905167fb2 100644
--- a/examples/undocumented/python_modular/preprocessor_kernelpca_modular.py
+++ b/examples/undocumented/python/preprocessor_kernelpca.py
@@ -6,10 +6,10 @@
 
 parameter_list = [[data, 0.01, 1.0], [data, 0.05, 2.0]]
 
-def preprocessor_kernelpca_modular (data, threshold, width):
-	from modshogun import RealFeatures
-	from modshogun import KernelPCA
-	from modshogun import GaussianKernel
+def preprocessor_kernelpca (data, threshold, width):
+	from shogun import RealFeatures
+	from shogun import KernelPCA
+	from shogun import GaussianKernel
 
 	features = RealFeatures(data)
 
@@ -25,5 +25,5 @@ def preprocessor_kernelpca_modular (data, threshold, width):
 
 if __name__=='__main__':
 	print('KernelPCA')
-	preprocessor_kernelpca_modular(*parameter_list[0])
+	preprocessor_kernelpca(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/preprocessor_logplusone_modular.py b/examples/undocumented/python/preprocessor_logplusone.py
similarity index 75%
rename from examples/undocumented/python_modular/preprocessor_logplusone_modular.py
rename to examples/undocumented/python/preprocessor_logplusone.py
index 457db1471af..11115edac6b 100644
--- a/examples/undocumented/python_modular/preprocessor_logplusone_modular.py
+++ b/examples/undocumented/python/preprocessor_logplusone.py
@@ -7,11 +7,11 @@
 
 parameter_list = [[traindat+10,testdat+10,1.4,10],[traindat+10,testdat+10,1.5,10]]
 
-def preprocessor_logplusone_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
+def preprocessor_logplusone (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
 
-	from modshogun import Chi2Kernel
-	from modshogun import RealFeatures
-	from modshogun import LogPlusOne
+	from shogun import Chi2Kernel
+	from shogun import RealFeatures
+	from shogun import LogPlusOne
 
 	feats_train=RealFeatures(fm_train_real)
 	feats_test=RealFeatures(fm_test_real)
@@ -34,4 +34,4 @@ def preprocessor_logplusone_modular (fm_train_real=traindat,fm_test_real=testdat
 
 if __name__=='__main__':
 	print('LogPlusOne')
-	preprocessor_logplusone_modular(*parameter_list[0])
+	preprocessor_logplusone(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/preprocessor_normone_modular.py b/examples/undocumented/python/preprocessor_normone.py
similarity index 75%
rename from examples/undocumented/python_modular/preprocessor_normone_modular.py
rename to examples/undocumented/python/preprocessor_normone.py
index 3a6a3453b8d..24afe917cdd 100644
--- a/examples/undocumented/python_modular/preprocessor_normone_modular.py
+++ b/examples/undocumented/python/preprocessor_normone.py
@@ -7,11 +7,11 @@
 
 parameter_list = [[traindat,testdat,1.4,10],[traindat,testdat,1.5,10]]
 
-def preprocessor_normone_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
+def preprocessor_normone (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
 
-	from modshogun import Chi2Kernel
-	from modshogun import RealFeatures
-	from modshogun import NormOne
+	from shogun import Chi2Kernel
+	from shogun import RealFeatures
+	from shogun import NormOne
 
 	feats_train=RealFeatures(fm_train_real)
 	feats_test=RealFeatures(fm_test_real)
@@ -33,4 +33,4 @@ def preprocessor_normone_modular (fm_train_real=traindat,fm_test_real=testdat,wi
 
 if __name__=='__main__':
 	print('NormOne')
-	preprocessor_normone_modular(*parameter_list[0])
+	preprocessor_normone(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/preprocessor_pca_modular.py b/examples/undocumented/python/preprocessor_pca.py
similarity index 70%
rename from examples/undocumented/python_modular/preprocessor_pca_modular.py
rename to examples/undocumented/python/preprocessor_pca.py
index f5363d6019a..23149502390 100644
--- a/examples/undocumented/python_modular/preprocessor_pca_modular.py
+++ b/examples/undocumented/python/preprocessor_pca.py
@@ -6,9 +6,9 @@
 
 parameter_list = [[data]]
 
-def preprocessor_pca_modular (data):
-	from modshogun import RealFeatures
-	from modshogun import PCA
+def preprocessor_pca (data):
+	from shogun import RealFeatures
+	from shogun import PCA
 
 	features = RealFeatures(data)
 
@@ -21,5 +21,5 @@ def preprocessor_pca_modular (data):
 
 if __name__=='__main__':
 	print('PCA')
-	preprocessor_pca_modular(*parameter_list[0])
+	preprocessor_pca(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/preprocessor_prunevarsubmean_modular.py b/examples/undocumented/python/preprocessor_prunevarsubmean.py
similarity index 74%
rename from examples/undocumented/python_modular/preprocessor_prunevarsubmean_modular.py
rename to examples/undocumented/python/preprocessor_prunevarsubmean.py
index 42696ca32b7..91bcc0376a4 100644
--- a/examples/undocumented/python_modular/preprocessor_prunevarsubmean_modular.py
+++ b/examples/undocumented/python/preprocessor_prunevarsubmean.py
@@ -7,10 +7,10 @@
 
 parameter_list = [[traindat,testdat,1.5,10],[traindat,testdat,1.5,10]]
 
-def preprocessor_prunevarsubmean_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
-	from modshogun import Chi2Kernel
-	from modshogun import RealFeatures
-	from modshogun import PruneVarSubMean
+def preprocessor_prunevarsubmean (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
+	from shogun import Chi2Kernel
+	from shogun import RealFeatures
+	from shogun import PruneVarSubMean
 
 	feats_train=RealFeatures(fm_train_real)
 	feats_test=RealFeatures(fm_test_real)
@@ -32,4 +32,4 @@ def preprocessor_prunevarsubmean_modular (fm_train_real=traindat,fm_test_real=te
 
 if __name__=='__main__':
 	print('PruneVarSubMean')
-	preprocessor_prunevarsubmean_modular(*parameter_list[0])
+	preprocessor_prunevarsubmean(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/preprocessor_randomfouriergausspreproc_modular.py b/examples/undocumented/python/preprocessor_randomfouriergausspreproc.py
similarity index 70%
rename from examples/undocumented/python_modular/preprocessor_randomfouriergausspreproc_modular.py
rename to examples/undocumented/python/preprocessor_randomfouriergausspreproc.py
index c72ed33e4bd..f1e9a0dc656 100644
--- a/examples/undocumented/python_modular/preprocessor_randomfouriergausspreproc_modular.py
+++ b/examples/undocumented/python/preprocessor_randomfouriergausspreproc.py
@@ -7,13 +7,13 @@
 
 parameter_list = [[traindat,testdat,1.5,10],[traindat,testdat,1.5,10]]
 
-from modshogun import Math_init_random;
+from shogun import Math_init_random;
 Math_init_random(12345);
 
-def preprocessor_randomfouriergausspreproc_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
-	from modshogun import Chi2Kernel
-	from modshogun import RealFeatures
-	from modshogun import RandomFourierGaussPreproc
+def preprocessor_randomfouriergausspreproc (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
+	from shogun import Chi2Kernel
+	from shogun import RealFeatures
+	from shogun import RandomFourierGaussPreproc
 
 	feats_train=RealFeatures(fm_train_real)
 	feats_test=RealFeatures(fm_test_real)
@@ -35,4 +35,4 @@ def preprocessor_randomfouriergausspreproc_modular (fm_train_real=traindat,fm_te
 
 if __name__=='__main__':
 	print('RandomFourierGaussPreproc')
-	preprocessor_randomfouriergausspreproc_modular(*parameter_list[0])
+	preprocessor_randomfouriergausspreproc(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/preprocessor_sortulongstring_modular.py b/examples/undocumented/python/preprocessor_sortulongstring.py
similarity index 76%
rename from examples/undocumented/python_modular/preprocessor_sortulongstring_modular.py
rename to examples/undocumented/python/preprocessor_sortulongstring.py
index 5f87eb93930..4b817b2996f 100644
--- a/examples/undocumented/python_modular/preprocessor_sortulongstring_modular.py
+++ b/examples/undocumented/python/preprocessor_sortulongstring.py
@@ -7,11 +7,11 @@
 
 parameter_list = [[traindna,testdna,4,0,False,False],[traindna,testdna,3,0,False,False]]
 
-def preprocessor_sortulongstring_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False,use_sign=False):
+def preprocessor_sortulongstring (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False,use_sign=False):
 
-	from modshogun import CommUlongStringKernel
-	from modshogun import StringCharFeatures, StringUlongFeatures, DNA
-	from modshogun import SortUlongString
+	from shogun import CommUlongStringKernel
+	from shogun import StringCharFeatures, StringUlongFeatures, DNA
+	from shogun import SortUlongString
 
 
 	charfeat=StringCharFeatures(DNA)
@@ -40,4 +40,4 @@ def preprocessor_sortulongstring_modular (fm_train_dna=traindna,fm_test_dna=test
 
 if __name__=='__main__':
 	print('CommUlongString')
-	preprocessor_sortulongstring_modular(*parameter_list[0])
+	preprocessor_sortulongstring(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/preprocessor_sortwordstring_modular.py b/examples/undocumented/python/preprocessor_sortwordstring.py
similarity index 76%
rename from examples/undocumented/python_modular/preprocessor_sortwordstring_modular.py
rename to examples/undocumented/python/preprocessor_sortwordstring.py
index fdc7f36a62e..231d334298a 100644
--- a/examples/undocumented/python_modular/preprocessor_sortwordstring_modular.py
+++ b/examples/undocumented/python/preprocessor_sortwordstring.py
@@ -7,11 +7,11 @@
 
 parameter_list = [[traindna,testdna,3,0,False,False],[traindna,testdna,3,0,False,False]]
 
-def preprocessor_sortwordstring_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False,use_sign=False):
+def preprocessor_sortwordstring (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False,use_sign=False):
 
-	from modshogun import CommWordStringKernel
-	from modshogun import StringCharFeatures, StringWordFeatures, DNA
-	from modshogun import SortWordString
+	from shogun import CommWordStringKernel
+	from shogun import StringCharFeatures, StringWordFeatures, DNA
+	from shogun import SortWordString
 
 	charfeat=StringCharFeatures(fm_train_dna, DNA)
 	feats_train=StringWordFeatures(charfeat.get_alphabet())
@@ -37,4 +37,4 @@ def preprocessor_sortwordstring_modular (fm_train_dna=traindna,fm_test_dna=testd
 
 if __name__=='__main__':
 	print('CommWordString')
-	preprocessor_sortwordstring_modular(*parameter_list[0])
+	preprocessor_sortwordstring(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/regression_cartree_modular.py b/examples/undocumented/python/regression_cartree.py
similarity index 80%
rename from examples/undocumented/python_modular/regression_cartree_modular.py
rename to examples/undocumented/python/regression_cartree.py
index 94221969dcb..ad89191af11 100644
--- a/examples/undocumented/python_modular/regression_cartree_modular.py
+++ b/examples/undocumented/python/regression_cartree.py
@@ -6,9 +6,9 @@
 
 parameter_list = [[50,5,15,0.2,feattypes]]
 
-def regression_cartree_modular(num_train=500,num_test=50,x_range=15,noise_var=0.2,ft=feattypes):
+def regression_cartree(num_train=500,num_test=50,x_range=15,noise_var=0.2,ft=feattypes):
 	try:
-		from modshogun import RealFeatures, RegressionLabels, CSVFile, CARTree, PT_REGRESSION
+		from shogun import RealFeatures, RegressionLabels, CSVFile, CARTree, PT_REGRESSION
 		from numpy import random
 	except ImportError:
 		print("Could not import Shogun and/or numpy modules")
@@ -40,4 +40,4 @@ def regression_cartree_modular(num_train=500,num_test=50,x_range=15,noise_var=0.
 
 if __name__=='__main__':
 	print('CARTree')
-	regression_cartree_modular(*parameter_list[0])
+	regression_cartree(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/regression_chaidtree_modular.py b/examples/undocumented/python/regression_chaidtree.py
similarity index 80%
rename from examples/undocumented/python_modular/regression_chaidtree_modular.py
rename to examples/undocumented/python/regression_chaidtree.py
index 1bdef04488d..60b7f883ca6 100644
--- a/examples/undocumented/python_modular/regression_chaidtree_modular.py
+++ b/examples/undocumented/python/regression_chaidtree.py
@@ -6,9 +6,9 @@
 
 parameter_list = [[500,50,15,0.2,feattypes]]
 
-def regression_chaidtree_modular(num_train=500,num_test=50,x_range=15,noise_var=0.2,ft=feattypes):
+def regression_chaidtree(num_train=500,num_test=50,x_range=15,noise_var=0.2,ft=feattypes):
 	try:
-		from modshogun import RealFeatures, RegressionLabels, CSVFile, CHAIDTree, PT_REGRESSION
+		from shogun import RealFeatures, RegressionLabels, CSVFile, CHAIDTree, PT_REGRESSION
 		from numpy import random
 	except ImportError:
 		print("Could not import Shogun and/or numpy modules")
@@ -40,4 +40,4 @@ def regression_chaidtree_modular(num_train=500,num_test=50,x_range=15,noise_var=
 
 if __name__=='__main__':
 	print('CHAIDTree')
-	regression_chaidtree_modular(*parameter_list[0])
+	regression_chaidtree(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/regression_randomforest_modular.py b/examples/undocumented/python/regression_randomforest.py
similarity index 82%
rename from examples/undocumented/python_modular/regression_randomforest_modular.py
rename to examples/undocumented/python/regression_randomforest.py
index b907b8f939f..acfc0d2073e 100644
--- a/examples/undocumented/python_modular/regression_randomforest_modular.py
+++ b/examples/undocumented/python/regression_randomforest.py
@@ -10,9 +10,9 @@
 
 parameter_list = [[500,50,15,0.2,feattypes]]
 
-def regression_randomforest_modular(num_train=500,num_test=50,x_range=15,noise_var=0.2,ft=feattypes):
+def regression_randomforest(num_train=500,num_test=50,x_range=15,noise_var=0.2,ft=feattypes):
 	try:
-		from modshogun import RealFeatures, RegressionLabels, CSVFile, RandomForest, MeanRule, PT_REGRESSION
+		from shogun import RealFeatures, RegressionLabels, CSVFile, RandomForest, MeanRule, PT_REGRESSION
 	except ImportError:
 		print("Could not import Shogun modules")
 		return
@@ -45,4 +45,4 @@ def regression_randomforest_modular(num_train=500,num_test=50,x_range=15,noise_v
 
 if __name__=='__main__':
 	print('RandomForest')
-	regression_randomforest_modular(*parameter_list[0])
+	regression_randomforest(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/regression_svrlight_modular.py b/examples/undocumented/python/regression_svrlight.py
similarity index 81%
rename from examples/undocumented/python_modular/regression_svrlight_modular.py
rename to examples/undocumented/python/regression_svrlight.py
index 752e17c980a..e9126fc8c7d 100644
--- a/examples/undocumented/python_modular/regression_svrlight_modular.py
+++ b/examples/undocumented/python/regression_svrlight.py
@@ -13,14 +13,14 @@
 
 parameter_list = [[traindat,testdat,label_traindat,1.2,1,1e-5,1e-2,1],[traindat,testdat,label_traindat,2.3,0.5,1e-5,1e-6,1]]
 
-def regression_svrlight_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat, \
+def regression_svrlight (fm_train=traindat,fm_test=testdat,label_train=label_traindat, \
 				    width=1.2,C=1,epsilon=1e-5,tube_epsilon=1e-2,num_threads=3):
 
 
-	from modshogun import RegressionLabels, RealFeatures
-	from modshogun import GaussianKernel
+	from shogun import RegressionLabels, RealFeatures
+	from shogun import GaussianKernel
 	try:
-		from modshogun import SVRLight
+		from shogun import SVRLight
 	except ImportError:
 		print('No support for SVRLight available.')
 		return
@@ -44,4 +44,4 @@ def regression_svrlight_modular (fm_train=traindat,fm_test=testdat,label_train=l
 
 if __name__=='__main__':
 	print('SVRLight')
-	regression_svrlight_modular(*parameter_list[0])
+	regression_svrlight(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/serialization_complex_example.py b/examples/undocumented/python/serialization_complex_example.py
similarity index 70%
rename from examples/undocumented/python_modular/serialization_complex_example.py
rename to examples/undocumented/python/serialization_complex_example.py
index 2e1a17b226f..33966a40452 100644
--- a/examples/undocumented/python_modular/serialization_complex_example.py
+++ b/examples/undocumented/python/serialization_complex_example.py
@@ -9,12 +9,13 @@ def serialization_complex_example (num=5, dist=1, dim=10, C=2.0, width=10):
 	import os
 	from numpy import concatenate, zeros, ones
 	from numpy.random import randn, seed
-	from modshogun import RealFeatures, MulticlassLabels
-	from modshogun import GMNPSVM
-	from modshogun import GaussianKernel
-	from modshogun import SerializableHdf5File,SerializableAsciiFile, \
+	from shogun import RealFeatures, MulticlassLabels
+	from shogun import GMNPSVM
+	from shogun import GaussianKernel
+	from shogun import SerializableHdf5File,SerializableAsciiFile, \
 			SerializableJsonFile,SerializableXmlFile,MSG_DEBUG
-	from modshogun import NormOne, LogPlusOne
+	from shogun import NormOne, LogPlusOne
+	from tempfile import NamedTemporaryFile
 
 	seed(17)
 
@@ -40,57 +41,57 @@ def serialization_complex_example (num=5, dist=1, dim=10, C=2.0, width=10):
 
 	#svm.print_serializable()
 
-	fstream = SerializableHdf5File("tmp/blaah.h5", "w")
+	tmp_h5 = NamedTemporaryFile(suffix='h5')
+	fstream = SerializableHdf5File(tmp_h5.name, "w")
 	status = svm.save_serializable(fstream)
 	check_status(status,'h5')
 
-	fstream = SerializableAsciiFile("tmp/blaah.asc", "w")
+	tmp_asc = NamedTemporaryFile(suffix='asc')
+	fstream = SerializableAsciiFile(tmp_asc.name, "w")
 	status = svm.save_serializable(fstream)
 	check_status(status,'asc')
 
-	fstream = SerializableJsonFile("tmp/blaah.json", "w")
+	tmp_json = NamedTemporaryFile(suffix='json')
+	fstream = SerializableJsonFile(tmp_json.name, "w")
 	status = svm.save_serializable(fstream)
 	check_status(status,'json')
 
-	fstream = SerializableXmlFile("tmp/blaah.xml", "w")
+	tmp_xml = NamedTemporaryFile(suffix='xml')
+	fstream = SerializableXmlFile(tmp_xml.name, "w")
 	status = svm.save_serializable(fstream)
 	check_status(status,'xml')
 
-	fstream = SerializableHdf5File("tmp/blaah.h5", "r")
+	fstream = SerializableHdf5File(tmp_h5.name, "r")
 	new_svm=GMNPSVM()
 	status = new_svm.load_serializable(fstream)
 	check_status(status,'h5')
 	new_svm.train()
 	bias_h5 = new_svm.get_svm(0).get_bias()
 
-	fstream = SerializableAsciiFile("tmp/blaah.asc", "r")
+	fstream = SerializableAsciiFile(tmp_asc.name, "r")
 	new_svm=GMNPSVM()
 	status = new_svm.load_serializable(fstream)
 	check_status(status,'asc')
 	new_svm.train()
 	bias_asc = new_svm.get_svm(0).get_bias()
 
-	fstream = SerializableJsonFile("tmp/blaah.json", "r")
+	fstream = SerializableJsonFile(tmp_json.name, "r")
 	new_svm=GMNPSVM()
 	status = new_svm.load_serializable(fstream)
 	check_status(status,'json')
 	new_svm.train()
 	bias_json = new_svm.get_svm(0).get_bias()
 
-	fstream = SerializableXmlFile("tmp/blaah.xml", "r")
+	fstream = SerializableXmlFile(tmp_xml.name, "r")
 	new_svm=GMNPSVM()
 	status = new_svm.load_serializable(fstream)
 	check_status(status,'xml')
 	new_svm.train()
 	bias_xml = new_svm.get_svm(0).get_bias()
 
-	os.unlink("tmp/blaah.h5")
-	os.unlink("tmp/blaah.asc")
-	os.unlink("tmp/blaah.json")
-	os.unlink("tmp/blaah.xml")
 	return svm,new_svm, bias_ref, bias_h5, bias_asc, bias_json, bias_xml
 
 
 if __name__=='__main__':
-	print('Serialization SVMLight')
+	print('Serialization')
 	serialization_complex_example(*parameter_list[0])
diff --git a/examples/undocumented/python/serialization_matrix.py b/examples/undocumented/python/serialization_matrix.py
new file mode 100644
index 00000000000..6d6be7ea5b3
--- /dev/null
+++ b/examples/undocumented/python/serialization_matrix.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+from shogun import *
+from numpy import array
+
+parameter_list=[[[[1.0,2,3],[4,5,6]]]]
+
+def serialization_matrix (m):
+	from tempfile import NamedTemporaryFile
+	feats=RealFeatures(array(m))
+	#feats.io.set_loglevel(0)
+	tmp_asc_1 = NamedTemporaryFile(suffix='1.asc')
+	fstream = SerializableAsciiFile(tmp_asc_1.name, "w")
+	feats.save_serializable(fstream)
+
+	tmp_asc_2 = NamedTemporaryFile(suffix='2.asc')
+	l=MulticlassLabels(array([1.0,2,3]))
+	fstream = SerializableAsciiFile(tmp_asc_2.name, "w")
+	l.save_serializable(fstream)
+
+if __name__=='__main__':
+	print('Serialization Matrix Modular')
+	serialization_matrix(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/serialization_string_kernels_modular.py b/examples/undocumented/python/serialization_string_kernels.py
similarity index 85%
rename from examples/undocumented/python_modular/serialization_string_kernels_modular.py
rename to examples/undocumented/python/serialization_string_kernels.py
index 9ca556d6379..9971b8532b8 100644
--- a/examples/undocumented/python_modular/serialization_string_kernels_modular.py
+++ b/examples/undocumented/python/serialization_string_kernels.py
@@ -1,17 +1,17 @@
 #!/usr/bin/env python
 
-from modshogun import WeightedDegreeStringKernel, LinearKernel, PolyKernel, GaussianKernel, CTaxonomy
-from modshogun import CombinedKernel, WeightedDegreeRBFKernel
-from modshogun import StringCharFeatures, RealFeatures, CombinedFeatures, StringWordFeatures, SortWordString
-from modshogun import DNA, PROTEIN, Labels
-from modshogun import WeightedDegreeStringKernel, CombinedKernel, WeightedCommWordStringKernel, WeightedDegreePositionStringKernel
-from modshogun import StringCharFeatures, DNA, StringWordFeatures, CombinedFeatures
-
-from modshogun import MSG_DEBUG
-from modshogun import RealFeatures, BinaryLabels, DNA, Alphabet
-from modshogun import WeightedDegreeStringKernel, GaussianKernel
+from shogun import WeightedDegreeStringKernel, LinearKernel, PolyKernel, GaussianKernel, CTaxonomy
+from shogun import CombinedKernel, WeightedDegreeRBFKernel
+from shogun import StringCharFeatures, RealFeatures, CombinedFeatures, StringWordFeatures, SortWordString
+from shogun import DNA, PROTEIN, Labels
+from shogun import WeightedDegreeStringKernel, CombinedKernel, WeightedCommWordStringKernel, WeightedDegreePositionStringKernel
+from shogun import StringCharFeatures, DNA, StringWordFeatures, CombinedFeatures
+
+from shogun import MSG_DEBUG
+from shogun import RealFeatures, BinaryLabels, DNA, Alphabet
+from shogun import WeightedDegreeStringKernel, GaussianKernel
 try:
-	from modshogun import SVMLight
+	from shogun import SVMLight
 except ImportError:
 	print("SVMLight is not available")
 	exit(0)
@@ -149,7 +149,7 @@ def construct_features(features):
 
 parameter_list = [[200, 1, 100]]
 
-def serialization_string_kernels_modular(n_data, num_shifts, size):
+def serialization_string_kernels(n_data, num_shifts, size):
     """
     serialize svm with string kernels
     """
@@ -215,5 +215,5 @@ def serialization_string_kernels_modular(n_data, num_shifts, size):
 
 
 if __name__=='__main__':
-    serialization_string_kernels_modular(*parameter_list[0])
+    serialization_string_kernels(*parameter_list[0])
 
diff --git a/examples/undocumented/python_modular/serialization_svmlight_modular.py b/examples/undocumented/python/serialization_svmlight.py
similarity index 89%
rename from examples/undocumented/python_modular/serialization_svmlight_modular.py
rename to examples/undocumented/python/serialization_svmlight.py
index e9e231896c6..f130c61a071 100644
--- a/examples/undocumented/python_modular/serialization_svmlight_modular.py
+++ b/examples/undocumented/python/serialization_svmlight.py
@@ -1,12 +1,12 @@
 #!/usr/bin/env python
 parameter_list=[[10, 1, 2.1, 2.0]]
 
-def serialization_svmlight_modular (num, dist, width, C):
-    from modshogun import MSG_DEBUG
-    from modshogun import RealFeatures, BinaryLabels, DNA, Alphabet
-    from modshogun import WeightedDegreeStringKernel, GaussianKernel
+def serialization_svmlight (num, dist, width, C):
+    from shogun import MSG_DEBUG
+    from shogun import RealFeatures, BinaryLabels, DNA, Alphabet
+    from shogun import WeightedDegreeStringKernel, GaussianKernel
     try:
-        from modshogun import SVMLight
+        from shogun import SVMLight
     except ImportError:
         print("SVMLight not available")
         exit(0)
@@ -119,4 +119,4 @@ def load(filename):
 
 if __name__=='__main__':
     print('Serialization SVMLight')
-    serialization_svmlight_modular(*parameter_list[0])
+    serialization_svmlight(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/so_multiclass.py b/examples/undocumented/python/so_multiclass.py
similarity index 92%
rename from examples/undocumented/python_modular/so_multiclass.py
rename to examples/undocumented/python/so_multiclass.py
index 4923ea553c8..b6ba20bd0a9 100644
--- a/examples/undocumented/python_modular/so_multiclass.py
+++ b/examples/undocumented/python/so_multiclass.py
@@ -26,8 +26,8 @@ def gen_data(num_classes,num_samples,dim):
 
 def so_multiclass (fm_train_real=traindat,label_train_multiclass=label_traindat):
 	try:
-		from modshogun	import RealFeatures
-		from modshogun	import MulticlassModel, MulticlassSOLabels, PrimalMosekSOSVM, RealNumber
+		from shogun	import RealFeatures
+		from shogun	import MulticlassModel, MulticlassSOLabels, PrimalMosekSOSVM, RealNumber
 	except ImportError:
 		print("Mosek not available")
 		return
diff --git a/examples/undocumented/python_modular/stochasticgbmachine_modular.py b/examples/undocumented/python/stochasticgbmachine.py
similarity index 73%
rename from examples/undocumented/python_modular/stochasticgbmachine_modular.py
rename to examples/undocumented/python/stochasticgbmachine.py
index e1acf8f1246..dae5d67ffce 100644
--- a/examples/undocumented/python_modular/stochasticgbmachine_modular.py
+++ b/examples/undocumented/python/stochasticgbmachine.py
@@ -9,9 +9,9 @@
 
 parameter_list = [[traindat,label_traindat,feat_types]]
 
-def stochasticgbmachine_modular(train=traindat,train_labels=label_traindat,ft=feat_types):
+def stochasticgbmachine(train=traindat,train_labels=label_traindat,ft=feat_types):
 	try:
-		from modshogun import RealFeatures, RegressionLabels, CSVFile, CARTree, StochasticGBMachine, SquaredLoss
+		from shogun import RealFeatures, RegressionLabels, CSVFile, CARTree, StochasticGBMachine, SquaredLoss
 	except ImportError:
 		print("Could not import Shogun modules")
 		return
@@ -31,16 +31,16 @@ def stochasticgbmachine_modular(train=traindat,train_labels=label_traindat,ft=fe
 	s=StochasticGBMachine(cart,loss,500,0.01,0.6)
 
 	# train
-	feats.add_subset(np.int32(p[0:num]))
-	labels.add_subset(np.int32(p[0:num]))
+	feats.add_subset(np.int32(p[0:int(num)]))
+	labels.add_subset(np.int32(p[0:int(num)]))
 	s.set_labels(labels)
 	s.train(feats)
 	feats.remove_subset()
 	labels.remove_subset()
 
 	# apply
-	feats.add_subset(np.int32(p[num:len(p)]))
-	labels.add_subset(np.int32(p[num:len(p)]))
+	feats.add_subset(np.int32(p[int(num):len(p)]))
+	labels.add_subset(np.int32(p[int(num):len(p)]))
 	output=s.apply_regression(feats)
 
 	feats.remove_subset()
@@ -50,4 +50,4 @@ def stochasticgbmachine_modular(train=traindat,train_labels=label_traindat,ft=fe
 
 if __name__=='__main__':
 	print('StochasticGBMachine')
-	stochasticgbmachine_modular(*parameter_list[0])
+	stochasticgbmachine(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/streaming_vw_modular.py b/examples/undocumented/python/streaming_vw.py
similarity index 74%
rename from examples/undocumented/python_modular/streaming_vw_modular.py
rename to examples/undocumented/python/streaming_vw.py
index aea41f328a4..8e21dbdbccb 100644
--- a/examples/undocumented/python_modular/streaming_vw_modular.py
+++ b/examples/undocumented/python/streaming_vw.py
@@ -1,12 +1,12 @@
 #!/usr/bin/env python
-from modshogun import StreamingVwFile
-from modshogun import T_SVMLIGHT
-from modshogun import StreamingVwFeatures
-from modshogun import VowpalWabbit
+from shogun import StreamingVwFile
+from shogun import T_SVMLIGHT
+from shogun import StreamingVwFeatures
+from shogun import VowpalWabbit
 
 parameter_list=[[None]]
 
-def streaming_vw_modular (dummy):
+def streaming_vw (dummy):
 	"""Runs the VW algorithm on a toy dataset in SVMLight format."""
 
 	# Open the input file as a StreamingVwFile
@@ -28,4 +28,4 @@ def streaming_vw_modular (dummy):
 	##return vw
 
 if __name__ == "__main__":
-	streaming_vw_modular(*parameter_list[0])
+	streaming_vw(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/streaming_vw_createcache_modular.py b/examples/undocumented/python/streaming_vw_createcache.py
similarity index 80%
rename from examples/undocumented/python_modular/streaming_vw_createcache_modular.py
rename to examples/undocumented/python/streaming_vw_createcache.py
index bbdff3fbc44..2404f0350b7 100644
--- a/examples/undocumented/python_modular/streaming_vw_createcache_modular.py
+++ b/examples/undocumented/python/streaming_vw_createcache.py
@@ -1,13 +1,13 @@
 #!/usr/bin/env python
-from modshogun import StreamingVwFile
-from modshogun import StreamingVwCacheFile
-from modshogun import T_SVMLIGHT
-from modshogun import StreamingVwFeatures
-from modshogun import VowpalWabbit
+from shogun import StreamingVwFile
+from shogun import StreamingVwCacheFile
+from shogun import T_SVMLIGHT
+from shogun import StreamingVwFeatures
+from shogun import VowpalWabbit
 
 parameter_list=[['../data/fm_train_sparsereal.dat']]
 
-def streaming_vw_createcache_modular (fname):
+def streaming_vw_createcache (fname):
 	# First creates a binary cache from an ascii data file.
 	# and then trains using the StreamingVwCacheFile as input
 
@@ -42,4 +42,4 @@ def streaming_vw_createcache_modular (fname):
 	##return vw
 
 if __name__ == "__main__":
-	streaming_vw_createcache_modular(*parameter_list[0])
+	streaming_vw_createcache(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/structure_discrete_hmsvm_bmrm.py b/examples/undocumented/python/structure_discrete_hmsvm_bmrm.py
similarity index 84%
rename from examples/undocumented/python_modular/structure_discrete_hmsvm_bmrm.py
rename to examples/undocumented/python/structure_discrete_hmsvm_bmrm.py
index 9fee3bd2826..59cf961f4b8 100644
--- a/examples/undocumented/python_modular/structure_discrete_hmsvm_bmrm.py
+++ b/examples/undocumented/python/structure_discrete_hmsvm_bmrm.py
@@ -9,10 +9,10 @@
 parameter_list=[[data_dict]]
 
 def structure_discrete_hmsvm_bmrm (m_data_dict=data_dict):
-	from modshogun import RealMatrixFeatures, SequenceLabels, HMSVMModel, Sequence, TwoStateModel
-	from modshogun import StructuredAccuracy, SMT_TWO_STATE
+	from shogun import RealMatrixFeatures, SequenceLabels, HMSVMModel, Sequence, TwoStateModel
+	from shogun import StructuredAccuracy, SMT_TWO_STATE
 	try:
-		from modshogun import DualLibQPBMSOSVM
+		from shogun import DualLibQPBMSOSVM
 	except ImportError:
 		print("DualLibQPBMSOSVM not available")
 		exit(0)
diff --git a/examples/undocumented/python_modular/structure_discrete_hmsvm_mosek.py b/examples/undocumented/python/structure_discrete_hmsvm_mosek.py
similarity index 83%
rename from examples/undocumented/python_modular/structure_discrete_hmsvm_mosek.py
rename to examples/undocumented/python/structure_discrete_hmsvm_mosek.py
index 14cecf439bf..47ac6e4b161 100644
--- a/examples/undocumented/python_modular/structure_discrete_hmsvm_mosek.py
+++ b/examples/undocumented/python/structure_discrete_hmsvm_mosek.py
@@ -9,11 +9,11 @@
 parameter_list=[[data_dict]]
 
 def structure_discrete_hmsvm_mosek (m_data_dict=data_dict):
-	from modshogun import RealMatrixFeatures, SequenceLabels, HMSVMModel, Sequence, TwoStateModel
-	from modshogun import StructuredAccuracy, SMT_TWO_STATE
+	from shogun import RealMatrixFeatures, SequenceLabels, HMSVMModel, Sequence, TwoStateModel
+	from shogun import StructuredAccuracy, SMT_TWO_STATE
 
 	try:
-		from modshogun import PrimalMosekSOSVM
+		from shogun import PrimalMosekSOSVM
 	except ImportError:
 		print("Mosek not available")
 		return
diff --git a/examples/undocumented/python_modular/structure_dynprog_modular.py b/examples/undocumented/python/structure_dynprog.py
similarity index 98%
rename from examples/undocumented/python_modular/structure_dynprog_modular.py
rename to examples/undocumented/python/structure_dynprog.py
index b9298489128..e97ddbf5954 100644
--- a/examples/undocumented/python_modular/structure_dynprog_modular.py
+++ b/examples/undocumented/python/structure_dynprog.py
@@ -4,7 +4,7 @@
 
 parameter_list=[['../data/DynProg_example_py.pickle.gz']]
 
-from modshogun import *
+from shogun import *
 
 import numpy
 from numpy import array,Inf,float64,matrix,frompyfunc,zeros
@@ -64,7 +64,7 @@ def loads(str):
 	unpickler.dispatch[pickle.GLOBAL] = mapped_load_global
 	return unpickler.load()
 
-def structure_dynprog_modular (fname):
+def structure_dynprog (fname):
 	import sys
 
 	#pickle is not compatible between python2 -> 3
@@ -190,4 +190,4 @@ def structure_dynprog_modular (fname):
 
 if __name__ == '__main__':
 	print("Structure")
-	structure_dynprog_modular(*parameter_list[0])
+	structure_dynprog(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/structure_factor_graph_model.py b/examples/undocumented/python/structure_factor_graph_model.py
similarity index 90%
rename from examples/undocumented/python_modular/structure_factor_graph_model.py
rename to examples/undocumented/python/structure_factor_graph_model.py
index 2c37f7582cc..face601195c 100644
--- a/examples/undocumented/python_modular/structure_factor_graph_model.py
+++ b/examples/undocumented/python/structure_factor_graph_model.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import numpy as np
-from modshogun import TableFactorType
+from shogun import TableFactorType
 
 # create the factor type with GT parameters
 tid = 0
@@ -20,10 +20,10 @@
 fac_type_b = TableFactorType(tid_b, cards_b, w_gt_b)
 
 def gen_data(ftype, num_samples, show_data = False):
-	from modshogun import Math
-	from modshogun import FactorType, Factor, TableFactorType, FactorGraph
-	from modshogun import FactorGraphObservation, FactorGraphLabels, FactorGraphFeatures
-	from modshogun import MAPInference, TREE_MAX_PROD
+	from shogun import Math
+	from shogun import FactorType, Factor, TableFactorType, FactorGraph
+	from shogun import FactorGraphObservation, FactorGraphLabels, FactorGraphFeatures
+	from shogun import MAPInference, TREE_MAX_PROD
 
 	Math.init_random(17)
 
@@ -95,11 +95,11 @@ def gen_data(ftype, num_samples, show_data = False):
 parameter_list = [[samples,labels,w_all,ftype_all]]
 
 def structure_factor_graph_model(tr_samples = samples, tr_labels = labels, w = w_all, ftype = ftype_all):
-	from modshogun import SOSVMHelper, LabelsFactory
-	from modshogun import FactorGraphModel, MAPInference, TREE_MAX_PROD
-	from modshogun import StochasticSOSVM, FWSOSVM
+	from shogun import SOSVMHelper, LabelsFactory
+	from shogun import FactorGraphModel, MAPInference, TREE_MAX_PROD
+	from shogun import StochasticSOSVM, FWSOSVM
 	try:
-		from modshogun import DualLibQPBMSOSVM
+		from shogun import DualLibQPBMSOSVM
 	except ImportError:
 		print("DualLibQPBMSOSVM not available")
 		exit(0)
diff --git a/examples/undocumented/python_modular/structure_graphcuts.py b/examples/undocumented/python/structure_graphcuts.py
similarity index 96%
rename from examples/undocumented/python_modular/structure_graphcuts.py
rename to examples/undocumented/python/structure_graphcuts.py
index 3615f90c6e8..59174ea05a1 100644
--- a/examples/undocumented/python_modular/structure_graphcuts.py
+++ b/examples/undocumented/python/structure_graphcuts.py
@@ -3,11 +3,11 @@
 import numpy as np
 import itertools
 
-from modshogun import Factor, TableFactorType, FactorGraph
-from modshogun import FactorGraphObservation, FactorGraphLabels, FactorGraphFeatures
-from modshogun import FactorGraphModel, GRAPH_CUT
-from modshogun import GraphCut
-from modshogun import StochasticSOSVM
+from shogun import Factor, TableFactorType, FactorGraph
+from shogun import FactorGraphObservation, FactorGraphLabels, FactorGraphFeatures
+from shogun import FactorGraphModel, GRAPH_CUT
+from shogun import GraphCut
+from shogun import StochasticSOSVM
 
 def generate_data(num_train_samples, len_label, len_feat):
     """ Generate synthetic dataset
diff --git a/examples/undocumented/python_modular/structure_hierarchical_multilabel_classification.py b/examples/undocumented/python/structure_hierarchical_multilabel_classification.py
similarity index 95%
rename from examples/undocumented/python_modular/structure_hierarchical_multilabel_classification.py
rename to examples/undocumented/python/structure_hierarchical_multilabel_classification.py
index 05392a2fab1..05c3e70e3ee 100644
--- a/examples/undocumented/python_modular/structure_hierarchical_multilabel_classification.py
+++ b/examples/undocumented/python/structure_hierarchical_multilabel_classification.py
@@ -7,10 +7,10 @@
     http://kt.ijs.si/DragiKocev/PhD/resources/doku.php?id=hmc_classification#imageclef07d
 """
 
-from modshogun import MultilabelSOLabels, HierarchicalMultilabelModel
-from modshogun import RealFeatures
-from modshogun import StochasticSOSVM
-from modshogun import StructuredAccuracy, LabelsFactory
+from shogun import MultilabelSOLabels, HierarchicalMultilabelModel
+from shogun import RealFeatures
+from shogun import StochasticSOSVM
+from shogun import StructuredAccuracy, LabelsFactory
 import numpy as np
 import time
 
diff --git a/examples/undocumented/python_modular/structure_multiclass_bmrm.py b/examples/undocumented/python/structure_multiclass_bmrm.py
similarity index 92%
rename from examples/undocumented/python_modular/structure_multiclass_bmrm.py
rename to examples/undocumented/python/structure_multiclass_bmrm.py
index 1a293763b79..d7dbd034a47 100644
--- a/examples/undocumented/python_modular/structure_multiclass_bmrm.py
+++ b/examples/undocumented/python/structure_multiclass_bmrm.py
@@ -25,15 +25,15 @@ def gen_data(num_classes,num_samples,dim):
 parameter_list = [[traindat,label_traindat]]
 
 def structure_multiclass_bmrm(fm_train_real=traindat,label_train_multiclass=label_traindat):
-	from modshogun import MulticlassSOLabels, LabelsFactory
-	from modshogun import RealFeatures
-	from modshogun import SOSVMHelper
+	from shogun import MulticlassSOLabels, LabelsFactory
+	from shogun import RealFeatures
+	from shogun import SOSVMHelper
 	try:
-		from modshogun import BMRM, PPBMRM, P3BMRM, DualLibQPBMSOSVM
+		from shogun import BMRM, PPBMRM, P3BMRM, DualLibQPBMSOSVM
 	except ImportError:
 		print("At least one of BMRM, PPBMRM, P3BMRM, DualLibQPBMSOSVM not available")
 		exit(0)
-	from modshogun import MulticlassModel, RealNumber
+	from shogun import MulticlassModel, RealNumber
 
 	labels = MulticlassSOLabels(label_train_multiclass)
 	features = RealFeatures(fm_train_real.T)
diff --git a/examples/undocumented/python_modular/structure_plif_hmsvm_bmrm.py b/examples/undocumented/python/structure_plif_hmsvm_bmrm.py
similarity index 86%
rename from examples/undocumented/python_modular/structure_plif_hmsvm_bmrm.py
rename to examples/undocumented/python/structure_plif_hmsvm_bmrm.py
index 91822d366a6..bd7542607a0 100644
--- a/examples/undocumented/python_modular/structure_plif_hmsvm_bmrm.py
+++ b/examples/undocumented/python/structure_plif_hmsvm_bmrm.py
@@ -3,9 +3,9 @@
 parameter_list=[[50, 125, 10, 2]]
 
 def structure_plif_hmsvm_bmrm (num_examples, example_length, num_features, num_noise_features):
-	from modshogun import RealMatrixFeatures, TwoStateModel, StructuredAccuracy
+	from shogun import RealMatrixFeatures, TwoStateModel, StructuredAccuracy
 	try:
-		from modshogun import DualLibQPBMSOSVM
+		from shogun import DualLibQPBMSOSVM
 	except ImportError:
 		print("DualLibQPBMSOSVM not available")
 		exit(0)
diff --git a/examples/undocumented/python_modular/structure_plif_hmsvm_mosek.py b/examples/undocumented/python/structure_plif_hmsvm_mosek.py
similarity index 85%
rename from examples/undocumented/python_modular/structure_plif_hmsvm_mosek.py
rename to examples/undocumented/python/structure_plif_hmsvm_mosek.py
index 2ef2e0dc40c..8e918f08512 100644
--- a/examples/undocumented/python_modular/structure_plif_hmsvm_mosek.py
+++ b/examples/undocumented/python/structure_plif_hmsvm_mosek.py
@@ -3,10 +3,10 @@
 parameter_list=[[100, 250, 10, 2]]
 
 def structure_plif_hmsvm_mosek (num_examples, example_length, num_features, num_noise_features):
-	from modshogun import RealMatrixFeatures, TwoStateModel, StructuredAccuracy
+	from shogun import RealMatrixFeatures, TwoStateModel, StructuredAccuracy
 
 	try:
-		from modshogun import PrimalMosekSOSVM
+		from shogun import PrimalMosekSOSVM
 	except ImportError:
 		print("Mosek not available")
 		return
diff --git a/examples/undocumented/python_modular/tests_check_commwordkernel_memleak_modular.py b/examples/undocumented/python/tests_check_commwordkernel_memleak.py
similarity index 92%
rename from examples/undocumented/python_modular/tests_check_commwordkernel_memleak_modular.py
rename to examples/undocumented/python/tests_check_commwordkernel_memleak.py
index 790e75bbc8e..5da5b8b07df 100644
--- a/examples/undocumented/python_modular/tests_check_commwordkernel_memleak_modular.py
+++ b/examples/undocumented/python/tests_check_commwordkernel_memleak.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python
 parameter_list=[[10,7,0,False]]
 
-def tests_check_commwordkernel_memleak_modular (num, order, gap, reverse):
+def tests_check_commwordkernel_memleak (num, order, gap, reverse):
 	import gc
-	from modshogun import Alphabet,StringCharFeatures,StringWordFeatures,DNA
-	from modshogun import SortWordString, MSG_DEBUG
-	from modshogun import CommWordStringKernel, IdentityKernelNormalizer
+	from shogun import Alphabet,StringCharFeatures,StringWordFeatures,DNA
+	from shogun import SortWordString, MSG_DEBUG
+	from shogun import CommWordStringKernel, IdentityKernelNormalizer
 	from numpy import mat
 
 	POS=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
@@ -83,4 +83,4 @@ def tests_check_commwordkernel_memleak_modular (num, order, gap, reverse):
 
 if __name__=='__main__':
 	print('Leak Check Comm Word Kernel')
-	tests_check_commwordkernel_memleak_modular(*parameter_list[0])
+	tests_check_commwordkernel_memleak(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/tools/__init__.py b/examples/undocumented/python/tools/__init__.py
similarity index 100%
rename from examples/undocumented/python_modular/tools/__init__.py
rename to examples/undocumented/python/tools/__init__.py
diff --git a/examples/undocumented/python_modular/tools/generate_circle_data.py b/examples/undocumented/python/tools/generate_circle_data.py
similarity index 100%
rename from examples/undocumented/python_modular/tools/generate_circle_data.py
rename to examples/undocumented/python/tools/generate_circle_data.py
diff --git a/examples/undocumented/python_modular/tools/load.py b/examples/undocumented/python/tools/load.py
similarity index 100%
rename from examples/undocumented/python_modular/tools/load.py
rename to examples/undocumented/python/tools/load.py
diff --git a/examples/undocumented/python_modular/tools/multiclass_shared.py b/examples/undocumented/python/tools/multiclass_shared.py
similarity index 100%
rename from examples/undocumented/python_modular/tools/multiclass_shared.py
rename to examples/undocumented/python/tools/multiclass_shared.py
diff --git a/examples/undocumented/python_modular/transfer_multitask_clustered_logistic_regression.py b/examples/undocumented/python/transfer_multitask_clustered_logistic_regression.py
similarity index 91%
rename from examples/undocumented/python_modular/transfer_multitask_clustered_logistic_regression.py
rename to examples/undocumented/python/transfer_multitask_clustered_logistic_regression.py
index 87ac204ab44..e3dc8b2a64b 100644
--- a/examples/undocumented/python_modular/transfer_multitask_clustered_logistic_regression.py
+++ b/examples/undocumented/python/transfer_multitask_clustered_logistic_regression.py
@@ -11,9 +11,9 @@
 parameter_list = [[traindat,testdat,label_traindat]]
 
 def transfer_multitask_clustered_logistic_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat):
-	from modshogun import BinaryLabels, RealFeatures, Task, TaskGroup, MSG_DEBUG
+	from shogun import BinaryLabels, RealFeatures, Task, TaskGroup, MSG_DEBUG
 	try:
-		from modshogun import MultitaskClusteredLogisticRegression
+		from shogun import MultitaskClusteredLogisticRegression
 	except ImportError:
 		print("MultitaskClusteredLogisticRegression not available")
 		exit()
diff --git a/examples/undocumented/python_modular/transfer_multitask_l12_logistic_regression.py b/examples/undocumented/python/transfer_multitask_l12_logistic_regression.py
similarity index 91%
rename from examples/undocumented/python_modular/transfer_multitask_l12_logistic_regression.py
rename to examples/undocumented/python/transfer_multitask_l12_logistic_regression.py
index 56c7c84e57b..22c47c0c2f8 100644
--- a/examples/undocumented/python_modular/transfer_multitask_l12_logistic_regression.py
+++ b/examples/undocumented/python/transfer_multitask_l12_logistic_regression.py
@@ -11,9 +11,9 @@
 parameter_list = [[traindat,testdat,label_traindat]]
 
 def transfer_multitask_l12_logistic_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat):
-	from modshogun import BinaryLabels, RealFeatures, Task, TaskGroup
+	from shogun import BinaryLabels, RealFeatures, Task, TaskGroup
 	try:
-		from modshogun import MultitaskL12LogisticRegression
+		from shogun import MultitaskL12LogisticRegression
 	except ImportError:
 		print("MultitaskL12LogisticRegression not available")
 		exit(0)
diff --git a/examples/undocumented/python_modular/transfer_multitask_leastsquares_regression.py b/examples/undocumented/python/transfer_multitask_leastsquares_regression.py
similarity index 90%
rename from examples/undocumented/python_modular/transfer_multitask_leastsquares_regression.py
rename to examples/undocumented/python/transfer_multitask_leastsquares_regression.py
index 8ec07e61d5c..1b1357d96a5 100644
--- a/examples/undocumented/python_modular/transfer_multitask_leastsquares_regression.py
+++ b/examples/undocumented/python/transfer_multitask_leastsquares_regression.py
@@ -11,9 +11,9 @@
 parameter_list = [[traindat,testdat,label_traindat]]
 
 def transfer_multitask_leastsquares_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat):
-	from modshogun import RegressionLabels, RealFeatures, Task, TaskGroup
+	from shogun import RegressionLabels, RealFeatures, Task, TaskGroup
 	try:
-		from modshogun import MultitaskLeastSquaresRegression
+		from shogun import MultitaskLeastSquaresRegression
 	except ImportError:
 		print("MultitaskLeastSquaresRegression not available")
 		exit(0)
diff --git a/examples/undocumented/python_modular/transfer_multitask_logistic_regression.py b/examples/undocumented/python/transfer_multitask_logistic_regression.py
similarity index 91%
rename from examples/undocumented/python_modular/transfer_multitask_logistic_regression.py
rename to examples/undocumented/python/transfer_multitask_logistic_regression.py
index d475cb7757f..5c97b6f6a2d 100644
--- a/examples/undocumented/python_modular/transfer_multitask_logistic_regression.py
+++ b/examples/undocumented/python/transfer_multitask_logistic_regression.py
@@ -11,9 +11,9 @@
 parameter_list = [[traindat,testdat,label_traindat]]
 
 def transfer_multitask_logistic_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat):
-	from modshogun import BinaryLabels, RealFeatures, Task, TaskGroup
+	from shogun import BinaryLabels, RealFeatures, Task, TaskGroup
 	try:
-		from modshogun import MultitaskLogisticRegression
+		from shogun import MultitaskLogisticRegression
 	except ImportError:
 		print("MultitaskLogisticRegression not available")
 		exit()
diff --git a/examples/undocumented/python_modular/transfer_multitask_trace_logistic_regression.py b/examples/undocumented/python/transfer_multitask_trace_logistic_regression.py
similarity index 91%
rename from examples/undocumented/python_modular/transfer_multitask_trace_logistic_regression.py
rename to examples/undocumented/python/transfer_multitask_trace_logistic_regression.py
index a128a969cbf..c72912dd63a 100644
--- a/examples/undocumented/python_modular/transfer_multitask_trace_logistic_regression.py
+++ b/examples/undocumented/python/transfer_multitask_trace_logistic_regression.py
@@ -11,9 +11,9 @@
 parameter_list = [[traindat,testdat,label_traindat]]
 
 def transfer_multitask_trace_logistic_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat):
-	from modshogun import BinaryLabels, RealFeatures, Task, TaskGroup
+	from shogun import BinaryLabels, RealFeatures, Task, TaskGroup
 	try:
-		from modshogun import MultitaskTraceLogisticRegression
+		from shogun import MultitaskTraceLogisticRegression
 	except ImportError:
 		print("MultitaskTraceLogisticRegression not available")
 		exit(0)
diff --git a/examples/undocumented/python_modular/variational_classifier_modular.py b/examples/undocumented/python/variational_classifier.py
similarity index 94%
rename from examples/undocumented/python_modular/variational_classifier_modular.py
rename to examples/undocumented/python/variational_classifier.py
index c922674c35c..7461d660b24 100644
--- a/examples/undocumented/python_modular/variational_classifier_modular.py
+++ b/examples/undocumented/python/variational_classifier.py
@@ -37,12 +37,12 @@
 label_binary_traindat = '%s/label_train_twoclass.dat'%path
 
 try:
-	from modshogun import GaussianProcessClassification
+	from shogun import GaussianProcessClassification
 except ImportError:
 	print("GaussianProcessClassification is not available")
 	exit(0)
 
-from modshogun import *
+from shogun import *
 parameter_list=[
 	[KLCholeskyInferenceMethod,traindat,testdat,label_binary_traindat,0,0,1e-5,1e-2,0],
 	[KLCovarianceInferenceMethod,traindat,testdat,label_binary_traindat,0,0,1e-5,1e-2,0],
@@ -50,7 +50,7 @@
 	[KLDualInferenceMethod,traindat,testdat,label_binary_traindat,0,0,1e-5,1e-2,0],
 	[SingleLaplaceInferenceMethod,traindat,testdat,label_binary_traindat,0,0],
 ]
-def variational_classifier_modular(kl_inference,train_fname=traindat,test_fname=testdat,
+def variational_classifier(kl_inference,train_fname=traindat,test_fname=testdat,
 	label_fname=label_binary_traindat,kernel_log_sigma=0,kernel_log_scale=0,noise_factor=1e-5,
 	min_coeff_kernel=1e-2,max_attempt=0):
 	from math import exp
@@ -84,4 +84,4 @@ def variational_classifier_modular(kl_inference,train_fname=traindat,test_fname=
 if __name__=="__main__":
 	print("variational_classifier")
 	for parameter in parameter_list:
-		variational_classifier_modular(*parameter)
+		variational_classifier(*parameter)
diff --git a/examples/undocumented/python_modular/CMakeLists.txt b/examples/undocumented/python_modular/CMakeLists.txt
deleted file mode 100644
index a2250418693..00000000000
--- a/examples/undocumented/python_modular/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-INCLUDE(PythonEnvironment)
-GET_PYTHON_ENV()
-
-FILE(GLOB PYTHON_EXAMPLES *.py)
-FOREACH(EXAMPLE ${PYTHON_EXAMPLES})
-	STRING(REGEX REPLACE ".*/(.*).py" "\\1" EXAMPLE_NAME ${EXAMPLE})
-	add_test(NAME python_modular-${EXAMPLE_NAME}
-			WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-			COMMAND ${PYTHON_EXECUTABLE} ${EXAMPLE})
-	set_property(TEST python_modular-${EXAMPLE_NAME} PROPERTY
-				ENVIRONMENT "${PYTHON_ENV_VARS}")
-
-ENDFOREACH()
\ No newline at end of file
diff --git a/examples/undocumented/python_modular/features_io_modular.py b/examples/undocumented/python_modular/features_io_modular.py
deleted file mode 100644
index 0b354c0f4c1..00000000000
--- a/examples/undocumented/python_modular/features_io_modular.py
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/env python
-from tools.load import LoadMatrix
-lm=LoadMatrix()
-data=lm.load_numbers('../data/fm_train_real.dat')
-label=lm.load_numbers('../data/label_train_twoclass.dat')
-
-parameter_list=[[data,label]]
-
-def features_io_modular (fm_train_real, label_train_twoclass):
-	import numpy
-	from modshogun import SparseRealFeatures, RealFeatures, MulticlassLabels
-	from modshogun import GaussianKernel
-	from modshogun import LibSVMFile, CSVFile, BinaryFile, HDF5File
-
-	feats=SparseRealFeatures(fm_train_real)
-	feats2=SparseRealFeatures()
-
-	f=BinaryFile("tmp/fm_train_sparsereal.bin","w")
-	feats.save(f)
-
-	f=LibSVMFile("tmp/fm_train_sparsereal.ascii","w")
-	feats.save(f)
-
-	f=BinaryFile("tmp/fm_train_sparsereal.bin")
-	feats2.load(f)
-
-	f=LibSVMFile("tmp/fm_train_sparsereal.ascii")
-	feats2.load(f)
-
-	feats=RealFeatures(fm_train_real)
-	feats2=RealFeatures()
-
-	f=BinaryFile("tmp/fm_train_real.bin","w")
-	feats.save(f)
-
-	f=HDF5File("tmp/fm_train_real.h5","w", "/data/doubles")
-	feats.save(f)
-
-	f=CSVFile("tmp/fm_train_real.ascii","w")
-	feats.save(f)
-
-	f=BinaryFile("tmp/fm_train_real.bin")
-	feats2.load(f)
-	#print("diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))
-
-	f=CSVFile("tmp/fm_train_real.ascii")
-	feats2.load(f)
-	#print("diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))
-
-	lab=MulticlassLabels(numpy.array([0.0,1.0,2.0,3.0]))
-	lab2=MulticlassLabels()
-	f=CSVFile("tmp/label_train_twoclass.ascii","w")
-	lab.save(f)
-
-	f=BinaryFile("tmp/label_train_twoclass.bin","w")
-	lab.save(f)
-
-	f=HDF5File("tmp/label_train_real.h5","w", "/data/labels")
-	lab.save(f)
-
-	f=CSVFile("tmp/label_train_twoclass.ascii")
-	lab2.load(f)
-
-	f=BinaryFile("tmp/label_train_twoclass.bin")
-	lab2.load(f)
-
-	f=HDF5File("tmp/fm_train_real.h5","r", "/data/doubles")
-	feats2.load(f)
-	#print(feats2.get_feature_matrix())
-	f=HDF5File("tmp/label_train_real.h5","r", "/data/labels")
-	lab2.load(f)
-	#print(lab2.get_labels())
-
-	#clean up
-	import os
-	for f in ['tmp/fm_train_sparsereal.bin','tmp/fm_train_sparsereal.ascii',
-			'tmp/fm_train_real.bin','tmp/fm_train_real.h5','tmp/fm_train_real.ascii',
-			'tmp/label_train_real.h5', 'tmp/label_train_twoclass.ascii','tmp/label_train_twoclass.bin']:
-		os.unlink(f)
-	return feats, feats2, lab, lab2
-
-if __name__=='__main__':
-	print('Features IO')
-	features_io_modular(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_read_svmlight_format_modular.py b/examples/undocumented/python_modular/features_read_svmlight_format_modular.py
deleted file mode 100644
index 5a3d2b8283b..00000000000
--- a/examples/undocumented/python_modular/features_read_svmlight_format_modular.py
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env python
-parameter_list=[['../data/train_sparsereal.light']]
-
-def features_read_svmlight_format_modular (fname):
-	import os
-	from modshogun import SparseRealFeatures
-	from modshogun import LibSVMFile
-
-	f=SparseRealFeatures()
-	lab=f.load_with_labels(LibSVMFile(fname))
-	f.save_with_labels(LibSVMFile('tmp/testwrite.light', 'w'), lab)
-	os.unlink('tmp/testwrite.light')
-
-if __name__=='__main__':
-	print('Reading SVMLIGHT format')
-	features_read_svmlight_format_modular(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/features_string_file_char_modular.py b/examples/undocumented/python_modular/features_string_file_char_modular.py
deleted file mode 100644
index 4bedccf6e50..00000000000
--- a/examples/undocumented/python_modular/features_string_file_char_modular.py
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python
-parameter_list = [['features_string_file_char_modular.py']]
-
-def features_string_file_char_modular (fname):
-	from modshogun import StringFileCharFeatures, RAWBYTE
-	f = StringFileCharFeatures(fname, RAWBYTE)
-	#print("strings", f.get_features())
-	return f
-
-if __name__=='__main__':
-    print('Compressing StringCharFileFeatures')
-    features_string_file_char_modular(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/serialization_matrix_modular.py b/examples/undocumented/python_modular/serialization_matrix_modular.py
deleted file mode 100644
index 1d0f6948ff8..00000000000
--- a/examples/undocumented/python_modular/serialization_matrix_modular.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env python
-from modshogun import *
-from numpy import array
-import os
-
-parameter_list=[[[[1.0,2,3],[4,5,6]]]]
-
-def serialization_matrix_modular (m):
-	feats=RealFeatures(array(m))
-	#feats.io.set_loglevel(0)
-	fstream = SerializableAsciiFile("tmp/foo.asc", "w")
-	feats.save_serializable(fstream)
-
-	l=MulticlassLabels(array([1.0,2,3]))
-	fstream = SerializableAsciiFile("tmp/foo2.asc", "w")
-	l.save_serializable(fstream)
-
-	os.unlink("tmp/foo.asc")
-	os.unlink("tmp/foo2.asc")
-
-if __name__=='__main__':
-	print('Serialization Matrix Modular')
-	serialization_matrix_modular(*parameter_list[0])
diff --git a/examples/undocumented/python_modular/tmp/.gitkeep b/examples/undocumented/python_modular/tmp/.gitkeep
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/scripts/check_format.sh b/scripts/check_format.sh
new file mode 100755
index 00000000000..f1d4c0cec9b
--- /dev/null
+++ b/scripts/check_format.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+# This script checks code style by using clang-format
+# on a git diff made between a base branch and a test branch,
+# which is the one we want to check.
+#
+# This script was originally inspired by:
+# https://github.com/root-project/root/blob/master/.travis.yml
+
+# Unofficial strict bash mode
+# See: http://redsymbol.net/articles/unofficial-bash-strict-mode/
+set -euo pipefail
+IFS=$'\n\t'
+
+function check_shogun_style {
+
+        # Prevent cases in which we are testing branches
+        # different from develop
+        if [ ! `git branch --list ${2:-}` ]
+        then
+            echo "Branch ${2:-} does not exists locally. Fetching it."
+            git fetch origin "${2:-}:${2:-}"
+            
+        fi
+
+        BASE_COMMIT=$(git rev-parse ${2:-})
+
+
+        echo "-----"
+        echo "Shogun Style Checker"
+        echo "-----"
+        echo "Running clang-format-3.8 against branch ${2:-}, with hash $BASE_COMMIT"
+
+        COMMIT_FILES=$(git diff --name-only $BASE_COMMIT)
+		
+        # Use clang-format only on existent files
+        LIST=("")
+        for file in $COMMIT_FILES
+        do
+            if [ -f $file ]; then
+        	    LIST=("${LIST[@]}" "$file")
+            fi
+        done
+
+        RESULT_OUTPUT="$(git clang-format-3.8 --commit $BASE_COMMIT --diff --binary `which clang-format-3.8` $LIST)"
+
+        if [ "$RESULT_OUTPUT" == "no modified files to format" ] \
+            || [ "$RESULT_OUTPUT" == "clang-format-3.8 did not modify any files" ] \
+            || [ "$RESULT_OUTPUT" == "clang-format did not modify any files" ]; then
+              echo "clang-format-3.8 passed. \o/"
+              echo "-----"
+              exit 0
+        else
+            echo "-----"
+            echo "clang-format failed."
+            echo "To reproduce it locally please run: "
+            echo -e "\t1) git checkout ${1:-}"
+            echo -e "\t2) git clang-format-3.8 --commit $BASE_COMMIT --diff --binary $(which clang-format-3.8)"
+            echo "To fix the errors automatically please run: "
+            echo -e "\t1) git checkout ${1:-}"
+            echo -e "\t2) git clang-format-3.8 --commit $BASE_COMMIT --binary $(which clang-format-3.8)"
+            echo "-----"
+            echo "Style errors found:"
+            echo "$RESULT_OUTPUT"
+            exit 1
+        fi
+}
+
+# Check only if we have enough arguments
+if [[ $# -ne 2 ]]; then
+    echo "Wrong number of parameters supplied!"
+    echo "Usage: ./check_format.sh <test_branch> <base_branch>"
+    exit 1
+fi
+
+# Run the check
+check_shogun_style ${1:-} ${2:-}
diff --git a/setup.py b/setup.py
index ae66c06651e..389f78e3f64 100644
--- a/setup.py
+++ b/setup.py
@@ -141,7 +141,7 @@ def shogun_cmake(arguments=None):
     print("Running CMake")
 
     if arguments is None:
-        arguments='-DPythonModular=ON -DENABLE_TESTING=OFF -DCMAKE_INSTALL_PREFIX=install'
+        arguments='-DINTERFACE_PYTHON=ON -DENABLE_TESTING=OFF -DCMAKE_INSTALL_PREFIX=install'
 
     if distutils.spawn.find_executable('cmake') is not None:
         print('CMake arguments: %s ' % arguments)
@@ -243,12 +243,12 @@ def shogun_package_directories():
 def shogun_data_files():
     data_files = list()
     libshogun_files = glob.glob(os.path.join(shogun_generated_install, 'lib/libshogun*'))
-    modshogun_so_destination = os.path.join('lib', python_package_path(shogun_python_packages_location))
-    modshogun_so_file = glob.glob(os.path.join(shogun_python_packages_location, '_modshogun.so'))[0]
+    shogun_so_destination = os.path.join('lib', python_package_path(shogun_python_packages_location))
+    shogun_so_file = glob.glob(os.path.join(shogun_python_packages_location, '_shogun.so'))[0]
     
     # appending data files
     data_files.append(('lib', libshogun_files))
-    data_files.append((modshogun_so_destination, [modshogun_so_file]))
+    data_files.append((shogun_so_destination, [shogun_so_file]))
     
     if show_debug_information: 
         print('Shogun Python package data files:')
@@ -335,7 +335,7 @@ def run(self):
     # Shogun package content
     packages = shogun_packages(),
     package_dir = shogun_package_directories(),
-    py_modules =['modshogun'],
+    py_modules =['shogun'],
     data_files = shogun_data_files(),
     
     # Shogun dependencies
diff --git a/src/.r-install.sh b/src/.r-install.sh
index 2c3f97912bf..887deb61fcd 100755
--- a/src/.r-install.sh
+++ b/src/.r-install.sh
@@ -129,7 +129,7 @@ else
 echo "Installing modular shogun interface for R"
 
 cat >"$1/$2/NAMESPACE" <<EOF
-useDynLib(modshogun, .registration = TRUE)
+useDynLib(shogun, .registration = TRUE)
 EOF
 
 cat >"$1/$2/R/$2" <<EOF
diff --git a/src/gpl b/src/gpl
new file mode 160000
index 00000000000..29ca2ff930c
--- /dev/null
+++ b/src/gpl
@@ -0,0 +1 @@
+Subproject commit 29ca2ff930cd5d963b3a78e79160cc04d48970c2
diff --git a/src/interfaces/coreml/ArrayFeatureExtractor.proto b/src/interfaces/coreml/ArrayFeatureExtractor.proto
new file mode 100644
index 00000000000..2b83ccbe357
--- /dev/null
+++ b/src/interfaces/coreml/ArrayFeatureExtractor.proto
@@ -0,0 +1,19 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+package CoreML.Specification;
+
+/**
+ * An array feature extractor.
+ *
+ * Given an index, extracts the value at that index from its array input.
+ * Indexes are zero-based.
+ */
+message ArrayFeatureExtractor {
+    repeated uint64 extractIndex = 1;
+}
diff --git a/src/interfaces/coreml/CategoricalMapping.proto b/src/interfaces/coreml/CategoricalMapping.proto
new file mode 100644
index 00000000000..f752bb1f22e
--- /dev/null
+++ b/src/interfaces/coreml/CategoricalMapping.proto
@@ -0,0 +1,38 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+import public "DataStructures.proto";
+
+package CoreML.Specification;
+
+/**
+ * A categorical mapping.
+ *
+ * This allows conversion from integers to strings, or from strings to integers.
+ */
+message CategoricalMapping {
+    oneof MappingType {
+        // Conversion from strings to integers
+        StringToInt64Map stringToInt64Map = 1;
+
+        // Conversion from integer to string
+        Int64ToStringMap int64ToStringMap = 2;
+    }
+
+    /**
+     * The value returned if an input is not contained in the map above.
+     * If one of these is not set, then an error is raised on an unknown input.
+     */
+    oneof ValueOnUnknown {
+        // Default output when converting from an integer to a string.
+        string strValue = 101;
+
+        // Default output when converting from an string to an integer.
+        int64 int64Value = 102;
+    }
+}
diff --git a/src/interfaces/coreml/DataStructures.proto b/src/interfaces/coreml/DataStructures.proto
new file mode 100644
index 00000000000..fdd4cdf3f41
--- /dev/null
+++ b/src/interfaces/coreml/DataStructures.proto
@@ -0,0 +1,64 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+import public "FeatureTypes.proto";
+
+package CoreML.Specification;
+
+/**
+ * A mapping from a string
+ * to a 64-bit integer.
+ */
+message StringToInt64Map {
+    map<string, int64> map = 1;
+}
+
+/**
+ * A mapping from a 64-bit integer
+ * to a string.
+ */
+message Int64ToStringMap {
+    map<int64, string> map = 1;
+}
+
+/**
+ * A mapping from a string
+ * to a double-precision floating point number.
+ */
+message StringToDoubleMap {
+    map<string, double> map = 1;
+}
+
+/**
+ * A mapping from a 64-bit integer
+ * to a double-precision floating point number.
+ */
+message Int64ToDoubleMap {
+    map<int64, double> map = 1;
+}
+
+/**
+ * A vector of strings.
+ */
+message StringVector {
+    repeated string vector = 1;
+}
+
+/**
+ * A vector of 64-bit integers.
+ */
+message Int64Vector {
+    repeated int64 vector = 1;
+}
+
+/**
+ * A vector of double-precision floating point numbers.
+ */
+message DoubleVector {
+    repeated double vector = 1;
+}
diff --git a/src/interfaces/coreml/DictVectorizer.proto b/src/interfaces/coreml/DictVectorizer.proto
new file mode 100644
index 00000000000..662f97b8cec
--- /dev/null
+++ b/src/interfaces/coreml/DictVectorizer.proto
@@ -0,0 +1,36 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+import public "DataStructures.proto";
+
+package CoreML.Specification;
+
+/**
+ * Uses an index mapping to convert a dictionary to an array.
+ *
+ * The output array will be equal in length to the index mapping vector parameter.
+ * All keys in the input dictionary must be present in the index mapping vector.
+ *
+ * For each item in the input dictionary, insert its value in the ouput array.
+ * The position of the insertion is determined by the position of the item's key
+ * in the index mapping. Any keys not present in the input dictionary, will be
+ * zero in the output array.
+ *
+ * For example: if the ``stringToIndex`` paramter is set to ``["a", "c", "b", "z"]``,
+ * then an input of ``{"a": 4, "c": 8}`` will produce an output of ``[4, 8, 0, 0]``.
+ *
+ */
+message DictVectorizer {
+    oneof Map {
+        /// String keys to indexes
+        StringVector stringToIndex = 1;
+
+        /// Int keys to indexes
+        Int64Vector int64ToIndex = 2;
+    }
+}
diff --git a/src/interfaces/coreml/FeatureTypes.proto b/src/interfaces/coreml/FeatureTypes.proto
new file mode 100644
index 00000000000..50bad311020
--- /dev/null
+++ b/src/interfaces/coreml/FeatureTypes.proto
@@ -0,0 +1,87 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+package CoreML.Specification;
+
+/**
+ * The 64-bit integer feature type.
+ */
+message Int64FeatureType {}
+
+/**
+ * The double-precision floating point number feature type.
+ */
+message DoubleFeatureType {}
+
+/**
+ * The string feature type.
+ */
+message StringFeatureType {}
+
+/**
+ * The image feature type.
+ */
+message ImageFeatureType {
+    // Assumes raw (decompressed) format
+    enum ColorSpace {
+        INVALID_COLOR_SPACE = 0;
+        GRAYSCALE = 10; //  8 bit pixel: 0=black, 255=white
+        RGB = 20;       // 32 bit pixel: RGBA with A channel ignored
+        BGR = 30;       // 32 bit pixel: BGRA with A channel ignored
+    }
+
+    uint64 width = 1;
+    uint64 height = 2;
+    ColorSpace colorSpace = 3;
+}
+
+/**
+ * The array feature type.
+ */
+message ArrayFeatureType {
+    enum ArrayDataType {
+        INVALID_ARRAY_DATA_TYPE = 0;
+        FLOAT32 = 65568; // 0x10000 | 32
+        DOUBLE = 65600;  // 0x10000 | 64
+        INT32 = 131104;  // 0x20000 | 32
+    }
+
+    repeated uint64 shape = 1; /// For neural networks, must be of length 1 or 3, representing input shape [C] or [C,H,W], respectively.
+    ArrayDataType dataType = 2;
+}
+
+/**
+ * The dictionary feature type.
+ */
+message DictionaryFeatureType {
+    /**
+     *  Key/value type tags, with the following restrictions:
+     *  - ``keyType`` must be a hashable type
+     *  - ``valueType`` is assumed to be a ``double``
+     */
+    oneof KeyType {
+        Int64FeatureType int64KeyType = 1;
+        StringFeatureType stringKeyType = 2;
+    }
+}
+
+/**
+ * A feature, which may be optional.
+ */
+message FeatureType {
+    oneof Type {
+        Int64FeatureType int64Type = 1;
+        DoubleFeatureType doubleType = 2;
+        StringFeatureType stringType = 3;
+        ImageFeatureType imageType = 4;
+        ArrayFeatureType multiArrayType = 5;
+        DictionaryFeatureType dictionaryType = 6;
+    }
+
+    bool isOptional = 1000;
+}
diff --git a/src/interfaces/coreml/FeatureVectorizer.proto b/src/interfaces/coreml/FeatureVectorizer.proto
new file mode 100644
index 00000000000..75eaf14b536
--- /dev/null
+++ b/src/interfaces/coreml/FeatureVectorizer.proto
@@ -0,0 +1,26 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+package CoreML.Specification;
+
+/**
+ * A FeatureVectorizer puts one or more features into a single array.
+ *
+ * The ordering of features in the output array is determined by
+ * ``inputList``.
+ *
+ * ``inputDimensions`` is a zero based index.
+ */
+message FeatureVectorizer {
+    message InputColumn {
+        string inputColumn = 1;
+        uint64 inputDimensions = 2;
+    }
+
+    repeated InputColumn inputList = 1;
+}
diff --git a/src/interfaces/coreml/GLMClassifier.proto b/src/interfaces/coreml/GLMClassifier.proto
new file mode 100644
index 00000000000..47f6f4a3c7b
--- /dev/null
+++ b/src/interfaces/coreml/GLMClassifier.proto
@@ -0,0 +1,43 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+import public "DataStructures.proto";
+
+package CoreML.Specification;
+
+/**
+ * A generalized linear model classifier.
+ */
+message GLMClassifier {
+    message DoubleArray {
+        repeated double value = 1;
+    }
+
+    enum PostEvaluationTransform {
+        Logit = 0;
+        Probit = 1; /// Only binary classification is supported for probit
+    }
+
+    enum ClassEncoding {
+        ReferenceClass = 0; /// First class is the reference class
+        OneVsRest = 1; /// Also called One vs All
+    }
+
+    repeated DoubleArray weights = 1;
+    repeated double offset = 2;
+    PostEvaluationTransform postEvaluationTransform = 3;
+    ClassEncoding classEncoding = 4;
+
+    /**
+     * Required class label mapping.
+     */
+    oneof ClassLabels {
+        StringVector stringClassLabels = 100;
+        Int64Vector int64ClassLabels = 101;
+    }
+}
diff --git a/src/interfaces/coreml/GLMRegressor.proto b/src/interfaces/coreml/GLMRegressor.proto
new file mode 100644
index 00000000000..64093c4f156
--- /dev/null
+++ b/src/interfaces/coreml/GLMRegressor.proto
@@ -0,0 +1,28 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+package CoreML.Specification;
+
+/**
+ * A generalized linear model regressor.
+ */
+message GLMRegressor {
+    message DoubleArray {
+        repeated double value = 1;
+    }
+
+    enum PostEvaluationTransform {
+        NoTransform = 0;
+        Logit = 1;
+        Probit = 2;
+    }
+
+    repeated DoubleArray weights = 1;
+    repeated double offset = 2;
+    PostEvaluationTransform postEvaluationTransform = 3;
+}
diff --git a/src/interfaces/coreml/Identity.proto b/src/interfaces/coreml/Identity.proto
new file mode 100644
index 00000000000..123a15e5915
--- /dev/null
+++ b/src/interfaces/coreml/Identity.proto
@@ -0,0 +1,18 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+package CoreML.Specification;
+
+/**
+ * An identity model.
+ *
+ * This model returns given inputs as outputs, unchanged.
+ * Intended to be used for testing purposes.
+ */
+message Identity {
+}
diff --git a/src/interfaces/coreml/Imputer.proto b/src/interfaces/coreml/Imputer.proto
new file mode 100644
index 00000000000..3de280b2f16
--- /dev/null
+++ b/src/interfaces/coreml/Imputer.proto
@@ -0,0 +1,43 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+import public "DataStructures.proto";
+
+package CoreML.Specification;
+
+/**
+ * A transformer that replaces missing values with a default value,
+ * such as a statistically-derived value.
+ *
+ * If ``ReplaceValue`` is set, then missing values of that type are
+ * replaced with the corresponding value.
+ *
+ * For example: if ``replaceDoubleValue`` is set to ``NaN``
+ * and a single ``NaN`` double value is provided as input,
+ * then it is replaced by ``imputedDoubleValue``. However
+ * if the input is an array of doubles, then any instances
+ * of ``NaN`` in the array is replaced with the corresponding
+ * value in ``imputedDoubleArray``.
+ */
+message Imputer {
+    oneof ImputedValue {
+        double imputedDoubleValue = 1;
+        int64 imputedInt64Value = 2;
+        string imputedStringValue = 3;
+        DoubleVector imputedDoubleArray = 4;
+        Int64Vector imputedInt64Array = 5;
+        StringToDoubleMap imputedStringDictionary = 6;
+        Int64ToDoubleMap imputedInt64Dictionary = 7;
+    }
+
+    oneof ReplaceValue {
+        double replaceDoubleValue = 11;
+        int64 replaceInt64Value = 12;
+        string replaceStringValue = 13;
+    }
+}
diff --git a/src/interfaces/coreml/LICENSE.txt b/src/interfaces/coreml/LICENSE.txt
new file mode 100644
index 00000000000..bbcdc9ef8f3
--- /dev/null
+++ b/src/interfaces/coreml/LICENSE.txt
@@ -0,0 +1,11 @@
+Copyright (c) 2017, Apple Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:  
+
+1.  Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2.  Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+3.  Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/interfaces/coreml/Model.proto b/src/interfaces/coreml/Model.proto
new file mode 100644
index 00000000000..b903d983379
--- /dev/null
+++ b/src/interfaces/coreml/Model.proto
@@ -0,0 +1,190 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+/**
+ * A Core ML model consists of a specification version
+ * and a model description,
+ * and can be any one of the following types:
+ *
+ * Pipelines
+ *     - ``PipelineClassifier``
+ *     - ``PipelineRegressor``
+ *     - ``Pipeline``
+ *
+ * Regressors
+ *     - ``GLMRegressor``
+ *     - ``SupportVectorRegressor``
+ *     - ``TreeEnsembleRegressor``
+ *     - ``NeuralNetworkRegressor``
+ *
+ * Classifiers
+ *     - ``GLMClassifier``
+ *     - ``SupportVectorClassifier``
+ *     - ``TreeEnsembleClassifier``
+ *     - ``NeuralNetworkClassifier``
+ *
+ * Neural Networks
+ *     - ``NeuralNetwork``
+ *
+ * Feature Engineering
+ *     - ``OneHotEncoder``
+ *     - ``Imputer``
+ *     - ``FeatureVectorizer``
+ *     - ``DictVectorizer``
+ *     - ``Scaler``
+ *     - ``CategoricalMapping``
+ *     - ``Normalizer``
+ *     - ``ArrayFeatureExtractor``
+ *
+ * Simple Mathematical Functions
+ *     - ``Identity``
+ */
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+import public "ArrayFeatureExtractor.proto";
+import public "CategoricalMapping.proto";
+import public "DictVectorizer.proto";
+import public "FeatureTypes.proto";
+import public "FeatureVectorizer.proto";
+import public "GLMRegressor.proto";
+import public "GLMClassifier.proto";
+import public "Identity.proto";
+import public "Imputer.proto";
+import public "NeuralNetwork.proto";
+import public "Normalizer.proto";
+import public "OneHotEncoder.proto";
+import public "Scaler.proto";
+import public "SVM.proto";
+import public "TreeEnsemble.proto";
+
+package CoreML.Specification;
+
+/**
+ * A pipeline consisting of one or more models.
+ */
+message Pipeline {
+    repeated Model models = 1;
+}
+
+/**
+ * A classifier pipeline.
+ */
+message PipelineClassifier {
+    Pipeline pipeline = 1;
+
+    // Required class label mapping
+    oneof ClassLabels {
+        StringVector stringClassLabels = 100;
+        Int64Vector int64ClassLabels = 101;
+    }
+}
+
+/**
+ * A regressor pipeline.
+ */
+message PipelineRegressor {
+    Pipeline pipeline = 1;
+}
+
+/**
+ * A feature description,
+ * consisting of a name, short description, and type.
+ */
+message FeatureDescription {
+    string name = 1;
+    string shortDescription = 2;
+    FeatureType type = 3;
+}
+
+/**
+ * Model metadata,
+ * consisting of a short description, a version string,
+ * an author, a license, and any other user defined
+ * key/value meta data.
+ */
+message Metadata {
+    string shortDescription = 1;
+    string versionString = 2;
+    string author = 3;
+    string license = 4;
+    map<string, string> userDefined = 100;
+}
+
+/**
+ * A description of a model,
+ * consisting of descriptions of its input and output features.
+ * Both regressor and classifier models require the name of the
+ * primary predicted output feature (``predictedFeatureName``).
+ * Classifier models can specify the output feature containing
+ * probabilities for the predicted classes
+ * (``predictedProbabilitiesName``).
+ */
+message ModelDescription {
+    repeated FeatureDescription input = 1;
+    repeated FeatureDescription output = 10;
+
+    string predictedFeatureName = 11;
+    string predictedProbabilitiesName = 12;
+
+    Metadata metadata = 100;
+}
+
+/**
+ * A Core ML model,
+ * consisting of a specification version,
+ * a model description, and a model type.
+ *
+ * Core ML model compatibility is indicated by
+ * a monotonically increasing specification version number,
+ * which is incremented anytime a backward-incompatible change is made
+ * (this is functionally equivalent to the MAJOR version number
+ * described by `Semantic Versioning 2.0.0 <http://semver.org/>`_).
+ * The Core ML framework in macOS currently supports specification version `1`.
+ */
+message Model {
+    int32 specificationVersion = 1;
+    ModelDescription description = 2;
+
+    // start at 200 here
+    // model specific parameters:
+    oneof Type {
+        // pipeline starts at 200
+        PipelineClassifier pipelineClassifier = 200;
+        PipelineRegressor pipelineRegressor = 201;
+        Pipeline pipeline = 202;
+
+        // regressors start at 300
+        GLMRegressor glmRegressor = 300;
+        SupportVectorRegressor supportVectorRegressor = 301;
+        TreeEnsembleRegressor treeEnsembleRegressor = 302;
+        NeuralNetworkRegressor neuralNetworkRegressor = 303;
+
+        // classifiers start at 400
+        GLMClassifier glmClassifier = 400;
+        SupportVectorClassifier supportVectorClassifier = 401;
+        TreeEnsembleClassifier treeEnsembleClassifier = 402;
+        NeuralNetworkClassifier neuralNetworkClassifier = 403;
+
+        // generic models start at 500
+        NeuralNetwork neuralNetwork = 500;
+
+        // feature engineering starts at 600
+        OneHotEncoder oneHotEncoder = 600;
+        Imputer imputer = 601;
+        FeatureVectorizer featureVectorizer = 602;
+        DictVectorizer dictVectorizer = 603;
+        Scaler scaler = 604;
+        CategoricalMapping categoricalMapping = 606;
+        Normalizer normalizer = 607;
+        ArrayFeatureExtractor arrayFeatureExtractor = 609;
+
+        // simple mathematical functions used for testing start at 900
+        Identity identity = 900;
+
+        // reserved until 1000
+    }
+}
diff --git a/src/interfaces/coreml/NeuralNetwork.proto b/src/interfaces/coreml/NeuralNetwork.proto
new file mode 100644
index 00000000000..fd367f16a58
--- /dev/null
+++ b/src/interfaces/coreml/NeuralNetwork.proto
@@ -0,0 +1,2032 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+import public "DataStructures.proto";
+
+package CoreML.Specification;
+
+/**
+ * A neural network is defined through a collection of layers
+ * and represents a directed acyclic graph (DAG).
+ * Each layer has a name, a layer type,
+ * a list of input names, a list of output names,
+ * and a collection of parameters specific to the layer type.
+ *
+ * The graph structure and connectivity of the neural network
+ * is inferred from the input and output names.
+ * A neural network starts with the layer
+ * whose input name is equal to the value specified in
+ * ``Model.description.input.name``,
+ * and ends with the layer
+ * whose output name is equal to the value specified in
+ * ``Model.description.output.name``.
+ * Layers must have unique input and output names,
+ * and a layer may not have input or output names that
+ * refer to layers that are not yet defined.
+ *
+ * CoreML supports sequential data that can be 1- or 3-dimensional.
+ * 3-dimensional data typically represents an image feature map,
+ * whose shape is denoted by ``[C, H, W]``,
+ * which corresponds to the channel, height, and width, respectively.
+ * 1-dimensional data is a set of features
+ * whose shape is denoted by ``[C]``,
+ * and is equivalent to 3-dimensional data
+ * with the shape ``[C, 1, 1]``.
+ *
+ * For the purposes of this specification,
+ * batch dimension is ignored.
+ * Thus, a sequence of 3-dimensional data
+ * is to be understood as a 4-dimensional array,
+ * whose shape is denoted by ``[Seq_length, C, H, W]``,
+ * and a sequence of 1-dimensional data
+ * is to be understood as a 2-dimensional array,
+ * whose shape is denoted by ``[Seq_length, C]``, 
+ * which is equivalent to a 4-dimensional array
+ * with the shape ``[Seq_length, C, 1, 1]``. This axes order is important to
+ * remember while setting parameters for layers such as "reshape" and "permute".
+ *
+ * 
+ * At runtime, all data blobs are internally represented
+ * as 5-dimensional blobs
+ * with the shape ``[Seq_length, Batch, C, H, W]``.
+ *
+ * A layer may process input data differently if operating over a sequence;
+ * details of this behavior is documented in the layer's message.
+ * Otherwise, sequential data is processed like a batch ---
+ * that is, the sequence of inputs are processed independently and in parallel.
+ *
+ * The network input shape specified by ``Model.description.input.type``
+ * must be compatible with the expected input shape
+ * of the network input layer, i.e. the last dimension is the fastest moving one.
+ *
+ * All data blobs, as well as weight parameters,
+ * are stored using row-major ordering, i.e. the last dimension is the fastest moving one.
+ */
+message NeuralNetwork {
+    repeated NeuralNetworkLayer layers = 1;
+    repeated NeuralNetworkPreprocessing preprocessing = 2;
+}
+
+/// Preprocessing
+/// -------------
+
+/**
+ * A neural network preprocessor that
+ * performs a scalar multiplication of an image
+ * followed by addition of scalar biases to the channels.
+ *
+ * Input: X
+ *    An image in BGR or RGB format with shape ``[3, H, W]``
+ *    or in grayscale format with shape ``[1, H, W]``.
+ * Output: Y
+ *    An image with format and shape corresponding to the input.
+ *
+ * If the input image is in BGR format:
+ * ::
+ *     Y[0, :, :] = channelScale * X[0, :, :] + blueBias
+ *     Y[1, :, :] = channelScale * X[1, :, :] + greenBias
+ *     Y[2, :, :] = channelScale * X[2, :, :] + redBias
+ *
+ * If the input image is in RGB format:
+ * ::
+ *     Y[0, :, :] = channelScale * X[0, :, :] + redBias
+ *     Y[1, :, :] = channelScale * X[1, :, :] + greenBias
+ *     Y[2, :, :] = channelScale * X[2, :, :] + blueBias
+ *
+ * If the input image is in grayscale format:
+ * ::
+ *     Y[0, :, :] = channelScale * X[0, :, :] + grayBias
+ */
+message NeuralNetworkImageScaler {
+    float channelScale = 10; ///Scalar to be multiplied.
+    float blueBias = 20; ///Scalar blue bias to be added.
+    float greenBias = 21; ///Scalar green bias to be added.
+    float redBias = 22; ///Scalar red bias to be added.
+    float grayBias = 30; ///Scalar bias to be added for grayscale images.
+}
+
+/**
+ * A neural network preprocessor that
+ * subtracts the provided mean image from the input image.
+ * The mean image is subtracted from the input named
+ * ``NeuralNetworkPreprocessing.featureName``.
+ */
+message NeuralNetworkMeanImage {
+    /**
+     * Mean image stored as a flattened array of floats,
+     * representing shape [Channel,Height,Width].
+     */
+    repeated float meanImage = 1;
+}
+
+/// Preprocessing parameters for image inputs.
+message NeuralNetworkPreprocessing {
+    string featureName = 1;
+    oneof preprocessor {
+      NeuralNetworkImageScaler scaler = 10;
+      NeuralNetworkMeanImage meanImage = 11;
+    }
+}
+
+/// Activation Functions
+/// --------------------
+
+/**
+ * A rectified linear unit (ReLU) activation function.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x) = \text{max}(0, x)
+ */
+message ActivationReLU {
+}
+
+/**
+ * A leaky rectified linear unit (ReLU) activation function.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x) = \begin{cases}
+ *             x      & \text{if } x \geq 0 \\
+ *             \alpha x & \text{if } x < 0
+ *            \end{cases}
+ */
+message ActivationLeakyReLU {
+    float alpha = 1; //negative slope value for leakyReLU
+}
+
+/**
+ * A hyperbolic tangent activation function.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x) = \dfrac{1 - e^{-2x}}{1 + e^{-2x}}
+ */
+message ActivationTanh {
+}
+
+/**
+ * A scaled hyperbolic tangent activation function.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x) = \alpha \tanh(\beta x)
+ */
+message ActivationScaledTanh {
+    float alpha = 1;
+    float beta = 2;
+}
+
+/**
+ * A sigmoid activation function.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x) = \dfrac{1}{1 + e^{-x}}
+ */
+message ActivationSigmoid {
+}
+
+/**
+ * A linear activation function.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x) = \alpha x + \beta
+ */
+message ActivationLinear {
+    float alpha = 1;
+    float beta = 2;
+}
+
+/**
+ * A hard sigmoid activation function.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x) = \text{min}(\text{max}(\alpha x + \beta, 0), 1)
+ */
+message ActivationSigmoidHard {
+    float alpha = 1;
+    float beta = 2;
+}
+
+/**
+ * A parameterized rectified linear unit (PReLU) activation function,
+ * which takes ``[C]`` or ``[C,H,W]`` as an input and
+ * applies different parameters in each channel dimension
+ * (shared across the ``H`` and ``W`` components).
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *    f(x_i) = \begin{cases}
+ *                 x_i          & \text{if } x_i \geq 0 \\
+ *                 \alpha_i x_i & \text{if } x_i < 0
+ *             \end{cases} \;,\;i=1,...,C
+ */
+message ActivationPReLU {
+    // parameter of length C or 1.
+    // If length is 1, same value is used for all channels
+    WeightParams alpha = 1;
+}
+
+/**
+ * An exponential linear unit (ELU) activation function.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x) = \begin{cases}
+ *             x              & \text{if } x \geq 0 \\
+ *             \alpha (e^x - 1) & \text{if } x < 0
+ *            \end{cases}
+ */
+message ActivationELU {
+    float alpha = 1;
+}
+
+/**
+ * A thresholded rectified linear unit (ReLU) activation function.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x) = \begin{cases}
+ *             x & \text{if } x \geq \alpha \\
+ *             0 & \text{if } x < \alpha
+ *            \end{cases}
+ */
+message ActivationThresholdedReLU {
+    float alpha = 1;
+}
+
+/**
+ * A softsign activation function.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x) = \dfrac{x}{1 + |x|}
+ */
+message ActivationSoftsign {
+}
+
+/**
+ * A softplus activation function.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x) = \text{log}(1 + e^x)
+ */
+message ActivationSoftplus {
+}
+
+/**
+ * A parametric softplus activation function,
+ * which takes ``[C]`` or ``[C,H,W]`` as an input and
+ * applies different parameters in each channel dimension
+ * (shared across the ``H`` and ``W`` components).
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x_i) = \alpha_i \text{log}(1 + e^{\beta_i x_i}) \;,\;i=1,...,C
+ */
+message ActivationParametricSoftplus {
+    // If length is 1, same value is used for all channels
+    WeightParams alpha = 1; //parameter of length C or 1
+    WeightParams beta = 2;  //parameter of length C or 1
+}
+
+message ActivationParams {
+    oneof NonlinearityType {
+        ActivationLinear linear = 5;
+
+        ActivationReLU ReLU = 10;
+        ActivationLeakyReLU leakyReLU = 15;
+        ActivationThresholdedReLU thresholdedReLU = 20;
+        ActivationPReLU PReLU = 25;
+
+        ActivationTanh tanh = 30;
+        ActivationScaledTanh scaledTanh = 31;
+
+        ActivationSigmoid sigmoid = 40;
+        ActivationSigmoidHard sigmoidHard = 41;
+
+        ActivationELU ELU = 50;
+
+        ActivationSoftsign softsign = 60;
+        ActivationSoftplus softplus = 70;
+        ActivationParametricSoftplus parametricSoftplus = 71;
+    }
+}
+
+/**
+ * A single neural network layer.
+ */
+message NeuralNetworkLayer {
+    string name = 1;  //descriptive name of the layer
+    repeated string input = 2;
+    repeated string output = 3;
+
+    oneof layer {
+        // start at 100 here
+        ConvolutionLayerParams convolution = 100;
+
+        PoolingLayerParams pooling = 120;
+
+        ActivationParams activation = 130;
+
+        InnerProductLayerParams innerProduct = 140;
+        EmbeddingLayerParams embedding = 150;
+
+        //normalization related layers
+        BatchnormLayerParams batchnorm = 160;
+        MeanVarianceNormalizeLayerParams mvn = 165;
+        L2NormalizeLayerParams l2normalize = 170;
+        SoftmaxLayerParams softmax = 175;
+        LRNLayerParams lrn = 180;
+
+        CropLayerParams crop = 190;
+        PaddingLayerParams padding = 200;
+        UpsampleLayerParams upsample = 210;
+
+        UnaryFunctionLayerParams unary = 220;
+
+        //elementwise operations
+        AddLayerParams add = 230;
+        MultiplyLayerParams multiply = 231;
+
+        AverageLayerParams average = 240;
+        ScaleLayerParams scale = 245;
+
+        BiasLayerParams bias = 250;
+        MaxLayerParams max = 260;
+        MinLayerParams min = 261;
+
+        DotProductLayerParams dot = 270;
+        ReduceLayerParams reduce = 280;
+        LoadConstantLayerParams loadConstant = 290;
+
+        //data reorganization
+        ReshapeLayerParams reshape = 300;
+        FlattenLayerParams flatten = 301;
+        PermuteLayerParams permute = 310;
+        ConcatLayerParams concat = 320;
+        SplitLayerParams split = 330;
+        SequenceRepeatLayerParams sequenceRepeat = 340;
+
+        //Recurrent Layers
+        SimpleRecurrentLayerParams simpleRecurrent = 400;
+        GRULayerParams gru = 410;
+        UniDirectionalLSTMLayerParams uniDirectionalLSTM = 420;
+        BiDirectionalLSTMLayerParams biDirectionalLSTM = 430;
+    }
+}
+
+/// Border Amounts
+/// --------------
+
+/**
+ * Specifies the amount of spatial border to be either padded or cropped.
+ *
+ * For padding:
+ * ::
+ *     H_out = borderAmounts[0].startEdgeSize + H_in + borderAmounts[0].endEdgeSize
+ *     W_out = borderAmounts[1].startEdgeSize + W_in + borderAmounts[1].endEdgeSize
+ *
+ *     topPaddingAmount == Height startEdgeSize
+ *     bottomPaddingAmount == Height endEdgeSize
+ *     leftPaddingAmount == Width startEdgeSize
+ *     rightPaddingAmount == Width endEdgeSize
+ *
+ * For cropping:
+ * ::
+ *     H_out = (-borderAmounts[0].startEdgeSize) + H_in + (-borderAmounts[0].endEdgeSize)
+ *     W_out = (-borderAmounts[1].startEdgeSize) + W_in + (-borderAmounts[1].endEdgeSize)
+ *
+ *     topCropAmount == Height startEdgeSize
+ *     bottomCropAmount == Height endEdgeSize
+ *     leftCropAmount == Width startEdgeSize
+ *     rightCropAmount == Width endEdgeSize
+ */
+message BorderAmounts {
+    message EdgeSizes {
+        /**
+         * The amount to be padded or cropped from the beginning.
+         */
+        uint64 startEdgeSize = 1;
+
+        /**
+         * The amount to be padded or cropped from the end.
+         */
+        uint64 endEdgeSize = 2;
+    }
+
+    /**
+     * The border amounts.
+     * This must be length 2 in the order ``[H, W]``.
+     */
+    repeated EdgeSizes borderAmounts = 10;
+}
+
+/**
+ * Specifies the type of padding to be used with Convolution/Deconvolution and Pooling layers. 
+ * After padding, input spatial shape: ``[H_in, W_in]``, gets modified to the
+ * output spatial shape ``[H_out, W_out]``.  
+ * ::
+ * 		topPaddingAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize
+ * 		bottomPaddingAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize
+ * 		leftPaddingAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize
+ * 		rightPaddingAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize
+ * 
+ * With Convolution or Pooling:  
+ * ::
+ *    H_out = int_division_round_down((H_in + topPaddingAmount + bottomPaddingAmount - KernelSize[0]),stride[0]) + 1  
+ *
+ * which is same as:   
+ * ::
+ *    H_out = int_division_round_up((H_in + topPaddingAmount + bottomPaddingAmount - KernelSize[0] + 1),stride[0])  
+ *  
+ * With Deconvolution: 
+ * ::
+ *    H_out = (H_in-1) * stride[0] + kernelSize[0] - (topPaddingAmount + bottomPaddingAmount)
+ *
+ *  
+ * The equivalent expressions hold true for ``W_out`` as well.
+ *
+ *
+ * By default, the values of ``paddingAmounts`` are set to ``0``,
+ * which results in a "true" valid padding.
+ * If non-zero values are provided for ``paddingAmounts``,
+ * "valid" convolution/pooling is performed within the spatially expanded input.
+ *
+ */
+message ValidPadding {
+    BorderAmounts paddingAmounts = 1;
+}
+
+/**
+ * Specifies the type of padding to be used with Convolution/Deconvolution and pooling layers. 
+ * After padding, input spatial shape: ``[H_in, W_in]``, gets modified to the
+ * output spatial shape ``[H_out, W_out]``.  
+ * With Convolution or pooling:  
+ * ::
+ * 		H_out = int_division_round_up(H_in,stride[0])  
+ * 		W_out = int_division_round_up(W_in,stride[1])  
+ * 		
+ * This is achieved by using the following padding amounts:
+ * ::
+ *     totalPaddingHeight = max(0,(H_out-1) * stride[0] + KernelSize[0] - Hin)
+ *     totalPaddingWidth = max(0,(W_out-1) * stride[1] + KernelSize[1] - Win)
+ *
+ * There are two modes of asymmetry: 
+ * ``BOTTOM_RIGHT_HEAVY``, and ``TOP_LEFT_HEAVY``.
+ *
+ * If the mode is ``BOTTOM_RIGHT_HEAVY``:
+ * ::
+ *     topPaddingAmount = floor(totalPaddingHeight / 2)
+ *     bottomPaddingAmount = totalPaddingHeight - topPaddingAmount
+ *     leftPaddingAmount = floor(totalPaddingWidth / 2)
+ *     rightPaddingAmount = totalPaddingWidth - leftPaddingAmount
+ *
+ * If the mode is ``TOP_LEFT_HEAVY``:
+ * ::
+ *     bottomPaddingAmount = floor(totalPaddingHeight / 2)
+ *     topPaddingAmount = totalPaddingHeight - bottomPaddingAmount
+ *     rightPaddingAmount = floor(totalPaddingWidth / 2)
+ *     leftPaddingAmount = totalPaddingWidth - rightPaddingAmount
+ *
+ *
+ * With Deconvolution: 
+ * ::
+ *    H_out = H_in * stride[0]   
+ *    W_out = W_in * stride[1]   
+ */
+message SamePadding {
+    enum SamePaddingMode {
+        BOTTOM_RIGHT_HEAVY = 0;
+        TOP_LEFT_HEAVY = 1;
+    }
+    SamePaddingMode asymmetryMode = 1;
+}
+
+/**
+ * Weights for layer parameters.
+ * Weights are stored as repeated floating point numbers
+ * using row-major ordering
+ * and can represent 1-, 2-, 3-, or 4-dimensional data.
+ */
+message WeightParams {
+    /**
+     * Values specified in single / float / FP32 precision.
+     */
+    repeated float floatValue = 1;
+}
+
+/// Layers
+/// ------
+
+/**
+ * A layer that performs spatial convolution or deconvolution.
+ * ::
+ * 		y = ConvolutionLayer(x)
+ *
+ * Requires 1 input and produces 1 output.
+ *
+ * Input 
+ *  A blob with shape ``[inputChannels,inputHeight,inputWidth]`` or ``[C_in, H_in, W_in]``.      
+ * 
+ * Output 
+ *  A blob with shape ``[outputChannels,outputHeight,outputWidth]`` or  ``[C_out, H_out, W_out]``.
+ *
+ *
+ * If ``dilationFactor`` is not 1 and ``isDeconvolution`` is not True, effective kernel size is
+ * modified as follows: 
+ * ::
+ * 		KernelSize[0] <-- (kernelSize[0]-1) * dilationFactor[0] + 1  
+ *		KernelSize[1] <-- (kernelSize[1]-1) * dilationFactor[1] + 1 
+ *
+ * Type of padding can be ``valid`` or ``same``. Output spatial dimensions depend on the 
+ * the type of padding. For details, refer to the descriptions of the messages "ValidPadding"
+ * and "SamePadding". Padded values are all zeros.
+ *
+ * For Deconvolution, ``ConvolutionPaddingType`` (``valid`` or ``same``) is ignored when ``outputShape`` is set. 
+ *
+ *
+ */
+message ConvolutionLayerParams {
+    /**
+     * The number of kernels.
+     * Same as ``C_out`` used in the layer description.
+     */
+    uint64 outputChannels = 1;
+
+    /**
+     * Channel dimension of the kernels.
+     * Must be equal to ``inputChannels / nGroups``.
+     */
+    uint64 kernelChannels = 2;
+
+    /**
+     * Group convolution as used in AlexNet,
+     * i.e. weight reuse along channel axis.
+     * Kernel channels * nGroups = inputChannels.
+     * If not set or 0, it is set to the default value 1.
+     */
+    uint64 nGroups = 10;
+
+    /**
+     * Must be length 2 in the order ``[H, W]``.
+     * If not set, default value ``[3, 3]`` is used.
+     */
+    repeated uint64 kernelSize = 20;
+
+    /**
+     * Must be length 2 in the order ``[H, W]``.
+     * If not set, default value ``[1, 1]`` is used.
+     */
+    repeated uint64 stride = 30;
+
+    /**
+     * Must be length 2 in order ``[H, W]``.
+     * If not set, default value ``[1, 1]`` is used.
+     * It is ignored if ``isDeconvolution == true``.
+     */
+    repeated uint64 dilationFactor = 40;
+
+    /**
+     * The type of padding.
+     */
+    oneof ConvolutionPaddingType {
+        ValidPadding valid = 50;
+        SamePadding same = 51;
+    }
+
+    /**
+     * Flag to specify whether it is a deconvolution layer.
+     */
+    bool isDeconvolution = 60;
+
+    /**
+     * Flag to specify whether a bias is to be added or not.
+     */
+    bool hasBias = 70;
+
+    /**
+     * Weights associated with this layer.
+     * If convolution (``isDeconvolution == false``), weights have the shape
+     * ``[outputChannels, kernelChannels, kernelHeight, kernelWidth]``.
+     * If deconvolution (``isDeconvolution == true``) weights have the shape
+     * ``[kernelChannels, outputChannels, kernelHeight, kernelWidth]``.
+     */
+    WeightParams weights = 90;
+    WeightParams bias = 91; /// Must be of size [outputChannels].
+
+    /**
+     * The output shape, which has length 2 ``[H_out, W_out]``.
+     * This is used only for deconvolution (``isDeconvolution == true``).
+     * If not set, the deconvolution output shape is calculated
+     * based on ``ConvolutionPaddingType``.
+     */
+    repeated uint64 outputShape = 100;
+}
+
+/**
+ * A layer that performs a matrix vector product.
+ * This is equivalent to a fully-connected, or dense layer.
+ * ::
+ *		y = InnerProductLayer(x)
+ *
+ * Requires 1 input and produces 1 output.
+ *
+ * Input 
+ * 	A blob with shape ``[C_in]`` or ``[C_in, 1, 1]``, where ``C_in`` is equal to ``inputChannels``.
+ *
+ * Output 
+ *  A blob with shape ``[C_out]``, where ``C_out`` is equal to ``outputChannels``.
+ */
+message InnerProductLayerParams {
+    uint64 inputChannels = 1; /// Input size: C_in.
+    uint64 outputChannels = 2; /// Output size: C_out.
+
+    bool hasBias = 10; /// Whether a bias is added or not.
+
+    WeightParams weights = 20; /// Weight matrix [C_out, C_in].
+    WeightParams bias = 21; /// Bias vector [C_out].
+}
+
+/**
+ * A layer that performs a matrix lookup and optionally adds a bias.
+ * ::
+ * 		y = EmbeddingLayer(x)
+ * 
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *	   A sequence of integers with shape ``[1]`` or ``[1, 1, 1]``, (equivalent to ``[Seq_length, 1, 1, 1]``).
+ *	   Input values must be in the range ``[0, inputDim - 1]``.
+ *
+ * Output
+ *     A sequence of 1-dimensional features of size ``outputChannels``
+ *     (equivalent to ``[Seq_length, outputChannels, 1, 1]``).
+ */
+message EmbeddingLayerParams {
+    uint64 inputDim = 1; /// Size of the input dictionary.
+    uint64 outputChannels = 2; /// Size of the output vectors.
+
+    bool hasBias = 10; /// Whether a bias is added or not.
+
+    WeightParams weights = 20; /// 2-D weights of dimensions [outputChannels, inputDim].
+    WeightParams bias = 21; /// Bias of size [outputChannels].
+}
+
+/**
+ * A layer that performs batch normalization,
+ * which is performed along the channel axis,
+ * and repeated along the other axes, if present.
+ * ::
+ *		y = BatchnormLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * This operation is described by the following formula:
+ *
+ * .. math::
+ *     y_i = \gamma_i \dfrac{ (x_i - \mu_i)}{\sqrt{\sigma_i^2 + \epsilon}} + \beta_i \;,\;i=1,....,C
+ *
+ * Input
+ *     A blob with shape ``[C]`` or ``[C, H, W]``.
+ *
+ * Output
+ *     A blob with the same shape as the input.
+ */
+message BatchnormLayerParams {
+    uint64 channels = 1; /// Size of the channel dimension in the input.
+
+    /**
+     * If ``computeMeanVar == true``,
+     * the mean and variance are calculated from either
+     * the single input instance, if ``instanceNormalization == true``,
+     * or the whole batch, if ``instanceNormalization = false``.
+     * and the values provided in parameters "mean" and "variance" are ignored. 
+     */
+    bool computeMeanVar = 5;
+    bool instanceNormalization = 6;
+
+    /**
+     * A small constant to avoid division by 0 while normalizing by variance.
+     * Defaults to ``1e-5`` if not set or set to ``0``.
+     */
+    float epsilon = 10;
+
+    WeightParams gamma=15; /// Parameter of length [channels]
+    WeightParams beta=16; /// Parameter of length [channels]
+    WeightParams mean=17; /// Parameter of length [channels]
+    WeightParams variance=18; /// Parameter of length [channels]
+}
+
+/**
+ * A spatial pooling layer.
+ * :: 
+ * 		y = PoolingLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A blob with shape ``[C, H_in, W_in]``.
+ * Output
+ *     A blob with shape ``[C, H_out, W_out]``.
+ *
+ * Padding options are similar to ``ConvolutionLayerParams``
+ * with the additional option of ``ValidCompletePadding`` (``includeLastPixel``),
+ * which ensures that the last application of the kernel
+ * always includes the last pixel of the input image.
+ * ::
+ *     H_out = int_division_round_up((H_in + 2 * paddingAmounts[0] - kernelSize[0]),Stride[0]) + 1)
+ *     if ((H_out - 1) * Stride >= H_in + paddingAmounts[0]) {
+ *         H_out = H_out - 1
+ *     }
+ *
+ * The equivalent expressions hold true for ``W_out`` as well.
+ * Only symmetric padding is supported with this option.
+ */
+message PoolingLayerParams {
+    enum PoolingType{
+        MAX = 0;
+        AVERAGE = 1;
+        L2 = 2;
+    }
+    PoolingType type = 1; /// Type of pooling operation.
+
+    /**
+     * Must be length 2 in the order ``[H, W]``.
+     * If not set, default value ``[3, 3]`` is used.
+     */
+    repeated uint64 kernelSize = 10;
+
+    /**
+     * Must be length 2 in the order ``[H, W]``.
+     * If not set, default value ``[1, 1]`` is used.
+     */
+    repeated uint64 stride = 20;
+
+    message ValidCompletePadding {
+        /**
+         * Must be length 2 in order ``[H, W]``.
+         * If not set, value ``[0, 0]`` is used.
+         */
+        repeated uint64 paddingAmounts = 10;
+    }
+
+    oneof PoolingPaddingType {
+        ValidPadding valid = 30;
+        SamePadding same = 31;
+        ValidCompletePadding includeLastPixel = 32;
+    }
+
+    /**
+     * If true, padded values are excluded from the count (denominator)
+     * when computing average pooling.
+     */
+    bool avgPoolExcludePadding = 50;
+
+    /**
+     * If true, global pooling is performed.
+     * Kernel size is inferred from the input data spatial dimensions.
+     */
+    bool globalPooling = 60;
+}
+
+/**
+ * A layer that performs padding along spatial dimensions.
+ * ::
+ * 		y = PaddingLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A blob with shape ``[C, H_in, W_in]``.
+ *
+ * Output
+ *     A blob with shape ``[C, H_out, W_out]``.
+ * Output dimensions are calculated as follows:
+ * ::
+ *     H_out = H_in + topPaddingAmount + bottomPaddingAmount
+ *     W_out = W_in + leftPaddingAmount + rightPaddingAmount
+ *
+ *     topPaddingAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize
+ *     bottomPaddingAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize
+ *     leftPaddingAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize
+ *     rightPaddingAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize
+ *
+ * There are three types of padding:
+ *
+ * - ``PaddingConstant``, which fills a constant value at the border.
+ * - ``PaddingReflection``, which reflects the values at the border.
+ * - ``PaddingReplication``, which replicates the values at the border.
+ *
+ * Given the following input:
+ * ::
+ *     [1, 3, 4]  :  1   2   3   4
+ *                   5   6   7   8
+ *                   9   10  11  12
+ *
+ * Here is the output of applying the padding
+ * ``(top=2, left=2, bottom=0, right=0)``
+ * with each of the supported types:
+ *
+ * - ``PaddingConstant`` (``value = 0``):
+ *   ::
+ *       [1, 5, 6]  :  0   0   0  0   0   0
+ *                     0   0   0  0   0   0
+ *                     0   0   1  2   3   4
+ *                     0   0   5  6   7   8
+ *                     0   0   9  10  11  12
+ *
+ * - ``PaddingReflection``:
+ *   ::
+ *       [1, 5, 6]  :  11  10  9  10  11  12
+ *                     7   6   5  6   7   8
+ *                     3   2   1  2   3   4
+ *                     7   6   5  6   7   8
+ *                     11  10  9  10  11  12
+ *
+ * - ``PaddingReplication``:
+ *   ::
+ *       [1, 5, 6]  :  1   1   1  2   3   4
+ *                     1   1   1  2   3   4
+ *                     1   1   1  2   3   4
+ *                     5   5   5  6   7   8
+ *                     9   9   9  10  11  12
+ */
+message PaddingLayerParams {
+    /**
+     * Fill a constant value in the padded region.
+     */
+    message PaddingConstant {
+        float value = 1;
+    }
+
+    /**
+     * Reflect the values at the border for padding.
+     */
+    message PaddingReflection {
+    }
+
+    /**
+     * Replicate the values at the border for padding.
+     */
+    message PaddingReplication {
+    }
+
+    oneof PaddingType {
+        PaddingConstant constant = 1;
+        PaddingReflection reflection = 2;
+        PaddingReplication replication = 3;
+    }
+
+    BorderAmounts paddingAmounts = 10; /// Amounts to be padded to the input.
+}
+
+/**
+ * A layer that concatenates along the channel axis (default) or sequence axis.
+ * ::
+ * 		y = ConcatLayer(x1,x2,....)
+ * Requires more than 1 input and produces 1 output.
+ *
+ * The input and output formats are dependent on ``sequenceConcat``.
+ *
+ * If ``sequenceConcat == true``:
+ *
+ * Input
+ *     Sequences of length ``Seq_i`` of blobs with shape ``[C, H, W]``.
+ * Output
+ *     A Sequence of length ``summation(Seq_i)`` of blobs with shape ``[C, H, W]``.
+ *
+ * If ``sequenceConcat == false``:
+ *
+ * Input
+ *     A blob with shape ``[C_i, H, W]``, where ``i = 1, 2, ...``.
+ * Output
+ *     A blob with shape ``[summation(C_i), H, W]``.
+ */
+message ConcatLayerParams {
+    /**
+     * If true, concatenate along the sequence axis instead of the channel axis.
+     */
+    bool sequenceConcat = 100;
+}
+
+/**
+ * A layer that performs local response normalization (LRN).
+ * ::
+ *  	y = LRNLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A blob with shape ``[C, H, W]``
+ * Output
+ *     A blob with the same shape as the input.
+ *
+ * This layer is described by the following formula:
+ *
+ * .. math::
+ *     x_i \leftarrow  \dfrac{x_i}{\left ( k + \dfrac{\alpha}{C} \sum_j x_j^2 \right )^\beta}
+ *
+ * where the summation is done over a ``(localSize, 1, 1)`` neighborhood ---
+ * that is, over a window "across" channels in 1x1 spatial neighborhoods.
+ */
+message LRNLayerParams {
+    float alpha = 1;
+    float beta = 2;
+    uint64 localSize = 3; /// Number of channels in the normalization window.
+    float k = 4; /// Defaults to 1 if not set or 0. Must be strictly positive.
+}
+
+/**
+ * Softmax Normalization Layer
+ *
+ * A layer that performs softmax normalization.
+ * Normalization is done along the channel axis.
+ * ::
+ *  	y = SoftmaxLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A blob with shape ``[C]`` or ``[C, H, W]``.
+ * Output
+ *     A blob with the same shape as the input.
+ *
+ * This layer is described by the following formula:
+ *
+ * .. math::
+ *     x_i \leftarrow \dfrac{e^{x_i}}{\sum_i{e^{x_i}}}
+ */
+message SoftmaxLayerParams {
+}
+
+/**
+ * A layer that uniformly splits across the channel dimension 
+ * to produce a specified number of outputs.
+ * ::
+ *  	(y1,y2,...yN) = SplitLayer(x), where N = nOutputs
+ * Requires 1 input and produces multiple outputs.
+ *
+ * Input
+ *     A blob with shape ``[C]`` or ``[C, H, W]``
+ * Output
+ *     ``nOutputs`` blobs with shapes 
+ *     ``[C/nOutputs]`` or ``[C/nOutputs, H, W]``
+ */
+message SplitLayerParams {
+    uint64 nOutputs=1; /// The number of outputs.
+}
+
+/**
+ * A layer that performs elementwise addition.
+ * ::
+ *  	y = AddLayer(x1,x2,...)
+ * Requires 1 or more than 1 input and produces 1 output.
+ *
+ * Input
+ *     One or more blobs with shape ``[C, H, W]``.
+ * Output
+ *     A blob with shape equal to the input blob.
+ *
+ * If only one input is provided, scalar addition is performed:
+ *
+ * .. math::
+ *     y = x + \alpha
+ *
+ * If multiple inputs are provided,
+ * each input is broadcasted to the shape of the first input and added.
+ * The shapes of inputs after the first must be one of the following:
+ * ``[1]``, ``[C]``, ``[1, H, W]``, or ``[C, H, W]``.
+ */
+message AddLayerParams {
+    /**
+     * Scalar to be added to the input.
+     * Only used if there is a single input.
+     */
+    float alpha = 1;
+}
+
+/**
+ * A layer that performs elementwise multiplication.
+ * ::
+ *  	y = MultiplyLayer(x1,x2,...)
+ * Requires 1 or more than 1 input and produces 1 output.
+ *
+ * Input
+ *     One or more blobs with shape ``[C, H, W]``.
+ * Output
+ *     A blob with shape equal to the first input blob.
+ * 
+ * If only one input is provided, scalar multiplication is performed:
+ *
+ * .. math::
+ *     y = \alpha x
+ *
+ * If multiple inputs are provided,
+ * each input is broadcasted to the shape of the first input and multiplied.
+ * The shapes of inputs after the first must be one of the following:
+ * ``[1]``, ``[C]``, ``[1, H, W]``, or ``[C, H, W]``.
+ */
+message MultiplyLayerParams {
+    /**
+     * Scalar to be multiplied with the input. 
+     * Only used if there is a single input.
+     */
+    float alpha = 1;
+}
+
+/**
+ * A layer that applies a unary function.
+ * ::
+ *  	y = UnaryFunctionLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A blob with shape ``[C]`` or ``[C, H, W]``.
+ * Output
+ *     A blob with the same shape as the input.
+ *
+ * The input is first modified by shifting and scaling:
+ * 
+ * .. math::
+ *     x \leftarrow \text{scale} \cdot x + \text{shift}
+ */
+message UnaryFunctionLayerParams {
+    /**
+     * A unary operator.
+     *
+     * The following functions are supported:
+     *
+     * ``SQRT``
+     *     .. math:: f(x) = \sqrt{x}
+     *
+     * ``RSQRT``
+     *     .. math:: f(x) = \dfrac{1}{\sqrt{x + \epsilon}}
+     *
+     * ``INVERSE``
+     *     .. math:: f(x) = \dfrac{1}{x + \epsilon}
+     *
+     * ``POWER``
+     *     .. math:: f(x) = x^\alpha
+     *
+     * ``EXP``
+     *     .. math:: f(x) = e^x
+     *
+     * ``LOG``
+     *     .. math:: f(x) = \log x
+     *
+     * ``ABS``
+     *     .. math:: f(x) = |x|
+     *
+     * ``THRESHOLD``
+     *     .. math:: f(x) = \text{max}(\alpha, x)
+     */
+    enum Operation{
+        SQRT = 0;
+        RSQRT = 1;
+        INVERSE = 2;
+        POWER = 3;
+        EXP = 4;
+        LOG = 5;
+        ABS = 6;
+        THRESHOLD = 7;
+    }
+    Operation type = 1; /// The type of unary function.
+    
+    /**
+     * A constant used in ``POWER`` and ``THRESHOLD`` functions.
+     */ 
+    float alpha = 2;
+    
+    /**
+     * A small constant to avoid division by 0 while normalizing variance.
+     * Defaults to ``1e-6`` if not set or set to ``0``.
+     */
+    float epsilon = 3;
+    
+    /**
+     * Input is shifted by this amount 
+     * before the unary function is applied.
+     * Defaults to ``0.0`` if not set.
+     */
+    float shift = 4;
+    
+    /**
+     * Input is scaled by this amount 
+     * before the unary function is applied.
+     * Defaults to ``1.0`` if not set or set to ``0``.
+     */
+    float scale = 5;
+}
+
+/**
+ * A layer that scales up spatial dimensions 
+ * using nearest neighbor interpolation.
+ * ::
+ *  	y = UpsampleLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A blob with shape ``[C, H, W]``.
+ * Output
+ *     A blob with shape ``[C, scalingFactor[0] * H, scalingFactor[1] * W]``
+ */
+message UpsampleLayerParams {
+    /**
+     * Scaling Factor. 
+     * Must be length 2 in order ``[H, W]``.
+     * If not set, default value ``[1, 1]`` is used.
+     */
+    repeated uint64 scalingFactor = 1;
+}
+
+/**
+ * A layer that performs elementwise addition of a bias,
+ * which is broadcasted to match the input shape.
+ * ::
+ *  	y = BiasLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A blob with shape ``[C, H, W]``.
+ * Output
+ *     A blob with the same shape as the input.
+ */
+message BiasLayerParams {
+    /**
+     * The shape of the bias. 
+     * Must be one of the following: 
+     * ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``. 
+     */ 
+    repeated uint64 shape = 1;
+    
+    /**
+     * The bias values.
+     * The size must be equal to the product of the ``shape`` dimensions.
+     */
+    WeightParams bias = 2;
+}
+
+/**
+ * A layer that performs elmentwise multiplication by a scale factor
+ * and optionally adds a bias;
+ * both the scale and bias are broadcasted to match the input shape.
+ * ::
+ *  	y = ScaleLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A blob with shape ``[C, H, W]``.
+ * Output
+ *     A blob with the same shape as the input.
+ */
+message ScaleLayerParams {
+    /**
+     * The shape of the scale. 
+     * Must be one of the following: 
+     * ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``. 
+     */
+    repeated uint64 shapeScale = 1;
+    
+    /**
+     * The scale values.
+     * The size must be equal to the product of the ``shape`` dimensions.
+     */
+    WeightParams scale = 2; /// Scale values. Size must be equal to the product of dimensions specified in shapeScale.
+    
+    bool hasBias = 3; /// If true, a bias is added after scaling.
+    
+    /**
+     * The shape of the bias. 
+     * Must be one of the following: 
+     * ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``. 
+     */
+    repeated uint64 shapeBias = 4;
+    
+    /**
+     * The bias values.
+     * The size must be equal to the product of the ``shape`` dimensions.
+     */
+    WeightParams bias = 5;
+}
+
+/**
+ * A layer that loads data as a parameter and provides it as an output.
+ * ::
+ *  	y = LoadConstantLayer()
+ * Takes no input. Produces 1 output.
+ *
+ * Input
+ *     None
+ * Output:
+ *     A blob with shape ``[C, H, W]``
+ */
+message LoadConstantLayerParams {
+    /**
+     * The shape of the constant to be loaded,
+     * which must be``[C, H, W]``.
+     */
+    repeated uint64 shape = 1;
+    
+    /**
+     * The data values,
+     * of size ``C * H * W``.
+     */
+    WeightParams data = 2;
+}
+
+/**
+ * A layer that performs L2 normalization, i.e. divides by the 
+ * the square root of the sum of squares of all elements of input.
+ * ::
+ * 		y = L2NormalizeLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A blob with shape ``[C]`` or ``[C, H, W]``.
+ * Output
+ *     A blob with the same shape as the input.
+ * 
+ * This layer is described by the following formula:
+ *
+ * .. math::
+ *     x_i \leftarrow \dfrac{x_i}{\sqrt{\sum{x_i^2} + \epsilon}}
+ */
+message L2NormalizeLayerParams {
+    /**
+     * A small constant to avoid division by 0 while normalizing variance.
+     * Defaults to ``1e-6`` if not set or set to ``0``.
+     */
+    float epsilon = 1;
+}
+
+/// Data Reorganization Layers
+/// --------------------------
+
+/**
+ * A layer that flattens the input.
+ * ::
+ * 		y = FlattenLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A blob with shape ``[C, H, W]``.
+ * Output
+ *     A blob with shape ``[C * H * W, 1, 1]``
+ *
+ * There are two flatten orders: ``CHANNEL_FIRST`` and ``CHANNEL_LAST``.
+ * ``CHANNEL_FIRST`` does not require data to be rearranged,
+ * because row major ordering is used by internal storage.
+ * ``CHANNEL_LAST`` requires data to be rearranged.
+ */
+message FlattenLayerParams {
+    enum FlattenOrder {
+        CHANNEL_FIRST = 0;
+        CHANNEL_LAST = 1;
+    }
+    FlattenOrder mode = 1;
+}
+
+/**
+ * A layer that recasts the input into a new shape.
+ * ::
+ *  	y = ReshapeLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A blob with shape ``[C, H, W]`` or ``[Seq, C, H, W]``.
+ * Output
+ *     A blob with shape ``[C_out, H_out, W_out]`` 
+ *     or ``[Seq_out, C_out, H_out, W_out]``.
+ *
+ * There are two reshape orders: ``CHANNEL_FIRST`` and ``CHANNEL_LAST``.
+ * ``CHANNEL_FIRST`` is equivalent to 
+ * flattening the input to ``[C * H * W, 1, 1]`` in channel first order
+ * and then reshaping it to the target shape;
+ * no data rearrangement is required.
+ * ``CHANNEL_LAST`` is equivalent to
+ * flattening the input to ``[H * W * C, 1, 1]`` in channel last order,
+ * reshaping it to ``[H_out, W_out, C_out]`` (it is now in "H_out-major"" order),
+ * and then permuting it to ``[C_out, H_out, W_out]``;
+ * both the flattening and permuting requires the data to be rearranged.
+ */
+message ReshapeLayerParams {
+    /**
+     * The shape of the output. 
+     * Must be of length 3 or 4.
+     * If set to 3, ``targetShape`` is interpreted as 
+     * ``[C_out, H_out, W_out]``, and sequence length of the input is preserved.
+     * If set to 4, ``targetShape`` is interpreted as
+     * ``[Seq_out, C_out, H_out, W_out]``, 
+     * where ``Seq_out`` is the new sequence length.
+     */
+    repeated int64 targetShape = 1;
+
+    enum ReshapeOrder {
+        CHANNEL_FIRST = 0;
+        CHANNEL_LAST = 1;
+    }
+    ReshapeOrder mode = 2;
+}
+
+/**
+ * A layer that rearranges the dimensions and data of an input.
+ * ::
+ *  	y = PermuteLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A sequence of 3-dimensional blobs. ``InputShape = [Seq, C, H, W]``.
+ * Output
+ *     A sequence of a different length of 3-dimensional blobs.
+ *     Shape: ``[InputShape[axis[0]], InputShape[axis[1]],
+ *     InputShape[axis[2]], InputShape[axis[3]]]``. Hence output is a sequence of length ``InputShape[axis[0]]``.
+ * 
+ * Examples:
+ *
+ * - If ``axis`` is set to ``[0, 3, 1, 2]``, 
+ *   then the output has shape ``[W,C,H]`` 
+ *   and has the same sequence length that of the input.
+ * - If ``axis`` is set to ``[3, 1, 2, 0]``, 
+ *   and the input is a sequence of data 
+ *   with length ``Seq`` and shape ``[C, 1, 1]``, 
+ *   then the output is a unit sequence of data with shape ``[C, 1, Seq]``.
+ * - If ``axis`` is set to ``[0, 3, 2, 1]``,
+ *   the output is a reverse of the input: ``[C, H, W] -> [W, H, C]``.
+ * - If ``axis`` is not set, or is set to ``[0, 1, 2, 3]``,
+ *   the output is the same as the input.
+ */
+message PermuteLayerParams {
+    /**
+      The order in which to permute the dimensions.
+      Must have length 4 and a permutation of ``[0, 1, 2, 3]``.
+     */
+    repeated uint64 axis = 1;
+}
+
+/**
+ * A layer that reduces the input to a scalar value using a specified operation.
+ * ::
+ * 		y = ReduceLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A blob with shape ``[C, H, W]``.
+ * Output
+ *     A scalar value.
+ */
+message ReduceLayerParams {
+    /*
+     * The following reduction operations are supported
+     * and are applied on flattened input array:
+     *
+     * ``SUM``
+     *     Sum of all elements
+     *
+     *     .. math:: \sum{x_i}
+     *
+     * ``AVG``
+     *     Sum of all elements divided by the number of elements
+     *
+     *     .. math:: \dfrac{\sum^n{x_i}}{n}
+     *
+     * ``PROD``
+     *     Product of all elements
+     *
+     *     .. math:: \prod{x_i}
+     *
+     * ``LOGSUM``
+     *     Sum of the natural logarithm of all elements
+     *
+     *     .. math:: \sum{\ln{(x_i + \epsilon)}}
+     *
+     * ``SUMSQUARE``
+     *     Sum of squares of all elements
+     *
+     *     .. math:: \sum{x^2}
+     *
+     * ``L1``
+     *     L1 normalization of all elements
+     *
+     *     .. math:: ||x||_1 = \sum{|x_i|}
+     *
+     * ``L2``
+     *     L2 normalization of all elements
+     *
+     *     .. math:: ||x||_2 = \sqrt{\sum{x_i^2}}
+     *
+     */
+    enum ReduceOperation {
+        SUM = 0;
+        AVG = 1;
+        PROD = 2;
+        LOGSUM = 3;
+        SUMSQUARE = 4;
+        L1 = 5;
+        L2 = 6;
+    }
+    ReduceOperation mode = 1; /// Specifies function used to reduce.
+
+    /**
+     * Used if mode is ``LOGSUM``.
+     * Defaults to ``1e-6`` if not set or is set to ``0``.
+     */
+    float epsilon = 2;
+}
+
+/**
+ * A layer that crops the spatial dimensions of an input.
+ * If two inputs are provided, the shape of the second input is used as the reference shape.
+ * ::
+ *  	y = CropLayer(x1) or y = CropLayer(x1,x2)  
+ * Requires 1 or 2 inputs and produces 1 output. 
+ *
+ * Input
+ *     - 1 input case: A blob with shape ``[C, H_in, W_in]``.
+ *     - 2 input case: 1st blob with shape ``[C, H_in, W_in]``, 2nd blob with shape ``[C, H_out, W_out]``.
+ *     
+ * Output
+ *     A blob with shape ``[C, H_out, W_out]``.
+ *
+ * If one input is used, output is computed as follows: 
+ * ::
+ * 		y = x1[:, topCropAmount:H_in - bottomCropAmount, leftCropAmount:W_in - rightCropAmount]
+ *
+ * 		topCropAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize
+ * 		bottomCropAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize
+ * 		leftCropAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize
+ * 		rightCropAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize
+ * 		
+ * 		H_out = H_in - topCropAmount - bottomCropAmount
+ * 		W_out = W_in - leftCropAmount - rightCropAmount
+ *
+ * If two inputs are used, output is computed as follows: 
+ * ::
+ * 		y = x1[:, offset[0]:offset[0] + H_out, offset[1]:offset[1] + W_out]
+ */
+message CropLayerParams {
+    /**
+     * The amounts to be cropped from the input. 
+     * Used only if a single input is provided.
+     */
+    BorderAmounts cropAmounts = 1;
+    
+    /**
+     * The offset amounts. 
+     * Used only if two inputs are provided.
+     * Must be of length 2, in order ``[H, W]``.
+     */
+    repeated uint64 offset = 5;
+}
+
+/**
+ * A layer that computes the elementwise average of the inputs.
+ * ::
+ * 		y = AverageLayer(x1,x2,...)
+ * Requires multiple inputs and produces 1 output.
+ *
+ * Input
+ *     Multiple blobs, each with shape ``[C]`` or ``[C, H, W]``.
+ * Output
+ *     A blob with the same shape as each input.
+ */
+message AverageLayerParams {
+}
+
+/**
+ * A layer that computes the elementwise maximum over the inputs.
+ * ::
+ * 		y = MaxLayer(x1,x2,...)
+ * Requires multiple inputs and produces 1 output. 
+ *
+ * Input
+ *     Multiple blobs, each with shape ``[C]`` or ``[C, H, W]``.
+ * Output
+ *     A blob with the same shape as each input.
+ */
+message MaxLayerParams {
+}
+
+/**
+ * A layer that computes the elementwise minimum over the inputs.
+ * ::
+ * 		y = MinLayer(x1,x2,...)
+ * Requires multiple inputs and produces 1 output.
+ *
+ * Input
+ *     Multiple blobs, each with shape ``[C]`` or ``[C, H, W]``.
+ * Output
+ *     A blob with the same shape as each input.
+ */
+message MinLayerParams {
+}
+
+/**
+ * A layer that computes the dot product of two vectors.
+ * ::
+ * 		y = DotProductLayer(x1,x2)
+ * Requires 2 inputs and produces 1 output.
+ *
+ * Input
+ *     Two blobs with shape ``[C]``.
+ * Output
+ *     A scalar.
+ */
+message DotProductLayerParams {
+    /**
+     * If true, inputs are normalized first,
+     * thereby computing the cosine similarity.
+     */
+    bool cosineSimilarity = 1;
+}
+
+/**
+ * A layer that performs mean variance normalization.
+ * ::
+ * 		y = MeanVarianceNormalizeLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A blob with shape ``[C]`` or ``[C, H, W]``.
+ * Output
+ *     A blob with the same shape as the input.
+ *
+ * If ``acrossChannels == true`` 
+ * normalization is performed on flattened input.
+ *
+ * If ``acrossChannels == false`` 
+ * normalization is performed within a channel, 
+ * across spatial dimensions.
+ */
+message MeanVarianceNormalizeLayerParams {
+    /**
+     * If true, mean and variance are computed across channels.
+     */
+    bool acrossChannels = 1;
+
+    /**
+     * If false, only mean is subtracted.
+     */
+    bool normalizeVariance = 2;
+
+    /**
+     * A small constant to avoid division by 0 while normalizing variance.
+     * Defaults to ``1e-6`` if not set or set to ``0``.
+     */
+    float epsilon = 3;
+}
+
+/**
+ * A layer that repeats a sequence.
+ * ::
+ * 		y = SequenceRepeatLayer(x)
+ * Requires 1 input and produces 1 output.
+ *
+ * Input
+ *     A sequence of blobs, i.e. shape is either ``[Seq, C]`` or ``[Seq, C, H, W]``.
+ * Output
+ *     A sequence of length ``nRepetitions * Seq`` 
+ *     with shape corresponding to the input,
+ *     i.e. shape is either ``[nRepetitions * Seq, C]`` or ``[nRepetitions * Seq, C, H, W]``.
+ */
+message SequenceRepeatLayerParams {
+    /**
+     * Number of repetitions.
+     * Defaults to ``1`` if not set or set to ``0``.
+     */
+    uint64 nRepetitions = 1;
+}
+
+/// Recurrent Layers
+/// ----------------
+
+/*
+ * The following activations are supported with recurrent layers:
+ * - Linear
+ * - Sigmoid
+ * - Tanh
+ * - ReLU
+ * - Scaled Hyperbolic Tangent: alpha * tanh(beta * x), currently only supported for alpha = 1.7159, beta = 2/3
+ * - Hard Sigmoid: min(max(alpha * x + beta, 0), 1), currently only supported for alpha = 0.2, beta = 0.5
+ */
+
+/**
+ * A simple recurrent layer.
+ * ::
+ * 		y_t = SimpleRecurrentLayer(x_t, y_{t-1})
+ *
+ * Input
+ *    A sequence of vectors of size ``inputVectorSize``
+ *    with shape ``[Seq, inputVectorSize]``.
+ * Output
+ *    A vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.  
+ * - Output Shape: ``[1,outputVectorSize]`` , if ``sequenceOutput == false``
+ * - Output Shape: ``[Seq,outputVectorSize]`` , if ``sequenceOutput == true``
+ *
+ * This layer is described by the following equation:
+ *
+ * .. math::
+ *     \boldsymbol{y_t} = f(\mathrm{clip}(W \boldsymbol{x_t} + \
+ *                                        R \boldsymbol{y_{t-1}} + b))
+ *
+ * - ``W`` is a 2-dimensional weight matrix
+ *   (``[outputVectorSize, inputVectorSize]``, row-major)
+ * - ``R`` is a 2-dimensional recursion matrix
+ *   (``[outputVectorSize, outputVectorSize]``, row-major)
+ * - ``b`` is a 1-dimensional bias vector (``[outputVectorSize]``)
+ * - ``f()`` is an activation
+ * - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
+ */
+message SimpleRecurrentLayerParams {
+    uint64 inputVectorSize = 1; /// The size of the input vectors.
+    uint64 outputVectorSize = 2; /// The size of the output vectors.
+
+    /**
+    * Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
+    */
+    ActivationParams activation = 10; /// The activation function.
+
+    /**
+        If false output is just the result after final state update.
+        If true, output is a sequence, containing outputs at all time steps.
+    */
+    bool sequenceOutput = 15;
+
+    bool hasBiasVector = 20; /// If false, no bias is added.
+
+    WeightParams weightMatrix = 30; /// Weight matrix W.
+    WeightParams recursionMatrix = 31; /// Recursion Weight matrix R.
+    WeightParams biasVector = 32; /// Bias vector b.
+
+    bool reverseInput = 100;
+    // If true, then the node processes the input sequence from right to left
+}
+
+/**
+ * Gated-Recurrent Unit (GRU) Layer
+ * ::
+ * 		y_t = GRULayer(x_t, y_{t-1})
+ *
+ * Input
+ *    A sequence of vectors of size ``inputVectorSize``
+ *    with shape ``[Seq, inputVectorSize]``.
+ * Output
+ *    A vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.  
+ * - Output Shape: ``[1,outputVectorSize]`` , if ``sequenceOutput == false``
+ * - Output Shape: ``[Seq,outputVectorSize]`` , if ``sequenceOutput == true``
+ *
+ * This layer is described by the following equations:
+ *
+ * Update Gate
+ *     .. math::
+ *         \boldsymbol{z_t} = \
+ *             f(\mathrm{clip}(W_z \boldsymbol{x_t} + \
+ *                             R_z \boldsymbol{y_{t-1}} + b_z)
+ *
+ * Reset Gate
+ *     .. math::
+ *         \boldsymbol{r_t} = \
+ *             f(\mathrm{clip}(W_r \boldsymbol{x_t} + \
+ *                             R_r \boldsymbol{y_{t-1}} + b_r))
+ *
+ * Cell Memory State
+ *     .. math::
+ *         \boldsymbol{c_t} = \
+ *             \boldsymbol{y_{t-1}} \odot \boldsymbol{r_t}
+ *
+ * Output Gate
+ *     .. math::
+ *         \boldsymbol{o_t} = \
+ *             g(\mathrm{clip}(W_o \boldsymbol{x_t} + \
+ *                             R_o \boldsymbol{c_t} + b_o))
+ *
+ * Output
+ *     .. math::
+ *         \boldsymbol{y_t} = \
+ *             (1 - \boldsymbol{z_t}) \odot \boldsymbol{o_t} + \
+ *              \boldsymbol{z_t} \odot \boldsymbol{y_{t-1}}
+ *
+ * - ``W_z``, ``W_r``, ``W_o`` are 2-dimensional input weight matrices
+ *   (``[outputVectorSize, inputVectorSize]``, row-major)
+ * - ``R_z``, ``R_r``, ``R_o`` are 2-dimensional recursion matrices
+ *   (``[outputVectorSize, outputVectorSize]``, row-major)
+ * - ``b_z``, ``b_r``, ``b_o`` are 1-dimensional bias vectors
+ *   (``[outputVectorSize]``)
+ * - ``f()``, ``g()`` are activations
+ * - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
+ * - ``⊙`` denotes the elementwise product of matrices
+ */
+message GRULayerParams {
+    uint64 inputVectorSize = 1; /// Size of the input vectors.
+    uint64 outputVectorSize = 2; /// Size of the output vectors.
+
+    /**
+     * 2 element array representing activations [f(), g()] in that order.
+     * Typical values used = [sigmoid, tanh].
+     * Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
+     */
+    repeated ActivationParams activations = 10;
+
+    /**
+     * If false output is just the result after final state update.
+     * If true, output is a sequence, containing outputs at all time steps.
+     */
+    bool sequenceOutput = 15;
+
+    /**
+     * If false, no biases (``b_z``, ``b_r``, ``b_o``) are added.
+     */
+    bool hasBiasVectors = 20;
+
+    WeightParams updateGateWeightMatrix = 30; /// Weight Matrix W_z.
+    WeightParams resetGateWeightMatrix = 31; /// Weight Matrix W_r.
+    WeightParams outputGateWeightMatrix = 32; /// Weight Matrix W_o.
+
+    WeightParams updateGateRecursionMatrix = 50; /// Recursion Weight Matrix R_z.
+    WeightParams resetGateRecursionMatrix = 51; /// Recursion Weight Matrix R_r.
+    WeightParams outputGateRecursionMatrix = 52; /// Recursion Weight Matrix R_o.
+
+    WeightParams updateGateBiasVector = 70; /// Bias vector b_z.
+    WeightParams resetGateBiasVector = 71; /// Bias vector b_r.
+    WeightParams outputGateBiasVector = 72; /// Bias vector b_o.
+
+    /// If true, then the node processes the input sequence from right to left
+    bool reverseInput = 100;
+}
+
+/**
+ * Long short-term memory (LSTM) parameters.
+ *
+ * This is described by the following equations:
+ *
+ * Input Gate
+ *     .. math::
+ *         \boldsymbol{i_t} = \
+ *             f(\mathrm{clip}(W_i \boldsymbol{x_t} + \
+ *                             R_i \boldsymbol{y_{t-1}} + \
+ *                             p_i \odot c_{t-1} + b_i))
+ *
+ * Forget Gate
+ *     .. math::
+ *         \boldsymbol{f_t} = \
+ *             f(\mathrm{clip}(W_f \boldsymbol{x_t} + \
+ *                             R_f \boldsymbol{y_{t-1}} + \
+ *                             p_f \odot c_{t-1} + b_f))
+ *
+ * Block Input
+ *     .. math::
+ *         \boldsymbol{z_t} = \
+ *             g(\mathrm{clip}(W_z \boldsymbol{x_t} + \
+ *                             R_z \boldsymbol{y_{t-1}} + b_z))
+ *
+ * Cell Memory State
+ *     .. math::
+ *         \boldsymbol{c_t} = \
+ *             \boldsymbol{c_{t-1}} \odot \boldsymbol{f_t} + \
+ *             \boldsymbol{i_t} \odot \boldsymbol{z_t}
+ *
+ * Output Gate
+ *     .. math::
+ *         \boldsymbol{o_t} = \
+ *             f(\mathrm{clip}(W_o \boldsymbol{x_t} + \
+ *                             R_o \boldsymbol{y_{t-1}} + \
+ *                             p_o \odot c_t + b_o))
+ *
+ * Output
+ *     .. math::
+ *         \boldsymbol{y_t} = \
+ *             h(\boldsymbol{c_t}) \odot \boldsymbol{o_t}
+ *
+ * - ``W_i``, ``W_f``, ``W_z``, ``W_o`` are 2-dimensional input weight matrices
+ *   (``[outputVectorSize, inputVectorSize]``, row-major)
+ * - ``R_i``, ``R_f``, ``R_z``, ``R_o`` are 2-dimensional recursion matrices
+ *   (``[outputVectorSize, outputVectorSize]``, row-major)
+ * - ``b_i``, ``b_f``, ``b_z``, ``b_o`` are 1-dimensional bias vectors
+ *   (``[outputVectorSize]``)
+ * - ``p_``, ``p_f``, ``p_o`` are 1-dimensional peephole vectors
+ *   (``[outputVectorSize]``)
+ * - ``f()``, ``g()``, ``h()`` are activations
+ * - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
+ * - ``⊙`` denotes the elementwise product of matrices
+ */
+message LSTMParams {
+    /**
+     * If true, output is a sequence, containing outputs at all time steps.
+     * If false, output is just the result after final state update.
+     */
+    bool sequenceOutput = 10;
+
+    /**
+     * If false, no biases (``b_i``, ``b_f``, ``b_z``, ``b_o``) are added.
+     */
+    bool hasBiasVectors = 20;
+
+    /**
+     * If true, a vector of ``1`` values is added to ``b_f``.
+     */
+    bool forgetBias = 30;
+
+    /**
+     * If true, peephole vectors are included.
+     */
+    bool hasPeepholeVectors = 40;
+
+    /**
+     * If the coupled Input and Forget flag is on, the behaviour of
+     * ``c_t`` is changed to the following (i.e. forget gate is not used):
+     *
+     * .. math::
+     *     \boldsymbol{c_t} = \
+     *         \boldsymbol{c_{t-1}} \odot (1 - \boldsymbol{i_t}) + \
+     *         \boldsymbol{i_t} \odot \boldsymbol{z_t}
+     *
+     */
+    bool coupledInputAndForgetGate = 50;
+
+    /**
+     * Places a limit on the maximum and minimum values of ``c_t``.
+     * c_t = min(c_t, cellClipThreshold)
+     * c_t = max(c_t, -cellClipThreshold)
+     * If 0, it is set to its default value = 50.0.
+     */
+    float cellClipThreshold = 60;
+}
+
+/**
+ * Weights for long short-term memory (LSTM) layers
+ */
+message LSTMWeightParams {
+    WeightParams inputGateWeightMatrix = 1; /// Weight Matrix W_i.
+    WeightParams forgetGateWeightMatrix = 2; /// Weight Matrix W_f.
+    WeightParams blockInputWeightMatrix = 3; /// Weight Matrix W_z.
+    WeightParams outputGateWeightMatrix = 4; /// Weight Matrix W_o.
+
+    WeightParams inputGateRecursionMatrix = 20; /// Recursion Weight Matrix R_i.
+    WeightParams forgetGateRecursionMatrix = 21; /// Recursion Weight Matrix R_f.
+    WeightParams blockInputRecursionMatrix = 22; /// Recursion Weight Matrix R_z.
+    WeightParams outputGateRecursionMatrix = 23; /// Recursion Weight Matrix R_o.
+
+    //biases:
+    WeightParams inputGateBiasVector = 40; /// Bias vector b_i.
+    WeightParams forgetGateBiasVector = 41; /// Bias vector b_f.
+    WeightParams blockInputBiasVector = 42; /// Bias vector b_z.
+    WeightParams outputGateBiasVector = 43; /// Bias vector b_o.
+
+    //peepholes:
+    WeightParams inputGatePeepholeVector = 60; /// Peephole vector p_i.
+    WeightParams forgetGatePeepholeVector = 61; /// Peephole vector p_f.
+    WeightParams outputGatePeepholeVector = 62; /// Peephole vector p_o.
+}
+
+/**
+ * A unidirectional long short-term memory (LSTM) layer.
+ * ::
+ * 		(y_t, c_t) = UniDirectionalLSTMLayer(x_t, y_{t-1}, c_{t-1})
+ *
+ * Input
+ *    A sequence of vectors of size ``inputVectorSize``
+ *    with shape ``[Seq, inputVectorSize]``.
+ * Output
+ *    A vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.  
+ * - Output Shape: ``[1,outputVectorSize]`` , if ``sequenceOutput == false``
+ * - Output Shape: ``[Seq,outputVectorSize]`` , if ``sequenceOutput == true``
+ *
+ */
+message UniDirectionalLSTMLayerParams {
+    uint64 inputVectorSize = 1;  /// Size of the input vectors.
+    uint64 outputVectorSize = 2;  /// Size of the output vectors.
+
+    /**
+     * 3 element array representing activations [f(),g(),h()] in that order.
+     * Typical values used = [sigmoid, tanh, tanh].
+     * Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
+     */
+    repeated ActivationParams activations = 10;
+
+    LSTMParams params = 15;
+
+    LSTMWeightParams weightParams = 20; /// Weights, biases and peepholes.
+
+    /// If true, then the node processes the input sequence from right to left
+    bool reverseInput = 100;
+}
+
+/**
+ * Bidirectional long short-term memory (LSTM) layer
+ * ::
+ * 		(y_t, c_t, y_t_reverse, c_t_reverse) = BiDirectionalLSTMLayer(x_t, y_{t-1}, c_{t-1}, y_{t-1}_reverse, c_{t-1}_reverse)
+ *
+ * Input
+ *    A sequence of vectors of size ``inputVectorSize``
+ *    with shape ``[Seq, inputVectorSize]``.
+ * Output
+ *    A vector of size ``2 * outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.  
+ * - Output Shape: ``[1, 2 * outputVectorSize]`` , if ``sequenceOutput == false``
+ * - Output Shape: ``[Seq, 2 * outputVectorSize]`` , if ``sequenceOutput == true``
+ *
+ * The first LSTM operates on the input sequence in the forward direction.
+ * The second LSTM operates on the input sequence in the reverse direction.
+ *
+ * Example: given the input sequence ``[x_1, x_2, x_3]``,
+ * where ``x_i`` are vectors at time index ``i``:
+ * 
+ * The forward LSTM output is ``[yf_1, yf_2, yf_3]``,
+ * 
+ * where ``yf_i`` are vectors of size ``outputVectorSize``:
+ * 
+ * - ``yf_1`` is the output at the end of sequence {``x_1``}
+ * - ``yf_2`` is the output at the end of sequence {``x_1``, ``x_2``}
+ * - ``yf_3`` is the output at the end of sequence {``x_1``, ``x_2``, ``x_3``}
+ *
+ * The backward LSTM output: ``[yb_1, yb_2, yb_3]``,
+ *
+ * where ``yb_i`` are vectors of size ``outputVectorSize``:
+ * 
+ * - ``yb_1`` is the output at the end of sequence {``x_3``}
+ * - ``yb_2`` is the output at the end of sequence {``x_3``, ``x_2``}
+ * - ``yb_3`` is the output at the end of sequence {``x_3``, ``x_2``, ``x_1``}
+ *
+ * Output of the bi-dir layer:
+ *
+ * - if ``sequenceOutput = True`` : { ``[yf_1, yb_3]``,  ``[yf_2, yb_2]``,  ``[yf_3, yb_1]`` }
+ * - if ``sequenceOutput = False`` : { ``[yf_3, yb_3]`` }
+ */
+message BiDirectionalLSTMLayerParams {
+    /**
+     * Size of the input vectors.
+     */
+    uint64 inputVectorSize = 1;
+    /**
+     * Size of the outputs vectors.
+     * It is same for both forward and backward LSTMs.
+     */
+    uint64 outputVectorSize = 2;
+
+    /**
+     * 3 element array representing activations [f(),g(),h()] in that order.
+     * Typical values used = [sigmoid, tanh, tanh].
+     * Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
+     */
+    repeated ActivationParams activationsForwardLSTM = 10;
+    /**
+     * Currently, backward LSTM activations
+     * must be same as the ones for the forward LSTM.
+     */
+    repeated ActivationParams activationsBackwardLSTM = 11;
+
+    /**
+     * Common parameters shared by the forward and backward LSTMs.
+     */
+    LSTMParams params = 15;
+
+    /**
+     * Weights and biases.
+     * Must be a length 2 message,
+     * for the forward and backward LSTM respectively.
+     */
+    repeated LSTMWeightParams weightParams = 20;
+}
+
+/// Neural Network Specializations
+/// ------------------------------
+
+/**
+ * A neural network specialized as a classifier.
+ */
+message NeuralNetworkClassifier {
+    repeated NeuralNetworkLayer layers = 1;
+    repeated NeuralNetworkPreprocessing preprocessing = 2;
+
+    /**
+     * Mapping from indexed vector of probabilities to class label
+     */
+    oneof ClassLabels {
+        StringVector stringClassLabels = 100;
+        Int64Vector int64ClassLabels = 101;
+    }
+}
+
+/**
+ * A neural network specialized as a regressor.
+ */
+message NeuralNetworkRegressor {
+    repeated NeuralNetworkLayer layers = 1;
+    repeated NeuralNetworkPreprocessing preprocessing = 2;
+}
diff --git a/src/interfaces/coreml/Normalizer.proto b/src/interfaces/coreml/Normalizer.proto
new file mode 100644
index 00000000000..627f7e2e3af
--- /dev/null
+++ b/src/interfaces/coreml/Normalizer.proto
@@ -0,0 +1,38 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+package CoreML.Specification;
+
+/**
+ * A normalization preprocessor.
+ */
+message Normalizer {
+    /**
+     * There are three normalization modes,
+     * which have the corresponding formulas:
+     *
+     * Max
+     *     .. math::
+     *         max(x_i)
+     *
+     * L1
+     *     .. math::
+     *         z = ||x||_1 = \sum_{i=1}^{n} |x_i|
+     *
+     * L2
+     *     .. math::
+     *         z = ||x||_2 = \sqrt{\sum_{i=1}^{n} x_i^2}
+     */
+    enum NormType {
+        LMax = 0;
+        L1 = 1;
+        L2 = 2;
+    }
+
+    NormType normType = 1;
+}
diff --git a/src/interfaces/coreml/OneHotEncoder.proto b/src/interfaces/coreml/OneHotEncoder.proto
new file mode 100644
index 00000000000..f47cf281662
--- /dev/null
+++ b/src/interfaces/coreml/OneHotEncoder.proto
@@ -0,0 +1,41 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+import public "DataStructures.proto";
+
+package CoreML.Specification;
+
+/**
+ * Transforms a categorical feature into an array. The array will be all
+ * zeros expect a single entry of one.
+ *
+ * Each categorical value will map to an index, this mapping is given by
+ * either the ``stringCategories`` parameter or the ``int64Categories``
+ * parameter.
+ */
+message OneHotEncoder {
+    enum HandleUnknown {
+        ErrorOnUnknown = 0;
+        IgnoreUnknown = 1;   // Output will be all zeros for unknown values.
+    }
+
+    /**
+     * Mapping to be used for the encoding. The position of the category in
+     * the below vector determines where the single one entry will be in the
+     * output.
+     */
+    oneof CategoryType {
+        StringVector stringCategories = 1;
+        Int64Vector int64Categories = 2;
+    }
+
+    // Output can be a dictionary with only one entry, instead of an array.
+    bool outputSparse = 10;
+
+    HandleUnknown handleUnknown = 11;
+}
diff --git a/src/interfaces/coreml/README.rst b/src/interfaces/coreml/README.rst
new file mode 100644
index 00000000000..97ea0b1dac2
--- /dev/null
+++ b/src/interfaces/coreml/README.rst
@@ -0,0 +1,10 @@
+==================================
+Core ML Model Format Specification
+==================================
+
+This directory contains the protobuf message definitions 
+that comprise the Core ML model document (``.mlmodel``) format.
+
+The top-level message is ``Model``, which is defined in ``Model.proto``.
+Other message types describe data structures, feature types, 
+feature engineering model types, and predictive model types.
\ No newline at end of file
diff --git a/src/interfaces/coreml/SVM.proto b/src/interfaces/coreml/SVM.proto
new file mode 100644
index 00000000000..932a4ec2166
--- /dev/null
+++ b/src/interfaces/coreml/SVM.proto
@@ -0,0 +1,195 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+import public "DataStructures.proto";
+
+package CoreML.Specification;
+
+/// Kernel Definitions
+/// ------------------
+
+/**
+ * A linear kernel.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     K(\boldsymbol{x}, \boldsymbol{x'}) = \boldsymbol{x}^T \boldsymbol{x'}
+ */
+message LinearKernel {
+}
+
+/**
+ * A Gaussian radial basis function (RBF) kernel.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     K(\boldsymbol{x}, \boldsymbol{x'}) = \
+ *          \exp(-\gamma || \boldsymbol{x} - \boldsymbol{x'} ||^2 )
+ *
+ */
+message RBFKernel {
+    double gamma = 1;
+}
+
+/**
+ * A polynomial kernel.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     K(\boldsymbol{x}, \boldsymbol{x'}) = \
+ *           (\gamma \boldsymbol{x}^T \boldsymbol{x'} + c)^{degree}
+ */
+message PolyKernel {
+    int32 degree = 1;
+    double c = 2;
+    double gamma = 3;
+}
+
+/**
+ * A sigmoid kernel.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     K(\boldsymbol{x}, \boldsymbol{x'}) = \
+ *           \tanh(\gamma \boldsymbol{x}^T \boldsymbol{x'} + c)
+ */
+message SigmoidKernel {
+    double gamma = 1;
+    double c = 2;
+}
+
+/**
+ * A kernel.
+ */
+message Kernel {
+    oneof kernel {
+        LinearKernel linearKernel = 1;
+        RBFKernel rbfKernel = 2;
+        PolyKernel polyKernel = 3;
+        SigmoidKernel sigmoidKernel = 4;
+    }
+}
+
+
+/// Support Vector Definitions
+/// --------------------------
+
+/**
+ * A sparse node.
+ */
+message SparseNode {
+    int32 index = 1; // 1-based indexes, like libsvm
+    double value = 2;
+}
+
+/**
+ * A sparse vector.
+ */
+message SparseVector {
+    repeated SparseNode nodes = 1;
+}
+
+/**
+ * One or more sparse support vectors.
+ */
+message SparseSupportVectors {
+    repeated SparseVector vectors = 1;
+}
+
+/**
+ * A dense vector.
+ */
+message DenseVector {
+    repeated double values = 1;
+}
+
+/**
+ * One or more dense support vectors.
+ */
+message DenseSupportVectors {
+    repeated DenseVector vectors = 1;
+}
+
+/**
+ * One or more coefficients.
+ */
+message Coefficients {
+    repeated double alpha = 1;
+}
+
+/**
+ * A support vector regressor.
+ */
+message SupportVectorRegressor {
+    Kernel kernel = 1;
+
+    // Support vectors, either sparse or dense format
+    oneof supportVectors {
+        SparseSupportVectors sparseSupportVectors = 2;
+        DenseSupportVectors denseSupportVectors = 3;
+    }
+
+    // Coefficients, one for each support vector
+    Coefficients coefficients = 4;
+
+    double rho = 5;
+}
+
+/**
+ * A support vector classifier
+ */
+message SupportVectorClassifier {
+    Kernel kernel = 1;
+
+    /**
+     * The number of support vectors for each class.
+     */
+    repeated int32 numberOfSupportVectorsPerClass = 2;
+
+    /**
+     * The support vectors, in either sparse or dense format.
+     */
+    oneof supportVectors {
+        SparseSupportVectors sparseSupportVectors = 3;
+        DenseSupportVectors denseSupportVectors = 4;
+    }
+
+    /**
+     * The coefficients, essentially a two dimensional array of
+     * size: (numberOfClasses-1) by (total number of support vectors)
+     */
+    repeated Coefficients coefficients = 5;
+
+    /**
+     * Constants for decision function,
+     * with K*(K-1) / 2 elements,
+     * where K is the number of classes.
+     */
+    repeated double rho = 6;
+
+    /**
+     * Pairwise probability information for A vs B classifier.
+     * Total of K*(K-1)/2 elements where K is the number of classes.
+     * These fields are optional,
+     * and only required if you want probabilities or multi class predictions.
+     */
+    repeated double probA = 7;
+    repeated double probB = 8;
+
+    /**
+     * Class label mapping.
+     */
+    oneof ClassLabels {
+        StringVector stringClassLabels = 100;
+        Int64Vector int64ClassLabels = 101;
+    }
+}
diff --git a/src/interfaces/coreml/Scaler.proto b/src/interfaces/coreml/Scaler.proto
new file mode 100644
index 00000000000..f0e13d54be2
--- /dev/null
+++ b/src/interfaces/coreml/Scaler.proto
@@ -0,0 +1,34 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+package CoreML.Specification;
+
+/**
+ * A scaling operation.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x) = scaleValue \cdot (x + shiftValue)
+ *
+ * If the ``scaleValue`` is not given, the default value 1 is used.
+ * If the ``shiftValue`` is not given, the default value 0 is used.
+ *
+ * If ``scaleValue`` and ``shiftValue`` are each a single value
+ * and the input is an array, then the scale and shift are applied
+ * to each element of the array.
+ *
+ * If the input is an integer, then it is converted to a double to
+ * perform the scaling operation. If the output type is an integer,
+ * then it is cast to an integer. If that cast is lossy, then an
+ * error is generated.
+ */
+message Scaler {
+    repeated double shiftValue = 1;
+    repeated double scaleValue = 2;
+}
diff --git a/src/interfaces/coreml/TreeEnsemble.proto b/src/interfaces/coreml/TreeEnsemble.proto
new file mode 100644
index 00000000000..defebee9885
--- /dev/null
+++ b/src/interfaces/coreml/TreeEnsemble.proto
@@ -0,0 +1,161 @@
+// Copyright (c) 2017, Apple Inc. All rights reserved.
+//
+// Use of this source code is governed by a BSD-3-clause license that can be
+// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
+
+/**
+ * Each tree is a collection of nodes,
+ * each of which is identified by a unique identifier.
+ *
+ * Each node is either a branch or a leaf node.
+ * A branch node evaluates a value according to a behavior;
+ * if true, the node identified by ``true_child_node_id`` is evaluated next,
+ * if false, the node identified by ``false_child_node_id`` is evaluated next.
+ * A leaf node adds the evaluation value to the base prediction value
+ * to get the final prediction.
+ *
+ * A tree must have exactly one root node,
+ * which has no parent node.
+ * A tree must not terminate on a branch node.
+ * All leaf nodes must be accessible
+ * by evaluating one or more branch nodes in sequence,
+ * starting from the root node.
+ */
+
+syntax = "proto3";
+option optimize_for = LITE_RUNTIME;
+
+import public "DataStructures.proto";
+
+package CoreML.Specification;
+
+/**
+ * A tree ensemble post-evaluation transform.
+ */
+enum TreeEnsemblePostEvaluationTransform {
+    NoTransform = 0;
+    Classification_SoftMax = 1;
+    Regression_Logistic = 2;
+    Classification_SoftMaxWithZeroClassReference = 3;
+}
+
+/**
+ * Tree ensemble parameters.
+ */
+message TreeEnsembleParameters {
+    message TreeNode {
+        uint64 treeId = 1;
+        uint64 nodeId = 2;
+
+        enum TreeNodeBehavior {
+            BranchOnValueLessThanEqual = 0;
+            BranchOnValueLessThan = 1;
+            BranchOnValueGreaterThanEqual = 2;
+            BranchOnValueGreaterThan = 3;
+            BranchOnValueEqual = 4;
+            BranchOnValueNotEqual = 5;
+            LeafNode = 6;
+        }
+
+        /**
+         * The branch mode parameters.
+         *
+         * If branch is false,
+         * then the parameters in this section must be filled in
+         * to determine how the branching functions.
+         */
+        TreeNodeBehavior nodeBehavior = 3;
+
+        /**
+         * If the node behavior mode is a branch mode,
+         * then these values must be filled in.
+         */
+        uint64 branchFeatureIndex = 10;
+        double branchFeatureValue = 11;
+        uint64 trueChildNodeId = 12;
+        uint64 falseChildNodeId = 13;
+        bool missingValueTracksTrueChild = 14;
+
+        /**
+         * The leaf mode.
+         *
+         * If ``nodeBahavior`` == ``LeafNode``,
+         * then the evaluationValue is added to the base prediction value
+         * in order to get the final prediction.
+         * To support multiclass classification
+         * as well as regression and binary classification,
+         * the evaluation value is encoded here as a sparse vector,
+         * with evaluationIndex being the index of the base vector
+         * that evaluation value is added to.
+         * In the single class case,
+         * it is expected that evaluationIndex is exactly 0.
+         */
+        message EvaluationInfo {
+           uint64 evaluationIndex = 1;
+           double evaluationValue = 2;
+        }
+
+        repeated EvaluationInfo evaluationInfo = 20;
+
+        /**
+         * The relative hit rate of a node for optimization purposes.
+         *
+         * This value has no effect on the accuracy of the result;
+         * it allows the tree to optimize for frequent branches.
+         * The value is relative,
+         * compared to the hit rates of other branch nodes.
+         *
+         * You typically use a proportion of training samples
+         * that reached this node
+         * or some similar metric to derive this value.
+         */
+        double relativeHitRate = 30;
+    }
+
+    repeated TreeNode nodes = 1;
+
+    /**
+     * The number of prediction dimensions or classes in the model.
+     *
+     * All instances of ``evaluationIndex`` in a leaf node
+     * must be less than this value,
+     * and the number of values in the ``basePredictionValue`` field
+     * must be equal to this value.
+     *
+     * For regression,
+     * this is the dimension of the prediction.
+     * For classification,
+     * this is the number of classes.
+     */
+    uint64 numPredictionDimensions = 2;
+
+    /**
+     * The base prediction value.
+     *
+     * The number of values in this must match
+     * the default values of the tree model.
+     */
+    repeated double basePredictionValue = 3;
+}
+
+/**
+ * A tree ensemble classifier.
+ */
+message TreeEnsembleClassifier {
+    TreeEnsembleParameters treeEnsemble = 1;
+    TreeEnsemblePostEvaluationTransform postEvaluationTransform = 2;
+
+    // Required class label mapping
+    oneof ClassLabels {
+        StringVector stringClassLabels = 100;
+        Int64Vector int64ClassLabels = 101;
+    }
+}
+
+/**
+ * A tree ensemble regressor.
+ */
+message TreeEnsembleRegressor {
+    TreeEnsembleParameters treeEnsemble = 1;
+    TreeEnsemblePostEvaluationTransform postEvaluationTransform = 2;
+}
diff --git a/src/interfaces/csharp/CMakeLists.txt b/src/interfaces/csharp/CMakeLists.txt
new file mode 100644
index 00000000000..2384ed5a5d4
--- /dev/null
+++ b/src/interfaces/csharp/CMakeLists.txt
@@ -0,0 +1,23 @@
+FIND_PACKAGE(CSharp REQUIRED)
+UNSET(TARGET_SWIGFLAGS)
+
+include(ShogunInterfaces)
+
+GENERATE_INTERFACE_TARGET(csharp ${CMAKE_CURRENT_SOURCE_DIR} "")
+
+ADD_CUSTOM_COMMAND(TARGET interface_csharp
+				   POST_BUILD
+				   COMMAND "${CSHARP_COMPILER}" /t:library *.cs /out:shogun.dll
+				   COMMENT "Creating shogun.dll")
+IF(NOT CSHARP_INSTALL_DIR)
+	SET(CSHARP_INSTALL_DIR "lib/cli/shogun")
+ENDIF(NOT CSHARP_INSTALL_DIR)
+
+SET(INTERFACE_CSHARP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
+INSTALL(TARGETS interface_csharp
+	LIBRARY DESTINATION ${CSHARP_INSTALL_DIR}
+	COMPONENT csharp)
+
+INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/shogun.dll
+		DESTINATION ${CSHARP_INSTALL_DIR}
+		COMPONENT csharp)
diff --git a/src/interfaces/java_modular/sg_print_functions.cpp b/src/interfaces/csharp/sg_print_functions.cpp
similarity index 81%
rename from src/interfaces/java_modular/sg_print_functions.cpp
rename to src/interfaces/csharp/sg_print_functions.cpp
index ea4d1d854a2..b9637930f62 100644
--- a/src/interfaces/java_modular/sg_print_functions.cpp
+++ b/src/interfaces/csharp/sg_print_functions.cpp
@@ -15,7 +15,3 @@ void sg_global_print_error(FILE* target, const char* str)
 {
 	fprintf(target, "%s", str);
 }
-
-void sg_global_cancel_computations(bool &delayed, bool &immediately)
-{
-}
diff --git a/src/interfaces/csharp_modular/swig_typemaps.i b/src/interfaces/csharp/swig_typemaps.i
similarity index 100%
rename from src/interfaces/csharp_modular/swig_typemaps.i
rename to src/interfaces/csharp/swig_typemaps.i
diff --git a/src/interfaces/csharp_modular/CMakeLists.txt b/src/interfaces/csharp_modular/CMakeLists.txt
deleted file mode 100644
index 439a078ce1b..00000000000
--- a/src/interfaces/csharp_modular/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-FIND_PACKAGE(CSharp REQUIRED)
-UNSET(TARGET_SWIGFLAGS)
-
-include(CommonModularInterface)
-
-GENERATE_MODULAR_TARGET(csharp ${CMAKE_CURRENT_SOURCE_DIR} "")
-
-ADD_CUSTOM_COMMAND(TARGET csharp_modular
-				   POST_BUILD
-				   COMMAND "${CSHARP_COMPILER}" /t:library *.cs /out:modshogun.dll
-				   COMMENT "Creating modshogun.dll")
-IF(NOT CSHARP_INSTALL_DIR)
-	SET(CSHARP_INSTALL_DIR "lib/cli/shogun")
-ENDIF(NOT CSHARP_INSTALL_DIR)
-
-SET(CSHARP_MODULAR_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
-INSTALL(TARGETS csharp_modular
-	LIBRARY DESTINATION ${CSHARP_INSTALL_DIR}
-	COMPONENT csharp)
-
-INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/modshogun.dll
-		DESTINATION ${CSHARP_INSTALL_DIR}
-		COMPONENT csharp)
diff --git a/src/interfaces/java_modular/CMakeLists.txt b/src/interfaces/java/CMakeLists.txt
similarity index 80%
rename from src/interfaces/java_modular/CMakeLists.txt
rename to src/interfaces/java/CMakeLists.txt
index 89e61ac56ab..6cd45381232 100644
--- a/src/interfaces/java_modular/CMakeLists.txt
+++ b/src/interfaces/java/CMakeLists.txt
@@ -10,20 +10,20 @@ IF(JBLAS)
 	SET(HAVE_JBLAS 1)
 ELSE()
 	MESSAGE(FATAL_ERROR "JBlas could not be found!\n"
-		"It is required for java modular interface!!!")
+		"It is required for java interface!")
 ENDIF()
 
 SET(TARGET_SWIGFLAGS "-package\;org.shogun")
 
-include(CommonModularInterface)
+include(ShogunInterfaces)
 include_directories(${JNI_INCLUDE_DIRS})
 
-GENERATE_MODULAR_TARGET(java ${CMAKE_CURRENT_SOURCE_DIR} "")
+GENERATE_INTERFACE_TARGET(java ${CMAKE_CURRENT_SOURCE_DIR} "")
 
 set(CMAKE_JAVA_TARGET_OUTPUT_NAME shogun)
 SET(CMAKE_JAVA_INCLUDE_PATH ${JBLAS})
 
-ADD_CUSTOM_COMMAND(TARGET java_modular POST_BUILD
+ADD_CUSTOM_COMMAND(TARGET interface_java POST_BUILD
 	COMMAND cmake -E echo "Compiling Java files..."
 	COMMAND cmake -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/shogun_jar
 	COMMAND ${Java_JAVAC_EXECUTABLE} *.java
@@ -36,7 +36,7 @@ ADD_CUSTOM_COMMAND(TARGET java_modular POST_BUILD
 
 
 #ADD_JAR(shogun_jar "${CMAKE_CURRENT_BINARY_DIR}/*.java")
-#add_dependencies(shogun_jar java_modular)
+#add_dependencies(shogun_jar interface_java)
 
 IF(NOT JAVA_INSTALL_DIR)
 	SET(JAVA_INSTALL_DIR "share/java")
@@ -46,8 +46,8 @@ IF(NOT JNI_INSTALL_DIR)
 	SET(JNI_INSTALL_DIR "lib/cli/shogun")
 ENDIF(NOT JNI_INSTALL_DIR)
 
-SET(JAVA_MODULAR_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
-INSTALL(TARGETS java_modular
+SET(INTERFACE_JAVA_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
+INSTALL(TARGETS interface_java
 	LIBRARY DESTINATION ${JNI_INSTALL_DIR}
 	COMPONENT java)
 
diff --git a/src/interfaces/java_modular/build.xml b/src/interfaces/java/build.xml
similarity index 100%
rename from src/interfaces/java_modular/build.xml
rename to src/interfaces/java/build.xml
diff --git a/src/interfaces/ruby_modular/sg_print_functions.cpp b/src/interfaces/java/sg_print_functions.cpp
similarity index 81%
rename from src/interfaces/ruby_modular/sg_print_functions.cpp
rename to src/interfaces/java/sg_print_functions.cpp
index ea4d1d854a2..b9637930f62 100644
--- a/src/interfaces/ruby_modular/sg_print_functions.cpp
+++ b/src/interfaces/java/sg_print_functions.cpp
@@ -15,7 +15,3 @@ void sg_global_print_error(FILE* target, const char* str)
 {
 	fprintf(target, "%s", str);
 }
-
-void sg_global_cancel_computations(bool &delayed, bool &immediately)
-{
-}
diff --git a/src/interfaces/java_modular/swig_typemaps.i b/src/interfaces/java/swig_typemaps.i
similarity index 100%
rename from src/interfaces/java_modular/swig_typemaps.i
rename to src/interfaces/java/swig_typemaps.i
diff --git a/src/interfaces/lua_modular/CMakeLists.txt b/src/interfaces/lua/CMakeLists.txt
similarity index 64%
rename from src/interfaces/lua_modular/CMakeLists.txt
rename to src/interfaces/lua/CMakeLists.txt
index 37ab57d903a..cdad9e1e927 100644
--- a/src/interfaces/lua_modular/CMakeLists.txt
+++ b/src/interfaces/lua/CMakeLists.txt
@@ -2,16 +2,16 @@ FIND_PACKAGE(PkgConfig)
 pkg_search_module(LUA REQUIRED lua lua5.1)
 IF (NOT LUA_FOUND)
 MESSAGE(FATAL_ERROR "Lua could not be found!\n"
-	"Lua is required for compiling lua modular interface!")
+	"Lua is required for compiling lua interface!")
 ENDIF ()
 SET(LUA_EXECUTABLE lua)
 UNSET(TARGET_SWIGFLAGS)
 
-include(CommonModularInterface)
+include(ShogunInterfaces)
 include_directories(${LUA_INCLUDE_DIRS})
 
-SET(LUA_MODULAR_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
-GENERATE_MODULAR_TARGET(lua ${CMAKE_CURRENT_SOURCE_DIR} "${LUA_LDFLAGS}")
+SET(INTERFACE_LUA_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
+GENERATE_INTERFACE_TARGET(lua ${CMAKE_CURRENT_SOURCE_DIR} "${LUA_LDFLAGS}")
 
 IF (NOT LIB_INSTALL_DIR)
 	SET(LIB_INSTALL_DIR lib)
@@ -19,6 +19,6 @@ ENDIF()
 
 STRING(REGEX REPLACE "([0-9]*.[0-9]*).[0-9]*" "${LIB_INSTALL_DIR}/lua/\\1" LUA_EXTENSION_DIR ${LUA_VERSION})
 
-INSTALL(TARGETS lua_modular
+INSTALL(TARGETS interface_lua
 	LIBRARY DESTINATION ${LUA_EXTENSION_DIR}
 	COMPONENT lua)
diff --git a/src/interfaces/csharp_modular/sg_print_functions.cpp b/src/interfaces/lua/sg_print_functions.cpp
similarity index 81%
rename from src/interfaces/csharp_modular/sg_print_functions.cpp
rename to src/interfaces/lua/sg_print_functions.cpp
index ea4d1d854a2..b9637930f62 100644
--- a/src/interfaces/csharp_modular/sg_print_functions.cpp
+++ b/src/interfaces/lua/sg_print_functions.cpp
@@ -15,7 +15,3 @@ void sg_global_print_error(FILE* target, const char* str)
 {
 	fprintf(target, "%s", str);
 }
-
-void sg_global_cancel_computations(bool &delayed, bool &immediately)
-{
-}
diff --git a/src/interfaces/lua_modular/swig_typemaps.i b/src/interfaces/lua/swig_typemaps.i
similarity index 100%
rename from src/interfaces/lua_modular/swig_typemaps.i
rename to src/interfaces/lua/swig_typemaps.i
diff --git a/src/interfaces/octave/CMakeLists.txt b/src/interfaces/octave/CMakeLists.txt
new file mode 100644
index 00000000000..bbabf4af893
--- /dev/null
+++ b/src/interfaces/octave/CMakeLists.txt
@@ -0,0 +1,16 @@
+FIND_PACKAGE(Octave 3.6 REQUIRED)
+SET(OCTAVE_APIVERSION ${OCTAVE_API_VERSION})
+UNSET(TARGET_SWIGFLAGS)
+
+include(ShogunInterfaces)
+include_directories(${OCTAVE_INCLUDE_DIRS})
+
+GENERATE_INTERFACE_TARGET(octave ${CMAKE_CURRENT_SOURCE_DIR} ${OCTAVE_LIBRARY})
+set_target_properties(interface_octave PROPERTIES PREFIX "")
+set_target_properties(interface_octave PROPERTIES SUFFIX ".oct")
+target_compile_definitions(interface_octave PRIVATE "OCTAVE_APIVERSION=${OCTAVE_API_VERSION}")
+
+SET(INTERFACE_OCTAVE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
+install (TARGETS interface_octave
+    DESTINATION ${OCTAVE_OCT_LOCAL_API_FILE_DIR}/shogun
+	COMPONENT octave)
diff --git a/src/interfaces/octave_modular/sg_print_functions.cpp b/src/interfaces/octave/sg_print_functions.cpp
similarity index 86%
rename from src/interfaces/octave_modular/sg_print_functions.cpp
rename to src/interfaces/octave/sg_print_functions.cpp
index c696a81e0d8..3c692039a26 100644
--- a/src/interfaces/octave_modular/sg_print_functions.cpp
+++ b/src/interfaces/octave/sg_print_functions.cpp
@@ -1,13 +1,13 @@
 #include <octave/config.h>
 
-#include <octave/ov.h>
+#include <octave/Cell.h>
 #include <octave/defun-dld.h>
 #include <octave/error.h>
 #include <octave/oct-obj.h>
+#include <octave/ov.h>
 #include <octave/pager.h>
 #include <octave/symtab.h>
 #include <octave/variables.h>
-#include <octave/Cell.h>
 
 #include <shogun/io/SGIO.h>
 #include <stdio.h>
@@ -19,7 +19,7 @@ void sg_global_print_message(FILE* target, const char* str)
 
 void sg_global_print_warning(FILE* target, const char* str)
 {
-	if (target==stdout)
+	if (target == stdout)
 		::warning(str);
 	else
 		fprintf(target, "%s", str);
@@ -29,7 +29,3 @@ void sg_global_print_error(FILE* target, const char* str)
 {
 	fprintf(target, "%s", str);
 }
-
-void sg_global_cancel_computations(bool &delayed, bool &immediately)
-{
-}
diff --git a/src/interfaces/octave_modular/swig_typemaps.i b/src/interfaces/octave/swig_typemaps.i
similarity index 100%
rename from src/interfaces/octave_modular/swig_typemaps.i
rename to src/interfaces/octave/swig_typemaps.i
diff --git a/src/interfaces/octave_modular/CMakeLists.txt b/src/interfaces/octave_modular/CMakeLists.txt
deleted file mode 100644
index 536c2b37b3d..00000000000
--- a/src/interfaces/octave_modular/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-FIND_PACKAGE(Octave 3.6 REQUIRED)
-SET(OCTAVE_APIVERSION ${OCTAVE_API_VERSION})
-UNSET(TARGET_SWIGFLAGS)
-
-include(CommonModularInterface)
-include_directories(${OCTAVE_INCLUDE_DIRS})
-
-GENERATE_MODULAR_TARGET(octave ${CMAKE_CURRENT_SOURCE_DIR} ${OCTAVE_LIBRARY})
-set_target_properties(octave_modular PROPERTIES PREFIX "")
-set_target_properties(octave_modular PROPERTIES SUFFIX ".oct")
-target_compile_definitions(octave_modular PRIVATE "OCTAVE_APIVERSION=${OCTAVE_API_VERSION}")
-
-SET(OCTAVE_MODULAR_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
-install (TARGETS octave_modular
-    DESTINATION ${OCTAVE_OCT_LOCAL_API_FILE_DIR}/shogun
-	COMPONENT octave)
diff --git a/src/interfaces/perl_modular/pdl.i b/src/interfaces/perl/pdl.i
similarity index 100%
rename from src/interfaces/perl_modular/pdl.i
rename to src/interfaces/perl/pdl.i
diff --git a/src/interfaces/perl/sg_print_functions.cpp b/src/interfaces/perl/sg_print_functions.cpp
new file mode 100644
index 00000000000..a9e7089b519
--- /dev/null
+++ b/src/interfaces/perl/sg_print_functions.cpp
@@ -0,0 +1,42 @@
+/* For co-existence with stdio only */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PERLIO_NOT_STDIO 0
+#include <pdlcore.h>
+
+#ifdef __cplusplus
+}
+#endif
+
+#include <shogun/io/SGIO.h>
+
+void sg_global_print_message(FILE* target, const char* str)
+{
+	fprintf(target, "%s", str);
+}
+
+void sg_global_print_warning(FILE* target, const char* str)
+{
+	SV* err = get_sv("@", GV_ADD);
+	if (target == stdout)
+	{
+		if (sv_isobject(err))
+			pdl_warn(0);
+		else
+			croak("%s", SvPV_nolen(err));
+	}
+	else
+		fprintf(target, "%s", str);
+}
+
+void sg_global_print_error(FILE* target, const char* str)
+{
+	if (target == stdout) // PerlIO_stdout()) //"ERRSV" ($@)
+		croak(str);
+	// SWIG_croak(str);
+	else
+		fprintf(target, "%s", str);
+}
diff --git a/src/interfaces/perl_modular/swig_typemaps.i b/src/interfaces/perl/swig_typemaps.i
similarity index 100%
rename from src/interfaces/perl_modular/swig_typemaps.i
rename to src/interfaces/perl/swig_typemaps.i
diff --git a/src/interfaces/perl_modular/sg_print_functions.cpp b/src/interfaces/perl_modular/sg_print_functions.cpp
deleted file mode 100644
index 7cb0e25b896..00000000000
--- a/src/interfaces/perl_modular/sg_print_functions.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/* For co-existence with stdio only */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define PERLIO_NOT_STDIO 0
-#include <pdlcore.h>
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#include <shogun/io/SGIO.h>
-
-void sg_global_print_message(FILE* target, const char* str)
-{
-	fprintf(target, "%s", str);
-}
-
-void sg_global_print_warning(FILE* target, const char* str)
-{
-	SV *err = get_sv("@", GV_ADD);
-	if (target == stdout)
-	{
-		if (sv_isobject(err))
-			pdl_warn(0);
-		else
-			croak("%s", SvPV_nolen(err));
-	}
-	else
-		fprintf(target, "%s", str);
-}
-
-void sg_global_print_error(FILE* target, const char* str)
-{
-	if (target == stdout) // PerlIO_stdout()) //"ERRSV" ($@)
-		croak(str);
-	//SWIG_croak(str);
-	else
-		fprintf(target, "%s", str);
-}
-
-//PTZ121009 used in threads...so cannot access stdin like this...
-// why not checking kill stuff???
-void sg_global_cancel_computations(bool &delayed, bool &immediately)
-{
-#if 0
-	using namespace shogun;
-
-	dTHX;       /* fetch context */
-	//PerlIO_init(pTHX);
-	//  PerlIO *f = PerlIO_stdin(); crashes in Perl_csighandler () from /usr/lib/libperl.so.5.14
-
-	if(!f) {return;}
-	if(PerlIO_flush(f)) //check signal
-	{
-		SG_SPRINT("\nImmediately return to matlab prompt / Prematurely finish computations / Do nothing (I/P/D)? ");
-		char answer= PerlIO_getc(f);
-		if (answer == 'I')
-			immediately=true;
-		else if (answer == 'P')
-		{
-			PerlIO_clearerr(f);
-			delayed=true;
-		}
-		else
-			SG_SPRINT("\n");
-	}
-#endif
-}
diff --git a/src/interfaces/python_modular/CMakeLists.txt b/src/interfaces/python/CMakeLists.txt
similarity index 67%
rename from src/interfaces/python_modular/CMakeLists.txt
rename to src/interfaces/python/CMakeLists.txt
index 09c58132ddc..a9f0ce62e40 100644
--- a/src/interfaces/python_modular/CMakeLists.txt
+++ b/src/interfaces/python/CMakeLists.txt
@@ -5,7 +5,7 @@ FIND_PACKAGE(NumPy REQUIRED)
 
 SET(HAVE_PYTHON 1)
 
-#custom swig flags for python modular interface
+#custom swig flags for python interface
 IF(${PYTHON_VERSION_MAJOR} VERSION_EQUAL 3)
   SET(TARGET_SWIGFLAGS "-builtin\;-modern\;-modernargs\;-threads\;-py3")
   SET(PYTHON3 1)
@@ -19,11 +19,11 @@ ENDIF()
 IF ((NOT "${PYTHON_VERSION_STRING}" VERSION_LESS "3.5") AND
   ("${SWIG_VERSION}" VERSION_LESS "3.0.8"))
   MESSAGE(FATAL_ERROR
-    "Building Python_modular interface for Python >= 3.5 requires SWIG >= 3.0.8.")
+    "Building Python interface for Python >= 3.5 requires SWIG >= 3.0.8.")
 ENDIF ((NOT "${PYTHON_VERSION_STRING}" VERSION_LESS "3.5") AND
   ("${SWIG_VERSION}" VERSION_LESS "3.0.8"))
 
-include(CommonModularInterface)
+include(ShogunInterfaces)
 include_directories(${PYTHON_INCLUDE_PATH} ${NUMPY_INCLUDE_DIRS})
 
 #TODO: check for SWIG version where this bug has been applied already:
@@ -31,7 +31,7 @@ include_directories(${PYTHON_INCLUDE_PATH} ${NUMPY_INCLUDE_DIRS})
 
 SET(SWIG_CXX_COMPILER_FLAGS "${SWIG_CXX_COMPILER_FLAGS} -Wno-c++11-narrowing")
 
-GENERATE_MODULAR_TARGET(python ${CMAKE_CURRENT_SOURCE_DIR} ${PYTHON_LIBRARIES})
+GENERATE_INTERFACE_TARGET(python ${CMAKE_CURRENT_SOURCE_DIR} ${PYTHON_LIBRARIES})
 
 # pacakges directory
 if(CMAKE_HOST_UNIX)
@@ -64,31 +64,10 @@ elseif(CMAKE_HOST_WIN32)
 endif()
 SET(PYTHON_PACKAGES_PATH "${_PYTHON_PACKAGES_PATH}" CACHE PATH "Where to install the python packages.")
 
-INSTALL(TARGETS _python_modular
+INSTALL(TARGETS _interface_python
 		DESTINATION ${PYTHON_PACKAGES_PATH}
     COMPONENT python)
-INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/modshogun.py
+INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/shogun.py
 		DESTINATION ${PYTHON_PACKAGES_PATH}
-    COMPONENT python)
-
-# create the __init.py__ files for modules and the main
-file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/__init__.py.in "from modshogun import *")
-file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/main_init.py.in "__all__= [\n")
-configure_file(${CMAKE_CURRENT_BINARY_DIR}/__init__.py.in __init__.py COPYONLY)
-SET(MODULES Kernel Distance Features Classifier Regression Converter
-	Loss Clustering Evaluation IO Library Mathematics
-	ModelSelection Preprocessor Structure Distribution Statistics Latent)
-
-FOREACH(module ${MODULES})
-	INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/__init__.py
-		DESTINATION ${PYTHON_PACKAGES_PATH}/shogun/${module}
-    COMPONENT python)
-	file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/main_init.py.in "\"${module}\",\n")
-ENDFOREACH()
-
-file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/main_init.py.in "]")
-configure_file(${CMAKE_CURRENT_BINARY_DIR}/main_init.py.in main_init.py COPYONLY)
-INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/main_init.py
-	DESTINATION ${PYTHON_PACKAGES_PATH}/shogun
-  COMPONENT python
-	RENAME __init__.py)
+    COMPONENT python
+)
diff --git a/src/interfaces/python_modular/CustomKernel_protocols.i b/src/interfaces/python/CustomKernel_protocols.i
similarity index 100%
rename from src/interfaces/python_modular/CustomKernel_protocols.i
rename to src/interfaces/python/CustomKernel_protocols.i
diff --git a/src/interfaces/python_modular/DenseFeatures_protocols.i b/src/interfaces/python/DenseFeatures_protocols.i
similarity index 100%
rename from src/interfaces/python_modular/DenseFeatures_protocols.i
rename to src/interfaces/python/DenseFeatures_protocols.i
diff --git a/src/interfaces/python_modular/DenseLabels_protocols.i b/src/interfaces/python/DenseLabels_protocols.i
similarity index 100%
rename from src/interfaces/python_modular/DenseLabels_protocols.i
rename to src/interfaces/python/DenseLabels_protocols.i
diff --git a/src/interfaces/python_modular/SGVector_protocols.i b/src/interfaces/python/SGVector_protocols.i
similarity index 100%
rename from src/interfaces/python_modular/SGVector_protocols.i
rename to src/interfaces/python/SGVector_protocols.i
diff --git a/src/interfaces/python_modular/protocols_helper.i b/src/interfaces/python/protocols_helper.i
similarity index 100%
rename from src/interfaces/python_modular/protocols_helper.i
rename to src/interfaces/python/protocols_helper.i
diff --git a/src/interfaces/python_modular/sg_print_functions.cpp b/src/interfaces/python/sg_print_functions.cpp
similarity index 54%
rename from src/interfaces/python_modular/sg_print_functions.cpp
rename to src/interfaces/python/sg_print_functions.cpp
index 4b86b8d8a5c..7f3f16dfc75 100644
--- a/src/interfaces/python_modular/sg_print_functions.cpp
+++ b/src/interfaces/python/sg_print_functions.cpp
@@ -13,7 +13,7 @@ void sg_global_print_message(FILE* target, const char* str)
 
 void sg_global_print_warning(FILE* target, const char* str)
 {
-	if (target==stdout)
+	if (target == stdout)
 	{
 		PyGILState_STATE gil = PyGILState_Ensure();
 		PyErr_Warn(NULL, str);
@@ -25,7 +25,7 @@ void sg_global_print_warning(FILE* target, const char* str)
 
 void sg_global_print_error(FILE* target, const char* str)
 {
-	if (target==stdout)
+	if (target == stdout)
 	{
 		PyGILState_STATE gil = PyGILState_Ensure();
 		PyErr_SetString(PyExc_RuntimeError, str);
@@ -34,26 +34,3 @@ void sg_global_print_error(FILE* target, const char* str)
 	else
 		fprintf(target, "%s", str);
 }
-
-void sg_global_cancel_computations(bool &delayed, bool &immediately)
-{
-	using namespace shogun;
-
-	PyGILState_STATE gil = PyGILState_Ensure();
-	if (PyErr_CheckSignals())
-	{
-		SG_SPRINT("\nImmediately return to matlab prompt / Prematurely finish computations / Do nothing (I/P/D)? ");
-		char answer=fgetc(stdin);
-
-		if (answer == 'I')
-			immediately=true;
-		else if (answer == 'P')
-		{
-			PyErr_Clear();
-			delayed=true;
-		}
-		else
-			SG_SPRINT("\n");
-	}
-	PyGILState_Release(gil);
-}
diff --git a/src/interfaces/python_modular/swig_typemaps.i b/src/interfaces/python/swig_typemaps.i
similarity index 100%
rename from src/interfaces/python_modular/swig_typemaps.i
rename to src/interfaces/python/swig_typemaps.i
diff --git a/src/interfaces/r_modular/CMakeLists.txt b/src/interfaces/r/CMakeLists.txt
similarity index 57%
rename from src/interfaces/r_modular/CMakeLists.txt
rename to src/interfaces/r/CMakeLists.txt
index 33cbf5725d8..7b596acb2b1 100644
--- a/src/interfaces/r_modular/CMakeLists.txt
+++ b/src/interfaces/r/CMakeLists.txt
@@ -2,27 +2,27 @@ FIND_PACKAGE(R REQUIRED)
 
 UNSET(TARGET_SWIGFLAGS)
 
-include(CommonModularInterface)
+include(ShogunInterfaces)
 SET(SWIG_CXX_COMPILER_FLAGS "${SWIG_CXX_COMPILER_FLAGS} ${R_CXX_FLAGS}")
-IF(APPLE)
+IF("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
 	SET(SWIG_CXX_COMPILER_FLAGS "${SWIG_CXX_COMPILER_FLAGS} -DDO_NOT_USE_CXX_HEADERS")
 ENDIF()
 
-GENERATE_MODULAR_TARGET(r ${CMAKE_CURRENT_SOURCE_DIR} ${R_LIBRARIES})
-set_target_properties(r_modular PROPERTIES PREFIX "")
+GENERATE_INTERFACE_TARGET(r ${CMAKE_CURRENT_SOURCE_DIR} ${R_LIBRARIES})
+set_target_properties(interface_r PROPERTIES PREFIX "")
 
-ADD_CUSTOM_COMMAND(TARGET r_modular
+ADD_CUSTOM_COMMAND(TARGET interface_r
 	POST_BUILD
-	COMMAND echo 'wd="${CMAKE_CURRENT_BINARY_DIR}" \; setwd( wd ) \; f="modshogun.R" \; fdata="modshogun.RData" \; source( f ) \; save( list=ls( all=TRUE ) , file=fdata , compress=TRUE ) \; q( save="no" ) \;' | ${R_EXECUTABLE} --silent --no-save
+	COMMAND echo 'wd="${CMAKE_CURRENT_BINARY_DIR}" \; setwd( wd ) \; f="shogun.R" \; fdata="shogun.RData" \; source( f ) \; save( list=ls( all=TRUE ) , file=fdata , compress=TRUE ) \; q( save="no" ) \;' | ${R_EXECUTABLE} --silent --no-save
 	WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-	COMMENT "Generating modshogun.RData"
+	COMMENT "Generating shogun.RData"
 )
 
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/modshogun.so
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/shogun.so
 	DESTINATION ${R_COMPONENT_LIB_PATH}/shogun/libs
 	COMPONENT r)
 
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/modshogun.RData
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/shogun.RData
 	DESTINATION ${R_COMPONENT_LIB_PATH}/shogun/R
 	COMPONENT r)
 
diff --git a/src/interfaces/r_modular/sg_print_functions.cpp b/src/interfaces/r/sg_print_functions.cpp
similarity index 64%
rename from src/interfaces/r_modular/sg_print_functions.cpp
rename to src/interfaces/r/sg_print_functions.cpp
index 7a8c7c7bfdc..985b43ace97 100644
--- a/src/interfaces/r_modular/sg_print_functions.cpp
+++ b/src/interfaces/r/sg_print_functions.cpp
@@ -1,39 +1,34 @@
 #include <R.h>
-#include <Rinternals.h>
-#include <Rdefines.h>
+#include <R_ext/Error.h>
+#include <R_ext/RS.h>
 #include <R_ext/Rdynload.h>
+#include <Rdefines.h>
 #include <Rembedded.h>
 #include <Rinterface.h>
-#include <R_ext/RS.h>
-#include <R_ext/Error.h>
+#include <Rinternals.h>
 
 #include <stdio.h>
 
 void sg_global_print_message(FILE* target, const char* str)
 {
-	if (target==stdout)
-		Rprintf((char*) "%s", str);
+	if (target == stdout)
+		Rprintf((char*)"%s", str);
 	else
 		fprintf(target, "%s", str);
 }
 
 void sg_global_print_warning(FILE* target, const char* str)
 {
-	if (target==stdout)
-		Rprintf((char*) "%s", str);
+	if (target == stdout)
+		Rprintf((char*)"%s", str);
 	else
 		fprintf(target, "%s", str);
 }
 
 void sg_global_print_error(FILE* target, const char* str)
 {
-	if (target==stdout)
-		Rprintf((char*) "%s", str);
+	if (target == stdout)
+		Rprintf((char*)"%s", str);
 	else
 		fprintf(target, "%s", str);
 }
-
-void sg_global_cancel_computations(bool &delayed, bool &immediately)
-{
-	/* R_Suicide((char*) "sg stopped by SIGINT\n"); */
-}
diff --git a/src/interfaces/r_modular/swig_typemaps.i b/src/interfaces/r/swig_typemaps.i
similarity index 100%
rename from src/interfaces/r_modular/swig_typemaps.i
rename to src/interfaces/r/swig_typemaps.i
diff --git a/src/interfaces/ruby/CMakeLists.txt b/src/interfaces/ruby/CMakeLists.txt
new file mode 100644
index 00000000000..e806eab5995
--- /dev/null
+++ b/src/interfaces/ruby/CMakeLists.txt
@@ -0,0 +1,29 @@
+FIND_PACKAGE(Ruby REQUIRED)
+FIND_PACKAGE(RubyNArray REQUIRED)
+SET(NARRAY_LIB ${RUBY_NARRAY_LIBRARY} PARENT_SCOPE)
+UNSET(TARGET_SWIGFLAGS)
+
+include(ShogunInterfaces)
+INCLUDE_DIRECTORIES(${RUBY_INCLUDE_DIRS} ${RUBY_NARRAY_INCLUDE_DIR})
+
+CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/shogun.rb.in"
+		"${CMAKE_CURRENT_BINARY_DIR}/shogun.rb" @ONLY)
+
+GENERATE_INTERFACE_TARGET(ruby ${CMAKE_CURRENT_SOURCE_DIR} ${RUBY_LIBRARY})
+set_target_properties(interface_ruby PROPERTIES PREFIX "")
+target_compile_definitions(interface_ruby PRIVATE NARRAY_LIB="${RUBY_NARRAY_LIBRARY}")
+
+SET(INTERFACE_RUBY_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
+
+EXECUTE_PROCESS(COMMAND ${RUBY_EXECUTABLE} -r rbconfig -e "print RbConfig::CONFIG['sitearchdir']"
+		OUTPUT_VARIABLE RUBY_SITE_ARCH_DIR
+		ERROR_QUIET)
+
+INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/shogun.rb
+	DESTINATION ${RUBY_SITE_ARCH_DIR}
+	COMPONENT ruby)
+
+INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/shogun.so
+	DESTINATION ${RUBY_SITE_ARCH_DIR}
+	COMPONENT ruby
+	RENAME shogun${EXT_LIB_SWIG_RUBY})
diff --git a/src/interfaces/lua_modular/sg_print_functions.cpp b/src/interfaces/ruby/sg_print_functions.cpp
similarity index 73%
rename from src/interfaces/lua_modular/sg_print_functions.cpp
rename to src/interfaces/ruby/sg_print_functions.cpp
index 72afe5ab1f4..b9637930f62 100644
--- a/src/interfaces/lua_modular/sg_print_functions.cpp
+++ b/src/interfaces/ruby/sg_print_functions.cpp
@@ -8,14 +8,10 @@ void sg_global_print_message(FILE* target, const char* str)
 
 void sg_global_print_warning(FILE* target, const char* str)
 {
-    fprintf(target, "%s", str);
+	fprintf(target, "%s", str);
 }
 
 void sg_global_print_error(FILE* target, const char* str)
 {
 	fprintf(target, "%s", str);
 }
-
-void sg_global_cancel_computations(bool &delayed, bool &immediately)
-{
-}
diff --git a/src/interfaces/ruby/shogun.rb.in b/src/interfaces/ruby/shogun.rb.in
new file mode 100644
index 00000000000..c60d3beb55f
--- /dev/null
+++ b/src/interfaces/ruby/shogun.rb.in
@@ -0,0 +1,3 @@
+require 'rubygems'
+require 'narray'
+require 'shogun@EXT_LIB_SWIG_RUBY@'
diff --git a/src/interfaces/ruby_modular/swig_typemaps.i b/src/interfaces/ruby/swig_typemaps.i
similarity index 98%
rename from src/interfaces/ruby_modular/swig_typemaps.i
rename to src/interfaces/ruby/swig_typemaps.i
index 0e52d2fa36a..d330adad8ae 100644
--- a/src/interfaces/ruby_modular/swig_typemaps.i
+++ b/src/interfaces/ruby/swig_typemaps.i
@@ -114,7 +114,7 @@ TYPEMAP_SGVECTOR(float64_t, NUM2DBL, rb_float_new)
 				array = SG_MALLOC(SGTYPE, rows * cols);
 			}
 			for (j = 0; j < cols; j++) {
-				array[i * cols + j] = R2SG(rb_ary_entry(vec, j));
+				array[j * rows + i] = R2SG(rb_ary_entry(vec, j));
 			}
 		}
 	}
@@ -136,7 +136,7 @@ TYPEMAP_SGVECTOR(float64_t, NUM2DBL, rb_float_new)
 	for (i = 0; i < rows; i++) {
 		VALUE vec = rb_ary_new2(cols);
 		for (j = 0; j < cols; j++) {
-			rb_ary_push(vec, SG2R($1.matrix[i * cols + j]));
+			rb_ary_push(vec, SG2R($1.matrix[j * rows + i]));
 		}
 		rb_ary_push(arr, vec);
 	}
diff --git a/src/interfaces/ruby_modular/CMakeLists.txt b/src/interfaces/ruby_modular/CMakeLists.txt
deleted file mode 100644
index c2585d410fc..00000000000
--- a/src/interfaces/ruby_modular/CMakeLists.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-FIND_PACKAGE(Ruby REQUIRED)
-FIND_PACKAGE(RubyNArray REQUIRED)
-SET(NARRAY_LIB ${RUBY_NARRAY_LIBRARY} PARENT_SCOPE)
-UNSET(TARGET_SWIGFLAGS)
-
-include(CommonModularInterface)
-INCLUDE_DIRECTORIES(${RUBY_INCLUDE_DIRS} ${RUBY_NARRAY_INCLUDE_DIR})
-
-CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/modshogun.rb.in"
-		"${CMAKE_CURRENT_BINARY_DIR}/modshogun.rb" @ONLY)
-
-GENERATE_MODULAR_TARGET(ruby ${CMAKE_CURRENT_SOURCE_DIR} ${RUBY_LIBRARY})
-set_target_properties(ruby_modular PROPERTIES PREFIX "")
-target_compile_definitions(ruby_modular PRIVATE NARRAY_LIB="${RUBY_NARRAY_LIBRARY}")
-
-SET(RUBY_MODULAR_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
-
-EXECUTE_PROCESS(COMMAND ${RUBY_EXECUTABLE} -r rbconfig -e "print RbConfig::CONFIG['sitearchdir']"
-		OUTPUT_VARIABLE RUBY_SITE_ARCH_DIR
-		ERROR_QUIET)
-
-INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/modshogun.rb
-	DESTINATION ${RUBY_SITE_ARCH_DIR}
-	COMPONENT ruby)
-
-INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/modshogun.so
-	DESTINATION ${RUBY_SITE_ARCH_DIR}
-	COMPONENT ruby
-	RENAME modshogun${EXT_LIB_SWIG_RUBY_MODULAR})
diff --git a/src/interfaces/ruby_modular/modshogun.rb.in b/src/interfaces/ruby_modular/modshogun.rb.in
deleted file mode 100644
index 966a414ade2..00000000000
--- a/src/interfaces/ruby_modular/modshogun.rb.in
+++ /dev/null
@@ -1,3 +0,0 @@
-require 'rubygems'
-require 'narray'
-require 'modshogun@EXT_LIB_SWIG_RUBY_MODULAR@'
diff --git a/src/interfaces/modular/Boost.i b/src/interfaces/swig/Boost.i
similarity index 100%
rename from src/interfaces/modular/Boost.i
rename to src/interfaces/swig/Boost.i
diff --git a/src/interfaces/modular/Boost_includes.i b/src/interfaces/swig/Boost_includes.i
similarity index 100%
rename from src/interfaces/modular/Boost_includes.i
rename to src/interfaces/swig/Boost_includes.i
diff --git a/src/interfaces/modular/Classifier.i b/src/interfaces/swig/Classifier.i
similarity index 98%
rename from src/interfaces/modular/Classifier.i
rename to src/interfaces/swig/Classifier.i
index da11cef942b..dd0a9a8adcd 100644
--- a/src/interfaces/modular/Classifier.i
+++ b/src/interfaces/swig/Classifier.i
@@ -53,8 +53,8 @@
 #ifndef HAVE_PYTHON
 %rename(SVM) CSVM;
 #endif
-%rename(SVMLin) CSVMLin;
 #ifdef USE_GPL_SHOGUN
+%rename(SVMLin) CSVMLin;
 %rename(SVMOcas) CSVMOcas;
 #endif //USE_GPL_SHOGUN
 %rename(SVMSGD) CSVMSGD;
@@ -67,7 +67,9 @@
 %rename(MKLClassification) CMKLClassification;
 %rename(MKLOneClass) CMKLOneClass;
 %rename(VowpalWabbit) CVowpalWabbit;
+#ifdef USE_GPL_SHOGUN
 %rename(FeatureBlockLogisticRegression) CFeatureBlockLogisticRegression;
+#endif //USE_GPL_SHOGUN
 %rename(DirectorLinearMachine) CDirectorLinearMachine;
 %rename(DirectorKernelMachine) CDirectorKernelMachine;
 %rename(BaggingMachine) CBaggingMachine;
@@ -115,8 +117,8 @@
 %include <shogun/classifier/svm/OnlineLibLinear.h>
 %include <shogun/classifier/Perceptron.h>
 %include <shogun/classifier/AveragedPerceptron.h>
-%include <shogun/classifier/svm/SVMLin.h>
 #ifdef USE_GPL_SHOGUN
+%include <shogun/classifier/svm/SVMLin.h>
 %include <shogun/classifier/svm/SVMOcas.h>
 #endif //USE_GPL_SHOGUN
 %include <shogun/classifier/svm/SVMSGD.h>
@@ -130,7 +132,9 @@
 %include <shogun/classifier/mkl/MKLOneClass.h>
 %include <shogun/classifier/vw/VowpalWabbit.h>
 %include <shogun/classifier/svm/NewtonSVM.h>
+#ifdef USE_GPL_SHOGUN
 %include <shogun/classifier/FeatureBlockLogisticRegression.h>
+#endif //USE_GPL_SHOGUN
 %include <shogun/machine/DirectorLinearMachine.h>
 %include <shogun/machine/DirectorKernelMachine.h>
 %include <shogun/machine/BaggingMachine.h>
diff --git a/src/interfaces/modular/Classifier_includes.i b/src/interfaces/swig/Classifier_includes.i
similarity index 97%
rename from src/interfaces/modular/Classifier_includes.i
rename to src/interfaces/swig/Classifier_includes.i
index 741e9151ec6..3dc5ddc37f1 100644
--- a/src/interfaces/modular/Classifier_includes.i
+++ b/src/interfaces/swig/Classifier_includes.i
@@ -1,3 +1,4 @@
+
 %{
  #include <shogun/machine/Machine.h>
  #include <shogun/classifier/svm/GNPPSVM.h>
@@ -23,9 +24,9 @@
  #include <shogun/classifier/Perceptron.h>
  #include <shogun/classifier/AveragedPerceptron.h>
  #include <shogun/classifier/svm/SVM.h>
- #include <shogun/classifier/svm/SVMLin.h>
  #include <shogun/machine/KernelMachine.h>
 #ifdef USE_GPL_SHOGUN
+ #include <shogun/classifier/svm/SVMLin.h>
  #include <shogun/classifier/svm/SVMOcas.h>
 #endif //USE_GPL_SHOGUN
  #include <shogun/classifier/svm/SVMSGD.h>
@@ -39,7 +40,9 @@
  #include <shogun/classifier/mkl/MKLOneClass.h>
  #include <shogun/classifier/vw/VowpalWabbit.h>
  #include <shogun/classifier/svm/NewtonSVM.h>
+#ifdef USE_GPL_SHOGUN
  #include <shogun/classifier/FeatureBlockLogisticRegression.h>
+#endif //USE_GPL_SHOGUN
  #include <shogun/machine/DirectorLinearMachine.h>
  #include <shogun/machine/DirectorKernelMachine.h>
  #include <shogun/machine/BaggingMachine.h>
diff --git a/src/interfaces/modular/Clustering.i b/src/interfaces/swig/Clustering.i
similarity index 100%
rename from src/interfaces/modular/Clustering.i
rename to src/interfaces/swig/Clustering.i
diff --git a/src/interfaces/modular/Clustering_includes.i b/src/interfaces/swig/Clustering_includes.i
similarity index 100%
rename from src/interfaces/modular/Clustering_includes.i
rename to src/interfaces/swig/Clustering_includes.i
diff --git a/src/interfaces/modular/Converter.i b/src/interfaces/swig/Converter.i
similarity index 100%
rename from src/interfaces/modular/Converter.i
rename to src/interfaces/swig/Converter.i
diff --git a/src/interfaces/modular/Converter_includes.i b/src/interfaces/swig/Converter_includes.i
similarity index 100%
rename from src/interfaces/modular/Converter_includes.i
rename to src/interfaces/swig/Converter_includes.i
diff --git a/src/interfaces/modular/Distance.i b/src/interfaces/swig/Distance.i
similarity index 100%
rename from src/interfaces/modular/Distance.i
rename to src/interfaces/swig/Distance.i
diff --git a/src/interfaces/modular/Distance_includes.i b/src/interfaces/swig/Distance_includes.i
similarity index 100%
rename from src/interfaces/modular/Distance_includes.i
rename to src/interfaces/swig/Distance_includes.i
diff --git a/src/interfaces/modular/Distribution.i b/src/interfaces/swig/Distribution.i
similarity index 100%
rename from src/interfaces/modular/Distribution.i
rename to src/interfaces/swig/Distribution.i
diff --git a/src/interfaces/modular/Distribution_includes.i b/src/interfaces/swig/Distribution_includes.i
similarity index 100%
rename from src/interfaces/modular/Distribution_includes.i
rename to src/interfaces/swig/Distribution_includes.i
diff --git a/src/interfaces/modular/Ensemble.i b/src/interfaces/swig/Ensemble.i
similarity index 100%
rename from src/interfaces/modular/Ensemble.i
rename to src/interfaces/swig/Ensemble.i
diff --git a/src/interfaces/modular/Ensemble_includes.i b/src/interfaces/swig/Ensemble_includes.i
similarity index 100%
rename from src/interfaces/modular/Ensemble_includes.i
rename to src/interfaces/swig/Ensemble_includes.i
diff --git a/src/interfaces/modular/Evaluation.i b/src/interfaces/swig/Evaluation.i
similarity index 87%
rename from src/interfaces/modular/Evaluation.i
rename to src/interfaces/swig/Evaluation.i
index ca70a132c91..c67263a13a4 100644
--- a/src/interfaces/modular/Evaluation.i
+++ b/src/interfaces/swig/Evaluation.i
@@ -47,10 +47,6 @@
 %rename(GradientEvaluation) CGradientEvaluation;
 %rename(MulticlassOVREvaluation) CMulticlassOVREvaluation;
 %rename(CrossValidationResult) CCrossValidationResult;
-%rename(CrossValidationOutput) CCrossValidationOutput;
-%rename(CrossValidationPrintOutput) CCrossValidationPrintOutput;
-%rename(CrossValidationMKLStorage) CCrossValidationMKLStorage;
-%rename(CrossValidationMulticlassStorage) CCrossValidationMulticlassStorage;
 %rename(StructuredAccuracy) CStructuredAccuracy;
 %rename(DirectorContingencyTableEvaluation) CDirectorContingencyTableEvaluation;
 
@@ -70,6 +66,7 @@
 %include <shogun/evaluation/PRCEvaluation.h>
 %include <shogun/evaluation/MachineEvaluation.h>
 %include <shogun/evaluation/CrossValidation.h>
+%include <shogun/evaluation/CrossValidationStorage.h>
 %include <shogun/evaluation/SplittingStrategy.h>
 %include <shogun/evaluation/DifferentiableFunction.h>
 %include <shogun/evaluation/GradientCriterion.h>
@@ -78,9 +75,5 @@
 %include <shogun/evaluation/MulticlassOVREvaluation.h>
 %include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
 %include <shogun/evaluation/CrossValidationSplitting.h>
-%include <shogun/evaluation/CrossValidationOutput.h>
-%include <shogun/evaluation/CrossValidationPrintOutput.h>
-%include <shogun/evaluation/CrossValidationMKLStorage.h>
-%include <shogun/evaluation/CrossValidationMulticlassStorage.h>
 %include <shogun/evaluation/StructuredAccuracy.h>
-%include <shogun/evaluation/DirectorContingencyTableEvaluation.h>
+%include <shogun/evaluation/DirectorContingencyTableEvaluation.h>
\ No newline at end of file
diff --git a/src/interfaces/modular/Evaluation_includes.i b/src/interfaces/swig/Evaluation_includes.i
similarity index 84%
rename from src/interfaces/modular/Evaluation_includes.i
rename to src/interfaces/swig/Evaluation_includes.i
index 6950aebbfe8..d96f226d22c 100644
--- a/src/interfaces/modular/Evaluation_includes.i
+++ b/src/interfaces/swig/Evaluation_includes.i
@@ -14,6 +14,7 @@
  #include <shogun/evaluation/PRCEvaluation.h>
  #include <shogun/evaluation/MachineEvaluation.h>
  #include <shogun/evaluation/CrossValidation.h>
+ #include <shogun/evaluation/CrossValidationStorage.h>
  #include <shogun/evaluation/DifferentiableFunction.h>
  #include <shogun/evaluation/GradientCriterion.h>
  #include <shogun/evaluation/GradientEvaluation.h>
@@ -22,10 +23,6 @@
  #include <shogun/evaluation/SplittingStrategy.h>
  #include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
  #include <shogun/evaluation/CrossValidationSplitting.h>
- #include <shogun/evaluation/CrossValidationOutput.h>
- #include <shogun/evaluation/CrossValidationPrintOutput.h>
- #include <shogun/evaluation/CrossValidationMKLStorage.h>
- #include <shogun/evaluation/CrossValidationMulticlassStorage.h>
  #include <shogun/evaluation/StructuredAccuracy.h>
  #include <shogun/evaluation/DirectorContingencyTableEvaluation.h>
 %}
diff --git a/src/interfaces/modular/Features.i b/src/interfaces/swig/Features.i
similarity index 100%
rename from src/interfaces/modular/Features.i
rename to src/interfaces/swig/Features.i
diff --git a/src/interfaces/modular/Features_includes.i b/src/interfaces/swig/Features_includes.i
similarity index 100%
rename from src/interfaces/modular/Features_includes.i
rename to src/interfaces/swig/Features_includes.i
diff --git a/src/interfaces/modular/GaussianProcess.i b/src/interfaces/swig/GaussianProcess.i
similarity index 100%
rename from src/interfaces/modular/GaussianProcess.i
rename to src/interfaces/swig/GaussianProcess.i
diff --git a/src/interfaces/modular/GaussianProcess_includes.i b/src/interfaces/swig/GaussianProcess_includes.i
similarity index 100%
rename from src/interfaces/modular/GaussianProcess_includes.i
rename to src/interfaces/swig/GaussianProcess_includes.i
diff --git a/src/interfaces/modular/IO.i b/src/interfaces/swig/IO.i
similarity index 100%
rename from src/interfaces/modular/IO.i
rename to src/interfaces/swig/IO.i
diff --git a/src/interfaces/modular/IO_includes.i b/src/interfaces/swig/IO_includes.i
similarity index 100%
rename from src/interfaces/modular/IO_includes.i
rename to src/interfaces/swig/IO_includes.i
diff --git a/src/interfaces/modular/Kernel.i b/src/interfaces/swig/Kernel.i
similarity index 99%
rename from src/interfaces/modular/Kernel.i
rename to src/interfaces/swig/Kernel.i
index 3a4ce6f1617..a4b73db1dcf 100644
--- a/src/interfaces/modular/Kernel.i
+++ b/src/interfaces/swig/Kernel.i
@@ -49,7 +49,9 @@ PROTOCOLS_CUSTOMKERNEL(CustomKernel, float32_t, "f\0", NPY_FLOAT32)
 %rename(CustomKernel) CCustomKernel;
 
 %rename(DiagKernel) CDiagKernel;
+#ifdef USE_GPL_SHOGUN
 %rename(DistantSegmentsKernel) CDistantSegmentsKernel;
+#endif //USE_GPL_SHOGUN
 %rename(WaveKernel) CWaveKernel;
 %rename(CauchyKernel) CCauchyKernel;
 %rename(DiceKernelNormalizer) CDiceKernelNormalizer;
@@ -182,7 +184,9 @@ namespace shogun
 %include <shogun/kernel/ConstKernel.h>
 %include <shogun/kernel/CustomKernel.h>
 %include <shogun/kernel/DiagKernel.h>
+#ifdef USE_GPL_SHOGUN
 %include <shogun/kernel/string/DistantSegmentsKernel.h>
+#endif //USE_GPL_SHOGUN
 %include <shogun/kernel/ExponentialKernel.h>
 %include <shogun/kernel/string/FixedDegreeStringKernel.h>
 %include <shogun/kernel/ShiftInvariantKernel.h>
diff --git a/src/interfaces/modular/Kernel_includes.i b/src/interfaces/swig/Kernel_includes.i
similarity index 98%
rename from src/interfaces/modular/Kernel_includes.i
rename to src/interfaces/swig/Kernel_includes.i
index a38a8a920d2..ad6533e4d48 100644
--- a/src/interfaces/modular/Kernel_includes.i
+++ b/src/interfaces/swig/Kernel_includes.i
@@ -19,7 +19,9 @@
 #include <shogun/kernel/ConstKernel.h>
 #include <shogun/kernel/CustomKernel.h>
 #include <shogun/kernel/DiagKernel.h>
+#ifdef USE_GPL_SHOGUN
 #include <shogun/kernel/string/DistantSegmentsKernel.h>
+#endif //USE_GPL_SHOGUN
 #include <shogun/kernel/normalizer/DiceKernelNormalizer.h>
 #include <shogun/kernel/ExponentialKernel.h>
 #include <shogun/kernel/normalizer/ScatterKernelNormalizer.h>
diff --git a/src/interfaces/modular/Latent.i b/src/interfaces/swig/Latent.i
similarity index 95%
rename from src/interfaces/modular/Latent.i
rename to src/interfaces/swig/Latent.i
index c6a7317f260..6b57e20c873 100644
--- a/src/interfaces/modular/Latent.i
+++ b/src/interfaces/swig/Latent.i
@@ -29,4 +29,6 @@
 
 %include <shogun/machine/LinearLatentMachine.h>
 
+#ifdef USE_GPL_SHOGUN
 %include <shogun/latent/LatentSVM.h>
+#endif //USE_GPL_SHOGUN
diff --git a/src/interfaces/modular/Latent_includes.i b/src/interfaces/swig/Latent_includes.i
similarity index 80%
rename from src/interfaces/modular/Latent_includes.i
rename to src/interfaces/swig/Latent_includes.i
index 84270b52634..237cfc16420 100644
--- a/src/interfaces/modular/Latent_includes.i
+++ b/src/interfaces/swig/Latent_includes.i
@@ -5,6 +5,8 @@
 
  #include <shogun/machine/LinearLatentMachine.h>
 
+#ifdef USE_GPL_SHOGUN
  #include <shogun/latent/LatentSVM.h>
+#endif //USE_GPL_SHOGUN
 %}
 
diff --git a/src/interfaces/modular/Library.i b/src/interfaces/swig/Library.i
similarity index 100%
rename from src/interfaces/modular/Library.i
rename to src/interfaces/swig/Library.i
diff --git a/src/interfaces/modular/Library_includes.i b/src/interfaces/swig/Library_includes.i
similarity index 100%
rename from src/interfaces/modular/Library_includes.i
rename to src/interfaces/swig/Library_includes.i
diff --git a/src/interfaces/modular/Loss.i b/src/interfaces/swig/Loss.i
similarity index 100%
rename from src/interfaces/modular/Loss.i
rename to src/interfaces/swig/Loss.i
diff --git a/src/interfaces/modular/Loss_includes.i b/src/interfaces/swig/Loss_includes.i
similarity index 100%
rename from src/interfaces/modular/Loss_includes.i
rename to src/interfaces/swig/Loss_includes.i
diff --git a/src/interfaces/modular/Machine.i b/src/interfaces/swig/Machine.i
similarity index 100%
rename from src/interfaces/modular/Machine.i
rename to src/interfaces/swig/Machine.i
diff --git a/src/interfaces/modular/Mathematics.i b/src/interfaces/swig/Mathematics.i
similarity index 89%
rename from src/interfaces/modular/Mathematics.i
rename to src/interfaces/swig/Mathematics.i
index 6bb00a75145..9fc4993afe0 100644
--- a/src/interfaces/modular/Mathematics.i
+++ b/src/interfaces/swig/Mathematics.i
@@ -12,59 +12,39 @@
 /* Remove C Prefix */
 %rename(Math) CMath;
 %rename(Statistics) CStatistics;
+#ifdef USE_GPL_SHOGUN
 %rename(SparseInverseCovariance) CSparseInverseCovariance;
+#endif //USE_GPL_SHOGUN
 
 // fix overloaded methods in Math
 #if defined(SWIGLUA) || defined(SWIGR)
 
 namespace shogun
 {
-#ifdef USE_INT8
-%rename(dot_int8) CMath::dot(int8_t const *,int8_t const *,int32_t);
-#endif
-
-#ifdef USE_UINT8
-%rename(dot_uint8) CMath::dot(uint8_t const *,uint8_t const *,int32_t);
-#endif
-
-#ifdef USE_INT16
-%rename(dot_int16) CMath::dot(int16_t const *,int16_t const *,int32_t);
-#endif
-
-#ifdef USE_UINT16
-%rename(dot_uint16) CMath::dot(uint16_t const *,uint16_t const *,int32_t);
-#endif
-
 #ifdef USE_INT32
-%rename(dot_int32) CMath::dot(int32_t const *,int32_t const *,int32_t);
 %rename(pow_int32) CMath::pow(int32_t,int32_t);
 %rename(random_int32) CMath::random(int32_t,int32_t);
 #endif
 
 #ifdef USE_UINT32
-%rename(dot_uint32) CMath::dot(uint32_t const *,uint32_t const *,int32_t);
 %rename(random_uint32) CMath::random(uint32_t,uint32_t);
 #endif
 
 #ifdef USE_INT64
-%rename(dot_int64) CMath::dot(int64_t const *,int64_t const *,int32_t);
 %rename(random_int64) CMath::random(int64_t,int64_t);
 #endif
 
 #ifdef USE_UINT64
-%rename(dot_uint64) CMath::dot(uint64_t const *,uint64_t const *,int32_t);
 %rename(random_uint64) CMath::random(uint64_t,uint64_t);
 #endif
 
 #ifdef USE_FLOAT32
-%rename(dot_float32) CMath::dot(float32_t const *,float32_t const *,int32_t);
 %rename(normal_random_float32) CMath::normal_random(float32_t,float32_t);
 %rename(random_float32) CMath::random(float32_t,float32_t);
 %rename(sqrt_float32) CMath::sqrt(float32_t);
 #endif
 
 #ifdef USE_FLOAT64
-%rename(dot_float64) CMath::dot(float64_t const *,float64_t const *,int32_t);
 %rename(normal_random_float64) CMath::normal_random(float64_t,float64_t);
 %rename(pow_float64_int32) CMath::pow(float64_t,int32_t);
 %rename(pow_float64_float64) CMath::pow(float64_t,float64_t);
@@ -194,7 +174,9 @@ namespace shogun
 /* Include Class Headers to make them visible from within the target language */
 %include <shogun/mathematics/Math.h>
 %include <shogun/mathematics/Statistics.h>
+#ifdef USE_GPL_SHOGUN
 %include <shogun/mathematics/SparseInverseCovariance.h>
+#endif //USE_GPL_SHOGUN
 
 /* Log-det framework */
 %include <shogun/mathematics/linalg/ratapprox/tracesampler/TraceSampler.h>
diff --git a/src/interfaces/modular/Mathematics_includes.i b/src/interfaces/swig/Mathematics_includes.i
similarity index 97%
rename from src/interfaces/modular/Mathematics_includes.i
rename to src/interfaces/swig/Mathematics_includes.i
index 1654ae085f1..11b6e1c3d77 100644
--- a/src/interfaces/modular/Mathematics_includes.i
+++ b/src/interfaces/swig/Mathematics_includes.i
@@ -1,7 +1,9 @@
 %{
 #include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/Statistics.h>
+#ifdef USE_GPL_SHOGUN
 #include <shogun/mathematics/SparseInverseCovariance.h>
+#endif //USE_GPL_SHOGUN
 
 /* Log-det framework */
 #include <shogun/mathematics/linalg/ratapprox/tracesampler/TraceSampler.h>
diff --git a/src/interfaces/modular/Metric.i b/src/interfaces/swig/Metric.i
similarity index 100%
rename from src/interfaces/modular/Metric.i
rename to src/interfaces/swig/Metric.i
diff --git a/src/interfaces/modular/Metric_includes.i b/src/interfaces/swig/Metric_includes.i
similarity index 100%
rename from src/interfaces/modular/Metric_includes.i
rename to src/interfaces/swig/Metric_includes.i
diff --git a/src/interfaces/modular/Minimizer.i b/src/interfaces/swig/Minimizer.i
similarity index 100%
rename from src/interfaces/modular/Minimizer.i
rename to src/interfaces/swig/Minimizer.i
diff --git a/src/interfaces/modular/Minimizer_includes.i b/src/interfaces/swig/Minimizer_includes.i
similarity index 100%
rename from src/interfaces/modular/Minimizer_includes.i
rename to src/interfaces/swig/Minimizer_includes.i
diff --git a/src/interfaces/modular/ModelSelection.i b/src/interfaces/swig/ModelSelection.i
similarity index 100%
rename from src/interfaces/modular/ModelSelection.i
rename to src/interfaces/swig/ModelSelection.i
diff --git a/src/interfaces/modular/ModelSelection_includes.i b/src/interfaces/swig/ModelSelection_includes.i
similarity index 100%
rename from src/interfaces/modular/ModelSelection_includes.i
rename to src/interfaces/swig/ModelSelection_includes.i
diff --git a/src/interfaces/modular/Multiclass.i b/src/interfaces/swig/Multiclass.i
similarity index 98%
rename from src/interfaces/modular/Multiclass.i
rename to src/interfaces/swig/Multiclass.i
index 8317a9178c8..21f1b9b1a18 100644
--- a/src/interfaces/modular/Multiclass.i
+++ b/src/interfaces/swig/Multiclass.i
@@ -52,10 +52,12 @@
 %rename(ECOCAEDDecoder) CECOCAEDDecoder;
 %rename(ECOCLLBDecoder) CECOCLLBDecoder;
 
+#ifdef USE_GPL_SHOGUN
 %rename(MulticlassTreeGuidedLogisticRegression) CMulticlassTreeGuidedLogisticRegression;
 %rename(MulticlassLogisticRegression) CMulticlassLogisticRegression;
-%rename(MulticlassLibLinear) CMulticlassLibLinear;
 %rename(MulticlassOCAS) CMulticlassOCAS;
+#endif //USE_GPL_SHOGUN
+%rename(MulticlassLibLinear) CMulticlassLibLinear;
 %rename(MulticlassSVM) CMulticlassSVM;
 %rename(MulticlassLibSVM) CMulticlassLibSVM;
 %rename(LaRank) CLaRank;
@@ -121,10 +123,12 @@ namespace shogun
 %include <shogun/multiclass/ecoc/ECOCLLBDecoder.h>
 %include <shogun/multiclass/ecoc/ECOCStrategy.h>
 
+#ifdef USE_GPL_SHOGUN
 %include <shogun/multiclass/MulticlassTreeGuidedLogisticRegression.h>
 %include <shogun/multiclass/MulticlassLogisticRegression.h>
-%include <shogun/multiclass/MulticlassLibLinear.h>
 %include <shogun/multiclass/MulticlassOCAS.h>
+#endif // USE_GPL_SHOGUN
+%include <shogun/multiclass/MulticlassLibLinear.h>
 %include <shogun/multiclass/MulticlassSVM.h>
 %include <shogun/multiclass/MulticlassLibSVM.h>
 %include <shogun/multiclass/LaRank.h>
diff --git a/src/interfaces/modular/Multiclass_includes.i b/src/interfaces/swig/Multiclass_includes.i
similarity index 98%
rename from src/interfaces/modular/Multiclass_includes.i
rename to src/interfaces/swig/Multiclass_includes.i
index 4057458cf77..689ee8662fa 100644
--- a/src/interfaces/modular/Multiclass_includes.i
+++ b/src/interfaces/swig/Multiclass_includes.i
@@ -46,10 +46,12 @@
  #include <shogun/multiclass/ecoc/ECOCAEDDecoder.h>
  #include <shogun/multiclass/ecoc/ECOCLLBDecoder.h>
 
+#ifdef USE_GPL_SHOGUN
  #include <shogun/multiclass/MulticlassTreeGuidedLogisticRegression.h>
  #include <shogun/multiclass/MulticlassLogisticRegression.h>
- #include <shogun/multiclass/MulticlassLibLinear.h>
  #include <shogun/multiclass/MulticlassOCAS.h>
+#endif //USE_GPL_SHOGUN
+ #include <shogun/multiclass/MulticlassLibLinear.h>
  #include <shogun/multiclass/MulticlassSVM.h>
  #include <shogun/multiclass/LaRank.h>
  #include <shogun/multiclass/MulticlassLibSVM.h>
diff --git a/src/interfaces/modular/NeuralNets.i b/src/interfaces/swig/NeuralNets.i
similarity index 100%
rename from src/interfaces/modular/NeuralNets.i
rename to src/interfaces/swig/NeuralNets.i
diff --git a/src/interfaces/modular/NeuralNets_includes.i b/src/interfaces/swig/NeuralNets_includes.i
similarity index 100%
rename from src/interfaces/modular/NeuralNets_includes.i
rename to src/interfaces/swig/NeuralNets_includes.i
diff --git a/src/interfaces/swig/ParameterObserver.i b/src/interfaces/swig/ParameterObserver.i
new file mode 100644
index 00000000000..439c3dbf786
--- /dev/null
+++ b/src/interfaces/swig/ParameterObserver.i
@@ -0,0 +1,23 @@
+%include "std_vector.i"
+%include "std_string.i"
+%template(ParameterList) std::vector<std::string>;
+
+%rename(ParameterObserverCV) CParameterObserverCV;
+
+%{
+    #include <shogun/lib/parameter_observers/ParameterObserverInterface.h>
+	#include <shogun/lib/parameter_observers/ParameterObserverCV.h>
+#ifdef HAVE_TFLOGGER
+	#include <shogun/lib/parameter_observers/ParameterObserverTensorBoard.h>
+    #include <shogun/lib/parameter_observers/ParameterObserverScalar.h>
+    #include <shogun/lib/parameter_observers/ParameterObserverHistogram.h>
+#endif // HAVE_TFLOGGER
+%}
+
+%include <shogun/lib/parameter_observers/ParameterObserverInterface.h>
+%include <shogun/lib/parameter_observers/ParameterObserverCV.h>
+#ifdef HAVE_TFLOGGER
+%include <shogun/lib/parameter_observers/ParameterObserverTensorBoard.h>
+%include <shogun/lib/parameter_observers/ParameterObserverScalar.h>
+%include <shogun/lib/parameter_observers/ParameterObserverHistogram.h>
+#endif // HAVE_TFLOGGER
diff --git a/src/interfaces/modular/Preprocessor.i b/src/interfaces/swig/Preprocessor.i
similarity index 100%
rename from src/interfaces/modular/Preprocessor.i
rename to src/interfaces/swig/Preprocessor.i
diff --git a/src/interfaces/modular/Preprocessor_includes.i b/src/interfaces/swig/Preprocessor_includes.i
similarity index 100%
rename from src/interfaces/modular/Preprocessor_includes.i
rename to src/interfaces/swig/Preprocessor_includes.i
diff --git a/src/interfaces/modular/Regression.i b/src/interfaces/swig/Regression.i
similarity index 100%
rename from src/interfaces/modular/Regression.i
rename to src/interfaces/swig/Regression.i
diff --git a/src/interfaces/modular/Regression_includes.i b/src/interfaces/swig/Regression_includes.i
similarity index 100%
rename from src/interfaces/modular/Regression_includes.i
rename to src/interfaces/swig/Regression_includes.i
diff --git a/src/interfaces/modular/SGBase.i b/src/interfaces/swig/SGBase.i
similarity index 97%
rename from src/interfaces/modular/SGBase.i
rename to src/interfaces/swig/SGBase.i
index 54e52841824..368889f587e 100644
--- a/src/interfaces/modular/SGBase.i
+++ b/src/interfaces/swig/SGBase.i
@@ -111,9 +111,6 @@ public void readExternal(java.io.ObjectInput in) throws java.io.IOException, jav
  extern void sg_global_print_message(FILE* target, const char* str);
  extern void sg_global_print_warning(FILE* target, const char* str);
  extern void sg_global_print_error(FILE* target, const char* str);
-#ifndef DISABLE_CANCEL_CALLBACK
- extern void sg_global_cancel_computations(bool &delayed, bool &immediately);
-#endif
 
 #ifdef SWIGR
  #include <Rdefines.h>
@@ -162,7 +159,7 @@ public void readExternal(java.io.ObjectInput in) throws java.io.IOException, jav
 #if !defined(SWIGJAVA) && !defined(SWIGCSHARP)
 #ifndef DISABLE_CANCEL_CALLBACK
         shogun::init_shogun(&sg_global_print_message, &sg_global_print_warning,
-                &sg_global_print_error, &sg_global_cancel_computations);
+                &sg_global_print_error);
 #else
         shogun::init_shogun(&sg_global_print_message, &sg_global_print_warning,
                 &sg_global_print_error);
@@ -448,9 +445,9 @@ except ImportError:
     import copyreg as copy_reg
 def _sg_reconstructor(cls, base, state):
     try:
-        if isinstance(cls, str) and cls.startswith('modshogun.'):
+        if isinstance(cls, str) and cls.startswith('shogun.'):
             if base is object:
-                import modshogun
+                import shogun
                 return eval(cls+'()')
             else:
                 base.__new__(cls, state)
@@ -479,7 +476,7 @@ def _sg_reduce_ex(self, proto):
 
     base = object
     state = None
-    args = ('modshogun.' + self.get_name(), base, state)
+    args = ('shogun.' + self.get_name(), base, state)
 
 
     try:
diff --git a/src/interfaces/modular/Statistics.i b/src/interfaces/swig/Statistics.i
similarity index 100%
rename from src/interfaces/modular/Statistics.i
rename to src/interfaces/swig/Statistics.i
diff --git a/src/interfaces/modular/Statistics_includes.i b/src/interfaces/swig/Statistics_includes.i
similarity index 100%
rename from src/interfaces/modular/Statistics_includes.i
rename to src/interfaces/swig/Statistics_includes.i
diff --git a/src/interfaces/modular/Structure.i b/src/interfaces/swig/Structure.i
similarity index 96%
rename from src/interfaces/modular/Structure.i
rename to src/interfaces/swig/Structure.i
index e8264b21c11..48795aa1dc8 100644
--- a/src/interfaces/modular/Structure.i
+++ b/src/interfaces/swig/Structure.i
@@ -56,11 +56,13 @@
 %rename(StructuredOutputMachine) CStructuredOutputMachine;
 %rename(LinearStructuredOutputMachine) CLinearStructuredOutputMachine;
 %rename(KernelStructuredOutputMachine) CKernelStructuredOutputMachine;
-%rename(DualLibQPBMSOSVM) CDualLibQPBMSOSVM;
 
+#ifdef USE_GPL_SHOGUN
+%rename(DualLibQPBMSOSVM) CDualLibQPBMSOSVM;
 #ifdef USE_MOSEK
 %rename(PrimalMosekSOSVM) CPrimalMosekSOSVM;
 #endif /* USE_MOSEK */
+#endif //USE_GPL_SHOGUN
 
 %rename(StochasticSOSVM) CStochasticSOSVM;
 %rename(FWSOSVM) CFWSOSVM;
@@ -74,7 +76,9 @@
 %include <shogun/structure/IntronList.h>
 %include <shogun/structure/SegmentLoss.h>
 
+#ifdef USE_GPL_SHOGUN
 %include <shogun/structure/BmrmStatistics.h>
+#endif //USE_GPL_SHOGUN
 %include <shogun/structure/StructuredModel.h>
 %include <shogun/structure/MulticlassModel.h>
 %include <shogun/structure/MulticlassSOLabels.h>
@@ -105,11 +109,13 @@
 %include <shogun/machine/LinearStructuredOutputMachine.h>
 %include <shogun/machine/KernelStructuredOutputMachine.h>
 
+#ifdef USE_GPL_SHOGUN
 %include <shogun/structure/DualLibQPBMSOSVM.h>
 
 #ifdef USE_MOSEK
 %include <shogun/structure/PrimalMosekSOSVM.h>
 #endif /* USE_MOSEK */
+#endif //USE_GPL_SHOGUN
 
 %include <shogun/structure/StochasticSOSVM.h>
 %include <shogun/structure/FWSOSVM.h>
diff --git a/src/interfaces/modular/Structure_includes.i b/src/interfaces/swig/Structure_includes.i
similarity index 92%
rename from src/interfaces/modular/Structure_includes.i
rename to src/interfaces/swig/Structure_includes.i
index 29a3203df23..8faddc64711 100644
--- a/src/interfaces/modular/Structure_includes.i
+++ b/src/interfaces/swig/Structure_includes.i
@@ -6,8 +6,10 @@
  #include <shogun/structure/PlifMatrix.h>
  #include <shogun/structure/IntronList.h>
  #include <shogun/structure/SegmentLoss.h>
-
+ 
+#ifdef USE_GPL_SHOGUN
  #include <shogun/structure/BmrmStatistics.h>
+#endif //USE_GPL_SHOGUN
  #include <shogun/structure/StructuredModel.h>
  #include <shogun/structure/MulticlassModel.h>
  #include <shogun/structure/MulticlassSOLabels.h>
@@ -38,11 +40,12 @@
  #include <shogun/machine/LinearStructuredOutputMachine.h>
  #include <shogun/machine/KernelStructuredOutputMachine.h>
 
- #include <shogun/structure/DualLibQPBMSOSVM.h>
-
+#ifdef USE_GPL_SHOGUN
+#include <shogun/structure/DualLibQPBMSOSVM.h>
 #ifdef USE_MOSEK
  #include <shogun/structure/PrimalMosekSOSVM.h>
 #endif /* USE_MOSEK */
+#endif //USE_GPL_SHOGUN
 
  #include <shogun/structure/StochasticSOSVM.h>
  #include <shogun/structure/FWSOSVM.h>
diff --git a/src/interfaces/modular/Transfer.i b/src/interfaces/swig/Transfer.i
similarity index 95%
rename from src/interfaces/modular/Transfer.i
rename to src/interfaces/swig/Transfer.i
index df81c915673..841736587de 100644
--- a/src/interfaces/modular/Transfer.i
+++ b/src/interfaces/swig/Transfer.i
@@ -20,12 +20,14 @@
 %rename(TaskRelationBase) CTaskRelation;
 %rename(TaskTree) CTaskTree;
 %rename(TaskGroup) CTaskGroup;
+#ifdef USE_GPL_SHOGUN
 %rename(MultitaskLinearMachineBase) CMultitaskLinearMachine;
 %rename(MultitaskLeastSquaresRegression) CMultitaskLeastSquaresRegression;
 %rename(MultitaskLogisticRegression) CMultitaskLogisticRegression;
 %rename(MultitaskL12LogisticRegression) CMultitaskL12LogisticRegression;
 %rename(MultitaskTraceLogisticRegression) CMultitaskTraceLogisticRegression;
 %rename(MultitaskClusteredLogisticRegression) CMultitaskClusteredLogisticRegression;
+#endif //USE_GPL_SHOGUN
 
 %rename(MultitaskROCEvaluation) CMultitaskROCEvaluation;
 
@@ -50,16 +52,21 @@
 %include <shogun/transfer/multitask/TaskRelation.h>
 %include <shogun/transfer/multitask/TaskTree.h>
 %include <shogun/transfer/multitask/TaskGroup.h>
+#ifdef USE_GPL_SHOGUN
 %include <shogun/transfer/multitask/MultitaskLinearMachine.h>
 %include <shogun/transfer/multitask/MultitaskLeastSquaresRegression.h>
 %include <shogun/transfer/multitask/MultitaskLogisticRegression.h>
 %include <shogun/transfer/multitask/MultitaskL12LogisticRegression.h>
 %include <shogun/transfer/multitask/MultitaskTraceLogisticRegression.h>
-%include <shogun/transfer/multitask/MultitaskClusteredLogisticRegression.h>
+#endif //USE_GPL_SHOGUN
 
 %include <shogun/transfer/multitask/MultitaskROCEvaluation.h>
 %include <shogun/transfer/multitask/LibLinearMTL.h>
 
+#ifdef USE_GPL_SHOGUN
+%include <shogun/transfer/multitask/MultitaskClusteredLogisticRegression.h>
+#endif // USE_GPL_SHOGUN
+
 /* Domain adaptation includes */
 #ifdef USE_SVMLIGHT
 %include <shogun/transfer/domain_adaptation/DomainAdaptationSVM.h>
diff --git a/src/interfaces/modular/Transfer_includes.i b/src/interfaces/swig/Transfer_includes.i
similarity index 93%
rename from src/interfaces/modular/Transfer_includes.i
rename to src/interfaces/swig/Transfer_includes.i
index e354b4a389b..5c699c0d736 100644
--- a/src/interfaces/modular/Transfer_includes.i
+++ b/src/interfaces/swig/Transfer_includes.i
@@ -11,15 +11,20 @@
  #include <shogun/transfer/multitask/TaskRelation.h>
  #include <shogun/transfer/multitask/TaskTree.h>
  #include <shogun/transfer/multitask/TaskGroup.h>
+#ifdef USE_GPL_SHOGUN
  #include <shogun/transfer/multitask/MultitaskLinearMachine.h>
  #include <shogun/transfer/multitask/MultitaskLeastSquaresRegression.h>
  #include <shogun/transfer/multitask/MultitaskLogisticRegression.h>
  #include <shogun/transfer/multitask/MultitaskL12LogisticRegression.h>
  #include <shogun/transfer/multitask/MultitaskTraceLogisticRegression.h>
- #include <shogun/transfer/multitask/MultitaskClusteredLogisticRegression.h>
+#endif //USE_GPL_SHOGUN
 
  #include <shogun/transfer/multitask/MultitaskROCEvaluation.h>
 
+#ifdef USE_GPL_SHOGUN
+ #include <shogun/transfer/multitask/MultitaskClusteredLogisticRegression.h>
+#endif /* USE_GPL_SHOGUN */
+
 #ifdef USE_SVMLIGHT
  #include <shogun/transfer/domain_adaptation/DomainAdaptationSVM.h>
 #endif /* USE_SVMLIGHT */
diff --git a/src/interfaces/modular/abstract_types_extension.i b/src/interfaces/swig/abstract_types_extension.i
similarity index 98%
rename from src/interfaces/modular/abstract_types_extension.i
rename to src/interfaces/swig/abstract_types_extension.i
index cfb435c3df2..f78065f10a4 100644
--- a/src/interfaces/modular/abstract_types_extension.i
+++ b/src/interfaces/swig/abstract_types_extension.i
@@ -18,7 +18,7 @@
  //PTZ121108 example of classifier in examples/undocumented/libshogun/classifier_latent_svm.cpp
  //extention to make use of CData,CLatentModel
  //TODO:PTZ121108 put it in another file like  classifier_latent_svm.i or %include  examples/undocumented/libshogun/classifier_latent_svm.cpp
- //or find a clever way to wrap CLatenModel, CData  instanciation, bless({}, modshogun::LatentModel)
+ //or find a clever way to wrap CLatenModel, CData  instanciation, bless({}, shogun::LatentModel)
  // is not enough and would need a new wrapper, but yet new CLatentModel() is not working,
  // (with error: "cannot allocate an object of abstract type") ?
 %inline %{
diff --git a/src/interfaces/modular/bagging.i b/src/interfaces/swig/bagging.i
similarity index 100%
rename from src/interfaces/modular/bagging.i
rename to src/interfaces/swig/bagging.i
diff --git a/src/interfaces/modular/bagging_includes.i b/src/interfaces/swig/bagging_includes.i
similarity index 100%
rename from src/interfaces/modular/bagging_includes.i
rename to src/interfaces/swig/bagging_includes.i
diff --git a/src/interfaces/modular/modshogun.doxy.in b/src/interfaces/swig/shogun.doxy.in
similarity index 86%
rename from src/interfaces/modular/modshogun.doxy.in
rename to src/interfaces/swig/shogun.doxy.in
index 8e2521e8f40..900ac13d1bf 100644
--- a/src/interfaces/modular/modshogun.doxy.in
+++ b/src/interfaces/swig/shogun.doxy.in
@@ -1,6 +1,6 @@
 INPUT                  = @LIBSHOGUN_SRC_DIR@
-OUTPUT_DIRECTORY       = modshogun
-XML_OUTPUT             = doxygen_xml
+OUTPUT_DIRECTORY       = shogun_doxygen
+XML_OUTPUT             = xml
 CREATE_SUBDIRS         = NO
 OUTPUT_LANGUAGE        = English
 FILE_PATTERNS          = *.h
diff --git a/src/interfaces/modular/modshogun.i b/src/interfaces/swig/shogun.i
similarity index 92%
rename from src/interfaces/modular/modshogun.i
rename to src/interfaces/swig/shogun.i
index 7dea9e39d40..f25432c21e9 100644
--- a/src/interfaces/modular/modshogun.i
+++ b/src/interfaces/swig/shogun.i
@@ -20,13 +20,13 @@
 %include "swig_config.h"
 
 %define DOCSTR
-"The `modshogun` module gathers all modules available in the SHOGUN toolkit."
+"The `shogun` module gathers all modules available in the SHOGUN toolkit."
 %enddef
 
 #if defined(USE_SWIG_DIRECTORS) && defined(SWIGPYTHON)
-%module(directors="1", docstring=DOCSTR) modshogun
+%module(directors="1", docstring=DOCSTR) shogun
 #else
-%module(docstring=DOCSTR) modshogun
+%module(docstring=DOCSTR) shogun
 #endif
 #undef DOCSTR
 
@@ -50,11 +50,11 @@
 
 #ifdef HAVE_DOXYGEN
 #ifndef SWIGRUBY
-%include "modshogun_doxygen.i"
+%include "shogun_doxygen.i"
 #endif
 #endif
 
-%include "modshogun_ignores.i"
+%include "shogun_ignores.i"
 
 %include "Classifier_includes.i"
 %include "Clustering_includes.i"
@@ -114,6 +114,8 @@
 %include "bagging.i"
 %include "Boost.i"
 
+%include "ParameterObserver.i"
+
 #if defined(SWIGPERL)
 %include "abstract_types_extension.i"
 #endif
diff --git a/src/interfaces/modular/modshogun_ignores.i b/src/interfaces/swig/shogun_ignores.i
similarity index 99%
rename from src/interfaces/modular/modshogun_ignores.i
rename to src/interfaces/swig/shogun_ignores.i
index a5f4feff389..86d35a8466a 100644
--- a/src/interfaces/modular/modshogun_ignores.i
+++ b/src/interfaces/swig/shogun_ignores.i
@@ -222,3 +222,5 @@
 %ignore shogun::CMAPInference::CMAPInference();
 %ignore shogun::CGraphCut::CGraphCut();
 %ignore shogun::CFactorGraphModel::CFactorGraphModel();
+
+%ignore shogun::Range;
diff --git a/src/interfaces/modular/swig_config.h.in b/src/interfaces/swig/swig_config.h.in
similarity index 100%
rename from src/interfaces/modular/swig_config.h.in
rename to src/interfaces/swig/swig_config.h.in
diff --git a/src/shogun/CMakeLists.txt b/src/shogun/CMakeLists.txt
index 367f9a2cf9c..71d42b2209c 100644
--- a/src/shogun/CMakeLists.txt
+++ b/src/shogun/CMakeLists.txt
@@ -29,6 +29,32 @@ if (MSVC AND (BUILD_EXAMPLES OR BUILD_META_EXAMPLES))
     CACHE BOOL "Build libshogun static library" FORCE)
 endif()
 
+# Allow to hide non-bsd compatible codes
+OPTION(LICENSE_GPL_SHOGUN "Include GPL codes of Shogun (non-BSD compatible) in build" ON)
+SET(USE_GPL_SHOGUN 0)
+IF (LICENSE_GPL_SHOGUN)
+        SET(SHOGUN_GPL_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/src/gpl)
+        IF(EXISTS "${SHOGUN_GPL_INCLUDE_DIR}/shogun")
+            SET(USE_GPL_SHOGUN 1)
+
+            FILE(GLOB_RECURSE GPL_LIBSHOGUN_SRC ${SHOGUN_GPL_INCLUDE_DIR}/*.${EXT_SRC_CPP} ${SHOGUN_GPL_INCLUDE_DIR}/*.${EXT_SRC_C})
+            FILE(GLOB_RECURSE GPL_LIBSHOGUN_HEADERS ${SHOGUN_GPL_INCLUDE_DIR}/*.${EXT_SRC_HEADER})
+
+            LIST(APPEND LIBSHOGUN_SRC ${GPL_LIBSHOGUN_SRC})
+            LIST(APPEND LIBSHOGUN_HEADERS ${GPL_LIBSHOGUN_HEADERS})
+            INCLUDE_DIRECTORIES(${SHOGUN_GPL_INCLUDE_DIR})
+
+            SET(USE_GPL_SHOGUN 1)
+        ELSE()
+            MESSAGE(FATAL_ERROR "Shogun can only be built with GPL codes if the source files are in ${SHOGUN_GPL_INCLUDE_DIR}. Please download or disable.\n")
+        ENDIF()
+ENDIF()
+
+OPTION(USE_SVMLIGHT "SVMLight" ON)
+IF(USE_SVMLIGHT AND NOT USE_GPL_SHOGUN)
+	MESSAGE(FATAL_ERROR "Can only use SVMLight when GPL codes are included")
+ENDIF()
+
 # add target to compile the libshogun sources
 add_library(libshogun OBJECT ${LIBSHOGUN_SRC} ${LIBSHOGUN_HEADERS} ${CMAKE_CURRENT_BINARY_DIR}/lib/config.h)
 set_property(TARGET libshogun PROPERTY POSITION_INDEPENDENT_CODE TRUE)
@@ -56,27 +82,17 @@ FOREACH(template ${LIBSHOGUN_SRC_TMP})
   STRING(REGEX REPLACE ${EXT_CPP_TMP} ${EXT_CPP_PY} generator_script "${template}")
   STRING(REGEX REPLACE ".*/(.*).${EXT_CPP_TMP}" "\\1" generated_target "${template}")
 
-  IF(MSVC)
-    FOREACH(h ${LIBSHOGUN_HEADERS})
-      FILE(APPEND ${CMAKE_CURRENT_BINARY_DIR}/headers_list.txt "${h}\n")
-    ENDFOREACH()
-
-    ADD_CUSTOM_COMMAND(OUTPUT ${generated_cpp}
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${template} ${generated_cpp}
-        COMMAND ${PYTHON_EXECUTABLE} ${generator_script} ${generated_cpp} -in ${CMAKE_CURRENT_BINARY_DIR}/headers_list.txt
-        DEPENDS ${template} ${generator_script} version ${LIBSHOGUN_HEADERS} ${CMAKE_CURRENT_BINARY_DIR}/headers_list.txt
-        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-        COMMENT "Generating ${generated_cpp}"
-    )
-  ELSE()
-    ADD_CUSTOM_COMMAND(OUTPUT ${generated_cpp}
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${template} ${generated_cpp}
-        COMMAND ${PYTHON_EXECUTABLE} ${generator_script} ${generated_cpp} ${LIBSHOGUN_HEADERS}
-        DEPENDS ${template} ${generator_script} version ${LIBSHOGUN_HEADERS}
-        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-        COMMENT "Generating ${generated_cpp}"
-    )
-  ENDIF()
+  FOREACH(h ${LIBSHOGUN_HEADERS})
+    FILE(APPEND ${CMAKE_CURRENT_BINARY_DIR}/headers_list.txt "${h}\n")
+  ENDFOREACH()
+
+  ADD_CUSTOM_COMMAND(OUTPUT ${generated_cpp}
+      COMMAND ${CMAKE_COMMAND} -E copy_if_different ${template} ${generated_cpp}
+      COMMAND ${PYTHON_EXECUTABLE} ${generator_script} ${generated_cpp} -in ${CMAKE_CURRENT_BINARY_DIR}/headers_list.txt
+      DEPENDS ${template} ${generator_script} version ${LIBSHOGUN_HEADERS} ${CMAKE_CURRENT_BINARY_DIR}/headers_list.txt
+      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+      COMMENT "Generating ${generated_cpp}"
+  )
 
   ADD_CUSTOM_TARGET(${generated_target} DEPENDS ${generated_cpp})
   add_dependencies(libshogun ${generated_target})
@@ -97,6 +113,14 @@ target_include_directories(shogun PUBLIC
   $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/src>
   $<INSTALL_INTERFACE:include/shogun>
 )
+IF (${USE_GPL_SHOGUN})
+    target_include_directories(shogun PUBLIC $<BUILD_INTERFACE:${SHOGUN_GPL_INCLUDE_DIR}>)
+ENDIF()
+
+get_property(dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
+foreach(dir ${dirs})
+  message(STATUS "dir='${dir}'")
+endforeach()
 
 # create shogun_deps meta target to store all the
 # dependencies of shogun itself, this is used in unit test
@@ -106,6 +130,9 @@ target_include_directories(shogun_deps INTERFACE
   $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/src>
   $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/src>
 )
+IF (${USE_GPL_SHOGUN})
+    target_include_directories(shogun_deps INTERFACE $<BUILD_INTERFACE:${SHOGUN_GPL_INCLUDE_DIR}>)
+ENDIF()
 
 # add bundled libraries to dependency
 if(SHOGUN_DEPENDS)
@@ -116,6 +143,15 @@ endif()
 if (LIBSHOGUN_BUILD_STATIC)
   add_library(shogun-static STATIC $<TARGET_OBJECTS:libshogun> ${CMAKE_CURRENT_BINARY_DIR}/lib/config.h)
   set_property(TARGET shogun-static PROPERTY OUTPUT_NAME shogun)
+  target_include_directories(shogun-static PUBLIC
+    $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/src>
+    $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/src>
+    $<INSTALL_INTERFACE:include/shogun>
+  )
+  IF (${USE_GPL_SHOGUN})
+    target_include_directories(shogun-static PUBLIC $<BUILD_INTERFACE:${SHOGUN_GPL_INCLUDE_DIR}>)
+  ENDIF()
+
   if(MSVC)
     target_link_libraries(shogun-static PUBLIC winmm.lib)
   endif()
@@ -157,11 +193,6 @@ if (OPENMP_FOUND)
   endif()
 endif()
 
-FIND_PACKAGE(Spinlock)
-IF (SPINLOCK_FOUND)
-  SET(USE_SPINLOCKS 1)
-ENDIF()
-
 FIND_PACKAGE(CxaDemangle)
 
 # check SSE and SSE2 intrinsics header
@@ -172,8 +203,6 @@ ENDIF((NOT CYGWIN) AND (NOT DISABLE_SSE))
 
 FIND_PACKAGE(CxaDemangle)
 ############################ std lib functions
-CHECK_INCLUDE_FILE_CXX("unordered_map" HAVE_STD_UNORDERED_MAP)
-
 include (CheckCXXSymbolExists)
 CHECK_CXX_SYMBOL_EXISTS(isfinite "cmath" HAVE_DECL_ISFINITE)
 CHECK_CXX_SYMBOL_EXISTS(isinf "cmath" HAVE_DECL_ISINF)
@@ -231,64 +260,30 @@ SHOGUN_DEPENDENCIES(
   VERSION ${VIENNACL_VERSION_MINIMUM}
   CONFIG_FLAG HAVE_VIENNACL)
 
-# Linear algebra default global backend setups
-SET_LINALG_BACKEND(LINALG_DEFAULT_BACKEND GLOBAL)
-
-# Linear algebra default module specific backend setup
-# Linear solver module
-SET_LINALG_BACKEND(LINALG_LINEAR_SOLVER_LIB LINSLV)
-
-# Eigen solver module
-SET_LINALG_BACKEND(LINALG_EIGENSOLVER_LIB EIGSLV)
-
 ####### /LINALG
 
-# prefer original LAPACK, if needed
-OPTION(USE_ORIGINAL_LAPACK "Original LAPACK" OFF)
-FIND_PACKAGE(LAPACK)
-IF (LAPACK_FOUND)
-  # find out whether it is Accelerate.framework we found for LaPack/BLAS
-  IF("${LAPACK_LIBRARIES}" MATCHES ".*/Accelerate.framework$")
-    SET(HAVE_MVEC 1)
-    SET(HAVE_CATLAS 1)
-    SET(HAVE_LAPACK 1)
-    SHOGUN_LINK_LIBS(${LAPACK_LIBRARIES})
-  ELSEIF("${LAPACK_LIBRARIES}" MATCHES ".*/mkl_.*")
-    SET(HAVE_LAPACK 1)
-    SET(HAVE_MKL 1)
-    SHOGUN_LINK_LIBS(${LAPACK_LIBRARIES})
-  ELSE()
-    FIND_PACKAGE(Atlas)
-    IF(Atlas_FOUND)
-      SET(HAVE_ATLAS 1)
-      SET(HAVE_LAPACK 1)
-      SHOGUN_INCLUDE_DIRS(SCOPE PUBLIC ${Atlas_INCLUDE_DIRS})
-      IF(USE_ORIGINAL_LAPACK)
-        SET(ATLAS_CLAPACK_LIBRARY)
-        FOREACH(ITR ${Atlas_LIBRARIES})
-          IF(ITR MATCHES ".*atlas.*lapack.*" OR ITR MATCHES ".*lapack.*atlas.*")
-            STRING(REGEX REPLACE "lapack" "clapack" ITR ${ITR})
-            LIST(APPEND ATLAS_CLAPACK_LIBRARY ${ITR})
-          ENDIF(ITR MATCHES ".*atlas.*lapack.*" OR ITR MATCHES ".*lapack.*atlas.*")
-        ENDFOREACH(ITR ${ATLAS_LIBRARIES})
-        MESSAGE(STATUS "using ATLAS-CLAPACK from: ${ATLAS_CLAPACK_LIBRARY}")
-        SHOGUN_LINK_LIBS(${LAPACK_LIBRARIES} ${ATLAS_CLAPACK_LIBRARY} ${Atlas_LIBRARIES})
-      ELSE(USE_ORIGINAL_LAPACK)
-        SHOGUN_LINK_LIBS(${LAPACK_LIBRARIES} ${Atlas_LIBRARIES})
-      ENDIF(USE_ORIGINAL_LAPACK)
-    ELSE()
-      FIND_PACKAGE(CBLAS)
-      if(CBLAS_LIBRARY)
-        SET(HAVE_LAPACK 1)
-        SHOGUN_LINK_LIBS(${LAPACK_LIBRARIES} ${CBLAS_LIBRARY})
-      else()
-        SET(HAVE_LAPACK 1)
-        SHOGUN_LINK_LIBS(${LAPACK_LIBRARIES})
-        endif()
-    ENDIF()
-  ENDIF()
+FIND_PACKAGE(rxcpp)
+IF(NOT rxcpp_FOUND)
+    include(external/rxcpp)
+    SHOGUN_INCLUDE_DIRS(SCOPE PUBLIC SYSTEM
+            $<BUILD_INTERFACE:${rxcpp_INCLUDE_DIR}>
+            $<INSTALL_INTERFACE:include/shogun/lib/external/rxcpp>
+            )
+ELSE()
+    SHOGUN_INCLUDE_DIRS(SCOPE PUBLIC SYSTEM ${rxcpp_INCLUDE_DIR})
+ENDIF()
+
+# TFLogger package
+FIND_PACKAGE(TFLogger 0.1.0 CONFIG)
+IF (TFLogger_FOUND)
+    SET(HAVE_TFLOGGER 1)
+    SHOGUN_INCLUDE_DIRS(SCOPE PRIVATE SYSTEM ${TFLogger_INCLUDE_DIR})
+    target_link_libraries(shogun PRIVATE tflogger::tflogger)
 ENDIF()
 
+#### LAPACK
+include(ShogunFindLAPACK)
+
 SHOGUN_DEPENDENCIES(
   LIBRARY GLPK
   SCOPE PRIVATE
@@ -358,10 +353,14 @@ SHOGUN_DEPENDENCIES(
   SCOPE PUBLIC
   CONFIG_FLAG HAVE_XML)
 
-SHOGUN_DEPENDENCIES(
-  LIBRARY HDF5
-  SCOPE PUBLIC
-  CONFIG_FLAG HAVE_HDF5)
+if (NOT WIN32)
+  # FIXME: HDF5 linking on WIN32 is broken.
+  # at least with the hdf5 supplied in anaconda
+  SHOGUN_DEPENDENCIES(
+    LIBRARY HDF5
+    SCOPE PUBLIC
+    CONFIG_FLAG HAVE_HDF5)
+endif ()
 
 SHOGUN_DEPENDENCIES(
   LIBRARY CURL
@@ -434,9 +433,6 @@ SHOGUN_DEPENDENCIES(
   SCOPE PRIVATE
   CONFIG_FLAG HAVE_COLPACK)
 
-IF(USE_ARPREC AND NOT LICENSE_GPL_SHOGUN)
-  MESSAGE(FATAL_ERROR "ARPREC can only be used if USE_GPL_SHOGUN is enabled ")
-ENDIF()
 SHOGUN_DEPENDENCIES(
   LIBRARY ARPREC
   SCOPE PRIVATE
@@ -482,8 +478,10 @@ IF (CTAGS_FOUND)
     ADD_CUSTOM_COMMAND(OUTPUT ${CTAGS_FILE}
         COMMAND ${CTAGS_EXECUTABLE} -f ${CTAGS_FILE}
         # functions, classes, macroses, enumerations, enumerators, typedefs
-        --c++-kinds=fcdget
-        -R ${CMAKE_CURRENT_SOURCE_DIR})
+        --c++-kinds=fcdgetp
+        --fields=+im
+        -R ${CMAKE_SOURCE_DIR})
+
     ADD_CUSTOM_TARGET(ctags DEPENDS ${CTAGS_FILE})
     SET_SOURCE_FILES_PROPERTIES(${CTAGS_FILE} PROPERTIES GENERATED 1)
 ENDIF()
diff --git a/src/shogun/base/Parameter.cpp b/src/shogun/base/Parameter.cpp
index 9e572ab78d4..815ab27afb1 100644
--- a/src/shogun/base/Parameter.cpp
+++ b/src/shogun/base/Parameter.cpp
@@ -2195,7 +2195,7 @@ TParameter::new_sgserial(CSGObject** param,
 	if (*param != NULL)
 		SG_UNREF(*param);
 
-	*param = new_sgserializable(sgserializable_name, generic);
+	*param = create(sgserializable_name, generic);
 
 	if (*param == NULL) {
 		string_t buf = {'\0'};
diff --git a/src/shogun/base/SGObject.cpp b/src/shogun/base/SGObject.cpp
index 394ed0a1da5..684fd703758 100644
--- a/src/shogun/base/SGObject.cpp
+++ b/src/shogun/base/SGObject.cpp
@@ -14,33 +14,32 @@
 #include <shogun/lib/memory.h>
 #include <shogun/lib/RefCount.h>
 
+#include <shogun/base/DynArray.h>
+#include <shogun/base/Parameter.h>
 #include <shogun/base/SGObject.h>
 #include <shogun/base/Version.h>
-#include <shogun/base/Parameter.h>
-#include <shogun/base/DynArray.h>
+#include <shogun/io/SerializableFile.h>
 #include <shogun/lib/Map.h>
-#include <shogun/lib/SGVector.h>
 #include <shogun/lib/SGStringList.h>
-#include <shogun/io/SerializableFile.h>
+#include <shogun/lib/SGVector.h>
+#include <shogun/lib/parameter_observers/ParameterObserverInterface.h>
 
 #include <shogun/base/class_list.h>
 
 #include <stdlib.h>
 #include <stdio.h>
 
-#ifdef HAVE_CXX11
+#include <rxcpp/operators/rx-filter.hpp>
+#include <rxcpp/rx-lite.hpp>
+
 #include <unordered_map>
-#else
-#include <map>
-#endif
 
 namespace shogun
 {
-#ifdef HAVE_CXX11
-	typedef std::unordered_map<BaseTag, Any> ParametersMap;
-#else
 	typedef std::map<BaseTag, Any> ParametersMap;
-#endif
+	typedef std::unordered_map<std::string,
+	                           std::pair<SG_OBS_VALUE_TYPE, std::string>>
+	    ObsParamsList;
 
 	class CSGObject::Self
 	{
@@ -150,7 +149,7 @@ namespace shogun
 
 using namespace shogun;
 
-CSGObject::CSGObject() : self()
+CSGObject::CSGObject() : self(), param_obs_list()
 {
 	init();
 	set_global_objects();
@@ -160,7 +159,8 @@ CSGObject::CSGObject() : self()
 }
 
 CSGObject::CSGObject(const CSGObject& orig)
-: self(), io(orig.io), parallel(orig.parallel), version(orig.version)
+    : self(), param_obs_list(), io(orig.io), parallel(orig.parallel),
+      version(orig.version)
 {
 	init();
 	set_global_objects();
@@ -178,9 +178,11 @@ CSGObject::~CSGObject()
 	delete m_model_selection_parameters;
 	delete m_gradient_parameters;
 	delete m_refcount;
+	delete m_subject_params;
+	delete m_observable_params;
+	delete m_subscriber_params;
 }
 
-#ifdef USE_REFERENCE_COUNTING
 int32_t CSGObject::ref()
 {
 	int32_t count = m_refcount->ref();
@@ -210,7 +212,6 @@ int32_t CSGObject::unref()
 		return m_refcount->ref_count();
 	}
 }
-#endif //USE_REFERENCE_COUNTING
 
 #ifdef TRACE_MEMORY_ALLOCS
 #include <shogun/lib/Map.h>
@@ -502,6 +503,10 @@ void CSGObject::init()
 	m_save_pre_called = false;
 	m_save_post_called = false;
 	m_hash = 0;
+
+	m_subject_params = new SGSubject();
+	m_observable_params = new SGObservable(m_subject_params->get_observable());
+	m_subscriber_params = new SGSubscriber(m_subject_params->get_subscriber());
 }
 
 void CSGObject::print_modsel_params()
@@ -729,7 +734,7 @@ bool CSGObject::equals(CSGObject* other, float64_t accuracy, bool tolerant)
 CSGObject* CSGObject::clone()
 {
 	SG_DEBUG("Constructing an empty instance of %s\n", get_name());
-	CSGObject* copy=new_sgserializable(get_name(), this->m_generic);
+	CSGObject* copy = create(get_name(), this->m_generic);
 
 	SG_REF(copy);
 
@@ -755,7 +760,7 @@ bool CSGObject::clone_parameters(CSGObject* other)
 {
 	REQUIRE(other, "Provided instance must be non-empty.\n");
 	index_t num_parameters = m_parameters->get_num_parameters();
-	
+
 	REQUIRE(other->m_parameters->get_num_parameters() == num_parameters,
 		"Number of parameters of provided instance (%d) must match this instance (%d).\n",
 		other->m_parameters->get_num_parameters(), num_parameters);
@@ -802,3 +807,83 @@ bool CSGObject::type_erased_has(const BaseTag& _tag) const
 {
 	return self->has(_tag);
 }
+
+void CSGObject::subscribe_to_parameters(ParameterObserverInterface* obs)
+{
+	auto sub = rxcpp::make_subscriber<TimedObservedValue>(
+	    [obs](TimedObservedValue e) { obs->on_next(e); },
+	    [obs](std::exception_ptr ep) { obs->on_error(ep); },
+	    [obs]() { obs->on_complete(); });
+
+	// Create an observable which emits values only if they are about
+	// parameters selected by the observable.
+	auto subscription = m_observable_params
+	                        ->filter([obs](ObservedValue v) {
+		                        return obs->filter(v.get_name());
+		                    })
+	                        .timestamp()
+	                        .subscribe(sub);
+}
+
+void CSGObject::observe(const ObservedValue value)
+{
+	m_subscriber_params->on_next(value);
+}
+
+class CSGObject::ParameterObserverList
+{
+public:
+	void register_param(
+	    const std::string& name, const SG_OBS_VALUE_TYPE type,
+	    const std::string& description)
+	{
+		m_list_obs_params[name] = std::make_pair(type, description);
+	}
+
+	std::string type_name(SG_OBS_VALUE_TYPE type)
+	{
+		std::string value;
+		switch (type)
+		{
+		case TENSORBOARD:
+			value = std::string("Tensorboard");
+			break;
+		case CROSSVALIDATION:
+			value = std::string("CrossValidation");
+			break;
+		default:
+			value = std::string("Unknown");
+			break;
+		}
+		return value;
+	}
+
+	ObsParamsList get_list() const
+	{
+		return m_list_obs_params;
+	}
+
+private:
+	/** List of observable parameters (name, description) */
+	ObsParamsList m_list_obs_params;
+};
+
+void CSGObject::register_observable_param(
+    const std::string& name, const SG_OBS_VALUE_TYPE type,
+    const std::string& description)
+{
+	param_obs_list->register_param(name, type, description);
+}
+
+void CSGObject::list_observable_parameters()
+{
+	SG_INFO("List of observable parameters of object %s\n", get_name());
+	SG_PRINT("------");
+	for (auto const& x : param_obs_list->get_list())
+	{
+		SG_PRINT(
+		    "%s [%s]: %s\n", x.first.c_str(),
+		    param_obs_list->type_name(x.second.first).c_str(),
+		    x.second.second.c_str());
+	}
+}
diff --git a/src/shogun/base/SGObject.h b/src/shogun/base/SGObject.h
index 5af48151343..08bb98f97a7 100644
--- a/src/shogun/base/SGObject.h
+++ b/src/shogun/base/SGObject.h
@@ -13,15 +13,19 @@
 #ifndef __SGOBJECT_H__
 #define __SGOBJECT_H__
 
-#include <shogun/lib/config.h>
-#include <shogun/lib/common.h>
-#include <shogun/lib/DataType.h>
-#include <shogun/lib/ShogunException.h>
 #include <shogun/base/Version.h>
 #include <shogun/base/unique.h>
 #include <shogun/io/SGIO.h>
-#include <shogun/lib/tag.h>
+#include <shogun/lib/DataType.h>
+#include <shogun/lib/RxCppHeader.h>
+#include <shogun/lib/ShogunException.h>
 #include <shogun/lib/any.h>
+#include <shogun/lib/common.h>
+#include <shogun/lib/config.h>
+#include <shogun/lib/parameter_observers/ObservedValue.h>
+#include <shogun/lib/tag.h>
+
+#include <utility>
 
 /** \namespace shogun
  * @brief all of classes and functions are contained in the shogun namespace
@@ -33,6 +37,7 @@ class SGIO;
 class Parallel;
 class Parameter;
 class CSerializableFile;
+class ParameterObserverInterface;
 
 template <class T, class K> class CMap;
 
@@ -44,15 +49,9 @@ template <class T> class SGStringList;
  * define reference counter macros
  ******************************************************************************/
 
-#ifdef USE_REFERENCE_COUNTING
 #define SG_REF(x) { if (x) (x)->ref(); }
 #define SG_UNREF(x) { if (x) { if ((x)->unref()==0) (x)=NULL; } }
 #define SG_UNREF_NO_NULL(x) { if (x) { (x)->unref(); } }
-#else
-#define SG_REF(x)
-#define SG_UNREF(x)
-#define SG_UNREF_NO_NULL(x)
-#endif
 
 /*******************************************************************************
  * Macros for registering parameters/model selection parameters
@@ -125,6 +124,14 @@ enum EGradientAvailability
 class CSGObject
 {
 public:
+	typedef rxcpp::subjects::subject<ObservedValue> SGSubject;
+	typedef rxcpp::observable<ObservedValue,
+		                      rxcpp::dynamic_observable<ObservedValue>>
+		SGObservable;
+	typedef rxcpp::subscriber<
+		ObservedValue, rxcpp::observer<ObservedValue, void, void, void, void>>
+		SGSubscriber;
+
 	/** default constructor */
 	CSGObject();
 
@@ -134,7 +141,6 @@ class CSGObject
 	/** destructor */
 	virtual ~CSGObject();
 
-#ifdef USE_REFERENCE_COUNTING
 	/** increase reference counter
 	 *
 	 * @return reference count
@@ -153,7 +159,6 @@ class CSGObject
 	 * @return reference count
 	 */
 	int32_t unref();
-#endif //USE_REFERENCE_COUNTING
 
 #ifdef TRACE_MEMORY_ALLOCS
 	static void list_memory_allocs();
@@ -402,6 +407,23 @@ class CSGObject
 		return get(tag);
 	}
 
+#ifndef SWIG
+	/**
+	  * Get parameters observable
+	  * @return RxCpp observable
+	  */
+	SGObservable* get_parameters_observable()
+	{
+		return m_observable_params;
+	};
+#endif
+
+	/** Subscribe a parameter observer to watch over params */
+	void subscribe_to_parameters(ParameterObserverInterface* obs);
+
+	/** Print to stdout a list of observable parameters */
+	void list_observable_parameters();
+
 protected:
 	/** Can (optionally) be overridden to pre-initialize some member
 	 *  variables which are not PARAMETER::ADD'ed.  Make sure that at
@@ -553,6 +575,26 @@ class CSGObject
 	class Self;
 	Unique<Self> self;
 
+	class ParameterObserverList;
+	Unique<ParameterObserverList> param_obs_list;
+
+protected:
+	/**
+	 * Observe a parameter value and emit them to observer.
+	 * @param value Observed parameter's value
+	 */
+	void observe(const ObservedValue value);
+
+	/**
+	 * Register which params this object can emit.
+	 * @param name the param name
+	 * @param type the param type
+	 * @param description a user oriented description
+	 */
+	void register_observable_param(
+		const std::string& name, const SG_OBS_VALUE_TYPE type,
+		const std::string& description);
+
 public:
 	/** io */
 	SGIO* io;
@@ -584,6 +626,15 @@ class CSGObject
 	bool m_save_post_called;
 
 	RefCount* m_refcount;
+
+	/** Subject used to create the params observer */
+	SGSubject* m_subject_params;
+
+	/** Parameter Observable */
+	SGObservable* m_observable_params;
+
+	/** Subscriber used to call onNext, onComplete etc.*/
+	SGSubscriber* m_subscriber_params;
 };
 }
 #endif // __SGOBJECT_H__
diff --git a/src/shogun/base/Version.cpp b/src/shogun/base/Version.cpp
index 7a56f533e52..28a17c7eadb 100644
--- a/src/shogun/base/Version.cpp
+++ b/src/shogun/base/Version.cpp
@@ -19,7 +19,7 @@ using namespace shogun;
 
 namespace shogun
 {
-const int32_t Version::version_revision = VERSION_REVISION;
+const int64_t Version::version_revision = VERSION_REVISION;
 const int32_t Version::version_year = VERSION_YEAR;
 const int32_t Version::version_month = VERSION_MONTH;
 const int32_t Version::version_day = VERSION_DAY;
@@ -43,7 +43,7 @@ Version::~Version()
 
 void Version::print_version()
 {
-	SG_SPRINT("libshogun (%s/%s%d)\n\n", MACHINE, VERSION_RELEASE, version_revision)
+	SG_SPRINT("libshogun (%s/%s%" PRId64 ")\n\n", MACHINE, VERSION_RELEASE, version_revision)
 	SG_SPRINT("Copyright (C) 1999-2009 Fraunhofer Institute FIRST\n")
 	SG_SPRINT("Copyright (C) 1999-2011 Max Planck Society\n")
 	SG_SPRINT("Copyright (C) 2009-2011 Berlin Institute of Technology\n")
@@ -65,7 +65,7 @@ const char* Version::get_version_release()
 	return version_release;
 }
 
-int32_t Version::get_version_revision()
+int64_t Version::get_version_revision()
 {
 	return version_revision;
 }
diff --git a/src/shogun/base/Version.h b/src/shogun/base/Version.h
index f4595a4cca2..edf3b522819 100644
--- a/src/shogun/base/Version.h
+++ b/src/shogun/base/Version.h
@@ -43,7 +43,7 @@ class Version
 	static const char* get_version_release();
 
 	/** get version revision */
-	static int32_t get_version_revision();
+	static int64_t get_version_revision();
 
 	/** get version year */
 	static int32_t get_version_year();
@@ -88,7 +88,7 @@ class Version
 	static const char version_extra[128];
 
 	/** version revision */
-	static const int32_t version_revision;
+	static const int64_t version_revision;
 	/** version year */
 	static const int32_t version_year;
 	/** version month */
diff --git a/src/shogun/base/class_list.cpp.py b/src/shogun/base/class_list.cpp.py
index 46f9887d1be..8ca0a4d9018 100644
--- a/src/shogun/base/class_list.cpp.py
+++ b/src/shogun/base/class_list.cpp.py
@@ -1,12 +1,36 @@
 #!/usr/bin/env python
 
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
-# (at your option) any later version.
+# Copyright (c) The Shogun Machine Learning Toolbox
+# Copyright (c) 2008-2009 Fraunhofer Institute FIRST and Max-Planck-Society
+# Written (w) 2008-2009 Soeren Sonnenburg
+# Written (w) 2016 - 2017 Heiko Strathmann
+# All rights reserved.
 #
-# Written (W) 2008-2009 Soeren Sonnenburg
-# Copyright (C) 2008-2009 Fraunhofer Institute FIRST and Max Planck Society
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# The views and conclusions contained in the software and documentation are those
+# of the authors and should not be interpreted as representing official policies,
+# either expressed or implied, of the Shogun Development Team.
+
+import os
 
 class_str = 'class'
 types = ["BOOL", "CHAR", "INT8", "UINT8", "INT16", "UINT16", "INT32", "UINT32",
@@ -81,24 +105,31 @@ def extract_class_name(lines, line_nr, line, blacklist):
     return c[1:]
 
 
-def get_includes(classes, basedir="."):
-    class_headers = []
-    for c, t in classes:
-        class_headers.append(c+".h")
-
-    import os
-    result = []
-    for root, dirs, files in os.walk(basedir):
-        for f in files:
-            if f in class_headers:
-                result.append(os.path.join(os.path.relpath(root, basedir), f))
-
+def get_includes(classes, headers_absolute_fnames):
     includes = []
-    result.sort()
-    for o in result:
-        includes.append('#include <shogun/%s>' % o.strip().lstrip('./'))
-    return includes
+    for c in classes:
+        for h in headers_absolute_fnames:
+            class_from_header = os.path.splitext(os.path.basename(h))[0]
+
+            # build relative include path from absolute header filename
+            if class_from_header in c:
+                # find *last* occurence of "shogun" dir in header
+                shogun_dir = "shogun"
+                assert shogun_dir in h
+                tails = []
+                head, tail = os.path.split(h)
+                while tail != shogun_dir and len(head)>0:
+                    tails += [tail]
+                    head, tail = os.path.split(head)
+                
+                # construct include path from collected tails
+                tails.reverse()
+                include = os.path.join(*([shogun_dir] + tails))
+
+                # thats your include header
+                includes.append("#include <%s>" % include)
 
+    return includes
 
 def get_definitions(classes):
     definitions = []
@@ -287,12 +318,6 @@ def get_blacklist():
             blacklist[cfg] = 1
     return blacklist
 
-
-def get_base_src_dir(headers):
-    import os.path
-    return os.path.commonprefix(headers)
-
-
 if __name__ == '__main__':
     import sys
     TEMPL_FILE = sys.argv[1]
@@ -307,11 +332,10 @@ def get_base_src_dir(headers):
 
     blacklist = get_blacklist()
 
-    base_src_dir = get_base_src_dir(HEADERS)
     classes = extract_classes(HEADERS, False, blacklist, False)
     template_classes = extract_classes(HEADERS, True, blacklist, False)
     complex_template_classes = extract_classes(HEADERS, True, blacklist, True)
-    includes = get_includes(classes+template_classes+complex_template_classes, basedir=base_src_dir)
+    includes = get_includes(classes+template_classes+complex_template_classes, HEADERS)
     definitions = get_definitions(classes)
     template_definitions = get_template_definitions(template_classes, False)
     complex_template_definitions = get_template_definitions(complex_template_classes, True)
diff --git a/src/shogun/base/class_list.cpp.templ b/src/shogun/base/class_list.cpp.templ
index 00e076f2604..2afbf2c3d7d 100644
--- a/src/shogun/base/class_list.cpp.templ
+++ b/src/shogun/base/class_list.cpp.templ
@@ -1,11 +1,33 @@
 /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
+ * Copyright (c) The Shogun Machine Learning Toolbox
+ * Copyright (c) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
+ * Written (w) 2009 Soeren Sonnenburg
+ * Written (w) 2016 - 2017 Heiko Strathmann
+ * All rights reserved.
  *
- * Written (W) 2009 Soeren Sonnenburg
- * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are those
+ * of the authors and should not be interpreted as representing official policies,
+ * either expressed or implied, of the Shogun Development Team.
  */
 
 #include <shogun/lib/common.h>
@@ -22,12 +44,12 @@ REPLACE template_definitions THIS
 
 REPLACE complex_template_definitions THIS
 
-typedef CSGObject* (*new_sgserializable_t)(EPrimitiveType generic);
+typedef CSGObject* (*create_function)(EPrimitiveType generic);
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 typedef struct
 {
 	const char* m_class_name;
-	new_sgserializable_t m_new_sgserializable;
+	create_function m_create_function;
 } class_list_entry_t;
 #endif
 
@@ -36,14 +58,13 @@ REPLACE struct THIS
 	{NULL, NULL}
 };
 
-CSGObject* shogun::new_sgserializable(const char* sgserializable_name,
-						   EPrimitiveType generic)
+CSGObject* shogun::create(const char* classname, EPrimitiveType generic)
 {
 	for (class_list_entry_t* i=class_list; i->m_class_name != NULL;
 		 i++)
 	{
-		if (strncmp(i->m_class_name, sgserializable_name, STRING_LEN) == 0)
-			return i->m_new_sgserializable(generic);
+		if (strncmp(i->m_class_name, classname, STRING_LEN) == 0)
+			return i->m_create_function(generic);
 	}
 
 	return NULL;
diff --git a/src/shogun/base/class_list.h b/src/shogun/base/class_list.h
index ed1ed6617f1..99b495c3507 100644
--- a/src/shogun/base/class_list.h
+++ b/src/shogun/base/class_list.h
@@ -18,12 +18,11 @@
 namespace shogun {
 	class CSGObject;
 
-	/** new shogun serializable
+	/** new shogun instance
 	 * @param sgserializable_name
 	 * @param generic
 	 */
-	CSGObject* new_sgserializable(const char* sgserializable_name,
-										EPrimitiveType generic);
+	CSGObject* create(const char* sgserializable_name, EPrimitiveType generic);
 }
 
 #endif /* __SG_CLASS_LIST_H__  */
diff --git a/src/shogun/base/init.cpp b/src/shogun/base/init.cpp
index 104636e925d..fe7e46421e4 100644
--- a/src/shogun/base/init.cpp
+++ b/src/shogun/base/init.cpp
@@ -12,17 +12,23 @@
 #include <shogun/lib/memory.h>
 #include <shogun/lib/config.h>
 
+#include <shogun/base/Parallel.h>
+#include <shogun/base/SGObject.h>
+#include <shogun/base/Version.h>
+#include <shogun/io/SGIO.h>
+#include <shogun/lib/Signal.h>
+
+#include <rxcpp/rx-lite.hpp>
+
 #include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/Random.h>
 #include <shogun/mathematics/linalg/SGLinalg.h>
-#include <shogun/io/SGIO.h>
-#include <shogun/base/Parallel.h>
-#include <shogun/base/Version.h>
-#include <shogun/base/SGObject.h>
 
-#include <string>
+#include <csignal>
+#include <functional>
 #include <stdlib.h>
 #include <string.h>
+#include <string>
 #ifdef TRACE_MEMORY_ALLOCS
 #include <shogun/lib/Map.h>
 shogun::CMap<void*, shogun::MemoryBlock>* sg_mallocs=NULL;
@@ -39,25 +45,22 @@ namespace shogun
 	Version* sg_version=NULL;
 	CMath* sg_math=NULL;
 	CRandom* sg_rand=NULL;
+	std::unique_ptr<CSignal> sg_signal(nullptr);
 	std::unique_ptr<SGLinalg> sg_linalg(nullptr);
 
 	/// function called to print normal messages
-	void (*sg_print_message)(FILE* target, const char* str) = NULL;
+	std::function<void(FILE*, const char*)> sg_print_message(nullptr);
 
 	/// function called to print warning messages
-	void (*sg_print_warning)(FILE* target, const char* str) = NULL;
+	std::function<void(FILE*, const char*)> sg_print_warning(nullptr);
 
 	/// function called to print error messages
-	void (*sg_print_error)(FILE* target, const char* str) = NULL;
-
-	/// function called to cancel things
-	void (*sg_cancel_computations)(bool &delayed, bool &immediately)=NULL;
-
+	std::function<void(FILE*, const char*)> sg_print_error(nullptr);
 
-	void init_shogun(void (*print_message)(FILE* target, const char* str),
-			void (*print_warning)(FILE* target, const char* str),
-			void (*print_error)(FILE* target, const char* str),
-			void (*cancel_computations)(bool &delayed, bool &immediately))
+	void init_shogun(
+	    const std::function<void(FILE*, const char*)> print_message,
+	    const std::function<void(FILE*, const char*)> print_warning,
+	    const std::function<void(FILE*, const char*)> print_error)
 	{
 		if (!sg_io)
 			sg_io = new shogun::SGIO();
@@ -71,6 +74,8 @@ namespace shogun
 			sg_rand = new shogun::CRandom();
 		if (!sg_linalg)
 			sg_linalg = std::unique_ptr<SGLinalg>(new shogun::SGLinalg());
+		if (!sg_signal)
+			sg_signal = std::unique_ptr<CSignal>(new shogun::CSignal());
 
 #ifdef TRACE_MEMORY_ALLOCS
 		if (!sg_mallocs)
@@ -87,7 +92,9 @@ namespace shogun
 		sg_print_message=print_message;
 		sg_print_warning=print_warning;
 		sg_print_error=print_error;
-		sg_cancel_computations=cancel_computations;
+
+		// Set up signal handler
+		std::signal(SIGINT, sg_signal->handler);
 
 		init_from_env();
 	}
@@ -111,10 +118,6 @@ namespace shogun
 		sg_mallocs=NULL;
 		SG_UNREF(mallocs);
 #endif
-		sg_print_message=NULL;
-		sg_print_warning=NULL;
-		sg_print_error=NULL;
-		sg_cancel_computations=NULL;
 
 		SG_UNREF(sg_rand);
 		SG_UNREF(sg_math);
@@ -122,6 +125,10 @@ namespace shogun
 		SG_UNREF(sg_parallel);
 		SG_UNREF(sg_io);
 
+		delete CSignal::m_subscriber;
+		delete CSignal::m_observable;
+		delete CSignal::m_subject;
+
 #ifdef HAVE_PROTOBUF
 		::google::protobuf::ShutdownProtobufLibrary();
 #endif
@@ -191,12 +198,19 @@ namespace shogun
 		SG_REF(sg_rand);
 		return sg_rand;
 	}
+
+	CSignal* get_global_signal()
+	{
+		return sg_signal.get();
+	}
+
 #ifndef SWIG // SWIG should skip this part
 	SGLinalg* get_global_linalg()
 	{
 		return sg_linalg.get();
 	}
 #endif
+
 	void init_from_env()
 	{
 		char* env_log_val = NULL;
diff --git a/src/shogun/base/init.h b/src/shogun/base/init.h
index 7f60097496a..53b8455a6fb 100644
--- a/src/shogun/base/init.h
+++ b/src/shogun/base/init.h
@@ -13,6 +13,7 @@
 
 #include <shogun/lib/config.h>
 
+#include <functional>
 #include <stdio.h>
 
 namespace shogun
@@ -23,91 +24,95 @@ namespace shogun
 	class Parallel;
 	class CRandom;
 	class SGLinalg;
-
-/** This function must be called before libshogun is used. Usually shogun does
- * not provide any output messages (neither debugging nor error; apart from
- * exceptions). This function allows one to specify customized output
- * callback functions and a callback function to check for exceptions:
- *
- * @param print_message function pointer to print a message
- * @param print_warning function pointer to print a warning message
- * @param print_error function pointer to print an error message (this will be
- *                                  printed before shogun throws an exception)
- *
- * @param cancel_computations function pointer to check for exception
- *
- */
-void init_shogun(void (*print_message)(FILE* target, const char* str) = NULL,
-		void (*print_warning)(FILE* target, const char* str) = NULL,
-		void (*print_error)(FILE* target, const char* str) = NULL,
-		void (*cancel_computations)(bool &delayed, bool &immediately)=NULL);
-
-/** init shogun with defaults */
-void init_shogun_with_defaults();
-
-/** This function must be called when one stops using libshogun. It will
- * perform a number of cleanups */
-void exit_shogun();
-
-/** set the global io object
- *
- * @param io io object to use
- */
-void set_global_io(SGIO* io);
-
-/** get the global io object
- *
- * @return io object
- */
-SGIO* get_global_io();
-
-/** set the global parallel object
- *
- * @param parallel parallel object to use
- */
-void set_global_parallel(Parallel* parallel);
-
-/** get the global parallel object
- *
- * @return parallel object
- */
-Parallel* get_global_parallel();
-
-/** set the global version object
- *
- * @param version version object to use
- */
-void set_global_version(Version* version);
-
-/** get the global version object
- *
- * @return version object
- */
-Version* get_global_version();
-
-/** set the global math object
- *
- * @param math math object to use
- */
-void set_global_math(CMath* math);
-
-/** get the global math object
- *
- * @return math object
- */
-CMath* get_global_math();
-
-/** set the global random object
- *
- * @param rand random object to use
- */
-void set_global_rand(CRandom* rand);
-
-/** get the global random object
- *
- * @return random object
- */
-CRandom* get_global_rand();
+	class CSignal;
+
+	/** This function must be called before libshogun is used. Usually shogun
+	 * does
+	 * not provide any output messages (neither debugging nor error; apart from
+	 * exceptions). This function allows one to specify customized output
+	 * callback functions and a callback function to check for exceptions:
+	 *
+	 * @param print_message function pointer to print a message
+	 * @param print_warning function pointer to print a warning message
+	 * @param print_error function pointer to print an error message (this will
+	 * be
+	 *                                  printed before shogun throws an
+	 * exception)
+	 *
+	 * @param cancel_computations function pointer to check for exception
+	 *
+	 */
+	void init_shogun(
+	    const std::function<void(FILE*, const char*)> print_message = nullptr,
+	    const std::function<void(FILE*, const char*)> print_warning = nullptr,
+	    const std::function<void(FILE*, const char*)> print_error = nullptr);
+
+	/** init shogun with defaults */
+	void init_shogun_with_defaults();
+
+	/** This function must be called when one stops using libshogun. It will
+	 * perform a number of cleanups */
+	void exit_shogun();
+
+	/** set the global io object
+	 *
+	 * @param io io object to use
+	 */
+	void set_global_io(SGIO* io);
+
+	/** get the global io object
+	 *
+	 * @return io object
+	 */
+	SGIO* get_global_io();
+
+	/** set the global parallel object
+	 *
+	 * @param parallel parallel object to use
+	 */
+	void set_global_parallel(Parallel* parallel);
+
+	/** get the global parallel object
+	 *
+	 * @return parallel object
+	 */
+	Parallel* get_global_parallel();
+
+	/** set the global version object
+	 *
+	 * @param version version object to use
+	 */
+	void set_global_version(Version* version);
+
+	/** get the global version object
+	 *
+	 * @return version object
+	 */
+	Version* get_global_version();
+
+	/** set the global math object
+	 *
+	 * @param math math object to use
+	 */
+	void set_global_math(CMath* math);
+
+	/** get the global math object
+	 *
+	 * @return math object
+	 */
+	CMath* get_global_math();
+
+	/** set the global random object
+	 *
+	 * @param rand random object to use
+	 */
+	void set_global_rand(CRandom* rand);
+
+	/** get the global random object
+	 *
+	 * @return random object
+	 */
+	CRandom* get_global_rand();
 
 #ifndef SWIG // SWIG should skip this part
 /** get the global linalg library object
@@ -117,20 +122,23 @@ CRandom* get_global_rand();
 SGLinalg* get_global_linalg();
 #endif
 
+/** get the global singnal handler object
+ *
+ * @return linalg object
+ */
+CSignal* get_global_signal();
+
 /** Checks environment variables and modifies global objects
  */
 void init_from_env();
 
 /// function called to print normal messages
-extern void (*sg_print_message)(FILE* target, const char* str);
+extern std::function<void(FILE*, const char*)> sg_print_message;
 
 /// function called to print warning messages
-extern void (*sg_print_warning)(FILE* target, const char* str);
+extern std::function<void(FILE*, const char*)> sg_print_warning;
 
 /// function called to print error messages
-extern void (*sg_print_error)(FILE* target, const char* str);
-
-/// function called to cancel things
-extern void (*sg_cancel_computations)(bool &delayed, bool &immediately);
+extern std::function<void(FILE*, const char*)> sg_print_error;
 }
 #endif //__SG_INIT__
diff --git a/src/shogun/base/progress.h b/src/shogun/base/progress.h
new file mode 100644
index 00000000000..1c13095617b
--- /dev/null
+++ b/src/shogun/base/progress.h
@@ -0,0 +1,741 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+
+#ifndef __SG_PROGRESS_H__
+#define __SG_PROGRESS_H__
+
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <string>
+
+#include <shogun/base/init.h>
+#include <shogun/base/range.h>
+#include <shogun/io/SGIO.h>
+#include <shogun/lib/Lock.h>
+#include <shogun/lib/Time.h>
+#include <shogun/mathematics/Math.h>
+
+#ifdef WIN32
+#include <windows.h>
+#else
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#endif
+
+namespace shogun
+{
+
+	/** Possible print modes */
+	enum SG_PRG_MODE
+	{
+		ASCII,
+		UTF8
+	};
+
+	/**
+	 * @class Printer class that displays the progress bar.
+	 */
+	class ProgressPrinter
+	{
+	public:
+		/**
+		 * Creates a @ref ProgressPrinter instance.
+		 * @param io SGIO object which will be used to print the progress bar.
+		 * @param max_value interval maximum value.
+		 * @param min_value interval minimum value.
+		 * @param prefix string which will be printed before the progress bar.
+		 * @param mode char mode (UTF8, ASCII etc.).
+		 */
+		ProgressPrinter(
+		    const SGIO& io, float64_t max_value, float64_t min_value,
+		    const std::string& prefix, const SG_PRG_MODE mode)
+		    : m_io(io), m_max_value(max_value), m_min_value(min_value),
+		      m_prefix(prefix), m_mode(mode), m_last_progress(0),
+		      m_last_progress_time(0),
+		      m_progress_start_time(CTime::get_curtime()),
+		      m_current_value(min_value)
+		{
+		}
+		~ProgressPrinter()
+		{
+		}
+
+		/**
+		 * Increment and print the progress bar.
+		 * Everything is locked to prevent race conditions
+		 * or characters overlapping (especially within
+		 * multi threaded environments).
+		 */
+		void print_progress() const
+		{
+			lock.lock();
+			if (m_current_value.load() - m_min_value >
+			    m_max_value - m_min_value)
+			{
+				increment();
+				lock.unlock();
+				return;
+			}
+			print_progress_impl();
+			if (m_current_value.load() - m_min_value ==
+			    m_max_value - m_min_value)
+			{
+				print_end();
+				increment();
+				lock.unlock();
+				return;
+			}
+			increment();
+			lock.unlock();
+		}
+
+		void print_progress_absolute(
+		    float64_t current_val, float64_t val, float64_t min_val,
+		    float64_t max_val)
+		{
+			lock.lock();
+			if (val - m_min_value > m_max_value - m_min_value)
+			{
+				lock.unlock();
+				return;
+			}
+			print_progress_absolute_impl(current_val, val, min_val, max_val);
+			if (val - m_min_value == m_max_value - m_min_value)
+			{
+				print_end();
+				lock.unlock();
+				return;
+			}
+			lock.unlock();
+		}
+
+		/**
+		 * Manually increment to max size the current value
+		 * to print a complete progress bar.
+		 */
+		void premature_end()
+		{
+			if (m_current_value.load() < m_max_value - 1)
+				m_current_value.store(m_max_value);
+		}
+
+		/** @return last progress as a percentage. */
+		inline float64_t get_current_progress() const
+		{
+			return m_current_value.load();
+		}
+
+	private:
+		/**
+		 * Logic implementation of the progress bar.
+		 */
+		void print_progress_impl() const
+		{
+
+			// Check if the progress was enabled
+			if (!m_io.get_show_progress())
+				return;
+
+			if (m_max_value <= m_min_value)
+				return;
+
+			// Check for terminal dimension. This is for provide
+			// a minimal resize functionality.
+			set_screen_size();
+
+			float64_t difference = m_max_value - m_min_value, v = -1,
+			          estimate = 0, total_estimate = 0;
+			float64_t size_chunk = -1;
+
+			// Check if we have enough space to show the progress bar
+			// Use only a fraction of it to account for the size of the
+			// time displayed (decimals and integer).
+			int32_t progress_bar_space =
+			    (m_columns_num - 50 - m_prefix.length()) * 0.9;
+
+			// TODO: this guy here brokes testing
+			// REQUIRE(
+			//    progress_bar_space > 0,
+			//    "Not enough terminal space to show the progress bar!\n")
+
+			char str[1000];
+			float64_t runtime = CTime::get_curtime();
+
+			if (difference > 0.0)
+				v = 100 * (m_current_value.load() - m_min_value) /
+				    (m_max_value - m_min_value);
+
+			// Set up chunk size
+			size_chunk = difference / (float64_t)progress_bar_space;
+
+			if (m_last_progress == 0)
+			{
+				m_last_progress_time = runtime;
+				m_last_progress = v;
+			}
+			else
+			{
+				m_last_progress = v - 1e-6;
+
+				if ((v != 100.0) && (runtime - m_last_progress_time < 0.5))
+					return;
+
+				m_last_progress_time = runtime;
+				estimate = (1 - v / 100) *
+				           (m_last_progress_time - m_progress_start_time) /
+				           (v / 100);
+				total_estimate =
+				    (m_last_progress_time - m_progress_start_time) / (v / 100);
+			}
+
+			/** Print the actual progress bar to screen **/
+			m_io.message(MSG_MESSAGEONLY, "", "", -1, "%s |", m_prefix.c_str());
+			for (index_t i = 1; i < progress_bar_space; i++)
+			{
+				if (m_current_value.load() - m_min_value > i * size_chunk)
+				{
+					m_io.message(
+					    MSG_MESSAGEONLY, "", "", -1, "%s",
+					    get_pb_char().c_str());
+				}
+				else
+				{
+					m_io.message(MSG_MESSAGEONLY, "", "", -1, " ");
+				}
+			}
+			m_io.message(MSG_MESSAGEONLY, "", "", -1, "| %.2f\%", v);
+
+			if (estimate > 120)
+			{
+				snprintf(
+				    str, sizeof(str),
+				    "   %%1.1f minutes remaining  %%1.1f minutes total\r");
+				m_io.message(
+				    MSG_MESSAGEONLY, "", "", -1, str, estimate / 60,
+				    total_estimate / 60);
+			}
+			else
+			{
+				snprintf(
+				    str, sizeof(str),
+				    "   %%1.1f seconds remaining  %%1.1f seconds total\r");
+				m_io.message(
+				    MSG_MESSAGEONLY, "", "", -1, str, estimate, total_estimate);
+			}
+		}
+
+		/**
+		 * Logic implementation fo the absolute progress bar.
+		 */
+		void print_progress_absolute_impl(
+		    float64_t current_val, float64_t val, float64_t min_value,
+		    float64_t max_value) const
+		{
+			// Check if the progress was enabled
+			if (!m_io.get_show_progress())
+				return;
+
+			m_current_value.store(current_val);
+
+			if (max_value <= min_value)
+				return;
+
+			// Check for terminal dimension. This is for provide
+			// a minimal resize functionality.
+			set_screen_size();
+
+			float64_t difference = max_value - min_value, v = -1, estimate = 0,
+			          total_estimate = 0;
+			float64_t size_chunk = -1;
+
+			// Check if we have enough space to show the progress bar
+			// Use only a fraction of it to account for the size of the
+			// time displayed (decimals and integer).
+			int32_t progress_bar_space =
+			    (m_columns_num - 50 - m_prefix.length()) * 0.9;
+
+			// TODO: this guy here brokes testing
+			// REQUIRE(
+			//    progress_bar_space > 0,
+			//    "Not enough terminal space to show the progress bar!\n")
+
+			char str[1000];
+			float64_t runtime = CTime::get_curtime();
+
+			if (difference > 0.0)
+				v = 100 * (val - min_value) / (max_value - min_value);
+
+			// Set up chunk size
+			size_chunk = difference / (float64_t)progress_bar_space;
+
+			if (m_last_progress == 0)
+			{
+				m_last_progress_time = runtime;
+				m_last_progress = v;
+			}
+			else
+			{
+				m_last_progress = v - 1e-6;
+
+				if ((v != 100.0) && (runtime - m_last_progress_time < 0.5))
+					return;
+
+				m_last_progress_time = runtime;
+				estimate = (1 - v / 100) *
+				           (m_last_progress_time - m_progress_start_time) /
+				           (v / 100);
+				total_estimate =
+				    (m_last_progress_time - m_progress_start_time) / (v / 100);
+			}
+
+			/** Print the actual progress bar to screen **/
+			m_io.message(MSG_MESSAGEONLY, "", "", -1, "%s |", m_prefix.c_str());
+			for (index_t i = 1; i < progress_bar_space; i++)
+			{
+				if (m_current_value.load() - min_value > i * size_chunk)
+				{
+					m_io.message(
+					    MSG_MESSAGEONLY, "", "", -1, "%s",
+					    get_pb_char().c_str());
+				}
+				else
+				{
+					m_io.message(MSG_MESSAGEONLY, "", "", -1, " ");
+				}
+			}
+			m_io.message(MSG_MESSAGEONLY, "", "", -1, "| %.2f\%", current_val);
+
+			if (estimate > 120)
+			{
+				snprintf(
+				    str, sizeof(str),
+				    "   %%1.1f minutes remaining  %%1.1f minutes total\r");
+				m_io.message(
+				    MSG_MESSAGEONLY, "", "", -1, str, estimate / 60,
+				    total_estimate / 60);
+			}
+			else
+			{
+				snprintf(
+				    str, sizeof(str),
+				    "   %%1.1f seconds remaining  %%1.1f seconds total\r");
+				m_io.message(
+				    MSG_MESSAGEONLY, "", "", -1, str, estimate, total_estimate);
+			}
+		}
+
+		/** Print the progress bar end. */
+		void print_end() const
+		{
+			// Check if the progress was enabled
+			if (!m_io.get_show_progress())
+				return;
+
+			m_io.message(MSG_MESSAGEONLY, "", "", -1, "\n");
+		}
+
+		/**
+		 * Return the char which will be used to print the progress.
+		 * @return UTF8/ASCII string
+		 */
+		std::string get_pb_char() const
+		{
+			switch (m_mode)
+			{
+			case ASCII:
+				return m_ascii_char;
+			case UTF8:
+				return m_utf8_char;
+			default:
+				return m_ascii_char;
+			}
+		}
+
+		/**
+		 * Get the terminal's screen size (Windows and Unix).
+		 */
+		void set_screen_size() const
+		{
+#ifdef WIN32
+			CONSOLE_SCREEN_BUFFER_INFO csbi;
+			GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
+			m_columns_num = csbi.srWindow.Right - csbi.srWindow.Left + 1;
+			m_rows_num = csbi.srWindow.Bottom - csbi.srWindow.Top + 1;
+#else
+			struct winsize wind;
+			ioctl(STDOUT_FILENO, TIOCGWINSZ, &wind);
+			m_columns_num = wind.ws_col;
+			m_rows_num = wind.ws_row;
+#endif
+		}
+
+		/* Increment the current value (atomically) */
+		void increment() const
+		{
+			m_current_value++;
+		}
+
+		/** IO object */
+		SGIO m_io;
+		/** Maxmimum value */
+		float64_t m_max_value;
+		/** Minimum value */
+		float64_t m_min_value;
+		/** Prefix which will be printed before the progress bar */
+		std::string m_prefix;
+		/** Progres bar's char mode */
+		SG_PRG_MODE m_mode;
+		/** ASCII char */
+		std::string m_ascii_char = "#";
+		/** UTF8 char */
+		std::string m_utf8_char = "\u2588";
+		/* Screen column number*/
+		mutable int32_t m_columns_num;
+		/* Screen row number*/
+		mutable int32_t m_rows_num;
+		/** Last progress */
+		mutable float64_t m_last_progress;
+		/** Last progress time */
+		mutable float64_t m_last_progress_time;
+		/** Progress start time */
+		mutable float64_t m_progress_start_time;
+		/** Current value */
+		mutable std::atomic<int64_t> m_current_value;
+		/** Lock for multithreaded operations **/
+		mutable CLock lock;
+	};
+
+	/** @class Helper class to show a progress bar given a range.
+	 *
+	 * @code
+	 *  for (auto i : PRange<int>(Range<int>(1, 10), io)) { ... }
+	 * @endcode
+	 */
+	template <typename T>
+	class PRange
+	{
+	public:
+		/**
+		 * Constructor, initialize the progress bar manager.
+		 *
+		 * @param range the range to loop over
+		 * @param io the SGIO object which will be used to print the progress
+		 * bar
+		 * @param prefix the string prefix which will be printed before the
+		 * progress bar
+		 * @param mode the char mode used to print the progress bar (ASCII, UTF8
+		 * etc.)
+		 * @param condition premature stop condition for the loop
+		 */
+		PRange(
+		    Range<T> range, const SGIO& io, const std::string prefix,
+		    const SG_PRG_MODE mode, std::function<bool()> condition)
+		    : m_range(range), m_condition(condition)
+		{
+			set_up_range();
+			m_printer = std::make_shared<ProgressPrinter>(
+			    io, m_end_range, m_begin_range, prefix, mode);
+		}
+
+		/** @class Wrapper for Range<T>::Iterator spawned by @ref PRange. */
+		class PIterator : public std::iterator<std::input_iterator_tag, T>
+		{
+		public:
+			/**
+			 * Initialize the PIterator object.
+			 * @param value the @ref Range<T>:Iterator object.
+			 * @param shrd_ptr the @ref ProgressPrinter object.
+			 * @param condition premature stop condition for the loop.
+			 */
+			PIterator(
+			    typename Range<T>::Iterator value,
+			    std::shared_ptr<ProgressPrinter> shrd_ptr,
+			    std::function<bool()> condition)
+			    : m_value(value), m_printer(shrd_ptr), m_condition(condition)
+			{
+			}
+			PIterator(const PIterator& other)
+			    : m_value(other.m_value), m_printer(other.m_printer),
+			      m_condition(other.m_condition)
+			{
+			}
+			PIterator(PIterator&& other)
+			    : m_value(other.m_value), m_printer(other.m_printer),
+			      m_condition(other.m_condition)
+			{
+			}
+			PIterator& operator=(const PIterator&) = delete;
+			PIterator& operator++()
+			{
+				// Every time we update the iterator we print
+				// also the updated progress bar
+				m_printer->print_progress();
+				m_value++;
+				return *this;
+			}
+			PIterator operator++(int)
+			{
+				PIterator tmp(*this);
+				++*this;
+				return tmp;
+			}
+			T operator*()
+			{
+				// Since PIterator is a wrapper we have
+				// to return the actual value of the
+				// wrapped iterator
+				return *m_value;
+			}
+			bool operator!=(const PIterator& other)
+			{
+				if (!(this->m_value != other.m_value))
+				{
+					m_printer->premature_end();
+					m_printer->print_progress();
+					return false;
+				}
+				bool result = evaluate_condition();
+				return (this->m_value != other.m_value) && result;
+			}
+
+		private:
+			/**
+			 * Evaluate the premature stop condition.
+			 * @return return value of the condition.
+			 */
+			bool evaluate_condition()
+			{
+				if (!m_condition())
+				{
+					m_printer->premature_end();
+					m_printer->print_progress();
+				}
+				return m_condition();
+			}
+
+			/* The wrapped range */
+			typename Range<T>::Iterator m_value;
+			/* The ProgressPrinter object which will be used to show the
+			 * progress bar*/
+			std::shared_ptr<ProgressPrinter> m_printer;
+			/* The function which will contain the custom condition
+			 * to premature stop the loop */
+			std::function<bool()> m_condition;
+		};
+
+		/** Create the iterator that corresponds to the start of the range.
+		 *  Used within the range-based loop version of the progress bar.
+		 *
+		 * @code
+		 * 	for (auto i: progress(range(0, 10), io, ASCII))
+		 * 	{
+		 * 		//Do stuff
+		 * 	}
+		 * @endcode
+		 *
+		 * @return @ref PIterator that represents the start of the range
+		 */
+		PIterator begin() const
+		{
+			return PIterator(m_range.begin(), m_printer, m_condition);
+		}
+
+		/** Create the iterator that corresponds to the end of the range.
+		 * Used within the range-based loop version of the progress bar.
+		 *
+		 * @code
+		 * 	for (auto i: progress(range(0, 10), io, ASCII))
+		 * 	{
+		 * 		//Do stuff
+		 * 	}
+		 * @endcode
+		 *
+		 * @return @ref PIterator that represent the end of the range.
+		 */
+		PIterator end() const
+		{
+			return PIterator(m_range.end(), m_printer, m_condition);
+		}
+
+		/**
+		 * Return the current progress bar value.
+		 * Used for testing purposes.
+		 * @return current progress bar value.
+		 */
+		inline float64_t get_current_progress() const
+		{
+			return m_printer->get_current_progress();
+		}
+
+		/**
+		 * Print the progress bar. This method must be called
+		 * each time we want the progress bar to be updated.
+		 * @code
+		 * 	auto pr = progress(range(0,10), ASCII);
+		 * 	for (int i=0; i<10; i++)
+		 * 	{
+		 * 		// Do stuff
+		 * 		pr.print_progress();
+		 * 	}
+		 * 	pr.complete();
+		 * @endcode
+		 */
+		void print_progress() const
+		{
+			m_printer->print_progress();
+		}
+
+		/**
+		 * Print the absolute progress bar. This method must be called
+		 * each time we want the progress bar to be updated.
+		 *
+		 * @param current_val current value
+		 * @param val value
+		 * @param min_val minimum value
+		 * @param max_val maximum value
+		 */
+		void print_absolute(
+		    float64_t current_val, float64_t val, float64_t min_value,
+		    float64_t max_value) const
+		{
+			m_printer->print_progress_absolute(
+			    current_val, val, min_value, max_value);
+		}
+
+		/**
+		 * Print the progress bar end. This method must be called
+		 * one time, after the loop.
+		 * @code
+		 * 	auto pr = progress(range(0,10), ASCII);
+		 * 	for (int i=0; i<10; i++)
+		 * 	{
+		 * 		// Do stuff
+		 * 		pr.print_progress();
+		 * 	}
+		 * 	pr.complete();
+		 * @endcode
+		 */
+		void complete() const
+		{
+			m_printer->premature_end();
+			m_printer->print_progress();
+		}
+
+		/**
+		 * Print the progress bar end. This method must be called
+		 * one time, after the loop.
+		 * @code
+		 * 	auto pr = progress(range(0,10), ASCII);
+		 * 	for (int i=0; i<10; i++)
+		 * 	{
+		 * 		// Do stuff
+		 * 		pr.print_absolute();
+		 * 	}
+		 * 	pr.complete_absolute();
+		 * @endcode
+		 */
+		void complete_absolute() const
+		{
+			m_printer->print_progress_absolute(100, 100, 0, 100);
+		}
+
+	private:
+		/**
+		 * Set up progress range.
+		 */
+		void set_up_range()
+		{
+			m_begin_range = *(m_range.begin());
+			m_end_range = *(m_range.end());
+		}
+
+		/** Range we iterate over */
+		Range<T> m_range;
+		/** Observer that will print the actual progress bar */
+		std::shared_ptr<ProgressPrinter> m_printer;
+		/* Start of the range */
+		float64_t m_begin_range;
+		/* End of the range */
+		float64_t m_end_range;
+		/* Function which store the premature stop condition */
+		std::function<bool()> m_condition = []() { return true; };
+	};
+
+	/** Creates @ref PRange given a range.
+	 *
+	 * @code
+	 *  for (auto i : progress(range(0, 100), io)) { ... }
+	 * @endcode
+	 *
+	 * @param   range   range used
+	 * @param   io      SGIO object
+	 * @param	mode	char printing mode (default: UTF8)
+	 * @param	prefix  string which will be printed before the progress bar
+	 * (default: PROGRESS: )
+	 * @param	condition	premature stopping condition
+	 */
+	template <typename T>
+	inline PRange<T> progress(
+	    Range<T> range, const SGIO& io, std::string prefix = "PROGRESS: ",
+	    SG_PRG_MODE mode = UTF8,
+	    std::function<bool()> condition = []() { return true; })
+	{
+		return PRange<T>(range, io, prefix, mode, condition);
+	}
+
+	/** Creates @ref PRange given a range that uses the global SGIO
+	 *
+	 * @code
+	 *  for (auto i : progress( range(0, 100) ) ) { ... }
+	 * @endcode
+	 *
+	 * @param   range   range used
+	 * @param	mode	char printing mode (default: UTF8)
+	 * @param	prefix  string which will be printed before the progress bar
+	 * (default: PROGRESS: )
+	 * @param	condition	premature stopping condition
+	 */
+	template <typename T>
+	inline PRange<T> progress(
+	    Range<T> range, std::string prefix = "PROGRESS: ",
+	    SG_PRG_MODE mode = UTF8,
+	    std::function<bool()> condition = []() { return true; })
+	{
+		return PRange<T>(range, *sg_io, prefix, mode, condition);
+	}
+};
+#endif /* __SG_PROGRESS_H__ */
diff --git a/src/shogun/base/range.h b/src/shogun/base/range.h
index dfb1e213257..27d31d19b43 100644
--- a/src/shogun/base/range.h
+++ b/src/shogun/base/range.h
@@ -1,128 +1,157 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2016 Sergey Lisitsyn
+*
+*/
+
 #ifndef __SG_RANGE_H__
 #define __SG_RANGE_H__
 
-#include <shogun/lib/config.h>
 #include <iterator>
 
-#ifdef HAVE_CXX11
 namespace shogun
 {
 
-    /** @class Helper class to spawn range iterator.
-     *
-     * Useful for C++11-style for loops:
-     *
-     * @code
-     *  for (auto i : Range(3, 10)) { ... }
-     * @endcode
-     */
-    template <typename T>
-    class Range
-    {
-        public:
-            /** Creates range with specified bounds.
-             * Assumes rbegin < rend.
-             *
-             * @param   rbegin   lower bound of range
-             * @param   rend     upper bound of range (excluding)
-             */
-            Range(T rbegin, T rend) : m_begin(rbegin), m_end(rend)
-            {
-            }
+	/** @class Helper class to spawn range iterator.
+	 *
+	 * Useful for C++11-style for loops:
+	 *
+	 * @code
+	 *  for (auto i : Range(3, 10)) { ... }
+	 * @endcode
+	 */
+	template <typename T>
+	class Range
+	{
+	public:
+		/** Creates range with specified bounds.
+		 * Assumes rbegin < rend.
+		 *
+		 * @param   rbegin   lower bound of range
+		 * @param   rend     upper bound of range (excluding)
+		 */
+		Range(T rbegin, T rend) : m_begin(rbegin), m_end(rend)
+		{
+		}
+
+		/** @class Iterator spawned by @ref Range. */
+		class Iterator : public std::iterator<std::input_iterator_tag, T>
+		{
+		public:
+			Iterator(T value) : m_value(value)
+			{
+			}
+			Iterator(const Iterator& other) : m_value(other.m_value)
+			{
+			}
+			Iterator(Iterator&& other) : m_value(other.m_value)
+			{
+			}
+			Iterator& operator=(const Iterator&) = delete;
+			Iterator& operator++()
+			{
+				m_value++;
+				return *this;
+			}
+			Iterator operator++(int)
+			{
+				Iterator tmp(*this);
+				++*this;
+				return tmp;
+			}
+			T operator*()
+			{
+				return m_value;
+			}
+			bool operator!=(const Iterator& other)
+			{
+				return this->m_value != other.m_value;
+			}
 
-            /** @class Iterator spawned by @ref Range. */
-            class Iterator : public std::iterator<std::input_iterator_tag, T>
-            {
-                public:
-                    Iterator(T value) : m_value(value)
-                    {
-                    }
-                    Iterator(const Iterator& other) : m_value(other.m_value)
-                    {
-                    }
-                    Iterator(Iterator&& other) : m_value(other.m_value)
-                    {
-                    }
-                    Iterator& operator=(const Iterator&) = delete;
-                    Iterator& operator++()
-                    {
-                        m_value++;
-                        return *this;
-                    }
-                    Iterator& operator++(int)
-                    {
-                        Iterator tmp(*this);
-                        tmp++;
-                        return tmp; 
-                    }
-                    T operator*()
-                    {
-                        return m_value;
-                    }
-                    bool operator!=(const Iterator& other)
-                    {
-                        return this->m_value != other.m_value;
-                    }
-                    bool operator==(const Iterator& other)
-                    {
-                        return this->m_value == other.m_value;
-                    }
-                private:
-                    T m_value;
-            };
-            /** Create iterator that corresponds to the start of range.
-             *
-             * Usually called through for-loop syntax.
-             */
-            Iterator begin() const
-            {
-                return Iterator(m_begin);
-            }
-            /** Create iterator that corresponds to the end of range.
-             *
-             * Usually called through for-loop syntax.
-             */
-            Iterator end() const
-            {
-                return Iterator(m_end);
-            }
-        private:
-            /** begin of range */
-            T m_begin;
-            /** end of range */
-            T m_end;
-    };
+		private:
+			T m_value;
+		};
+		/** Create iterator that corresponds to the start of range.
+		 *
+		 * Usually called through for-loop syntax.
+		 */
+		Iterator begin() const
+		{
+			return Iterator(m_begin);
+		}
+		/** Create iterator that corresponds to the end of range.
+		 *
+		 * Usually called through for-loop syntax.
+		 */
+		Iterator end() const
+		{
+			return Iterator(m_end);
+		}
 
-    /** Creates @ref Range with specified upper bound.
-     *
-     * @code
-     *  for (auto i : range(100)) { ... }
-     * @endcode
-     *
-     * @param   rend     upper bound of range (excluding)
-     */
-    template <typename T>
-    inline Range<T> range(T rend)
-    {
-        return Range<T>(0, rend);
-    }
+	private:
+		/** begin of range */
+		T m_begin;
+		/** end of range */
+		T m_end;
+	};
 
-    /** Creates @ref Range with specified bounds.
-     *
-     * @code
-     *  for (auto i : range(0, 100)) { ... }
-     * @endcode
-     *
-     * @param   rbegin  lower bound of range
-     * @param   rend    upper bound of range (excluding)
-     */
-    template <typename T>
-    inline Range<T> range(T rbegin, T rend)
-    {
-        return Range<T>(rbegin, rend);
-    }
+	/** Creates @ref Range with specified upper bound.
+	 *
+	 * @code
+	 *  for (auto i : range(100)) { ... }
+	 * @endcode
+	 *
+	 * @param   rend     upper bound of range (excluding)
+	 */
+	template <typename T>
+	inline Range<T> range(T rend)
+	{
+		return Range<T>(0, rend);
+	}
 
+	/** Creates @ref Range with specified bounds.
+	 *
+	 * @code
+	 *  for (auto i : range(0, 100)) { ... }
+	 * @endcode
+	 *
+	 * @param   rbegin  lower bound of range
+	 * @param   rend    upper bound of range (excluding)
+	 */
+	template <typename T>
+	inline Range<T> range(T rbegin, T rend)
+	{
+		return Range<T>(rbegin, rend);
+	}
 }
 
-#endif /* HAVE_CXX */
 #endif /* __SG_RANGE_H__ */
diff --git a/src/shogun/base/some.h b/src/shogun/base/some.h
index eb8094e4965..464c60949f5 100644
--- a/src/shogun/base/some.h
+++ b/src/shogun/base/some.h
@@ -1,148 +1,169 @@
 #ifndef __SG_SOME_H__
 #define __SG_SOME_H__
 
-#include <shogun/lib/config.h>
-
-#ifdef HAVE_CXX11
 #include <memory>
 
-#include <shogun/base/SGObject.h>
-
 namespace shogun
 {
 
-    /** @class Shogun synonym for the std::shared_ptr. Employs
-     * exactly the same strategy for reference counting
-     * as std::shared_ptr: any operation involving copying increases
-     * the count and once deleted this wrapper decreases the counter.
-     *
-     */
-    template <typename T>
-        class Some
-        {
-            public:
-                Some(const Some<T>& other);
-                explicit Some(T* other);
-                Some& operator=(T* other);
-                ~Some();
-
-                static Some<T> from_raw(T* raw);
-
-                /** Casts the underlying object back to raw pointer
-                 *
-                 * Be careful to SG_REF obtained pointer if you start to own it.
-                 *
-                 * @return raw pointer (without SG_REF)
-                 */
-                operator T*();
-                /** Call member function or access member of T
-                 *
-                 * @return raw pointer (without SG_REF)
-                 */
-                T* operator->();
-            private:
-                Some();
-                void unref();
-                void ref();
-            private:
-                T* raw;
-        };
-
-    template <typename T>
-    Some<T>::Some()
-    : raw(nullptr)
-    {
-    }
-    template <typename T>
-    Some<T>::Some(const Some<T>& other)
-    : raw(other.raw)
-    {
-        ref();
-    }
-    template <typename T>
-    Some<T>::Some(T* other)
-    : raw(other)
-    {
-        ref();
-    }
-    template <typename T>
-    Some<T>& Some<T>::operator=(T* other)
-    {
-        if (raw != other) {
-            unref();
-            raw = other;
-            ref();
-        }
-        return *this;
-    }
-    template <typename T>
-    Some<T>::~Some()
-    {
-        unref();
-    }
-    template <typename T>
-    Some<T>::operator T*()
-    {
-        return raw;
-    }
-    template <typename T>
-    T* Some<T>::operator->()
-    {
-        return raw;
-    }
-    template <typename T>
-    void Some<T>::ref()
-    {
-        SG_REF(raw);
-    }
-    template <typename T>
-    void Some<T>::unref()
-    {
-        SG_UNREF(raw);
-    }
-    template <typename T>
-    Some<T> Some<T>::from_raw(T* raw)
-    {
-        Some<T> result(raw);
-        return result;
-    }
-
-    /** Creates an instance of any class
-     * that is wrapped with a shared pointer like
-     * structure @ref Some
-     *
-     * @param args arguments to construct instance of T with (T should
-     * have compatible constructor)
-     *
-     * @return a shared pointer that holds created instance of @ref T
-     *
-     */
-    template <typename T, class... Args>
-    Some<T> some(Args&&... args)
-    {
-        T* ptr = new T(args...);
-        return Some<T>::from_raw(ptr);
-    }
-
-    template <class T>
-    inline T wrap(const T& value)
-    {
-        return value;
-    }
-
-    template <class T>
-    inline Some<T> wrap(T* ptr)
-    {
-        return Some<T>::from_raw(ptr);
-    }
-
-    template <class T>
-    inline Some<T> wrap(const Some<T>& other)
-    {
-        return other;
-    }
+	/** @class Shogun synonym for the std::shared_ptr. Employs
+	 * exactly the same strategy for reference counting
+	 * as std::shared_ptr: any operation involving copying increases
+	 * the count and once deleted this wrapper decreases the counter.
+	 *
+	 */
+	template <typename T>
+	class Some
+	{
+	public:
+		Some(const Some<T>& other);
+		template <typename R>
+		Some(const Some<R>& other);
+		explicit Some(T* other);
+
+		Some& operator=(T* other);
+		~Some();
+
+		static Some<T> from_raw(T* raw);
+
+		/** Casts the underlying object back to raw pointer
+		 *
+		 * Be careful to SG_REF obtained pointer if you start to own it.
+		 *
+		 * @return raw pointer (without SG_REF)
+		 */
+		operator T*() const;
+		/** Call member function or access member of T
+		 *
+		 * @return raw pointer (without SG_REF)
+		 */
+		T* operator->() const;
+
+		/**
+		 * Get the raw pointer
+		 *
+		 * @return raw pointer (without SG_REF)
+		 */
+		T* get() const;
+
+	private:
+		Some();
+		void unref();
+		void ref();
+
+	private:
+		T* raw = nullptr;
+	};
+
+	template <typename T>
+	Some<T>::Some() : raw(nullptr)
+	{
+	}
+	template <typename T>
+	Some<T>::Some(const Some<T>& other) : raw(other.raw)
+	{
+		ref();
+	}
+	template <typename T>
+	Some<T>::Some(T* other) : raw(other)
+	{
+		ref();
+	}
+	template <class T>
+	template <class R>
+	Some<T>::Some(const Some<R>& other)
+	{
+		raw = dynamic_cast<T*>(other.get());
+		ref();
+	}
+	template <typename T>
+	Some<T>& Some<T>::operator=(T* other)
+	{
+		if (raw != other)
+		{
+			unref();
+			raw = other;
+			ref();
+		}
+		return *this;
+	}
+
+	template <typename T>
+	Some<T>::~Some()
+	{
+		unref();
+	}
+	template <typename T>
+	Some<T>::operator T*() const
+	{
+		return raw;
+	}
+	template <typename T>
+	T* Some<T>::operator->() const
+	{
+		return raw;
+	}
+	template <class T>
+	T* Some<T>::get() const
+	{
+		return raw;
+	}
+	template <typename T>
+	void Some<T>::ref()
+	{
+		if (raw)
+			(raw)->ref();
+	}
+	template <typename T>
+	void Some<T>::unref()
+	{
+		if (raw)
+		{
+			if ((raw)->unref() == 0)
+				(raw) = NULL;
+		};
+	}
+	template <typename T>
+	Some<T> Some<T>::from_raw(T* raw)
+	{
+		Some<T> result(raw);
+		return result;
+	}
+
+	/** Creates an instance of any class
+	 * that is wrapped with a shared pointer like
+	 * structure @ref Some
+	 *
+	 * @param args arguments to construct instance of T with (T should
+	 * have compatible constructor)
+	 *
+	 * @return a shared pointer that holds created instance of @ref T
+	 *
+	 */
+	template <typename T, class... Args>
+	Some<T> some(Args&&... args)
+	{
+		T* ptr = new T(args...);
+		return Some<T>::from_raw(ptr);
+	}
+
+	template <class T>
+	inline T wrap(const T& value)
+	{
+		return value;
+	}
+
+	template <class T>
+	inline Some<T> wrap(T* ptr)
+	{
+		return Some<T>::from_raw(ptr);
+	}
 
+	template <class T>
+	inline Some<T> wrap(const Some<T>& other)
+	{
+		return other;
+	}
 };
 
-#endif /* HAVE_CXX11 */
 #endif /* __SG_SOME_H__ */
diff --git a/src/shogun/classifier/AveragedPerceptron.cpp b/src/shogun/classifier/AveragedPerceptron.cpp
index 35d89daa710..cf9ac008294 100644
--- a/src/shogun/classifier/AveragedPerceptron.cpp
+++ b/src/shogun/classifier/AveragedPerceptron.cpp
@@ -52,6 +52,7 @@ bool CAveragedPerceptron::train_machine(CFeatures* data)
 	ASSERT(num_vec==train_labels.vlen)
 	SGVector<float64_t> w(num_feat);
 	float64_t* tmp_w=SG_MALLOC(float64_t, num_feat);
+	memset(tmp_w, 0, sizeof(float64_t)*num_feat);
 	float64_t* output=SG_MALLOC(float64_t, num_vec);
 
 	//start with uniform w, bias=0, tmp_bias=0
@@ -60,15 +61,13 @@ bool CAveragedPerceptron::train_machine(CFeatures* data)
 	for (int32_t i=0; i<num_feat; i++)
 		w[i]=1.0/num_feat;
 
-	CSignal::clear_cancel();
-
 	//loop till we either get everything classified right or reach max_iter
 
-	while (!(CSignal::cancel_computations()) && (!converged && iter<max_iter))
+	while (!(cancel_computation()) && (!converged && iter < max_iter))
 	{
 		converged=true;
 		SG_INFO("Iteration Number : %d of max %d\n", iter, max_iter);
-		
+
 		for (int32_t i=0; i<num_vec; i++)
 		{
 			output[i] = features->dense_dot(i, w.vector, w.vlen) + bias;
diff --git a/src/shogun/classifier/FeatureBlockLogisticRegression.cpp b/src/shogun/classifier/FeatureBlockLogisticRegression.cpp
deleted file mode 100644
index dd3aa8cdd22..00000000000
--- a/src/shogun/classifier/FeatureBlockLogisticRegression.cpp
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-
-#include <shogun/classifier/FeatureBlockLogisticRegression.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/slep/slep_solver.h>
-#include <shogun/lib/slep/slep_options.h>
-
-#include <shogun/lib/IndexBlockGroup.h>
-#include <shogun/lib/IndexBlockTree.h>
-
-namespace shogun
-{
-
-CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression() :
-	CLinearMachine()
-{
-	init();
-	register_parameters();
-}
-
-CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression(
-     float64_t z, CDotFeatures* train_features,
-     CBinaryLabels* train_labels, CIndexBlockRelation* feature_relation) :
-	CLinearMachine()
-{
-	init();
-	set_feature_relation(feature_relation);
-	set_z(z);
-	set_features(train_features);
-	set_labels(train_labels);
-	register_parameters();
-}
-
-void CFeatureBlockLogisticRegression::init()
-{
-	m_feature_relation=NULL;
-	m_z=0.0;
-	m_q=2.0;
-	m_termination=0;
-	m_regularization=0;
-	m_tolerance=1e-3;
-	m_max_iter=1000;
-}
-
-CFeatureBlockLogisticRegression::~CFeatureBlockLogisticRegression()
-{
-	SG_UNREF(m_feature_relation);
-}
-
-void CFeatureBlockLogisticRegression::register_parameters()
-{
-	SG_ADD((CSGObject**)&m_feature_relation, "feature_relation", "feature relation", MS_NOT_AVAILABLE);
-	SG_ADD(&m_z, "z", "regularization coefficient", MS_AVAILABLE);
-	SG_ADD(&m_q, "q", "q of L1/Lq", MS_AVAILABLE);
-	SG_ADD(&m_termination, "termination", "termination", MS_NOT_AVAILABLE);
-	SG_ADD(&m_regularization, "regularization", "regularization", MS_NOT_AVAILABLE);
-	SG_ADD(&m_tolerance, "tolerance", "tolerance", MS_NOT_AVAILABLE);
-	SG_ADD(&m_max_iter, "max_iter", "maximum number of iterations", MS_NOT_AVAILABLE);
-}
-
-CIndexBlockRelation* CFeatureBlockLogisticRegression::get_feature_relation() const
-{
-	SG_REF(m_feature_relation);
-	return m_feature_relation;
-}
-
-void CFeatureBlockLogisticRegression::set_feature_relation(CIndexBlockRelation* feature_relation)
-{
-	SG_REF(feature_relation);
-	SG_UNREF(m_feature_relation);
-	m_feature_relation = feature_relation;
-}
-
-int32_t CFeatureBlockLogisticRegression::get_max_iter() const
-{
-	return m_max_iter;
-}
-
-int32_t CFeatureBlockLogisticRegression::get_regularization() const
-{
-	return m_regularization;
-}
-
-int32_t CFeatureBlockLogisticRegression::get_termination() const
-{
-	return m_termination;
-}
-
-float64_t CFeatureBlockLogisticRegression::get_tolerance() const
-{
-	return m_tolerance;
-}
-
-float64_t CFeatureBlockLogisticRegression::get_z() const
-{
-	return m_z;
-}
-
-float64_t CFeatureBlockLogisticRegression::get_q() const
-{
-	return m_q;
-}
-
-void CFeatureBlockLogisticRegression::set_max_iter(int32_t max_iter)
-{
-	ASSERT(max_iter>=0)
-	m_max_iter = max_iter;
-}
-
-void CFeatureBlockLogisticRegression::set_regularization(int32_t regularization)
-{
-	ASSERT(regularization==0 || regularization==1)
-	m_regularization = regularization;
-}
-
-void CFeatureBlockLogisticRegression::set_termination(int32_t termination)
-{
-	ASSERT(termination>=0 && termination<=4)
-	m_termination = termination;
-}
-
-void CFeatureBlockLogisticRegression::set_tolerance(float64_t tolerance)
-{
-	ASSERT(tolerance>0.0)
-	m_tolerance = tolerance;
-}
-
-void CFeatureBlockLogisticRegression::set_z(float64_t z)
-{
-	m_z = z;
-}
-
-void CFeatureBlockLogisticRegression::set_q(float64_t q)
-{
-	m_q = q;
-}
-
-bool CFeatureBlockLogisticRegression::train_machine(CFeatures* data)
-{
-	if (data && (CDotFeatures*)data)
-		set_features((CDotFeatures*)data);
-
-	ASSERT(features)
-	ASSERT(m_labels)
-
-	int32_t n_vecs = m_labels->get_num_labels();
-	SGVector<float64_t> y(n_vecs);
-	for (int32_t i=0; i<n_vecs; i++)
-		y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
-
-	slep_options options = slep_options::default_options();
-	options.q = m_q;
-	options.regularization = m_regularization;
-	options.termination = m_termination;
-	options.tolerance = m_tolerance;
-	options.max_iter = m_max_iter;
-	options.loss = LOGISTIC;
-
-	EIndexBlockRelationType relation_type = m_feature_relation->get_relation_type();
-	switch (relation_type)
-	{
-		case GROUP:
-		{
-			CIndexBlockGroup* feature_group = (CIndexBlockGroup*)m_feature_relation;
-			SGVector<index_t> ind = feature_group->get_SLEP_ind();
-			options.ind = ind.vector;
-			options.n_feature_blocks = ind.vlen-1;
-			if (ind[ind.vlen-1] > features->get_dim_feature_space())
-				SG_ERROR("Group of features covers more features than available\n")
-
-			options.gWeight = SG_MALLOC(double, options.n_feature_blocks);
-			for (int32_t i=0; i<options.n_feature_blocks; i++)
-				options.gWeight[i] = 1.0;
-			options.mode = FEATURE_GROUP;
-			options.loss = LOGISTIC;
-			options.n_nodes = 0;
-			slep_result_t result = slep_solver(features, y.vector, m_z, options);
-
-			SG_FREE(options.gWeight);
-			int32_t n_feats = features->get_dim_feature_space();
-			SGVector<float64_t> new_w(n_feats);
-			for (int i=0; i<n_feats; i++)
-				new_w[i] = result.w[i];
-			set_bias(result.c[0]);
-
-			set_w(new_w);
-		}
-		break;
-		case TREE:
-		{
-			CIndexBlockTree* feature_tree = (CIndexBlockTree*)m_feature_relation;
-
-			SGVector<float64_t> ind_t = feature_tree->get_SLEP_ind_t();
-			SGVector<float64_t> G;
-			if (feature_tree->is_general())
-			{
-				G = feature_tree->get_SLEP_G();
-				options.general = true;
-			}
-			options.ind_t = ind_t.vector;
-			options.G = G.vector;
-			options.n_nodes = ind_t.vlen/3;
-			options.n_feature_blocks = ind_t.vlen/3;
-			options.mode = FEATURE_TREE;
-			options.loss = LOGISTIC;
-
-			slep_result_t result = slep_solver(features, y.vector, m_z, options);
-
-			int32_t n_feats = features->get_dim_feature_space();
-			SGVector<float64_t> new_w(n_feats);
-			for (int i=0; i<n_feats; i++)
-				new_w[i] = result.w[i];
-
-			set_bias(result.c[0]);
-
-			set_w(new_w);
-		}
-		break;
-		default:
-			SG_ERROR("Not supported feature relation type\n")
-	}
-
-	return true;
-}
-
-float64_t CFeatureBlockLogisticRegression::apply_one(int32_t vec_idx)
-{
-	SGVector<float64_t> w = get_w();
-	return CMath::exp(-(features->dense_dot(vec_idx, w.vector, w.vlen) + bias));
-}
-
-SGVector<float64_t> CFeatureBlockLogisticRegression::apply_get_outputs(CFeatures* data)
-{
-	if (data)
-	{
-		if (!data->has_property(FP_DOT))
-			SG_ERROR("Specified features are not of type CDotFeatures\n")
-
-		set_features((CDotFeatures*) data);
-	}
-
-	if (!features)
-		return SGVector<float64_t>();
-
-	int32_t num=features->get_num_vectors();
-	SGVector<float64_t> w = get_w();
-	ASSERT(num>0)
-	ASSERT(w.vlen==features->get_dim_feature_space())
-
-	float64_t* out=SG_MALLOC(float64_t, num);
-	features->dense_dot_range(out, 0, num, NULL, w.vector, w.vlen, bias);
-	for (int32_t i=0; i<num; i++)
-		out[i] = 2.0/(1.0+CMath::exp(-out[i])) - 1.0;
-	return SGVector<float64_t>(out,num);
-}
-
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/classifier/FeatureBlockLogisticRegression.h b/src/shogun/classifier/FeatureBlockLogisticRegression.h
deleted file mode 100644
index 89cafbdeb38..00000000000
--- a/src/shogun/classifier/FeatureBlockLogisticRegression.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-#ifndef  FEATUREBLOCKLOGISTICREGRESSION_H_
-#define  FEATUREBLOCKLOGISTICREGRESSION_H_
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/IndexBlockRelation.h>
-#include <shogun/machine/LinearMachine.h>
-
-namespace shogun
-{
-/** @brief class FeatureBlockLogisticRegression, a linear
- * binary logistic loss classifier for problems with complex feature relations.
- * Currently two feature relations are supported - feature group
- * (done via CIndexBlockGroup) and feature tree (done via CIndexTree).
- * Handling of feature relations is done via L1/Lq (for groups) and L1/L2
- * (for trees) regularization.
- *
- * The underlying solver is based on the SLEP library.
- *
- * @see CIndexBlock
- * @see CIndexBlockGroup
- * @see CIndexBlockTree
- */
-class CFeatureBlockLogisticRegression : public CLinearMachine
-{
-
-	public:
-		MACHINE_PROBLEM_TYPE(PT_BINARY)
-
-		/** default constructor */
-		CFeatureBlockLogisticRegression();
-
-		/** constructor
-		 *
-		 * @param z regularization coefficient
-		 * @param training_data training features
-		 * @param training_labels training labels
-		 * @param task_relation task relation
-		 */
-		CFeatureBlockLogisticRegression(
-		     float64_t z, CDotFeatures* training_data,
-		     CBinaryLabels* training_labels, CIndexBlockRelation* task_relation);
-
-		/** destructor */
-		virtual ~CFeatureBlockLogisticRegression();
-
-		/** get name */
-		virtual const char* get_name() const
-		{
-			return "FeatureBlockLogisticRegression";
-		}
-
-		/** getter for feature relation
-		 * @return feature relation
-		 */
-		CIndexBlockRelation* get_feature_relation() const;
-
-		/** setter for feature relation
-		 * @param feature_relation feature relation
-		 */
-		void set_feature_relation(CIndexBlockRelation* feature_relation);
-
-		virtual float64_t apply_one(int32_t vec_idx);
-
-		/** get max iter */
-		int32_t get_max_iter() const;
-		/** get q */
-		float64_t get_q() const;
-		/** get regularization */
-		int32_t get_regularization() const;
-		/** get termination */
-		int32_t get_termination() const;
-		/** get tolerance */
-		float64_t get_tolerance() const;
-		/** get z */
-		float64_t get_z() const;
-
-		/** set max iter */
-		void set_max_iter(int32_t max_iter);
-		/** set q */
-		void set_q(float64_t q);
-		/** set regularization */
-		void set_regularization(int32_t regularization);
-		/** set termination */
-		void set_termination(int32_t termination);
-		/** set tolerance */
-		void set_tolerance(float64_t tolerance);
-		/** set z */
-		void set_z(float64_t z);
-
-	protected:
-
-		virtual SGVector<float64_t> apply_get_outputs(CFeatures* data);
-
-		/** train machine */
-		virtual bool train_machine(CFeatures* data=NULL);
-
-	private:
-
-		/** register parameters */
-		void register_parameters();
-
-		/** Initializes Parameters to std values */
-		void init();
-
-	protected:
-
-		/** feature tree */
-		CIndexBlockRelation* m_feature_relation;
-
-		/** regularization type */
-		int32_t m_regularization;
-
-		/** termination criteria */
-		int32_t m_termination;
-
-		/** max iteration */
-		int32_t m_max_iter;
-
-		/** tolerance */
-		float64_t m_tolerance;
-
-		/** q of L1/Lq */
-		float64_t m_q;
-
-		/** regularization coefficient */
-		float64_t m_z;
-
-};
-}
-#endif //USE_GPL_SHOGUN
-#endif
diff --git a/src/shogun/classifier/GaussianProcessClassification.cpp b/src/shogun/classifier/GaussianProcessClassification.cpp
index 468905f84b6..9ef7cb9f591 100644
--- a/src/shogun/classifier/GaussianProcessClassification.cpp
+++ b/src/shogun/classifier/GaussianProcessClassification.cpp
@@ -39,7 +39,9 @@
 #include <shogun/lib/config.h>
 #include <shogun/classifier/GaussianProcessClassification.h>
 #include <shogun/mathematics/Math.h>
+#ifdef USE_GPL_SHOGUN
 #include <shogun/machine/gp/SingleFITCLaplaceInferenceMethod.h>
+#endif //USE_GPL_SHOGUN
 
 using namespace shogun;
 
@@ -117,10 +119,14 @@ CBinaryLabels* CGaussianProcessClassification::apply_binary(
 	{
 		if (m_method->get_inference_type()== INF_FITC_LAPLACE_SINGLE)
 		{
+#ifdef USE_GPL_SHOGUN
 			CSingleFITCLaplaceInferenceMethod* fitc_method=
 				CSingleFITCLaplaceInferenceMethod::obtain_from_generic(m_method);
 			data=fitc_method->get_inducing_features();
 			SG_UNREF(fitc_method);
+#else
+			SG_GPL_ONLY
+#endif //USE_GPL_SHOGUN
 		}
 		else
 			data=m_method->get_features();
@@ -149,10 +155,14 @@ bool CGaussianProcessClassification::train_machine(CFeatures* data)
 		// set inducing features for FITC inference method
 		if (m_method->get_inference_type()==INF_FITC_LAPLACE_SINGLE)
 		{
+#ifdef USE_GPL_SHOGUN
 			CSingleFITCLaplaceInferenceMethod* fitc_method=
 				CSingleFITCLaplaceInferenceMethod::obtain_from_generic(m_method);
 			fitc_method->set_inducing_features(data);
 			SG_UNREF(fitc_method);
+#else
+			SG_ERROR("Single FITC Laplace inference only supported under GPL.\n")
+#endif //USE_GPL_SHOGUN
 		}
 		else
 			m_method->set_features(data);
diff --git a/src/shogun/classifier/LDA.cpp b/src/shogun/classifier/LDA.cpp
index 6c2969ee1c5..51085187aa5 100644
--- a/src/shogun/classifier/LDA.cpp
+++ b/src/shogun/classifier/LDA.cpp
@@ -10,45 +10,51 @@
  */
 #include <shogun/lib/config.h>
 
-#include <shogun/lib/common.h>
-#include <shogun/machine/Machine.h>
-#include <shogun/machine/LinearMachine.h>
 #include <shogun/classifier/LDA.h>
-#include <shogun/labels/Labels.h>
-#include <shogun/labels/BinaryLabels.h>
-#include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/eigen3.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+#include <shogun/preprocessor/FisherLDA.h>
+#include <shogun/solver/LDACanVarSolver.h>
+#include <shogun/solver/LDASolver.h>
+#include <vector>
 
 using namespace Eigen;
 using namespace shogun;
 
-CLDA::CLDA(float64_t gamma, ELDAMethod method)
-	:CLinearMachine()
+CLDA::CLDA(float64_t gamma, ELDAMethod method, bool bdc_svd)
+    : CLinearMachine(false)
 {
 	init();
 	m_method=method;
 	m_gamma=gamma;
+	m_bdc_svd = bdc_svd;
 }
 
-CLDA::CLDA(float64_t gamma, CDenseFeatures<float64_t> *traindat,
-			CLabels *trainlab, ELDAMethod method)
-	:CLinearMachine(), m_gamma(gamma)
+CLDA::CLDA(
+    float64_t gamma, CDenseFeatures<float64_t>* traindat, CLabels* trainlab,
+    ELDAMethod method, bool bdc_svd)
+    : CLinearMachine(false), m_gamma(gamma)
 {
 	init();
 	set_features(traindat);
 	set_labels(trainlab);
 	m_method=method;
 	m_gamma=gamma;
+	m_bdc_svd = bdc_svd;
 }
 
 void CLDA::init()
 {
 	m_method=AUTO_LDA;
 	m_gamma=0;
-	SG_ADD((machine_int_t*) &m_method, "m_method",
-		"Method used for LDA calculation", MS_NOT_AVAILABLE);
-	SG_ADD((machine_int_t*) &m_gamma, "m_gamma",
-		"Regularization parameter", MS_NOT_AVAILABLE);
+	m_bdc_svd = true;
+	SG_ADD(
+	    (machine_int_t*)&m_method, "m_method",
+	    "Method used for LDA calculation", MS_NOT_AVAILABLE);
+	SG_ADD(
+	    (machine_int_t*)&m_gamma, "m_gamma", "Regularization parameter",
+	    MS_NOT_AVAILABLE);
+	SG_ADD(&m_bdc_svd, "m_bdc_svd", "Use BDC-SVD algorithm", MS_NOT_AVAILABLE);
 }
 
 CLDA::~CLDA()
@@ -58,8 +64,10 @@ CLDA::~CLDA()
 bool CLDA::train_machine(CFeatures *data)
 {
 	REQUIRE(m_labels, "Labels for the given features are not specified!\n")
-	REQUIRE(m_labels->get_label_type()==LT_BINARY, "The labels should of type"
-			" CBinaryLabels! you provided %s \n",m_labels->get_name())
+	REQUIRE(
+	    m_labels->get_label_type() == LT_BINARY,
+	    "The labels should of type CBinaryLabels! Provided type is %s \n",
+	    m_labels->get_name())
 
 	if(data)
 	{
@@ -73,192 +81,117 @@ bool CLDA::train_machine(CFeatures *data)
 		REQUIRE(data, "Features have not been provided.\n")
 	}
 
-	SGVector<int32_t>train_labels=((CBinaryLabels *)m_labels)->get_int_labels();
-	REQUIRE(train_labels.vector,"Provided Labels are empty!\n")
+	REQUIRE(
+	    data->get_num_vectors() == m_labels->get_num_labels(),
+	    "Number of training examples(%d) should be equal to number of labels "
+	    "(%d)!\n",
+	    data->get_num_vectors(), m_labels->get_num_labels());
 
-	REQUIRE(data->get_num_vectors() == train_labels.vlen,"Number of training examples(%d) should be "
-		"equal to number of labels (%d)!\n", data->get_num_vectors(), train_labels.vlen);
+	REQUIRE(
+	    features->get_feature_class() == C_DENSE,
+	    "LDA only works with dense features")
 
 	if(data->get_feature_type() == F_SHORTREAL)
-		return CLDA::train_machine_templated<float32_t>(train_labels, data);
+		return CLDA::train_machine_templated<float32_t>();
 	else if(data->get_feature_type() == F_DREAL)
-		return CLDA::train_machine_templated<float64_t>(train_labels, data);
+		return CLDA::train_machine_templated<float64_t>();
 	else if(data->get_feature_type() == F_LONGREAL)
-		return CLDA::train_machine_templated<floatmax_t>(train_labels, data);
+		return CLDA::train_machine_templated<floatmax_t>();
 
 	return false;
 }
 
 template <typename ST>
-bool CLDA::train_machine_templated(SGVector<int32_t> train_labels, CFeatures *data)
-{	
-	SGMatrix<ST>feature_matrix=((CDenseFeatures<ST>*)features)
-										->get_feature_matrix();
-	int32_t num_feat=feature_matrix.num_rows;
-	int32_t num_vec=feature_matrix.num_cols;
-
-	SGVector<int32_t> classidx_neg(num_vec);
-	SGVector<int32_t> classidx_pos(num_vec);
-
-	int32_t i=0;
-	int32_t num_neg=0;
-	int32_t num_pos=0;
-
-	for(i=0; i<train_labels.vlen; i++)
-	{
-		if (train_labels.vector[i]==-1)
-			classidx_neg[num_neg++]=i;
-
-		else if(train_labels.vector[i]==+1)
-			classidx_pos[num_pos++]=i;
-	}
-
-	SGVector<ST> w_st(num_feat);
-	w_st.zero();
-	typename SGMatrix<ST>::EigenMatrixXt fmatrix=typename SGMatrix<ST>::EigenMatrixXtMap(feature_matrix.matrix, num_feat, num_vec);
-	typename SGVector<ST>::EigenVectorXt mean_neg(num_feat);
-	mean_neg.setZero();
-	typename SGVector<ST>::EigenVectorXt mean_pos(num_feat);
-	mean_pos.setZero();
-
-	//mean neg
-	for(i=0; i<num_neg; i++)
-		mean_neg+=fmatrix.col(classidx_neg[i]);
-	mean_neg/=(ST)num_neg;
-
-	// get m(-ve) - mean(-ve)
-	for(i=0; i<num_neg; i++)
-		fmatrix.col(classidx_neg[i])-=mean_neg;
-
-	//mean pos
-	for(i=0; i<num_pos; i++)
-		mean_pos+=fmatrix.col(classidx_pos[i]);
-	mean_pos/=(ST)num_pos;
-
-	// get m(+ve) - mean(+ve)
-	for(i=0; i<num_pos; i++)
-		fmatrix.col(classidx_pos[i])-=mean_pos;
-
-	SGMatrix<ST>scatter_matrix(num_feat, num_feat);
-	typename SGMatrix<ST>::EigenMatrixXtMap scatter(scatter_matrix.matrix, num_feat, num_feat);
-
-	if (m_method == FLD_LDA || (m_method==AUTO_LDA && num_vec>num_feat))
-	{
-		// covariance matrix.
-		typename SGMatrix<ST>::EigenMatrixXt cov_mat(num_feat, num_feat);
-		cov_mat=fmatrix*fmatrix.transpose();
-		scatter=cov_mat/(num_vec-1);
-		ST trace=scatter.trace();
-		ST s=1.0-((ST) m_gamma);
-		scatter*=s;
-		scatter.diagonal()+=Eigen::DenseBase<typename SGVector<ST>::EigenVectorXt>::Constant(num_feat, trace*((ST)m_gamma)/num_feat);
-
-		// the usual way
-		// we need to find a Basic Linear Solution of A.x=b for 'x'.
-		// Instead of crudely Inverting A, we go for solve() using Decompositions.
-		// where:
-		// MatrixXd A=scatter;
-		// VectorXd b=mean_pos-mean_neg;
-		// VectorXd x=w;
-		typename SGVector<ST>::EigenVectorXtMap x(w_st.vector, num_feat);
-		LLT<typename SGMatrix<ST>::EigenMatrixXt> decomposition(scatter);
-		x=decomposition.solve(mean_pos-mean_neg);
+bool CLDA::train_machine_templated()
+{
+	index_t num_feat = ((CDenseFeatures<ST>*)features)->get_num_features();
+	index_t num_vec = features->get_num_vectors();
+	;
 
-		// get the weights w_neg(for -ve class) and w_pos(for +ve class)
-		typename SGVector<ST>::EigenVectorXt w_neg=decomposition.solve(mean_neg);
-		typename SGVector<ST>::EigenVectorXt w_pos=decomposition.solve(mean_pos);
+	bool lda_more_efficient = (m_method == AUTO_LDA && num_vec <= num_feat);
 
-		// get the bias.
-		bias=((float64_t)(0.5*(w_neg.dot(mean_neg)-w_pos.dot(mean_pos))));
-	}
+	if (m_method == SVD_LDA || lda_more_efficient)
+		return solver_svd<ST>();
 	else
-	{
-		//for algorithmic detail, please refer to section 16.3.1. of Bayesian
-		//Reasoning and Machine Learning by David Barber.
-
-		//we will perform SVD here.
-		typename SGMatrix<ST>::EigenMatrixXtMap fmatrix1(feature_matrix.matrix, num_feat, num_vec);
-
-		// to hold the centered positive and negative class data
-		typename SGMatrix<ST>::EigenMatrixXt cen_pos(num_feat,num_pos);
-		typename SGMatrix<ST>::EigenMatrixXt cen_neg(num_feat,num_neg);
-
-		for(i=0; i<num_pos;i++)
-			cen_pos.col(i)=fmatrix.col(classidx_pos[i]);
-
-		for(i=0; i<num_neg;i++)
-			cen_neg.col(i)=fmatrix.col(classidx_neg[i]);
-
-		//+ve covariance matrix
-#if EIGEN_WITH_OPERATOR_BUG
-		cen_pos=cen_pos*cen_pos.transpose();
-		cen_pos/=(ST)(num_pos-1);
-#else
-		cen_pos=cen_pos*cen_pos.transpose()/((ST)(num_pos-1));
-#endif
-
-		//-ve covariance matrix
-#if EIGEN_WITH_OPERATOR_BUG
-		cen_neg=cen_neg*cen_neg.transpose();
-		cen_neg/=(ST)(num_neg-1);
-#else
-		cen_neg=cen_neg*cen_neg.transpose()/((ST)(num_neg-1));
-#endif
-
-		//within class matrix
-		typename SGMatrix<ST>::EigenMatrixXt Sw= num_pos*cen_pos+num_neg*cen_neg;
-		ST trace=Sw.trace();
-		ST s=1.0-((ST)m_gamma);
-		Sw *=s;
-		Sw.diagonal()+=Eigen::DenseBase<typename SGVector<ST>::EigenVectorXt>::Constant(num_feat, trace*((ST)m_gamma)/num_feat);
-
-		//total mean
-		typename SGVector<ST>::EigenVectorXt mean_total=(num_pos*mean_pos+num_neg*mean_neg)/(ST)num_vec;
-
-		//between class matrix
-		typename SGMatrix<ST>::EigenMatrixXt Sb(num_feat,2);
-		Sb.col(0)=sqrt(num_pos)*(mean_pos-mean_total);
-		Sb.col(1)=sqrt(num_neg)*(mean_neg-mean_total);
+		return solver_classic<ST>();
+}
 
-		JacobiSVD<typename SGMatrix<ST>::EigenMatrixXt> svd(fmatrix1, ComputeThinU);
+template <typename ST>
+bool CLDA::solver_svd()
+{
+	auto dense_feat = static_cast<CDenseFeatures<ST>*>(features);
 
-		// basis to represent the solution
-		typename SGMatrix<ST>::EigenMatrixXt Q=svd.matrixU();
-		// modified between class scatter
-		Sb=Q.transpose()*(Sb*(Sb.transpose()))*Q;
+	// keep just one dimension to do binary classification
+	const index_t projection_dim = 1;
+	auto solver = std::unique_ptr<LDACanVarSolver<ST>>(
+	    new LDACanVarSolver<ST>(
+	        dense_feat,
+	        new CMulticlassLabels(static_cast<CBinaryLabels*>(m_labels)),
+	        projection_dim, m_gamma, m_bdc_svd));
 
-		// modified within class scatter
-		Sw=Q.transpose()*Sw*Q;
+	SGVector<ST> w_st(solver->get_eigenvectors());
 
-		// to find SVD((inverse(Chol(Sw)))' * Sb * (inverse(Chol(Sw))))
-		//1.get Cw=Chol(Sw)
-		//find the decomposition of Cw'.
-		HouseholderQR<typename SGMatrix<ST>::EigenMatrixXt> decomposition(Sw.llt().matrixU().transpose());
+	auto class_mean = solver->get_class_mean();
+	ST m_neg = linalg::dot(w_st, class_mean[0]);
+	ST m_pos = linalg::dot(w_st, class_mean[1]);
 
-		//2.get P=inv(Cw')*Sb_new
-		//MatrixXd P=decomposition.solve(Sb);
-		//3. final value to be put in SVD will be therefore:
-		// final_ output =(inv(Cw')*(P'))'
-		JacobiSVD<typename SGMatrix<ST>::EigenMatrixXt> svd2(decomposition.solve((decomposition.solve(Sb))
-					.transpose()).transpose(), ComputeThinU);
+	// change the sign of w if needed to get the correct labels
+	float64_t sign = (m_pos > m_neg) ? 1 : -1;
 
-		// Since this is a linear classifier, with only binary classes,
-		// we need to keep only the 1st eigenvector.
-		Map<typename SGVector<ST>::EigenVectorXt> x(w_st.vector, num_feat);
-		x=Q*(svd2.matrixU().col(0));
-		// get the bias
-		bias=((float64_t)(x.transpose()*mean_total));
-		bias=bias*(-1);
-	}
+	SGVector<float64_t> w(dense_feat->get_num_features());
+	// copy w_st into w
+	for (index_t i = 0; i < w.size(); ++i)
+		w[i] = sign * w_st[i];
+	set_w(w);
 
- 	SGVector<float64_t> w(num_feat);
-	w.zero();
+	set_bias(-0.5 * sign * (m_neg + m_pos));
 
-	//copy w_st into w
-	for(i = 0; i < w.size(); ++i)
-		w[i] = (float64_t) w_st[i];
+	return true;
+}
 
+template <typename ST>
+bool CLDA::solver_classic()
+{
+	auto dense_feat = static_cast<CDenseFeatures<ST>*>(features);
+	index_t num_feat = dense_feat->get_num_features();
+
+	auto solver = std::unique_ptr<LDASolver<ST>>(
+	    new LDASolver<ST>(
+	        dense_feat,
+	        new CMulticlassLabels(static_cast<CBinaryLabels*>(m_labels)),
+	        m_gamma));
+
+	auto class_mean = solver->get_class_mean();
+	auto class_count = solver->get_class_count();
+	SGMatrix<ST> scatter_matrix = solver->get_within_cov();
+
+	// the usual way
+	// we need to find a Basic Linear Solution of A.x=b for 'x'.
+	// Instead of crudely Inverting A, we go for solve() using Decompositions.
+	// where:
+	// MatrixXd A=scatter;
+	// VectorXd b=mean_pos-mean_neg;
+	// VectorXd x=w;
+	auto decomposition = linalg::cholesky_factor(scatter_matrix);
+	SGVector<ST> w_st = linalg::cholesky_solver(
+	    decomposition,
+	    linalg::add(class_mean[1], class_mean[0], (ST)1, (ST)-1));
+
+	// get the weights w_neg(for -ve class) and w_pos(for +ve class)
+	auto w_neg = linalg::cholesky_solver(decomposition, class_mean[0]);
+	auto w_pos = linalg::cholesky_solver(decomposition, class_mean[1]);
+
+	SGVector<float64_t> w(num_feat);
+	// copy w_st into w
+	for (index_t i = 0; i < w.size(); ++i)
+		w[i] = (float64_t)w_st[i];
 	set_w(w);
 
+	// get the bias.
+	set_bias(
+	    (float64_t)(
+	        0.5 * (linalg::dot(w_neg, class_mean[0]) -
+	               linalg::dot(w_pos, class_mean[1]))));
+
 	return true;
 }
diff --git a/src/shogun/classifier/LDA.h b/src/shogun/classifier/LDA.h
index 06f43d56af3..debbd08e1b7 100644
--- a/src/shogun/classifier/LDA.h
+++ b/src/shogun/classifier/LDA.h
@@ -32,7 +32,7 @@ enum ELDAMethod
 	/** Singular Value Decomposition based LDA.
 	*/
 	SVD_LDA = 20,
-	/** Fisher two class discrimiant based LDA.
+	/** Fisher two class discriminant based LDA.
 	*/
 	FLD_LDA = 30
 };
@@ -55,7 +55,8 @@ template <class ST> class CDenseFeatures;
  * is maximized, where
  * \f[S_b := ({\bf m_{+1}} - {\bf m_{-1}})({\bf m_{+1}} - {\bf m_{-1}})^T \f]
  * is the between class scatter matrix and
- * \f[S_w := \sum_{c\in\{-1,+1\}}\sum_{{\bf x}\in X_{c}}({\bf x} - {\bf m_c})({\bf x} - {\bf m_c})^T \f]
+ * \f[S_w := \sum_{c\in\{-1,+1\}}\sum_{{\bf x}\in X_{c}}({\bf x} - {\bf
+ * m_c})({\bf x} - {\bf m_c})^T \f]
  * is the within class scatter matrix with mean \f${\bf m_c} :=
  * \frac{1}{N}\sum_{j=1}^N {\bf x_j^c}\f$ and \f$X_c:=\{x_1^c, \dots, x_N^c\}\f$
  * the set of examples of class c.
@@ -66,10 +67,14 @@ template <class ST> class CDenseFeatures;
  *
  * <em>::SVD_LDA</em> : Singular Valued decomposition method.
  * The above derivation of Fisher's LDA requires the invertibility of the within
- * class matrix. However, this condition gets void when there are fewer data-points
- * than dimensions. A solution is to require that \f${\bf W}\f$ lies only in the subspace
- * spanned by the data. A basis of the data \f${\bf X}\f$ is found using the thin-SVD technique
- * which returns an orthonormal non-square basis matrix \f${\bf Q}\f$. We then require the
+ * class matrix. However, this condition gets void when there are fewer
+ * data-points
+ * than dimensions. A solution is to require that \f${\bf W}\f$ lies only in the
+ * subspace
+ * spanned by the data. A basis of the data \f${\bf X}\f$ is found using the
+ * thin-SVD technique
+ * which returns an orthonormal non-square basis matrix \f${\bf Q}\f$. We then
+ * require the
  * solution \f${\bf w}\f$ to be expressed in this basis.
  * \f[{\bf W} := {\bf Q} {\bf{W^\prime}}\f]
  * The between class Matrix is replaced with:
@@ -80,8 +85,13 @@ template <class ST> class CDenseFeatures;
  * been projected down to the basis that spans the data.
  * see: Bayesian Reasoning and Machine Learning, section 16.3.1.
  *
- * <em>::AUTO_LDA</em> : This mode automagically chooses one of the above modes for
- * the users based on whether N > D (chooses ::FLD_LDA) or N < D(chooses ::SVD_LDA)
+ * <em>::AUTO_LDA</em> : This mode automagically chooses one of the above modes
+ * for
+ * the users based on whether N > D (chooses ::FLD_LDA) or N < D(chooses
+ * ::SVD_LDA)
+ * Note that even if N > D FLD_LDA may fail being the covariance matrix not
+ * invertible,
+ * in such case one should use SVD_LDA.
  * \sa CLinearMachine
  * \sa http://en.wikipedia.org/wiki/Linear_discriminant_analysis
  */
@@ -94,19 +104,33 @@ class CLDA : public CLinearMachine
 		/** constructor
 		 *
 		 * @param gamma gamma
-		 * @param method LDA using Fisher's algorithm or Singular Value Decomposition : ::FLD_LDA/::SVD_LDA/::AUTO_LDA[default]
+		 * @param method LDA using Fisher's algorithm or Singular Value
+		 * Decomposition : ::FLD_LDA/::SVD_LDA/::AUTO_LDA[default]
+		 * @param bdc_svd when using SVD solver switch between
+		 * Bidiagonal Divide and Conquer algorithm (BDC) and
+		 * Jacobi's algorithm, for the differences @see linalg::SVDAlgorithm.
+		 * [default = BDC-SVD]
 		 */
-		CLDA(float64_t gamma=0, ELDAMethod method=AUTO_LDA);
+		CLDA(
+		    float64_t gamma = 0, ELDAMethod method = AUTO_LDA,
+		    bool bdc_svd = true);
 
 		/** constructor
 		 *
 		 * @param gamma gamma
 		 * @param traindat training features
 		 * @param trainlab labels for training features
-		 * @param method LDA using Fisher's algorithm or Singular Value Decomposition : ::FLD_LDA/::SVD_LDA/::AUTO_LDA[default]
-
+		 * @param method LDA using Fisher's algorithm or Singular Value
+		 * Decomposition : ::FLD_LDA/::SVD_LDA/::AUTO_LDA[default]
+		 * @param bdc_svd when using SVD solver switch between
+		 * Bidiagonal Divide and Conquer algorithm (BDC-SVD) and
+		 * Jacobi's algorithm, for the differences @see linalg::SVDAlgorithm.
+		 * [default = BDC-SVD]
 		 */
-		CLDA(float64_t gamma, CDenseFeatures<float64_t>* traindat, CLabels* trainlab, ELDAMethod method=AUTO_LDA);
+		CLDA(
+		    float64_t gamma, CDenseFeatures<float64_t>* traindat,
+		    CLabels* trainlab, ELDAMethod method = AUTO_LDA,
+		    bool bdc_svd = true);
 		virtual ~CLDA();
 
 		/** set gamma
@@ -170,7 +194,24 @@ class CLDA : public CLinearMachine
 		 * @see train_machine
 		 */
 		template <typename ST>
-		bool train_machine_templated(SGVector<int32_t> train_labels, CFeatures * features);
+		bool train_machine_templated();
+
+		/**
+		 * Train the machine with the svd-based solver (@see CFisherLDA).
+		 * @param features training data
+		 * @param labels labels for training data
+		 */
+		template <typename ST>
+		bool solver_svd();
+
+		/**
+		 * Train the machine with the classic method based on the cholesky
+		 * decomposition of the covariance matrix.
+		 * @param features training data
+		 * @param labels labels for training data
+		 */
+		template <typename ST>
+		bool solver_classic();
 
 	protected:
 
@@ -180,6 +221,8 @@ class CLDA : public CLinearMachine
 		float64_t m_gamma;
 		/** LDA mode */
 		ELDAMethod m_method;
+		/** use bdc-svd algorithm */
+		bool m_bdc_svd;
 };
 }
 #endif//ifndef
diff --git a/src/shogun/classifier/LPBoost.cpp b/src/shogun/classifier/LPBoost.cpp
index b1b9bd5618c..1765439d6fe 100644
--- a/src/shogun/classifier/LPBoost.cpp
+++ b/src/shogun/classifier/LPBoost.cpp
@@ -124,9 +124,8 @@ bool CLPBoost::train_machine(CFeatures* data)
 
 	int32_t num_hypothesis=0;
 	CTime time;
-	CSignal::clear_cancel();
 
-	while (!(CSignal::cancel_computations()))
+	while (!(cancel_computation()))
 	{
 		int32_t max_dim=0;
 		float64_t violator=find_max_violator(max_dim);
diff --git a/src/shogun/classifier/Perceptron.cpp b/src/shogun/classifier/Perceptron.cpp
index 07cc289e5ab..7e1a3866a52 100644
--- a/src/shogun/classifier/Perceptron.cpp
+++ b/src/shogun/classifier/Perceptron.cpp
@@ -13,6 +13,7 @@
 #include <shogun/labels/BinaryLabels.h>
 #include <shogun/mathematics/Math.h>
 #include <shogun/lib/Signal.h>
+#include <shogun/base/range.h>
 
 using namespace shogun;
 
@@ -64,21 +65,23 @@ bool CPerceptron::train_machine(CFeatures* data)
 			w.vector[i]=1.0/num_feat;
 	}
 
-	CSignal::clear_cancel();
 
 	//loop till we either get everything classified right or reach max_iter
-	while (!(CSignal::cancel_computations()) && (!converged && iter<max_iter))
+	while (!(cancel_computation()) && (!converged && iter < max_iter))
 	{
 		converged=true;
-		for (int32_t i=0; i<num_vec; i++)
+		for (auto example_idx : features->index_iterator())
 		{
-			output[i] = features->dense_dot(i, w.vector, w.vlen) + bias;
+			const auto predicted_label = features->dense_dot(example_idx, w.vector, w.vlen) + bias;
+			const auto true_label = train_labels[example_idx];
+			output[example_idx] = predicted_label;
 
-			if (CMath::sign<float64_t>(output[i]) != train_labels.vector[i])
+			if (CMath::sign<float64_t>(predicted_label) != true_label)
 			{
-				converged=false;
-				bias+=learn_rate*train_labels.vector[i];
-				features->add_to_dense_vec(learn_rate*train_labels.vector[i], i, w.vector, w.vlen);
+				converged = false;
+				const auto gradient = learn_rate * train_labels[example_idx];
+				bias += gradient;
+				features->add_to_dense_vec(gradient, example_idx, w.vector, w.vlen);
 			}
 		}
 
diff --git a/src/shogun/classifier/mkl/MKL.cpp b/src/shogun/classifier/mkl/MKL.cpp
index df758630d63..bbb96b05cd3 100644
--- a/src/shogun/classifier/mkl/MKL.cpp
+++ b/src/shogun/classifier/mkl/MKL.cpp
@@ -427,7 +427,6 @@ bool CMKL::train_machine(CFeatures* data)
 #endif
 
 	mkl_iterations = 0;
-	CSignal::clear_cancel();
 
 	training_time_clock.start();
 
@@ -449,17 +448,13 @@ bool CMKL::train_machine(CFeatures* data)
 		//but if we don't actually unref() the object we might leak memory...
 		//So as a workaround we only unref when the reference count was >1
 		//before.
-#ifdef USE_REFERENCE_COUNTING
 		int32_t refs=this->ref();
-#endif
 		svm->set_callback_function(this, perform_mkl_step_helper);
 		svm->train();
 		SG_DONE()
 		svm->set_callback_function(NULL, NULL);
-#ifdef USE_REFERENCE_COUNTING
 		if (refs>1)
 			this->unref();
-#endif
 	}
 	else
 	{
@@ -482,7 +477,7 @@ bool CMKL::train_machine(CFeatures* data)
 
 
 			mkl_iterations++;
-			if (perform_mkl_step(sumw, suma) || CSignal::cancel_computations())
+			if (perform_mkl_step(sumw, suma) || cancel_computation())
 				break;
 		}
 
diff --git a/src/shogun/classifier/mkl/MKLClassification.cpp b/src/shogun/classifier/mkl/MKLClassification.cpp
index 2b7833f9d6b..e80564b5ab4 100644
--- a/src/shogun/classifier/mkl/MKLClassification.cpp
+++ b/src/shogun/classifier/mkl/MKLClassification.cpp
@@ -38,3 +38,16 @@ void CMKLClassification::init_training()
 	REQUIRE(m_labels->get_num_labels(), "Number of labels is zero.\n");
 	REQUIRE(m_labels->get_label_type() == LT_BINARY, "Labels must be binary.\n");
 }
+
+CMKLClassification* CMKLClassification::obtain_from_generic(CMachine* machine)
+{
+	if (machine == NULL)
+		return NULL;
+
+	if (machine->get_classifier_type() != CT_MKLCLASSIFICATION)
+		SG_SERROR("Provided machine is not of type CMKLClassification!")
+
+	CMKLClassification* casted = dynamic_cast<CMKLClassification*>(machine);
+	SG_REF(casted)
+	return casted;
+}
\ No newline at end of file
diff --git a/src/shogun/classifier/mkl/MKLClassification.h b/src/shogun/classifier/mkl/MKLClassification.h
index af666d5f83a..c868c9cc744 100644
--- a/src/shogun/classifier/mkl/MKLClassification.h
+++ b/src/shogun/classifier/mkl/MKLClassification.h
@@ -41,6 +41,13 @@ class CMKLClassification : public CMKL
 		 */
 		virtual float64_t compute_sum_alpha();
 
+		/**
+		 * Helper method used to specialize a base class instance.
+		 * @param machine the machine we want to cast
+		 * @return a MKLClassification machine (already SGREF'ed)
+		 */
+		static CMKLClassification* obtain_from_generic(CMachine* machine);
+
 		/** @return object name */
 		virtual const char* get_name() const { return "MKLClassification"; }
 
diff --git a/src/shogun/classifier/mkl/MKLMulticlass.cpp b/src/shogun/classifier/mkl/MKLMulticlass.cpp
index 8ea604c60a9..d3b00c108e5 100644
--- a/src/shogun/classifier/mkl/MKLMulticlass.cpp
+++ b/src/shogun/classifier/mkl/MKLMulticlass.cpp
@@ -370,9 +370,8 @@ bool CMKLMulticlass::train_machine(CFeatures* data)
 
 	int32_t numberofsilpiterations=0;
 	bool final=false;
-	CSignal::clear_cancel();
 
-	while (!(CSignal::cancel_computations()) && !final)
+	while (!(cancel_computation()) && !final)
 	{
 
 		//curweights.clear();
diff --git a/src/shogun/classifier/mkl/MKLMulticlassGLPK.cpp b/src/shogun/classifier/mkl/MKLMulticlassGLPK.cpp
index 8b692d7ee79..6bba60e1fd4 100644
--- a/src/shogun/classifier/mkl/MKLMulticlassGLPK.cpp
+++ b/src/shogun/classifier/mkl/MKLMulticlassGLPK.cpp
@@ -13,6 +13,8 @@
 
 #include <shogun/classifier/mkl/MKLMulticlassGLPK.h>
 #include <shogun/io/SGIO.h>
+
+#include <algorithm>
 #include <vector>
 
 #ifdef USE_GLPK
diff --git a/src/shogun/classifier/svm/GPBTSVM.cpp b/src/shogun/classifier/svm/GPBTSVM.cpp
deleted file mode 100644
index b787807786d..00000000000
--- a/src/shogun/classifier/svm/GPBTSVM.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 1999-2009 Soeren Sonnenburg
- * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
- */
-
-
-#include <shogun/classifier/svm/GPBTSVM.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/external/gpdt.h>
-#include <shogun/lib/external/gpdtsolve.h>
-#include <shogun/io/SGIO.h>
-#include <shogun/labels/BinaryLabels.h>
-
-using namespace shogun;
-
-CGPBTSVM::CGPBTSVM()
-: CSVM(), model(NULL)
-{
-}
-
-CGPBTSVM::CGPBTSVM(float64_t C, CKernel* k, CLabels* lab)
-: CSVM(C, k, lab), model(NULL)
-{
-}
-
-CGPBTSVM::~CGPBTSVM()
-{
-	SG_FREE(model);
-}
-
-bool CGPBTSVM::train_machine(CFeatures* data)
-{
-	float64_t* solution;                     /* store the solution found       */
-	QPproblem prob;                          /* object containing the solvers  */
-
-	ASSERT(kernel)
-	ASSERT(m_labels && m_labels->get_num_labels())
-	ASSERT(m_labels->get_label_type() == LT_BINARY)
-	if (data)
-	{
-		if (m_labels->get_num_labels() != data->get_num_vectors())
-			SG_ERROR("Number of training vectors does not match number of labels\n")
-		kernel->init(data, data);
-	}
-
-	SGVector<int32_t> lab=((CBinaryLabels*) m_labels)->get_int_labels();
-	prob.KER=new sKernel(kernel, lab.vlen);
-	prob.y=lab.vector;
-	prob.ell=lab.vlen;
-	SG_INFO("%d trainlabels\n", prob.ell)
-
-	//  /*** set options defaults ***/
-	prob.delta = epsilon;
-	prob.maxmw = kernel->get_cache_size();
-	prob.verbosity       = 0;
-	prob.preprocess_size = -1;
-	prob.projection_projector = -1;
-	prob.c_const = get_C1();
-	prob.chunk_size = get_qpsize();
-	prob.linadd = get_linadd_enabled();
-
-	if (prob.chunk_size < 2)      prob.chunk_size = 2;
-	if (prob.q <= 0)              prob.q = prob.chunk_size / 3;
-	if (prob.q < 2)               prob.q = 2;
-	if (prob.q > prob.chunk_size) prob.q = prob.chunk_size;
-	prob.q = prob.q & (~1);
-	if (prob.maxmw < 5)
-		prob.maxmw = 5;
-
-	/*** set the problem description for final report ***/
-	SG_INFO("\nTRAINING PARAMETERS:\n")
-	SG_INFO("\tNumber of training documents: %d\n", prob.ell)
-	SG_INFO("\tq: %d\n", prob.chunk_size)
-	SG_INFO("\tn: %d\n", prob.q)
-	SG_INFO("\tC: %lf\n", prob.c_const)
-	SG_INFO("\tkernel type: %d\n", prob.ker_type)
-	SG_INFO("\tcache size: %dMb\n", prob.maxmw)
-	SG_INFO("\tStopping tolerance: %lf\n", prob.delta)
-
-	//  /*** compute the number of cache rows up to maxmw Mb. ***/
-	if (prob.preprocess_size == -1)
-		prob.preprocess_size = (int32_t) ( (float64_t)prob.chunk_size * 1.5 );
-
-	if (prob.projection_projector == -1)
-	{
-		if (prob.chunk_size <= 20) prob.projection_projector = 0;
-		else prob.projection_projector = 1;
-	}
-
-	/*** compute the problem solution *******************************************/
-	solution = SG_MALLOC(float64_t, prob.ell);
-	prob.gpdtsolve(solution);
-	/****************************************************************************/
-
-	CSVM::set_objective(prob.objective_value);
-
-	int32_t num_sv=0;
-	int32_t bsv=0;
-	int32_t i=0;
-	int32_t k=0;
-
-	for (i = 0; i < prob.ell; i++)
-	{
-		if (solution[i] > prob.DELTAsv)
-		{
-			num_sv++;
-			if (solution[i] > (prob.c_const - prob.DELTAsv)) bsv++;
-		}
-	}
-
-	create_new_model(num_sv);
-	set_bias(prob.bee);
-
-	SG_INFO("SV: %d BSV = %d\n", num_sv, bsv)
-
-	for (i = 0; i < prob.ell; i++)
-	{
-		if (solution[i] > prob.DELTAsv)
-		{
-			set_support_vector(k, i);
-			set_alpha(k++, solution[i]*((CBinaryLabels*) m_labels)->get_label(i));
-		}
-	}
-
-	delete prob.KER;
-	SG_FREE(solution);
-
-	return true;
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/classifier/svm/GPBTSVM.h b/src/shogun/classifier/svm/GPBTSVM.h
deleted file mode 100644
index 810df77d5fd..00000000000
--- a/src/shogun/classifier/svm/GPBTSVM.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 1999-2009 Soeren Sonnenburg
- * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
- */
-
-#ifndef _GPBTSVM_H___
-#define _GPBTSVM_H___
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/common.h>
-#include <shogun/classifier/svm/SVM.h>
-#include <shogun/lib/external/shogun_libsvm.h>
-
-
-namespace shogun
-{
-/** @brief class GPBTSVM */
-class CGPBTSVM : public CSVM
-{
-	public:
-		/** default constructor */
-		CGPBTSVM();
-
-		/** constructor
-		 *
-		 * @param C constant C
-		 * @param k kernel
-		 * @param lab labels
-		 */
-		CGPBTSVM(float64_t C, CKernel* k, CLabels* lab);
-		virtual ~CGPBTSVM();
-
-		/** get classifier type
-		 *
-		 * @return classifier type GPBT
-		 */
-		virtual EMachineType get_classifier_type() { return CT_GPBT; }
-
-		/** @return object name */
-		virtual const char* get_name() const { return "GPBTSVM"; }
-
-	protected:
-		/** train SVM classifier
-		 *
-		 * @param data training data (parameter can be avoided if distance or
-		 * kernel-based classifiers are used and distance/kernels are
-		 * initialized with train data)
-		 *
-		 * @return whether training was successful
-		 */
-		virtual bool train_machine(CFeatures* data=NULL);
-
-	protected:
-		/** SVM model */
-		struct svm_model* model;
-};
-}
-#endif //USE_GPL_SHOGUN
-#endif
diff --git a/src/shogun/classifier/svm/LibLinear.cpp b/src/shogun/classifier/svm/LibLinear.cpp
index d81989f62d6..3940dabba75 100644
--- a/src/shogun/classifier/svm/LibLinear.cpp
+++ b/src/shogun/classifier/svm/LibLinear.cpp
@@ -10,14 +10,15 @@
  */
 #include <shogun/lib/config.h>
 
-#include <shogun/io/SGIO.h>
-#include <shogun/lib/Signal.h>
-#include <shogun/lib/Time.h>
 #include <shogun/base/Parameter.h>
+#include <shogun/base/progress.h>
 #include <shogun/classifier/svm/LibLinear.h>
-#include <shogun/optimization/liblinear/tron.h>
 #include <shogun/features/DotFeatures.h>
+#include <shogun/io/SGIO.h>
 #include <shogun/labels/BinaryLabels.h>
+#include <shogun/lib/Signal.h>
+#include <shogun/lib/Time.h>
+#include <shogun/optimization/liblinear/tron.h>
 
 using namespace shogun;
 
@@ -77,7 +78,7 @@ CLibLinear::~CLibLinear()
 
 bool CLibLinear::train_machine(CFeatures* data)
 {
-	CSignal::clear_cancel();
+
 	ASSERT(m_labels)
 	ASSERT(m_labels->get_label_type() == LT_BINARY)
 
@@ -313,9 +314,9 @@ void CLibLinear::solve_l2r_l1l2_svc(
 		index[i] = i;
 	}
 
-
+	auto pb = progress(range(10));
 	CTime start_time;
-	while (iter < max_iterations && !CSignal::cancel_computations())
+	while (iter < max_iterations && !cancel_computation())
 	{
 		if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
 		  break;
@@ -392,7 +393,8 @@ void CLibLinear::solve_l2r_l1l2_svc(
 
 		iter++;
 		float64_t gap=PGmax_new - PGmin_new;
-		SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6)
+		pb.print_absolute(
+		    gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps));
 
 		if(gap <= eps)
 		{
@@ -414,7 +416,7 @@ void CLibLinear::solve_l2r_l1l2_svc(
 			PGmin_old = -CMath::INFTY;
 	}
 
-	SG_DONE()
+	pb.complete_absolute();
 	SG_INFO("optimization finished, #iter = %d\n",iter)
 	if (iter >= max_iterations)
 	{
@@ -522,9 +524,9 @@ void CLibLinear::solve_l1r_l2_svc(
 		}
 	}
 
-
+	auto pb = progress(range(10));
 	CTime start_time;
-	while (iter < max_iterations && !CSignal::cancel_computations())
+	while (iter < max_iterations && !cancel_computation())
 	{
 		if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
 		  break;
@@ -742,8 +744,9 @@ void CLibLinear::solve_l1r_l2_svc(
 			Gmax_init = Gmax_new;
 		iter++;
 
-		SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new),
-				-CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
+		pb.print_absolute(
+		    Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init),
+		    -CMath::log10(eps * Gmax_init));
 
 		if(Gmax_new <= eps*Gmax_init)
 		{
@@ -760,7 +763,7 @@ void CLibLinear::solve_l1r_l2_svc(
 		Gmax_old = Gmax_new;
 	}
 
-	SG_DONE()
+	pb.complete_absolute();
 	SG_INFO("optimization finished, #iter = %d\n", iter)
 	if(iter >= max_iterations)
 		SG_WARNING("\nWARNING: reaching max number of iterations\n")
@@ -893,8 +896,9 @@ void CLibLinear::solve_l1r_lr(
 		}
 	}
 
+	auto pb = progress(range(10));
 	CTime start_time;
-	while (iter < max_iterations && !CSignal::cancel_computations())
+	while (iter < max_iterations && !cancel_computation())
 	{
 		if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
 		  break;
@@ -1105,7 +1109,9 @@ void CLibLinear::solve_l1r_lr(
 		if(iter == 0)
 			Gmax_init = Gmax_new;
 		iter++;
-		SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6)
+		pb.print_absolute(
+		    Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init),
+		    -CMath::log10(eps * Gmax_init));
 
 		if(Gmax_new <= eps*Gmax_init)
 		{
@@ -1122,7 +1128,7 @@ void CLibLinear::solve_l1r_lr(
 		Gmax_old = Gmax_new;
 	}
 
-	SG_DONE()
+	pb.complete_absolute();
 	SG_INFO("optimization finished, #iter = %d\n", iter)
 	if(iter >= max_iterations)
 		SG_WARNING("\nWARNING: reaching max number of iterations\n")
@@ -1230,6 +1236,7 @@ void CLibLinear::solve_l2r_lr_dual(SGVector<float64_t>& w, const liblinear_probl
 		index[i] = i;
 	}
 
+	auto pb = progress(range(10));
 	while (iter < max_iter)
 	{
 		for (i=0; i<l; i++)
@@ -1304,7 +1311,9 @@ void CLibLinear::solve_l2r_lr_dual(SGVector<float64_t>& w, const liblinear_probl
 			Gmax_init = Gmax;
 		iter++;
 
-		SG_SABS_PROGRESS(Gmax, -CMath::log10(Gmax), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6)
+		pb.print_absolute(
+		    Gmax, -CMath::log10(Gmax), -CMath::log10(Gmax_init),
+		    -CMath::log10(eps * Gmax_init));
 
 		if(Gmax < eps)
 			break;
@@ -1314,7 +1323,7 @@ void CLibLinear::solve_l2r_lr_dual(SGVector<float64_t>& w, const liblinear_probl
 
 	}
 
-	SG_DONE()
+	pb.complete_absolute();
 	SG_INFO("optimization finished, #iter = %d\n",iter)
 	if (iter >= max_iter)
 		SG_WARNING("reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n")
diff --git a/src/shogun/classifier/svm/MPDSVM.cpp b/src/shogun/classifier/svm/MPDSVM.cpp
index 33c7a60f9cf..89abf942983 100644
--- a/src/shogun/classifier/svm/MPDSVM.cpp
+++ b/src/shogun/classifier/svm/MPDSVM.cpp
@@ -79,7 +79,6 @@ bool CMPDSVM::train_machine(CFeatures* data)
 
 	bool primalcool;
 	bool dualcool;
-	CSignal::clear_cancel();
 
 	//if (nustop)
 	//etas[1] = 1;
@@ -96,7 +95,7 @@ bool CMPDSVM::train_machine(CFeatures* data)
 	}
 
 	// go ...
-	while (niter++ < maxiter && !CSignal::cancel_computations())
+	while (niter++ < maxiter && !cancel_computation())
 	{
 		int32_t maxpidx=-1;
 		float64_t maxpviol = -1;
diff --git a/src/shogun/classifier/svm/NewtonSVM.cpp b/src/shogun/classifier/svm/NewtonSVM.cpp
index 5431d6e3aa0..4fcfa75c79f 100644
--- a/src/shogun/classifier/svm/NewtonSVM.cpp
+++ b/src/shogun/classifier/svm/NewtonSVM.cpp
@@ -12,6 +12,7 @@
 #ifdef HAVE_LAPACK
 #include <shogun/classifier/svm/NewtonSVM.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/machine/LinearMachine.h>
 #include <shogun/features/DotFeatures.h>
 #include <shogun/labels/Labels.h>
@@ -23,19 +24,21 @@
 //#define V_NEWTON
 using namespace shogun;
 
-CNewtonSVM::CNewtonSVM()
-: CLinearMachine(), C(1), use_bias(true)
+CNewtonSVM::CNewtonSVM() : CLinearMachine(true)
 {
+	lambda = 1;
+	num_iter = 20;
+	prec = 1e-6;
+	C = 1;
 }
 
-CNewtonSVM::CNewtonSVM(float64_t c, CDotFeatures* traindat, CLabels* trainlab, int32_t itr)
-: CLinearMachine()
+CNewtonSVM::CNewtonSVM(
+    float64_t c, CDotFeatures* traindat, CLabels* trainlab, int32_t itr)
+    : CLinearMachine(true)
 {
 	lambda=1/c;
 	num_iter=itr;
 	prec=1e-6;
-	num_iter=20;
-	use_bias=true;
 	C=c;
 	set_features(traindat);
 	set_labels(trainlab);
@@ -49,7 +52,7 @@ CNewtonSVM::~CNewtonSVM()
 
 bool CNewtonSVM::train_machine(CFeatures* data)
 {
-	CSignal::clear_cancel();
+
 	ASSERT(m_labels)
 	ASSERT(m_labels->get_label_type() == LT_BINARY)
 
@@ -80,7 +83,7 @@ bool CNewtonSVM::train_machine(CFeatures* data)
 	float64_t obj, *grad=SG_MALLOC(float64_t, x_d+1);
 	float64_t t;
 
-	while(!CSignal::cancel_computations())
+	while (!cancel_computation())
 	{
 		iter++;
 
@@ -232,20 +235,19 @@ void CNewtonSVM::line_search_linear(float64_t* weights, float64_t* d, float64_t*
 		out, float64_t* tx)
 {
 	SGVector<float64_t> Y=((CBinaryLabels*) m_labels)->get_labels();
-	float64_t* outz=SG_MALLOC(float64_t, x_n);
-	float64_t* temp1=SG_MALLOC(float64_t, x_n);
-	float64_t* temp1forout=SG_MALLOC(float64_t, x_n);
-	float64_t* outzsv=SG_MALLOC(float64_t, x_n);
-	float64_t* Ysv=SG_MALLOC(float64_t, x_n);
-	float64_t* Xsv=SG_MALLOC(float64_t, x_n);
-	float64_t* temp2=SG_MALLOC(float64_t, x_d);
+	SGVector<float64_t> outz(x_n);
+	SGVector<float64_t> temp1(x_n);
+	SGVector<float64_t> temp1forout(x_n);
+	SGVector<float64_t> outzsv(x_n);
+	SGVector<float64_t> Ysv(x_n);
+	SGVector<float64_t> Xsv(x_n);
 	float64_t t=0.0;
-	float64_t* Xd=SG_MALLOC(float64_t, x_n);
+	SGVector<float64_t> Xd(x_n);
 
 	for (int32_t i=0; i<x_n; i++)
 		Xd[i]=features->dense_dot(i, d, x_d);
 
-	SGVector<float64_t>::add_scalar(d[x_d], Xd, x_n);
+	linalg::add_scalar(Xd, d[x_d]);
 
 #ifdef DEBUG_NEWTON
 	CMath::display_vector(d, x_d+1, "Weight vector");
@@ -259,19 +261,22 @@ void CNewtonSVM::line_search_linear(float64_t* weights, float64_t* d, float64_t*
 	float64_t wd;
 	cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda,
 			weights, x_d, d, x_d, 0.0, &wd, 1);
-	float64_t tempg, dd;
+	float64_t dd;
 	cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda, d,
 			x_d, d, x_d, 0.0, &dd, 1);
 
 	float64_t g, h;
-	int32_t sv_len=0, *sv=SG_MALLOC(int32_t, x_n);
+	int32_t sv_len=0;
+	SGVector<int32_t> sv(x_n);
 
+	SGVector<float64_t> sg_out(out, x_n, false);
 	do
 	{
-		SGVector<float64_t>::vector_multiply(temp1, Y.vector, Xd, x_n);
-		sg_memcpy(temp1forout, temp1, sizeof(float64_t)*x_n);
-		SGVector<float64_t>::scale_vector(t, temp1forout, x_n);
-		SGVector<float64_t>::add(outz, 1.0, out, -1.0, temp1forout, x_n);
+		// FIXME:: port it to linalg::
+		SGVector<float64_t>::vector_multiply(temp1.vector, Y.vector, Xd.vector, x_n);
+		sg_memcpy(temp1forout.vector, temp1.vector, sizeof(float64_t)*x_n);
+		linalg::scale(temp1forout, temp1forout, t);
+		linalg::add(sg_out, temp1forout, outz, 1.0, -1.0);
 
 		// Calculation of sv
 		sv_len=0;
@@ -286,19 +291,22 @@ void CNewtonSVM::line_search_linear(float64_t* weights, float64_t* d, float64_t*
 		for (int32_t i=0; i<sv_len; i++)
 		{
 			outzsv[i]=outz[sv[i]];
-			Ysv[i]=Y.vector[sv[i]];
+			Ysv[i]=Y[sv[i]];
 			Xsv[i]=Xd[sv[i]];
 		}
 
-		memset(temp1, 0, sizeof(float64_t)*sv_len);
-		SGVector<float64_t>::vector_multiply(temp1, outzsv, Ysv, sv_len);
-		tempg=CMath::dot(temp1, Xsv, sv_len);
+		memset(temp1.vector, 0, sizeof(float64_t)*sv_len);
+		SGVector<float64_t>::vector_multiply(temp1.vector, outzsv.vector, Ysv.vector, sv_len);
+		// in case sv_len < x_n tempg != dot(temp1, Xsv);
+		float64_t tempg = 0.0;
+		for (auto i = 0; i < sv_len; ++i)
+			tempg += temp1[i]*Xsv[i];
 		g=wd+(t*dd);
 		g-=tempg;
 
 		// Calculation of second derivative 'h'
 		cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, sv_len, 1.0,
-				Xsv, sv_len, Xsv, sv_len, 0.0, &h, 1);
+				Xsv.vector, sv_len, Xsv.vector, sv_len, 0.0, &h, 1);
 		h+=dd;
 
 		// Calculation of 1D Newton step 'd'
@@ -309,19 +317,8 @@ void CNewtonSVM::line_search_linear(float64_t* weights, float64_t* d, float64_t*
 
 	} while(1);
 
-	for (int32_t i=0; i<x_n; i++)
-		out[i]=outz[i];
+	sg_memcpy(out, outz.vector, sizeof(float64_t)*x_n);
 	*tx=t;
-
-	SG_FREE(sv);
-	SG_FREE(temp1);
-	SG_FREE(temp2);
-	SG_FREE(temp1forout);
-	SG_FREE(outz);
-	SG_FREE(outzsv);
-	SG_FREE(Ysv);
-	SG_FREE(Xsv);
-	SG_FREE(Xd);
 }
 
 void CNewtonSVM::obj_fun_linear(float64_t* weights, float64_t* out,
diff --git a/src/shogun/classifier/svm/OnlineLibLinear.cpp b/src/shogun/classifier/svm/OnlineLibLinear.cpp
index d0d19ce8a1e..d92dc96de5f 100644
--- a/src/shogun/classifier/svm/OnlineLibLinear.cpp
+++ b/src/shogun/classifier/svm/OnlineLibLinear.cpp
@@ -15,17 +15,19 @@
 #include <shogun/features/streaming/StreamingDenseFeatures.h>
 #include <shogun/features/streaming/StreamingSparseFeatures.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/lib/Time.h>
 
 using namespace shogun;
 
 COnlineLibLinear::COnlineLibLinear()
-		: COnlineLinearMachine()
+	: COnlineLinearMachine()
 {
 		init();
 }
 
 COnlineLibLinear::COnlineLibLinear(float64_t C_reg)
+	: COnlineLinearMachine()
 {
 		init();
 		C1=C_reg;
@@ -35,6 +37,7 @@ COnlineLibLinear::COnlineLibLinear(float64_t C_reg)
 
 COnlineLibLinear::COnlineLibLinear(
 		float64_t C_reg, CStreamingDotFeatures* traindat)
+	: COnlineLinearMachine()
 {
 		init();
 		C1=C_reg;
@@ -45,6 +48,7 @@ COnlineLibLinear::COnlineLibLinear(
 }
 
 COnlineLibLinear::COnlineLibLinear(COnlineLibLinear *mch)
+	: COnlineLinearMachine()
 {
 	init();
 	C1 = mch->C1;
@@ -52,17 +56,7 @@ COnlineLibLinear::COnlineLibLinear(COnlineLibLinear *mch)
 	use_bias = mch->use_bias;
 
 	set_features(mch->features);
-
-	w_dim = mch->w_dim;
-	if (w_dim > 0)
-	{
-		w = SG_MALLOC(float32_t, w_dim);
-		sg_memcpy(w, mch->w, w_dim*sizeof(float32_t));
-	}
-	else
-	{
-		w = NULL;
-	}
+	m_w = mch->m_w.clone();
 	bias = mch->bias;
 }
 
@@ -129,8 +123,7 @@ void COnlineLibLinear::stop_train()
 	SG_INFO("Optimization finished.\n")
 
 	// calculate objective value
-	for (int32_t i=0; i<w_dim; i++)
-		v += w[i]*w[i];
+	v = linalg::dot(m_w, m_w);
 	v += bias*bias;
 
 	SG_INFO("Objective value = %lf\n", v/2)
@@ -149,10 +142,10 @@ void COnlineLibLinear::train_one(SGVector<float32_t> ex, float64_t label)
 
 	QD = diag[y_current + 1];
 	// Dot product of vector with itself
-	QD += CMath::dot(ex.vector, ex.vector, ex.vlen);
+	QD += linalg::dot(ex, ex);
 
 	// Dot product of vector with learned weights
-	G = CMath::dot(ex.vector, w, w_dim);
+	G = linalg::dot(ex, m_w);
 
 	if (use_bias)
 		G += bias;
@@ -193,9 +186,7 @@ void COnlineLibLinear::train_one(SGVector<float32_t> ex, float64_t label)
 		alpha_current = CMath::min(CMath::max(alpha_current - G/QD, 0.0), C);
 		d = (alpha_current - alpha_old) * y_current;
 
-		for (int32_t i=0; i < w_dim; ++i)
-			w[i] += d*ex[i];
-
+		linalg::add(m_w, ex, m_w, 1.0f, (float32_t)d);
 
 		if (use_bias)
 			bias += d;
@@ -220,7 +211,7 @@ void COnlineLibLinear::train_one(SGSparseVector<float32_t> ex, float64_t label)
 	QD += SGSparseVector<float32_t>::sparse_dot(ex, ex);
 
 	// Dot product of vector with learned weights
-	G = ex.dense_dot(1.0,w,w_dim,0.0);
+	G = ex.dense_dot(1.0,m_w.vector,m_w.vlen,0.0);
 
 	if (use_bias)
 		G += bias;
@@ -262,7 +253,7 @@ void COnlineLibLinear::train_one(SGSparseVector<float32_t> ex, float64_t label)
 		d = (alpha_current - alpha_old) * y_current;
 
 		for (int32_t i=0; i < ex.num_feat_entries; i++)
-			w[ex.features[i].feat_index] += d*ex.features[i].entry;
+			m_w[ex.features[i].feat_index] += d*ex.features[i].entry;
 
 
 		if (use_bias)
@@ -276,9 +267,9 @@ void COnlineLibLinear::train_one(SGSparseVector<float32_t> ex, float64_t label)
 
 void COnlineLibLinear::train_example(CStreamingDotFeatures *feature, float64_t label)
 {
-	features->expand_if_required(w, w_dim);
+	feature->expand_if_required(m_w.vector, m_w.vlen);
 
-	if (features->get_feature_class() == C_STREAMING_DENSE) {
+	if (feature->get_feature_class() == C_STREAMING_DENSE) {
 		CStreamingDenseFeatures<float32_t> *feat =
 			dynamic_cast<CStreamingDenseFeatures<float32_t> *>(feature);
 		if (feat == NULL)
@@ -286,7 +277,7 @@ void COnlineLibLinear::train_example(CStreamingDotFeatures *feature, float64_t l
 
 		train_one(feat->get_vector(), label);
 	}
-	else if (features->get_feature_class() == C_STREAMING_SPARSE) {
+	else if (feature->get_feature_class() == C_STREAMING_SPARSE) {
 		CStreamingSparseFeatures<float32_t> *feat =
 			dynamic_cast<CStreamingSparseFeatures<float32_t> *>(feature);
 		if (feat == NULL)
diff --git a/src/shogun/classifier/svm/OnlineLibLinear.h b/src/shogun/classifier/svm/OnlineLibLinear.h
index 7b34af26132..3f0926f6405 100644
--- a/src/shogun/classifier/svm/OnlineLibLinear.h
+++ b/src/shogun/classifier/svm/OnlineLibLinear.h
@@ -126,21 +126,22 @@ class COnlineLibLinear : public COnlineLinearMachine
 		 */
 		virtual void train_example(CStreamingDotFeatures *feature, float64_t label);
 
+private:
+		/** Set up parameters */
+		void init();
+
 		/** train on one vector
 		 * @param ex the example being trained
 		 * @param label label of this example
 		 */
-		virtual void train_one(SGVector<float32_t> ex, float64_t label);
+		void train_one(SGVector<float32_t> ex, float64_t label);
 
 		/** train on one *sparse* vector
 		 * @param ex the example being trained
 		 * @param label label of this example
+		 *
 		 */
-		virtual void train_one(SGSparseVector<float32_t> ex, float64_t label);
-
-private:
-		/** Set up parameters */
-		void init();
+		void train_one(SGSparseVector<float32_t> ex, float64_t label);
 
 private:
 		/// use bias or not
diff --git a/src/shogun/classifier/svm/OnlineSVMSGD.cpp b/src/shogun/classifier/svm/OnlineSVMSGD.cpp
index 6e7495b97b9..a82884a0f24 100644
--- a/src/shogun/classifier/svm/OnlineSVMSGD.cpp
+++ b/src/shogun/classifier/svm/OnlineSVMSGD.cpp
@@ -22,6 +22,7 @@
 
 #include <shogun/classifier/svm/OnlineSVMSGD.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/base/Parameter.h>
 #include <shogun/lib/Signal.h>
 #include <shogun/loss/HingeLoss.h>
@@ -79,10 +80,7 @@ bool COnlineSVMSGD::train(CFeatures* data)
 	// allocate memory for w and initialize everyting w and bias with 0
 	ASSERT(features)
 	ASSERT(features->get_has_labels())
-	if (w)
-		SG_FREE(w);
-	w_dim=1;
-	w=new float32_t;
+	m_w = SGVector<float32_t>(1);
 	bias=0;
 
 	// Shift t in order to have a
@@ -100,15 +98,13 @@ bool COnlineSVMSGD::train(CFeatures* data)
 	if (features->is_seekable())
 		features->reset_stream();
 
-	CSignal::clear_cancel();
-
 	ELossType loss_type = loss->get_loss_type();
 	bool is_log_loss = false;
 	if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
 		is_log_loss = true;
 
 	int32_t vec_count;
-	for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
+	for (int32_t e = 0; e < epochs && (!cancel_computation()); e++)
 	{
 		vec_count=0;
 		count = skip;
@@ -116,16 +112,16 @@ bool COnlineSVMSGD::train(CFeatures* data)
 		{
 			vec_count++;
 			// Expand w vector if more features are seen in this example
-			features->expand_if_required(w, w_dim);
+			features->expand_if_required(m_w.vector, m_w.vlen);
 
 			float64_t eta = 1.0 / (lambda * t);
 			float64_t y = features->get_label();
-			float64_t z = y * (features->dense_dot(w, w_dim) + bias);
+			float64_t z = y * (features->dense_dot(m_w.vector, m_w.vlen) + bias);
 
 			if (z < 1 || is_log_loss)
 			{
 				float64_t etd = -eta * loss->first_derivative(z,1);
-				features->add_to_dense_vec(etd * y / wscale, w, w_dim);
+				features->add_to_dense_vec(etd * y / wscale, m_w.vector, m_w.vlen);
 
 				if (use_bias)
 				{
@@ -140,7 +136,7 @@ bool COnlineSVMSGD::train(CFeatures* data)
 				float32_t r = 1 - eta * lambda * skip;
 				if (r < 0.8)
 					r = pow(1 - eta * lambda, skip);
-				SGVector<float32_t>::scale_vector(r, w, w_dim);
+				linalg::scale(m_w, m_w, r);
 				count = skip;
 			}
 			t++;
@@ -158,7 +154,7 @@ bool COnlineSVMSGD::train(CFeatures* data)
 	}
 
 	features->end_parser();
-	float64_t wnorm =  CMath::dot(w,w, w_dim);
+	float64_t wnorm = linalg::dot(m_w, m_w);
 	SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias)
 
 	return true;
diff --git a/src/shogun/classifier/svm/QPBSVMLib.cpp b/src/shogun/classifier/svm/QPBSVMLib.cpp
deleted file mode 100644
index b91a2f1ec24..00000000000
--- a/src/shogun/classifier/svm/QPBSVMLib.cpp
+++ /dev/null
@@ -1,687 +0,0 @@
-/*-----------------------------------------------------------------------
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Library for solving QP task required for learning SVM without bias term.
- *
- * Written (W) 2006-2009 Vojtech Franc, xfrancv@cmp.felk.cvut.cz
- * Written (W) 2007 Soeren Sonnenburg
- * Copyright (C) 2006-2009 Center for Machine Perception, CTU FEL Prague
- * Copyright (C) 2007-2009 Fraunhofer Institute FIRST
- *
- *
- *  min 0.5*x'*H*x + f'*x
- *
- *  subject to  C >= x(i) >= 0 for all i
- *
- * H [dim x dim] is symmetric positive semi-definite matrix.
- * f [dim x 1] is an arbitrary vector.
- *
- * The precision of found solution is given by parameters
- * tmax, tolabs, tolrel which define the stopping conditions:
- *
- *    t >= tmax                   ->  exit_flag = 0  Number of iterations.
- *    UB-LB <= tolabs             ->  exit_flag = 1  Abs. tolerance.
- *    UB-LB <= UB*tolrel          ->  exit_flag = 2  Relative tolerance.
- *
- * UB ... Upper bound on the optimal solution.
- * LB ... Lower bound on the optimal solution.
- * t  ... Number of iterations.
- * History ... Value of LB and UB wrt. number of iterations.
- *
- * 1. Generalized Gauss-Seidel methods
- * exitflag = qpbsvm_sca( &get_col, diag_H, f, UB, dim, tmax,
- *               tolabs, tolrel, x, Nabla, &t, &History, verb )
- *
- * 2. Greedy variant - Udpate variable yielding the best improvement.
- * exitflag = qpbsvm_scas( &get_col, diag_H, f, UB, dim, tmax,
- *               tolabs, tolrel, x, Nabla, &t, &History, verb )
- *
- * 3. Updates variable which most violates the KKT conditions
- * exitflag = qpbsvm_scamv( &get_col, diag_H, f, UB, dim, tmax,
- *               tolabs, tolrel, tolKKT, x, Nabla, &t, &History, verb )
- *
--------------------------------------------------------------------- */
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <string.h>
-#include <limits.h>
-
-#include <shogun/io/SGIO.h>
-#include <shogun/mathematics/Cplex.h>
-#include <shogun/mathematics/Math.h>
-
-#include <shogun/classifier/svm/QPBSVMLib.h>
-#include <shogun/lib/external/pr_loqo.h>
-
-using namespace shogun;
-
-#define HISTORY_BUF 1000000
-
-#define INDEX(ROW,COL,DIM) ((COL*DIM)+ROW)
-
-CQPBSVMLib::CQPBSVMLib()
-{
-	SG_UNSTABLE("CQPBSVMLib::CQPBSVMLib()", "\n")
-
-	m_H=0;
-	m_dim = 0;
-	m_diag_H = NULL;
-
-	m_f = NULL;
-	m_UB = 0.0;
-	m_tmax = INT_MAX;
-	m_tolabs = 0;
-	m_tolrel = 1e-6;
-	m_tolKKT = 0;
-	m_solver = QPB_SOLVER_SCA;
-}
-
-CQPBSVMLib::CQPBSVMLib(
-	float64_t* H, int32_t n, float64_t* f, int32_t m, float64_t UB)
-: CSGObject()
-{
-	ASSERT(H && n>0)
-	m_H=H;
-	m_dim = n;
-	m_diag_H=NULL;
-
-	m_f=f;
-	m_UB=UB;
-	m_tmax = INT_MAX;
-	m_tolabs = 0;
-	m_tolrel = 1e-6;
-	m_tolKKT = 0;
-	m_solver = QPB_SOLVER_SCA;
-}
-
-CQPBSVMLib::~CQPBSVMLib()
-{
-	SG_FREE(m_diag_H);
-}
-
-int32_t CQPBSVMLib::solve_qp(float64_t* result, int32_t len)
-{
-	int32_t status = -1;
-	ASSERT(len==m_dim)
-	float64_t* Nabla=SG_MALLOC(float64_t, m_dim);
-	for (int32_t i=0; i<m_dim; i++)
-		Nabla[i]=m_f[i];
-
-	SG_FREE(m_diag_H);
-	m_diag_H=SG_MALLOC(float64_t, m_dim);
-
-	for (int32_t i=0; i<m_dim; i++)
-		m_diag_H[i]=m_H[i*m_dim+i];
-
-	float64_t* History=NULL;
-	int32_t t;
-	int32_t verb=0;
-
-	switch (m_solver)
-	{
-		case QPB_SOLVER_GRADDESC:
-			status = qpbsvm_gradient_descent(result, Nabla, &t, &History, verb );
-			break;
-		case QPB_SOLVER_GS:
-			status = qpbsvm_gauss_seidel(result, Nabla, &t, &History, verb );
-			break;
-		case QPB_SOLVER_SCA:
-			status = qpbsvm_sca(result, Nabla, &t, &History, verb );
-			break;
-		case QPB_SOLVER_SCAS:
-			status = qpbsvm_scas(result, Nabla, &t, &History, verb );
-			break;
-		case QPB_SOLVER_SCAMV:
-			status = qpbsvm_scamv(result, Nabla, &t, &History, verb );
-			break;
-		case QPB_SOLVER_PRLOQO:
-			status = qpbsvm_prloqo(result, Nabla, &t, &History, verb );
-			break;
-#ifdef USE_CPLEX
-		case QPB_SOLVER_CPLEX:
-			status = qpbsvm_cplex(result, Nabla, &t, &History, verb );
-#else
-			SG_ERROR("cplex not enabled at compile time - unknow solver\n")
-#endif
-			break;
-		default:
-			SG_ERROR("unknown solver\n")
-			break;
-	}
-
-	SG_FREE(History);
-	SG_FREE(Nabla);
-	SG_FREE(m_diag_H);
-	m_diag_H=NULL;
-
-	return status;
-}
-
-/* --------------------------------------------------------------
-
-Usage: exitflag = qpbsvm_sca(m_UB, m_dim, m_tmax,
-               m_tolabs, m_tolrel, m_tolKKT, x, Nabla, &t, &History, verb )
-
--------------------------------------------------------------- */
-int32_t CQPBSVMLib::qpbsvm_sca(float64_t *x,
-	        float64_t *Nabla,
-            int32_t   *ptr_t,
-            float64_t **ptr_History,
-            int32_t   verb)
-{
-  float64_t *History;
-  float64_t *col_H;
-  float64_t *tmp_ptr;
-  float64_t x_old;
-  float64_t delta_x;
-  float64_t xHx;
-  float64_t Q_P;
-  float64_t Q_D;
-  float64_t xf;
-  float64_t xi_sum;
-  int32_t History_size;
-  int32_t t;
-  int32_t i, j;
-  int32_t exitflag;
-  int32_t KKTsatisf;
-
-  /* ------------------------------------------------------------ */
-  /* Initialization                                               */
-  /* ------------------------------------------------------------ */
-
-  t = 0;
-
-  History_size = (m_tmax < HISTORY_BUF ) ? m_tmax+1 : HISTORY_BUF;
-  History=SG_MALLOC(float64_t, History_size*2);
-  memset(History, 0, sizeof(float64_t)*History_size*2);
-
-  /* compute Q_P and Q_D */
-  xHx = 0;
-  xf = 0;
-  xi_sum = 0;
-  for(i = 0; i < m_dim; i++ ) {
-    xHx += x[i]*(Nabla[i] - m_f[i]);
-    xf += x[i]*m_f[i];
-    xi_sum += CMath::max(0.0,-Nabla[i]);
-  }
-
-  Q_P = 0.5*xHx + xf;
-  Q_D = -0.5*xHx - m_UB*xi_sum;
-  History[INDEX(0,t,2)] = Q_P;
-  History[INDEX(1,t,2)] = Q_D;
-
-  if( verb > 0 ) {
-    SG_PRINT("%d: Q_P=%m_f, Q_D=%m_f, Q_P-Q_D=%m_f, (Q_P-Q_D)/|Q_P|=%m_f \n",
-     t, Q_P, Q_D, Q_P-Q_D,(Q_P-Q_D)/CMath::abs(Q_P));
-  }
-
-  exitflag = -1;
-  while( exitflag == -1 )
-  {
-    t++;
-
-    for(i = 0; i < m_dim; i++ ) {
-      if( m_diag_H[i] > 0 ) {
-        /* variable update */
-        x_old = x[i];
-        x[i] = CMath::min(m_UB,CMath::max(0.0, x[i] - Nabla[i]/m_diag_H[i]));
-
-        /* update Nabla */
-        delta_x = x[i] - x_old;
-        if( delta_x != 0 ) {
-          col_H = (float64_t*)get_col(i);
-          for(j = 0; j < m_dim; j++ ) {
-            Nabla[j] += col_H[j]*delta_x;
-          }
-        }
-
-      }
-    }
-
-    /* compute Q_P and Q_D */
-    xHx = 0;
-    xf = 0;
-    xi_sum = 0;
-    KKTsatisf = 1;
-    for(i = 0; i < m_dim; i++ ) {
-      xHx += x[i]*(Nabla[i] - m_f[i]);
-      xf += x[i]*m_f[i];
-      xi_sum += CMath::max(0.0,-Nabla[i]);
-
-      if((x[i] > 0 && x[i] < m_UB && CMath::abs(Nabla[i]) > m_tolKKT) ||
-         (x[i] == 0 && Nabla[i] < -m_tolKKT) ||
-         (x[i] == m_UB && Nabla[i] > m_tolKKT)) KKTsatisf = 0;
-    }
-
-    Q_P = 0.5*xHx + xf;
-    Q_D = -0.5*xHx - m_UB*xi_sum;
-
-    /* stopping conditions */
-    if(t >= m_tmax) exitflag = 0;
-    else if(Q_P-Q_D <= m_tolabs) exitflag = 1;
-    else if(Q_P-Q_D <= CMath::abs(Q_P)*m_tolrel) exitflag = 2;
-    else if(KKTsatisf == 1) exitflag = 3;
-
-    if( verb > 0 && (t % verb == 0 || t==1)) {
-      SG_PRINT("%d: Q_P=%m_f, Q_D=%m_f, Q_P-Q_D=%m_f, (Q_P-Q_D)/|Q_P|=%m_f \n",
-        t, Q_P, Q_D, Q_P-Q_D,(Q_P-Q_D)/CMath::abs(Q_P));
-    }
-
-    /* Store m_UB LB to History buffer */
-    if( t < History_size ) {
-      History[INDEX(0,t,2)] = Q_P;
-      History[INDEX(1,t,2)] = Q_D;
-    }
-    else {
-      tmp_ptr=SG_MALLOC(float64_t, (History_size+HISTORY_BUF)*2);
-      memset(tmp_ptr, 0, sizeof(float64_t)*(History_size+HISTORY_BUF)*2);
-
-      for( i = 0; i < History_size; i++ ) {
-        tmp_ptr[INDEX(0,i,2)] = History[INDEX(0,i,2)];
-        tmp_ptr[INDEX(1,i,2)] = History[INDEX(1,i,2)];
-      }
-      tmp_ptr[INDEX(0,t,2)] = Q_P;
-      tmp_ptr[INDEX(1,t,2)] = Q_D;
-
-      History_size += HISTORY_BUF;
-      SG_FREE(History);
-      History = tmp_ptr;
-    }
-  }
-
-  (*ptr_t) = t;
-  (*ptr_History) = History;
-
-  SG_PRINT("QP: %f QD: %f\n", Q_P, Q_D)
-
-  return( exitflag );
-}
-
-
-/* --------------------------------------------------------------
-
-Usage: exitflag = qpbsvm_scas(m_UB, m_dim, m_tmax,
-               m_tolabs, m_tolrel, m_tolKKT, x, Nabla, &t, &History, verb )
-
--------------------------------------------------------------- */
-int32_t CQPBSVMLib::qpbsvm_scas(float64_t *x,
-	        float64_t *Nabla,
-            int32_t   *ptr_t,
-            float64_t **ptr_History,
-            int32_t   verb)
-{
-  float64_t *History;
-  float64_t *col_H;
-  float64_t *tmp_ptr;
-  float64_t x_old;
-  float64_t x_new;
-  float64_t delta_x;
-  float64_t max_x=CMath::INFTY;
-  float64_t xHx;
-  float64_t Q_P;
-  float64_t Q_D;
-  float64_t xf;
-  float64_t xi_sum;
-  float64_t max_update;
-  float64_t curr_update;
-  int32_t History_size;
-  int32_t t;
-  int32_t i, j;
-  int32_t max_i=-1;
-  int32_t exitflag;
-  int32_t KKTsatisf;
-
-  /* ------------------------------------------------------------ */
-  /* Initialization                                               */
-  /* ------------------------------------------------------------ */
-
-  t = 0;
-
-  History_size = (m_tmax < HISTORY_BUF ) ? m_tmax+1 : HISTORY_BUF;
-  History=SG_MALLOC(float64_t, History_size*2);
-  memset(History, 0, sizeof(float64_t)*History_size*2);
-
-  /* compute Q_P and Q_D */
-  xHx = 0;
-  xf = 0;
-  xi_sum = 0;
-  for(i = 0; i < m_dim; i++ ) {
-    xHx += x[i]*(Nabla[i] - m_f[i]);
-    xf += x[i]*m_f[i];
-    xi_sum += CMath::max(0.0,-Nabla[i]);
-  }
-
-  Q_P = 0.5*xHx + xf;
-  Q_D = -0.5*xHx - m_UB*xi_sum;
-  History[INDEX(0,t,2)] = Q_P;
-  History[INDEX(1,t,2)] = Q_D;
-
-  if( verb > 0 ) {
-    SG_PRINT("%d: Q_P=%m_f, Q_D=%m_f, Q_P-Q_D=%m_f, (Q_P-Q_D)/|Q_P|=%m_f \n",
-     t, Q_P, Q_D, Q_P-Q_D,(Q_P-Q_D)/CMath::abs(Q_P));
-  }
-
-  exitflag = -1;
-  while( exitflag == -1 )
-  {
-    t++;
-
-    max_update = -CMath::INFTY;
-    for(i = 0; i < m_dim; i++ ) {
-      if( m_diag_H[i] > 0 ) {
-        /* variable update */
-        x_old = x[i];
-        x_new = CMath::min(m_UB,CMath::max(0.0, x[i] - Nabla[i]/m_diag_H[i]));
-
-        curr_update = -0.5*m_diag_H[i]*(x_new*x_new-x_old*x_old) -
-          (Nabla[i] - m_diag_H[i]*x_old)*(x_new - x_old);
-
-        if( curr_update > max_update ) {
-          max_i = i;
-          max_update = curr_update;
-          max_x = x_new;
-        }
-      }
-    }
-
-    x_old = x[max_i];
-    x[max_i] = max_x;
-
-    /* update Nabla */
-    delta_x = max_x - x_old;
-    if( delta_x != 0 ) {
-      col_H = (float64_t*)get_col(max_i);
-      for(j = 0; j < m_dim; j++ ) {
-        Nabla[j] += col_H[j]*delta_x;
-      }
-    }
-
-    /* compute Q_P and Q_D */
-    xHx = 0;
-    xf = 0;
-    xi_sum = 0;
-    KKTsatisf = 1;
-    for(i = 0; i < m_dim; i++ ) {
-      xHx += x[i]*(Nabla[i] - m_f[i]);
-      xf += x[i]*m_f[i];
-      xi_sum += CMath::max(0.0,-Nabla[i]);
-
-      if((x[i] > 0 && x[i] < m_UB && CMath::abs(Nabla[i]) > m_tolKKT) ||
-         (x[i] == 0 && Nabla[i] < -m_tolKKT) ||
-         (x[i] == m_UB && Nabla[i] > m_tolKKT)) KKTsatisf = 0;
-    }
-
-    Q_P = 0.5*xHx + xf;
-    Q_D = -0.5*xHx - m_UB*xi_sum;
-
-    /* stopping conditions */
-    if(t >= m_tmax) exitflag = 0;
-    else if(Q_P-Q_D <= m_tolabs) exitflag = 1;
-    else if(Q_P-Q_D <= CMath::abs(Q_P)*m_tolrel) exitflag = 2;
-    else if(KKTsatisf == 1) exitflag = 3;
-
-    if( verb > 0 && (t % verb == 0 || t==1)) {
-      SG_PRINT("%d: Q_P=%m_f, Q_D=%m_f, Q_P-Q_D=%m_f, (Q_P-Q_D)/|Q_P|=%m_f \n",
-        t, Q_P, Q_D, Q_P-Q_D,(Q_P-Q_D)/CMath::abs(Q_P));
-    }
-
-    /* Store m_UB LB to History buffer */
-    if( t < History_size ) {
-      History[INDEX(0,t,2)] = Q_P;
-      History[INDEX(1,t,2)] = Q_D;
-    }
-    else {
-      tmp_ptr=SG_MALLOC(float64_t, (History_size+HISTORY_BUF)*2);
-      memset(tmp_ptr, 0, (History_size+HISTORY_BUF)*2*sizeof(float64_t));
-      for( i = 0; i < History_size; i++ ) {
-        tmp_ptr[INDEX(0,i,2)] = History[INDEX(0,i,2)];
-        tmp_ptr[INDEX(1,i,2)] = History[INDEX(1,i,2)];
-      }
-      tmp_ptr[INDEX(0,t,2)] = Q_P;
-      tmp_ptr[INDEX(1,t,2)] = Q_D;
-
-      History_size += HISTORY_BUF;
-      SG_FREE(History);
-      History = tmp_ptr;
-    }
-  }
-
-  (*ptr_t) = t;
-  (*ptr_History) = History;
-
-  return( exitflag );
-}
-
-/* --------------------------------------------------------------
-
-Usage: exitflag = qpbsvm_scamv(m_UB, m_dim, m_tmax,
-               m_tolabs, m_tolrel, m_tolKKT, x, Nabla, &t, &History, verb )
-
--------------------------------------------------------------- */
-int32_t CQPBSVMLib::qpbsvm_scamv(float64_t *x,
-	        float64_t *Nabla,
-            int32_t   *ptr_t,
-            float64_t **ptr_History,
-            int32_t   verb)
-{
-  float64_t *History;
-  float64_t *col_H;
-  float64_t delta_x;
-  float64_t x_new;
-  float64_t max_viol;
-  float64_t fval;
-  int32_t t;
-  int32_t i;
-  int32_t u=-1;
-  int32_t exitflag;
-
-  /* ------------------------------------------------------------ */
-  /* Initialization                                               */
-  /* ------------------------------------------------------------ */
-
-  t = 0;
-  exitflag = -1;
-  while( exitflag == -1 && t <= m_tmax)
-  {
-    t++;
-
-    max_viol = 0;
-    for(i = 0; i < m_dim; i++ )
-    {
-      if( x[i] == 0 )
-      {
-        if( max_viol < -Nabla[i]) { u = i; max_viol = -Nabla[i]; }
-      }
-      else if( x[i] > 0 && x[i] < m_UB )
-      {
-        if( max_viol < CMath::abs(Nabla[i]) ) { u = i; max_viol = CMath::abs(Nabla[i]); }
-      }
-      else if( max_viol < Nabla[i]) { u = i; max_viol = Nabla[i]; }
-    }
-
-/*    SG_PRINT("%d: max_viol=%m_f, u=%d\n", t, max_viol, u)*/
-
-    if( max_viol <= m_tolKKT )
-    {
-      exitflag = 1;
-    }
-    else
-    {
-      /* update */
-      x_new = CMath::min(m_UB,CMath::max(0.0, x[u] - Nabla[u]/m_diag_H[u]));
-
-      delta_x = x_new - x[u];
-      x[u] = x_new;
-
-      col_H = (float64_t*)get_col(u);
-      for(i = 0; i < m_dim; i++ ) {
-        Nabla[i] += col_H[i]*delta_x;
-      }
-    }
-  }
-
-  History=SG_MALLOC(float64_t, (t+1)*2);
-  memset(History, 0, sizeof(float64_t)*(t+1)*2);
-
-  fval = 0;
-  for(fval = 0, i = 0; i < m_dim; i++ ) {
-    fval += 0.5*x[i]*(Nabla[i]+m_f[i]);
-  }
-
-  History[INDEX(0,t,2)] = fval;
-  History[INDEX(1,t,2)] = 0;
-
-  (*ptr_t) = t;
-  (*ptr_History) = History;
-
-
-
-  return( exitflag );
-}
-
-/* --------------------------------------------------------------
-
-Usage: exitflag = qpbsvm_prloqo(m_UB, m_dim, m_tmax,
-               m_tolabs, m_tolrel, m_tolKKT, x, Nabla, &t, &History, verb )
-
--------------------------------------------------------------- */
-int32_t CQPBSVMLib::qpbsvm_prloqo(float64_t *x,
-	        float64_t *Nabla,
-            int32_t   *ptr_t,
-            float64_t **ptr_History,
-            int32_t   verb)
-{
-	float64_t* lb=SG_MALLOC(float64_t, m_dim);
-	float64_t* ub=SG_MALLOC(float64_t, m_dim);
-	float64_t* primal=SG_MALLOC(float64_t, 3*m_dim);
-	float64_t* dual=SG_MALLOC(float64_t, 1+2*m_dim);
-	float64_t* a=SG_MALLOC(float64_t, m_dim);
-
-	for (int32_t i=0; i<m_dim; i++)
-	{
-		a[i]=0.0;
-		lb[i]=0;
-		ub[i]=m_UB;
-	}
-
-	float64_t b=0;
-
-	SGVector<float64_t>::display_vector(m_f, m_dim, "m_f");
-	int32_t result=pr_loqo(m_dim, 1, m_f, m_H, a, &b, lb, ub, primal, dual,
-			2, 5, 1, -0.95, 10,0);
-
-	SG_FREE(a);
-	SG_FREE(lb);
-	SG_FREE(ub);
-	SG_FREE(primal);
-	SG_FREE(dual);
-
-	*ptr_t=0;
-	*ptr_History=NULL;
-	return result;
-}
-
-int32_t CQPBSVMLib::qpbsvm_gauss_seidel(float64_t *x,
-	        float64_t *Nabla,
-            int32_t   *ptr_t,
-            float64_t **ptr_History,
-            int32_t   verb)
-{
-	for (int32_t i=0; i<m_dim; i++)
-		x[i]=CMath::random(0.0, 1.0);
-
-	for (int32_t t=0; t<200; t++)
-	{
-		for (int32_t i=0; i<m_dim; i++)
-		{
-			x[i]= (-m_f[i]-(CMath::dot(x,&m_H[m_dim*i], m_dim) -
-						m_H[m_dim*i+i]*x[i]))/m_H[m_dim*i+i];
-			x[i]=CMath::clamp(x[i], 0.0, 1.0);
-		}
-	}
-
-	int32_t atbound=0;
-	for (int32_t i=0; i<m_dim; i++)
-	{
-		if (x[i]==0.0 || x[i]==1.0)
-			atbound++;
-	}
-	SG_PRINT("atbound:%d of %d (%2.2f%%)\n", atbound, m_dim, ((float64_t) 100.0*atbound)/m_dim)
-	*ptr_t=0;
-	*ptr_History=NULL;
-	return 0;
-}
-
-int32_t CQPBSVMLib::qpbsvm_gradient_descent(float64_t *x,
-	        float64_t *Nabla,
-            int32_t   *ptr_t,
-            float64_t **ptr_History,
-            int32_t   verb)
-{
-	for (int32_t i=0; i<m_dim; i++)
-		x[i]=CMath::random(0.0, 1.0);
-
-	for (int32_t t=0; t<2000; t++)
-	{
-		for (int32_t i=0; i<m_dim; i++)
-		{
-			x[i]-=0.001*(CMath::dot(x,&m_H[m_dim*i], m_dim)+m_f[i]);
-			x[i]=CMath::clamp(x[i], 0.0, 1.0);
-		}
-	}
-
-	int32_t atbound=0;
-	for (int32_t i=0; i<m_dim; i++)
-	{
-		if (x[i]==0.0 || x[i]==1.0)
-			atbound++;
-	}
-	SG_PRINT("atbound:%d of %d (%2.2f%%)\n", atbound, m_dim, ((float64_t) 100.0*atbound)/m_dim)
-	*ptr_t=0;
-	*ptr_History=NULL;
-	return 0;
-}
-
-#ifdef USE_CPLEX
-/* --------------------------------------------------------------
-
-Usage: exitflag = qpbsvm_prloqo(m_UB, m_dim, m_tmax,
-               m_tolabs, m_tolrel, m_tolKKT, x, Nabla, &t, &History, verb )
-
--------------------------------------------------------------- */
-int32_t CQPBSVMLib::qpbsvm_cplex(float64_t *x,
-	        float64_t *Nabla,
-            int32_t   *ptr_t,
-            float64_t **ptr_History,
-            int32_t   verb)
-{
-	float64_t* lb=SG_MALLOC(float64_t, m_dim);
-	float64_t* ub=SG_MALLOC(float64_t, m_dim);
-
-	for (int32_t i=0; i<m_dim; i++)
-	{
-		lb[i]=0;
-		ub[i]=m_UB;
-	}
-
-	CCplex cplex;
-	cplex.init(E_QP);
-	cplex.setup_lp(m_f, NULL, 0, m_dim, NULL, lb, ub);
-	cplex.setup_qp(m_H, m_dim);
-	cplex.optimize(x);
-	cplex.cleanup();
-
-	SG_FREE(lb);
-	SG_FREE(ub);
-
-	*ptr_t=0;
-	*ptr_History=NULL;
-	return 0;
-}
-#endif
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/classifier/svm/QPBSVMLib.h b/src/shogun/classifier/svm/QPBSVMLib.h
deleted file mode 100644
index d9474ca1cda..00000000000
--- a/src/shogun/classifier/svm/QPBSVMLib.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*-----------------------------------------------------------------------
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Library for solving QP task required for learning SVM without bias term.
- *
- * Written (W) 1999-2008 Vojtech Franc, xfrancv@cmp.felk.cvut.cz
- * Copyright (C) 1999-2008 Center for Machine Perception, CTU FEL Prague
- *
- -------------------------------------------------------------------- */
-
-#ifndef QPBSVMLIB_H__
-#define QPBSVMLIB_H__
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/base/SGObject.h>
-#include <shogun/io/SGIO.h>
-#include <shogun/lib/common.h>
-#include <shogun/kernel/Kernel.h>
-
-namespace shogun
-{
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-enum E_QPB_SOLVER
-{
-	QPB_SOLVER_SCA,	// sequential coordinate wise (gaussian seidel based)
-	QPB_SOLVER_SCAS,	// sequential coordinate wise selecting the variable
-	// gaining 'best' improved
-	QPB_SOLVER_SCAMV, // sequential coordinate wise selecting variable most violating kkt's
-	QPB_SOLVER_PRLOQO,// via pr_loqo
-	QPB_SOLVER_CPLEX,  // via cplex
-	QPB_SOLVER_GS,  // gaussian seidel
-	QPB_SOLVER_GRADDESC  // gaussian seidel
-};
-#endif
-
-/** @brief class QPBSVMLib */
-class CQPBSVMLib: public CSGObject
-{
-	public:
-		/** default constructor  */
-		CQPBSVMLib();
-
-		/** constructor
-		 *
-		 * @param H symmetric matrix of size n x n
-		 * @param n size of H's matrix
-		 * @param f is vector of size m
-		 * @param m size of vector f
-		 * @param UB UB
-		 */
-		CQPBSVMLib(
-			float64_t* H, int32_t n, float64_t* f, int32_t m, float64_t UB=1.0);
-
-		/// result has to be allocated & zeroed
-		int32_t solve_qp(float64_t* result, int32_t len);
-
-		/** set solver
-		 *
-		 * @param solver new solver
-		 */
-		inline void set_solver(E_QPB_SOLVER solver)
-		{
-			m_solver=solver;
-		}
-
-		virtual ~CQPBSVMLib();
-
-	protected:
-		/** get col
-		 *
-		 * @param col col to get
-		 * @return col indexed by col
-		 */
-		inline float64_t* get_col(int32_t col)
-		{
-			return &m_H[m_dim*col];
-		}
-
-		/** Usage: exitflag = qpbsvm_sca(UB, dim, tmax,
-		tolabs, tolrel, tolKKT, x, Nabla, &t, &History, verb ) */
-		int32_t qpbsvm_sca(
-			float64_t *x, float64_t *Nabla, int32_t *ptr_t,
-			float64_t **ptr_History, int32_t verb);
-		/** Usage: exitflag = qpbsvm_scas(UB, dim, tmax,
-		tolabs, tolrel, tolKKT, x, Nabla, &t, &History, verb ) */
-		int32_t qpbsvm_scas(
-			float64_t *x, float64_t *Nabla, int32_t *ptr_t,
-			float64_t **ptr_History, int32_t verb);
-		/** Usage: exitflag = qpbsvm_scamv(UB, dim, tmax,
-		tolabs, tolrel, tolKKT, x, Nabla, &t, &History, verb ) */
-		int32_t qpbsvm_scamv(
-			float64_t *x, float64_t *Nabla, int32_t *ptr_t,
-			float64_t **ptr_History, int32_t verb);
-		/** Usage: exitflag = qpbsvm_prloqo(UB, dim, tmax,
-		tolabs, tolrel, tolKKT, x, Nabla, &t, &History, verb ) */
-		int32_t qpbsvm_prloqo(
-			float64_t *x, float64_t *Nabla, int32_t *ptr_t,
-			float64_t **ptr_History, int32_t verb);
-		/** Usage: exitflag = qpbsvm_gauss_seidel(UB, dim, tmax,
-		tolabs, tolrel, tolKKT, x, Nabla, &t, &History, verb ) */
-		int32_t qpbsvm_gauss_seidel(
-			float64_t *x, float64_t *Nabla, int32_t *ptr_t,
-			float64_t **ptr_History, int32_t verb);
-		/** Usage: exitflag = qpbsvm_gradient_descent(UB, dim, tmax,
-		tolabs, tolrel, tolKKT, x, Nabla, &t, &History, verb ) */
-		int32_t qpbsvm_gradient_descent(
-			float64_t *x, float64_t *Nabla, int32_t *ptr_t,
-			float64_t **ptr_History, int32_t verb);
-#ifdef USE_CPLEX
-		/** Usage: exitflag = qpbsvm_cplex(UB, dim, tmax,
-		tolabs, tolrel, tolKKT, x, Nabla, &t, &History, verb ) */
-		int32_t qpbsvm_cplex(
-			float64_t *x, float64_t *Nabla, int32_t *ptr_t,
-			float64_t **ptr_History, int32_t verb);
-#endif
-
-		/** @return object name */
-		inline const char* get_name() const { return "QPBSVMLib"; }
-
-	protected:
-		/** matrix H */
-		float64_t* m_H;
-		/** diagonal of H */
-		float64_t* m_diag_H;
-		/** dim */
-		int32_t m_dim;
-
-		/** vector f */
-		float64_t* m_f;
-
-		/** UB */
-		float64_t m_UB;
-
-		/** tmax */
-		int32_t m_tmax;
-		/** tolabs */
-		float64_t m_tolabs;
-		/** tolrel */
-		float64_t m_tolrel;
-		/** tolKKT */
-		float64_t m_tolKKT;
-		/** solver */
-		E_QPB_SOLVER m_solver;
-};
-}
-#endif //USE_GPL_SHOGUN
-#endif //QPBSVMLIB_H__
diff --git a/src/shogun/classifier/svm/SGDQN.cpp b/src/shogun/classifier/svm/SGDQN.cpp
index e005485b7cc..aed685fcfad 100644
--- a/src/shogun/classifier/svm/SGDQN.cpp
+++ b/src/shogun/classifier/svm/SGDQN.cpp
@@ -137,14 +137,13 @@ bool CSGDQN::train(CFeatures* data)
 	calibrate();
 
 	SG_INFO("Training on %d vectors\n", num_vec)
-	CSignal::clear_cancel();
 
 	ELossType loss_type = loss->get_loss_type();
 	bool is_log_loss = false;
 	if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
 		is_log_loss = true;
 
-	for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
+	for (int32_t e = 0; e < epochs && (!cancel_computation()); e++)
 	{
 		count = skip;
 		bool updateB=false;
diff --git a/src/shogun/classifier/svm/SVMLight.cpp b/src/shogun/classifier/svm/SVMLight.cpp
index 0facabb00b9..35ce0ce9d42 100644
--- a/src/shogun/classifier/svm/SVMLight.cpp
+++ b/src/shogun/classifier/svm/SVMLight.cpp
@@ -24,10 +24,11 @@
 
 #ifdef USE_SVMLIGHT
 
+#include <shogun/base/progress.h>
 #include <shogun/io/SGIO.h>
 #include <shogun/lib/Signal.h>
-#include <shogun/mathematics/Math.h>
 #include <shogun/lib/Time.h>
+#include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/lapack.h>
 
 #include <shogun/classifier/svm/SVMLight.h>
@@ -52,6 +53,7 @@
 
 #ifdef HAVE_PTHREAD
 #include <pthread.h>
+
 #endif
 
 using namespace shogun;
@@ -640,12 +642,16 @@ int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_
   CTime start_time;
   mkl_converged=false;
 
-
+  auto pb = progress(range(10), *this->io);
 #ifdef CYGWIN
   for (;((iteration<100 || (!mkl_converged && callback) ) || (retrain && (!terminate))); iteration++){
 #else
-	  CSignal::clear_cancel();
-	  for (;((!CSignal::cancel_computations()) && ((iteration<3 || (!mkl_converged && callback) ) || (retrain && (!terminate)))); iteration++){
+
+  for (; ((!cancel_computation()) &&
+	      ((iteration < 3 || (!mkl_converged && callback)) ||
+	       (retrain && (!terminate))));
+	   iteration++)
+  {
 #endif
 
 	  if(use_kernel_cache)
@@ -901,7 +907,9 @@ int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_
 	  if (bestmaxdiff>worstmaxdiff)
 		  worstmaxdiff=bestmaxdiff;
 
-	  SG_ABS_PROGRESS(bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff), -CMath::log10(epsilon), 6)
+	  pb.print_absolute(
+		  bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff),
+		  -CMath::log10(epsilon));
 
 	  /* Terminate loop */
 	  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) {
@@ -910,6 +918,7 @@ int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_
 	  }
 
   } /* end of loop */
+  pb.complete_absolute();
 
   SG_DEBUG("inactive:%d\n", inactivenum)
 
diff --git a/src/shogun/classifier/svm/SVMLin.cpp b/src/shogun/classifier/svm/SVMLin.cpp
deleted file mode 100644
index 388cf2f2ae6..00000000000
--- a/src/shogun/classifier/svm/SVMLin.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2006-2009 Soeren Sonnenburg
- * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
- */
-
-#include <shogun/classifier/svm/SVMLin.h>
-#include <shogun/labels/Labels.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/lib/external/ssl.h>
-#include <shogun/machine/LinearMachine.h>
-#include <shogun/features/DotFeatures.h>
-#include <shogun/labels/Labels.h>
-#include <shogun/labels/BinaryLabels.h>
-
-using namespace shogun;
-
-CSVMLin::CSVMLin()
-: CLinearMachine(), C1(1), C2(1), epsilon(1e-5), use_bias(true)
-{
-}
-
-CSVMLin::CSVMLin(
-	float64_t C, CDotFeatures* traindat, CLabels* trainlab)
-: CLinearMachine(), C1(C), C2(C), epsilon(1e-5), use_bias(true)
-{
-	set_features(traindat);
-	set_labels(trainlab);
-}
-
-
-CSVMLin::~CSVMLin()
-{
-}
-
-bool CSVMLin::train_machine(CFeatures* data)
-{
-	ASSERT(m_labels)
-
-	if (data)
-	{
-		if (!data->has_property(FP_DOT))
-			SG_ERROR("Specified features are not of type CDotFeatures\n")
-		set_features((CDotFeatures*) data);
-	}
-
-	ASSERT(features)
-
-	SGVector<float64_t> train_labels=((CBinaryLabels*) m_labels)->get_labels();
-	int32_t num_feat=features->get_dim_feature_space();
-	int32_t num_vec=features->get_num_vectors();
-
-	ASSERT(num_vec==train_labels.vlen)
-
-	struct options Options;
-	struct data Data;
-	struct vector_double Weights;
-	struct vector_double Outputs;
-
-	Data.l=num_vec;
-	Data.m=num_vec;
-	Data.u=0;
-	Data.n=num_feat+1;
-	Data.nz=num_feat+1;
-	Data.Y=train_labels.vector;
-	Data.features=features;
-	Data.C = SG_MALLOC(float64_t, Data.l);
-
-	Options.algo = SVM;
-	Options.lambda=1/(2*get_C1());
-	Options.lambda_u=1/(2*get_C1());
-	Options.S=10000;
-	Options.R=0.5;
-	Options.epsilon = get_epsilon();
-	Options.cgitermax=10000;
-	Options.mfnitermax=50;
-	Options.Cp = get_C2()/get_C1();
-	Options.Cn = 1;
-
-	if (use_bias)
-		Options.bias=1.0;
-	else
-		Options.bias=0.0;
-
-	for (int32_t i=0;i<num_vec;i++)
-	{
-		if(train_labels.vector[i]>0)
-			Data.C[i]=Options.Cp;
-		else
-			Data.C[i]=Options.Cn;
-	}
-	ssl_train(&Data, &Options, &Weights, &Outputs);
-	ASSERT(Weights.vec && Weights.d==num_feat+1)
-
-	float64_t sgn=train_labels.vector[0];
-	for (int32_t i=0; i<num_feat+1; i++)
-		Weights.vec[i]*=sgn;
-
-	set_w(SGVector<float64_t>(Weights.vec, num_feat));
-	set_bias(Weights.vec[num_feat]);
-
-	SG_FREE(Data.C);
-	SG_FREE(Outputs.vec);
-	return true;
-}
diff --git a/src/shogun/classifier/svm/SVMLin.h b/src/shogun/classifier/svm/SVMLin.h
deleted file mode 100644
index 950305f1584..00000000000
--- a/src/shogun/classifier/svm/SVMLin.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2006-2009 Soeren Sonnenburg
- * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
- */
-
-#ifndef _SVMLIN_H___
-#define _SVMLIN_H___
-
-#include <shogun/lib/common.h>
-#include <shogun/machine/LinearMachine.h>
-#include <shogun/features/DotFeatures.h>
-#include <shogun/labels/Labels.h>
-
-namespace shogun
-{
-/** @brief class SVMLin */
-class CSVMLin : public CLinearMachine
-{
-	public:
-
-		/** problem type */
-		MACHINE_PROBLEM_TYPE(PT_BINARY);
-
-		/** default constructor */
-		CSVMLin();
-
-		/** constructor
-		 *
-		 * @param C constant C
-		 * @param traindat training features
-		 * @param trainlab labels for features
-		 */
-		CSVMLin(
-			float64_t C, CDotFeatures* traindat,
-			CLabels* trainlab);
-		virtual ~CSVMLin();
-
-		/** get classifier type
-		 *
-		 * @return classifier type SVMLIN
-		 */
-		virtual EMachineType get_classifier_type() { return CT_SVMLIN; }
-
-		/** set C
-		 *
-		 * @param c_neg new C constant for negatively labeled examples
-		 * @param c_pos new C constant for positively labeled examples
-		 *
-		 */
-		inline void set_C(float64_t c_neg, float64_t c_pos) { C1=c_neg; C2=c_pos; }
-
-		/** get C1
-		 *
-		 * @return C1
-		 */
-		inline float64_t get_C1() { return C1; }
-
-		/** get C2
-		 *
-		 * @return C2
-		 */
-		inline float64_t get_C2() { return C2; }
-
-		/** set if bias shall be enabled
-		 *
-		 * @param enable_bias if bias shall be enabled
-		 */
-		inline void set_bias_enabled(bool enable_bias) { use_bias=enable_bias; }
-
-		/** get if bias is enabled
-		 *
-		 * @return if bias is enabled
-		 */
-		inline bool get_bias_enabled() { return use_bias; }
-
-		/** set epsilon
-		 *
-		 * @param eps new epsilon
-		 */
-		inline void set_epsilon(float64_t eps) { epsilon=eps; }
-
-		/** get epsilon
-		 *
-		 * @return epsilon
-		 */
-		inline float64_t get_epsilon() { return epsilon; }
-
-		/** @return object name */
-		virtual const char* get_name() const { return "SVMLin"; }
-
-	protected:
-		/** train SVM classifier
-		 *
-		 * @param data training data (parameter can be avoided if distance or
-		 * kernel-based classifiers are used and distance/kernels are
-		 * initialized with train data)
-		 *
-		 * @return whether training was successful
-		 */
-		virtual bool train_machine(CFeatures* data=NULL);
-
-	protected:
-		/** C1 */
-		float64_t C1;
-		/** C2 */
-		float64_t C2;
-		/** epsilon */
-		float64_t epsilon;
-
-		/** if bias is used */
-		bool use_bias;
-};
-}
-#endif
diff --git a/src/shogun/classifier/svm/SVMOcas.cpp b/src/shogun/classifier/svm/SVMOcas.cpp
deleted file mode 100644
index ee63169fdbd..00000000000
--- a/src/shogun/classifier/svm/SVMOcas.cpp
+++ /dev/null
@@ -1,368 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2007-2008 Vojtech Franc
- * Written (W) 2007-2009 Soeren Sonnenburg
- * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
- */
-
-#include <shogun/classifier/svm/SVMOcas.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/labels/Labels.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/lib/Time.h>
-#include <shogun/base/Parameter.h>
-#include <shogun/base/Parallel.h>
-#include <shogun/machine/LinearMachine.h>
-#include <shogun/features/DotFeatures.h>
-#include <shogun/labels/Labels.h>
-#include <shogun/labels/BinaryLabels.h>
-
-using namespace shogun;
-
-CSVMOcas::CSVMOcas()
-: CLinearMachine()
-{
-	init();
-}
-
-CSVMOcas::CSVMOcas(E_SVM_TYPE type)
-: CLinearMachine()
-{
-	init();
-	method=type;
-}
-
-CSVMOcas::CSVMOcas(
-	float64_t C, CDotFeatures* traindat, CLabels* trainlab)
-: CLinearMachine()
-{
-	init();
-	C1=C;
-	C2=C;
-
-	set_features(traindat);
-	set_labels(trainlab);
-}
-
-
-CSVMOcas::~CSVMOcas()
-{
-}
-
-bool CSVMOcas::train_machine(CFeatures* data)
-{
-	SG_INFO("C=%f, epsilon=%f, bufsize=%d\n", get_C1(), get_epsilon(), bufsize)
-	SG_DEBUG("use_bias = %i\n", get_bias_enabled())
-
-	ASSERT(m_labels)
-  ASSERT(m_labels->get_label_type() == LT_BINARY)
-	if (data)
-	{
-		if (!data->has_property(FP_DOT))
-			SG_ERROR("Specified features are not of type CDotFeatures\n")
-		set_features((CDotFeatures*) data);
-	}
-	ASSERT(features)
-
-	int32_t num_vec=features->get_num_vectors();
-	lab = SGVector<float64_t>(num_vec);
-	for (int32_t i=0; i<num_vec; i++)
-		lab[i] = ((CBinaryLabels*)m_labels)->get_label(i);
-
-	current_w = SGVector<float64_t>(features->get_dim_feature_space());
-	current_w.zero();
-
-	if (num_vec!=lab.vlen || num_vec<=0)
-		SG_ERROR("num_vec=%d num_train_labels=%d\n", num_vec, lab.vlen)
-
-	SG_FREE(old_w);
-	old_w=SG_CALLOC(float64_t, current_w.vlen);
-	bias=0;
-	old_bias=0;
-
-	tmp_a_buf=SG_CALLOC(float64_t, current_w.vlen);
-	cp_value=SG_CALLOC(float64_t*, bufsize);
-	cp_index=SG_CALLOC(uint32_t*, bufsize);
-	cp_nz_dims=SG_CALLOC(uint32_t, bufsize);
-	cp_bias=SG_CALLOC(float64_t, bufsize);
-
-	float64_t TolAbs=0;
-	float64_t QPBound=0;
-	int32_t Method=0;
-	if (method == SVM_OCAS)
-		Method = 1;
-	ocas_return_value_T result = svm_ocas_solver( get_C1(), num_vec, get_epsilon(),
-			TolAbs, QPBound, get_max_train_time(), bufsize, Method,
-			&CSVMOcas::compute_W,
-			&CSVMOcas::update_W,
-			&CSVMOcas::add_new_cut,
-			&CSVMOcas::compute_output,
-			&CSVMOcas::sort,
-			&CSVMOcas::print,
-			this);
-
-	SG_INFO("Ocas Converged after %d iterations\n"
-			"==================================\n"
-			"timing statistics:\n"
-			"output_time: %f s\n"
-			"sort_time: %f s\n"
-			"add_time: %f s\n"
-			"w_time: %f s\n"
-			"solver_time %f s\n"
-			"ocas_time %f s\n\n", result.nIter, result.output_time, result.sort_time,
-			result.add_time, result.w_time, result.qp_solver_time, result.ocas_time);
-
-	SG_FREE(tmp_a_buf);
-
-	primal_objective = result.Q_P;
-
-	uint32_t num_cut_planes = result.nCutPlanes;
-
-	SG_DEBUG("num_cut_planes=%d\n", num_cut_planes)
-	for (uint32_t i=0; i<num_cut_planes; i++)
-	{
-		SG_DEBUG("cp_value[%d]=%p\n", i, cp_value)
-		SG_FREE(cp_value[i]);
-		SG_DEBUG("cp_index[%d]=%p\n", i, cp_index)
-		SG_FREE(cp_index[i]);
-	}
-
-	SG_FREE(cp_value);
-	cp_value=NULL;
-	SG_FREE(cp_index);
-	cp_index=NULL;
-	SG_FREE(cp_nz_dims);
-	cp_nz_dims=NULL;
-	SG_FREE(cp_bias);
-	cp_bias=NULL;
-
-	SG_FREE(old_w);
-	old_w=NULL;
-
-	set_w(current_w);
-
-	return true;
-}
-
-/*----------------------------------------------------------------------------------
-  sq_norm_W = sparse_update_W( t ) does the following:
-
-  W = oldW*(1-t) + t*W;
-  sq_norm_W = W'*W;
-
-  ---------------------------------------------------------------------------------*/
-float64_t CSVMOcas::update_W( float64_t t, void* ptr )
-{
-  float64_t sq_norm_W = 0;
-  CSVMOcas* o = (CSVMOcas*) ptr;
-  uint32_t nDim = (uint32_t) o->current_w.vlen;
-  float64_t* W = o->current_w.vector;
-  float64_t* oldW=o->old_w;
-
-  for(uint32_t j=0; j <nDim; j++)
-  {
-	  W[j] = oldW[j]*(1-t) + t*W[j];
-	  sq_norm_W += W[j]*W[j];
-  }
-  o->bias=o->old_bias*(1-t) + t*o->bias;
-  sq_norm_W += CMath::sq(o->bias);
-
-  return( sq_norm_W );
-}
-
-/*----------------------------------------------------------------------------------
-  sparse_add_new_cut( new_col_H, new_cut, cut_length, nSel ) does the following:
-
-    new_a = sum(data_X(:,find(new_cut ~=0 )),2);
-    new_col_H = [sparse_A(:,1:nSel)'*new_a ; new_a'*new_a];
-    sparse_A(:,nSel+1) = new_a;
-
-  ---------------------------------------------------------------------------------*/
-int CSVMOcas::add_new_cut(
-	float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
-	uint32_t nSel, void* ptr)
-{
-	CSVMOcas* o = (CSVMOcas*) ptr;
-	CDotFeatures* f = o->features;
-	uint32_t nDim=(uint32_t) o->current_w.vlen;
-	float64_t* y = o->lab.vector;
-
-	float64_t** c_val = o->cp_value;
-	uint32_t** c_idx = o->cp_index;
-	uint32_t* c_nzd = o->cp_nz_dims;
-	float64_t* c_bias = o->cp_bias;
-
-	float64_t sq_norm_a;
-	uint32_t i, j, nz_dims;
-
-	/* temporary vector */
-	float64_t* new_a = o->tmp_a_buf;
-	memset(new_a, 0, sizeof(float64_t)*nDim);
-
-	for(i=0; i < cut_length; i++)
-	{
-		f->add_to_dense_vec(y[new_cut[i]], new_cut[i], new_a, nDim);
-
-		if (o->use_bias)
-			c_bias[nSel]+=y[new_cut[i]];
-	}
-
-	/* compute new_a'*new_a and count number of non-zerou dimensions */
-	nz_dims = 0;
-	sq_norm_a = CMath::sq(c_bias[nSel]);
-	for(j=0; j < nDim; j++ ) {
-		if(new_a[j] != 0) {
-			nz_dims++;
-			sq_norm_a += new_a[j]*new_a[j];
-		}
-	}
-
-	/* sparsify new_a and insert it to the last column of sparse_A */
-	c_nzd[nSel] = nz_dims;
-	c_idx[nSel]=NULL;
-	c_val[nSel]=NULL;
-
-	if(nz_dims > 0)
-	{
-		c_idx[nSel]=SG_MALLOC(uint32_t, nz_dims);
-		c_val[nSel]=SG_MALLOC(float64_t, nz_dims);
-
-		uint32_t idx=0;
-		for(j=0; j < nDim; j++ )
-		{
-			if(new_a[j] != 0)
-			{
-				c_idx[nSel][idx] = j;
-				c_val[nSel][idx++] = new_a[j];
-			}
-		}
-	}
-
-	new_col_H[nSel] = sq_norm_a;
-
-	for(i=0; i < nSel; i++)
-	{
-		float64_t tmp = c_bias[nSel]*c_bias[i];
-		for(j=0; j < c_nzd[i]; j++)
-			tmp += new_a[c_idx[i][j]]*c_val[i][j];
-
-		new_col_H[i] = tmp;
-	}
-	//CMath::display_vector(new_col_H, nSel+1, "new_col_H");
-	//CMath::display_vector((int32_t*) c_idx[nSel], (int32_t) nz_dims, "c_idx");
-	//CMath::display_vector((float64_t*) c_val[nSel], nz_dims, "c_val");
-	return 0;
-}
-
-int CSVMOcas::sort(float64_t* vals, float64_t* data, uint32_t size)
-{
-	CMath::qsort_index(vals, data, size);
-	return 0;
-}
-
-/*----------------------------------------------------------------------
-  sparse_compute_output( output ) does the follwing:
-
-  output = data_X'*W;
-  ----------------------------------------------------------------------*/
-int CSVMOcas::compute_output(float64_t *output, void* ptr)
-{
-	CSVMOcas* o = (CSVMOcas*) ptr;
-	CDotFeatures* f=o->features;
-	int32_t nData=f->get_num_vectors();
-
-	float64_t* y = o->lab.vector;
-
-	f->dense_dot_range(output, 0, nData, y, o->current_w.vector, o->current_w.vlen, 0.0);
-
-	for (int32_t i=0; i<nData; i++)
-		output[i]+=y[i]*o->bias;
-	return 0;
-}
-
-/*----------------------------------------------------------------------
-  sq_norm_W = compute_W( alpha, nSel ) does the following:
-
-  oldW = W;
-  W = sparse_A(:,1:nSel)'*alpha;
-  sq_norm_W = W'*W;
-  dp_WoldW = W'*oldW';
-
-  ----------------------------------------------------------------------*/
-void CSVMOcas::compute_W(
-	float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha, uint32_t nSel,
-	void* ptr )
-{
-	CSVMOcas* o = (CSVMOcas*) ptr;
-	uint32_t nDim= (uint32_t) o->current_w.vlen;
-	CMath::swap(o->current_w.vector, o->old_w);
-	float64_t* W=o->current_w.vector;
-	float64_t* oldW=o->old_w;
-	memset(W, 0, sizeof(float64_t)*nDim);
-	float64_t old_bias=o->bias;
-	float64_t bias=0;
-
-	float64_t** c_val = o->cp_value;
-	uint32_t** c_idx = o->cp_index;
-	uint32_t* c_nzd = o->cp_nz_dims;
-	float64_t* c_bias = o->cp_bias;
-
-	for(uint32_t i=0; i<nSel; i++)
-	{
-		uint32_t nz_dims = c_nzd[i];
-
-		if(nz_dims > 0 && alpha[i] > 0)
-		{
-			for(uint32_t j=0; j < nz_dims; j++)
-				W[c_idx[i][j]] += alpha[i]*c_val[i][j];
-		}
-		bias += c_bias[i]*alpha[i];
-	}
-
-	*sq_norm_W = CMath::dot(W,W, nDim) + CMath::sq(bias);
-	*dp_WoldW = CMath::dot(W,oldW, nDim) + bias*old_bias;
-	//SG_PRINT("nSel=%d sq_norm_W=%f dp_WoldW=%f\n", nSel, *sq_norm_W, *dp_WoldW)
-
-	o->bias = bias;
-	o->old_bias = old_bias;
-}
-
-void CSVMOcas::init()
-{
-	use_bias=true;
-	bufsize=3000;
-	C1=1;
-	C2=1;
-
-	epsilon=1e-3;
-	method=SVM_OCAS;
-	old_w=NULL;
-	tmp_a_buf=NULL;
-	cp_value=NULL;
-	cp_index=NULL;
-	cp_nz_dims=NULL;
-	cp_bias=NULL;
-
-	primal_objective = 0.0;
-
-    m_parameters->add(&C1, "C1",  "Cost constant 1.");
-    m_parameters->add(&C2, "C2",  "Cost constant 2.");
-    m_parameters->add(&use_bias, "use_bias",
-			"Indicates if bias is used.");
-    m_parameters->add(&epsilon, "epsilon", "Convergence precision.");
-    m_parameters->add(&bufsize, "bufsize", "Maximum number of cutting planes.");
-    m_parameters->add((machine_int_t*) &method, "method",
-			"SVMOcas solver type.");
-}
-
-float64_t CSVMOcas::compute_primal_objective() const
-{
-	return primal_objective;
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/classifier/svm/SVMOcas.h b/src/shogun/classifier/svm/SVMOcas.h
deleted file mode 100644
index 838385a9406..00000000000
--- a/src/shogun/classifier/svm/SVMOcas.h
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2007-2009 Vojtech Franc
- * Written (W) 2007-2009 Soeren Sonnenburg
- * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
- */
-
-
-#ifndef _SVMOCAS_H___
-#define _SVMOCAS_H___
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/common.h>
-#include <shogun/machine/LinearMachine.h>
-#include <shogun/lib/external/libocas.h>
-#include <shogun/features/DotFeatures.h>
-#include <shogun/labels/Labels.h>
-
-namespace shogun
-{
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-enum E_SVM_TYPE
-{
-	SVM_OCAS = 0,
-	SVM_BMRM = 1
-};
-#endif
-
-/** @brief class SVMOcas */
-class CSVMOcas : public CLinearMachine
-{
-	public:
-
-		/** problem type */
-		MACHINE_PROBLEM_TYPE(PT_BINARY);
-
-		/** default constructor  */
-		CSVMOcas();
-
-		/** constructor
-		 *
-		 * @param type a E_SVM_TYPE
-		 */
-		CSVMOcas(E_SVM_TYPE type);
-
-		/** constructor
-		 *
-		 * @param C constant C
-		 * @param traindat training features
-		 * @param trainlab labels for training features
-		 */
-		CSVMOcas(
-			float64_t C, CDotFeatures* traindat,
-			CLabels* trainlab);
-		virtual ~CSVMOcas();
-
-		/** get classifier type
-		 *
-		 * @return classifier type SVMOCAS
-		 */
-		virtual EMachineType get_classifier_type() { return CT_SVMOCAS; }
-
-		/** set C
-		 *
-		 * @param c_neg new C constant for negatively labeled examples
-		 * @param c_pos new C constant for positively labeled examples
-		 *
-		 */
-		inline void set_C(float64_t c_neg, float64_t c_pos) { C1=c_neg; C2=c_pos; }
-
-		/** get C1
-		 *
-		 * @return C1
-		 */
-		inline float64_t get_C1() { return C1; }
-
-		/** get C2
-		 *
-		 * @return C2
-		 */
-		inline float64_t get_C2() { return C2; }
-
-		/** set epsilon
-		 *
-		 * @param eps new epsilon
-		 */
-		inline void set_epsilon(float64_t eps) { epsilon=eps; }
-
-		/** get epsilon
-		 *
-		 * @return epsilon
-		 */
-		inline float64_t get_epsilon() { return epsilon; }
-
-		/** set if bias shall be enabled
-		 *
-		 * @param enable_bias if bias shall be enabled
-		 */
-		inline void set_bias_enabled(bool enable_bias) { use_bias=enable_bias; }
-
-		/** check if bias is enabled
-		 *
-		 * @return if bias is enabled
-		 */
-		inline bool get_bias_enabled() { return use_bias; }
-
-		/** set buffer size
-		 *
-		 * @param sz buffer size
-		 */
-		inline void set_bufsize(int32_t sz) { bufsize=sz; }
-
-		/** get buffer size
-		 *
-		 * @return buffer size
-		 */
-		inline int32_t get_bufsize() { return bufsize; }
-
-		/** compute the primal objective value
-		 *
-		 * @return the primal objective
-		 */
-		virtual float64_t compute_primal_objective() const;
-
-	protected:
-		/** compute W
-		 *
-		 * @param sq_norm_W square normed W
-		 * @param dp_WoldW dp W old W
-		 * @param alpha alpha
-		 * @param nSel nSel
-		 * @param ptr ptr
-		 */
-		static void compute_W(
-			float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha,
-			uint32_t nSel, void* ptr);
-
-		/** update W
-		 *
-		 * @param t t
-		 * @param ptr ptr
-		 * @return something floaty
-		 */
-		static float64_t update_W(float64_t t, void* ptr );
-
-		/** add new cut
-		 *
-		 * @param new_col_H new col H
-		 * @param new_cut new cut
-		 * @param cut_length length of cut
-		 * @param nSel nSel
-		 * @param ptr ptr
-		 */
-		static int add_new_cut(
-			float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
-			uint32_t nSel, void* ptr );
-
-		/** compute output
-		 *
-		 * @param output output
-		 * @param ptr ptr
-		 */
-		static int compute_output( float64_t *output, void* ptr );
-
-		/** sort
-		 *
-		 * @param vals vals
-		 * @param data data
-		 * @param size size
-		 */
-		static int sort( float64_t* vals, float64_t* data, uint32_t size);
-
-		/** print nothing */
-		static inline void print(ocas_return_value_T value)
-		{
-			  return;
-		}
-
-	protected:
-		/** train SVM classifier
-		 *
-		 * @param data training data (parameter can be avoided if distance or
-		 * kernel-based classifiers are used and distance/kernels are
-		 * initialized with train data)
-		 *
-		 * @return whether training was successful
-		 */
-		virtual bool train_machine(CFeatures* data=NULL);
-
-		/** @return object name */
-		inline const char* get_name() const { return "SVMOcas"; }
-	private:
-		void init();
-
-	protected:
-		/** if bias is used */
-		bool use_bias;
-		/** buffer size */
-		int32_t bufsize;
-		/** C1 */
-		float64_t C1;
-		/** C2 */
-		float64_t C2;
-		/** epsilon */
-		float64_t epsilon;
-		/** method */
-		E_SVM_TYPE method;
-
-		/** current W */
-		SGVector<float64_t> current_w;
-		/** old W */
-		float64_t* old_w;
-		/** old bias */
-		float64_t old_bias;
-		/** nDim big */
-		float64_t* tmp_a_buf;
-		/** labels */
-		SGVector<float64_t> lab;
-
-		/** sparse representation of
-		 * cutting planes */
-		float64_t** cp_value;
-		/** cutting plane index */
-		uint32_t** cp_index;
-		/** cutting plane dimensions */
-		uint32_t* cp_nz_dims;
-		/** bias dimensions */
-		float64_t* cp_bias;
-
-		/** primal objective */
-		float64_t primal_objective;
-};
-}
-#endif
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/classifier/svm/SVMSGD.cpp b/src/shogun/classifier/svm/SVMSGD.cpp
index f608ff3eef0..bad90f8b9fc 100644
--- a/src/shogun/classifier/svm/SVMSGD.cpp
+++ b/src/shogun/classifier/svm/SVMSGD.cpp
@@ -24,6 +24,7 @@
 #include <shogun/base/Parameter.h>
 #include <shogun/lib/Signal.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/labels/BinaryLabels.h>
 #include <shogun/loss/HingeLoss.h>
 
@@ -109,14 +110,13 @@ bool CSVMSGD::train_machine(CFeatures* data)
 	calibrate();
 
 	SG_INFO("Training on %d vectors\n", num_vec)
-	CSignal::clear_cancel();
 
 	ELossType loss_type = loss->get_loss_type();
 	bool is_log_loss = false;
 	if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
 		is_log_loss = true;
 
-	for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
+	for (int32_t e = 0; e < epochs && (!cancel_computation()); e++)
 	{
 		count = skip;
 		for (int32_t i=0; i<num_vec; i++)
@@ -143,14 +143,14 @@ bool CSVMSGD::train_machine(CFeatures* data)
 				float64_t r = 1 - eta * lambda * skip;
 				if (r < 0.8)
 					r = pow(1 - eta * lambda, skip);
-				SGVector<float64_t>::scale_vector(r, w.vector, w.vlen);
+				linalg::scale(w, w, r);
 				count = skip;
 			}
 			t++;
 		}
 	}
 
-	float64_t wnorm =  CMath::dot(w.vector,w.vector, w.vlen);
+	float64_t wnorm = linalg::dot(w, w);
 	SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias)
 
 	set_w(w);
diff --git a/src/shogun/classifier/svm/WDSVMOcas.cpp b/src/shogun/classifier/svm/WDSVMOcas.cpp
deleted file mode 100644
index 64b5e70fbbc..00000000000
--- a/src/shogun/classifier/svm/WDSVMOcas.cpp
+++ /dev/null
@@ -1,645 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2007-2008 Vojtech Franc
- * Written (W) 2007-2009 Soeren Sonnenburg
- * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
- */
-
-#include <shogun/classifier/svm/WDSVMOcas.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/labels/Labels.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/lib/DynamicArray.h>
-#include <shogun/lib/Time.h>
-#include <shogun/base/Parallel.h>
-#include <shogun/machine/Machine.h>
-#include <shogun/lib/external/libocas.h>
-#include <shogun/features/StringFeatures.h>
-#include <shogun/features/Alphabet.h>
-#include <shogun/labels/Labels.h>
-#include <shogun/labels/BinaryLabels.h>
-
-using namespace shogun;
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-struct wdocas_thread_params_output
-{
-	float32_t* out;
-	int32_t* val;
-	float64_t* output;
-	CWDSVMOcas* wdocas;
-	int32_t start;
-	int32_t end;
-};
-
-struct wdocas_thread_params_add
-{
-	CWDSVMOcas* wdocas;
-	float32_t* new_a;
-	uint32_t* new_cut;
-	int32_t start;
-	int32_t end;
-	uint32_t cut_length;
-};
-#endif // DOXYGEN_SHOULD_SKIP_THIS
-
-CWDSVMOcas::CWDSVMOcas()
-: CMachine(), use_bias(false), bufsize(3000), C1(1), C2(1),
-	epsilon(1e-3), method(SVM_OCAS)
-{
-	SG_UNSTABLE("CWDSVMOcas::CWDSVMOcas()", "\n")
-
-	w=NULL;
-	old_w=NULL;
-	features=NULL;
-	degree=6;
-	from_degree=40;
-	wd_weights=NULL;
-	w_offsets=NULL;
-	normalization_const=1.0;
-}
-
-CWDSVMOcas::CWDSVMOcas(E_SVM_TYPE type)
-: CMachine(), use_bias(false), bufsize(3000), C1(1), C2(1),
-	epsilon(1e-3), method(type)
-{
-	w=NULL;
-	old_w=NULL;
-	features=NULL;
-	degree=6;
-	from_degree=40;
-	wd_weights=NULL;
-	w_offsets=NULL;
-	normalization_const=1.0;
-}
-
-CWDSVMOcas::CWDSVMOcas(
-	float64_t C, int32_t d, int32_t from_d, CStringFeatures<uint8_t>* traindat,
-	CLabels* trainlab)
-: CMachine(), use_bias(false), bufsize(3000), C1(C), C2(C), epsilon(1e-3),
-	degree(d), from_degree(from_d)
-{
-	w=NULL;
-	old_w=NULL;
-	method=SVM_OCAS;
-	features=traindat;
-	set_labels(trainlab);
-	wd_weights=NULL;
-	w_offsets=NULL;
-	normalization_const=1.0;
-}
-
-
-CWDSVMOcas::~CWDSVMOcas()
-{
-}
-
-CBinaryLabels* CWDSVMOcas::apply_binary(CFeatures* data)
-{
-	SGVector<float64_t> outputs = apply_get_outputs(data);
-	return new CBinaryLabels(outputs);
-}
-
-CRegressionLabels* CWDSVMOcas::apply_regression(CFeatures* data)
-{
-	SGVector<float64_t> outputs = apply_get_outputs(data);
-	return new CRegressionLabels(outputs);
-}
-
-SGVector<float64_t> CWDSVMOcas::apply_get_outputs(CFeatures* data)
-{
-	if (data)
-	{
-		if (data->get_feature_class() != C_STRING ||
-				data->get_feature_type() != F_BYTE)
-		{
-			SG_ERROR("Features not of class string type byte\n")
-		}
-
-		set_features((CStringFeatures<uint8_t>*) data);
-	}
-	ASSERT(features)
-
-	set_wd_weights();
-	set_normalization_const();
-
-	SGVector<float64_t> outputs;
-	if (features)
-	{
-		int32_t num=features->get_num_vectors();
-		ASSERT(num>0)
-
-		outputs = SGVector<float64_t>(num);
-
-		for (int32_t i=0; i<num; i++)
-			outputs[i] = apply_one(i);
-	}
-
-	return outputs;
-}
-
-int32_t CWDSVMOcas::set_wd_weights()
-{
-	ASSERT(degree>0 && degree<=8)
-	SG_FREE(wd_weights);
-	wd_weights=SG_MALLOC(float32_t, degree);
-	SG_FREE(w_offsets);
-	w_offsets=SG_MALLOC(int32_t, degree);
-	int32_t w_dim_single_c=0;
-
-	for (int32_t i=0; i<degree; i++)
-	{
-		w_offsets[i]=CMath::pow(alphabet_size, i+1);
-		wd_weights[i]=sqrt(2.0*(from_degree-i)/(from_degree*(from_degree+1)));
-		w_dim_single_c+=w_offsets[i];
-	}
-	return w_dim_single_c;
-}
-
-bool CWDSVMOcas::train_machine(CFeatures* data)
-{
-	SG_INFO("C=%f, epsilon=%f, bufsize=%d\n", get_C1(), get_epsilon(), bufsize)
-
-	ASSERT(m_labels)
-	ASSERT(m_labels->get_label_type() == LT_BINARY)
-	if (data)
-	{
-		if (data->get_feature_class() != C_STRING ||
-				data->get_feature_type() != F_BYTE)
-		{
-			SG_ERROR("Features not of class string type byte\n")
-		}
-		set_features((CStringFeatures<uint8_t>*) data);
-	}
-
-	ASSERT(get_features())
-	CAlphabet* alphabet=get_features()->get_alphabet();
-	ASSERT(alphabet && alphabet->get_alphabet()==RAWDNA)
-
-	alphabet_size=alphabet->get_num_symbols();
-	string_length=features->get_num_vectors();
-	SGVector<float64_t> labvec=((CBinaryLabels*) m_labels)->get_labels();
-	lab=labvec.vector;
-
-	w_dim_single_char=set_wd_weights();
-	//CMath::display_vector(wd_weights, degree, "wd_weights");
-	SG_DEBUG("w_dim_single_char=%d\n", w_dim_single_char)
-	w_dim=string_length*w_dim_single_char;
-	SG_DEBUG("cutting plane has %d dims\n", w_dim)
-	num_vec=get_features()->get_max_vector_length();
-
-	set_normalization_const();
-	SG_INFO("num_vec: %d num_lab: %d\n", num_vec, labvec.vlen)
-	ASSERT(num_vec==labvec.vlen)
-	ASSERT(num_vec>0)
-
-	SG_FREE(w);
-	w=SG_MALLOC(float32_t, w_dim);
-	memset(w, 0, w_dim*sizeof(float32_t));
-
-	SG_FREE(old_w);
-	old_w=SG_MALLOC(float32_t, w_dim);
-	memset(old_w, 0, w_dim*sizeof(float32_t));
-	bias=0;
-	old_bias=0;
-
-	cuts=SG_MALLOC(float32_t*, bufsize);
-	memset(cuts, 0, sizeof(*cuts)*bufsize);
-	cp_bias=SG_MALLOC(float64_t, bufsize);
-	memset(cp_bias, 0, sizeof(float64_t)*bufsize);
-
-/////speed tests/////
-	/*float64_t* tmp = SG_MALLOC(float64_t, num_vec);
-	float64_t start=CTime::get_curtime();
-	CMath::random_vector(w, w_dim, (float32_t) 0, (float32_t) 1000);
-	compute_output(tmp, this);
-	start=CTime::get_curtime()-start;
-	SG_PRINT("timing:%f\n", start)
-	SG_FREE(tmp);
-	exit(1);*/
-/////speed tests/////
-	float64_t TolAbs=0;
-	float64_t QPBound=0;
-	uint8_t Method=0;
-	if (method == SVM_OCAS)
-		Method = 1;
-	ocas_return_value_T result = svm_ocas_solver( get_C1(), num_vec, get_epsilon(),
-			TolAbs, QPBound, get_max_train_time(), bufsize, Method,
-			&CWDSVMOcas::compute_W,
-			&CWDSVMOcas::update_W,
-			&CWDSVMOcas::add_new_cut,
-			&CWDSVMOcas::compute_output,
-			&CWDSVMOcas::sort,
-			&CWDSVMOcas::print,
-			this);
-
-	SG_INFO("Ocas Converged after %d iterations\n"
-			"==================================\n"
-			"timing statistics:\n"
-			"output_time: %f s\n"
-			"sort_time: %f s\n"
-			"add_time: %f s\n"
-			"w_time: %f s\n"
-			"solver_time %f s\n"
-			"ocas_time %f s\n\n", result.nIter, result.output_time, result.sort_time,
-			result.add_time, result.w_time, result.qp_solver_time, result.ocas_time);
-
-	for (int32_t i=bufsize-1; i>=0; i--)
-		SG_FREE(cuts[i]);
-	SG_FREE(cuts);
-
-	lab=NULL;
-	SG_UNREF(alphabet);
-
-	return true;
-}
-
-/*----------------------------------------------------------------------------------
-  sq_norm_W = sparse_update_W( t ) does the following:
-
-  W = oldW*(1-t) + t*W;
-  sq_norm_W = W'*W;
-
-  ---------------------------------------------------------------------------------*/
-float64_t CWDSVMOcas::update_W( float64_t t, void* ptr )
-{
-  float64_t sq_norm_W = 0;
-  CWDSVMOcas* o = (CWDSVMOcas*) ptr;
-  uint32_t nDim = (uint32_t) o->w_dim;
-  float32_t* W=o->w;
-  float32_t* oldW=o->old_w;
-  float64_t bias=o->bias;
-  float64_t old_bias=bias;
-
-  for(uint32_t j=0; j <nDim; j++)
-  {
-	  W[j] = oldW[j]*(1-t) + t*W[j];
-	  sq_norm_W += W[j]*W[j];
-  }
-
-  bias=old_bias*(1-t) + t*bias;
-  sq_norm_W += CMath::sq(bias);
-
-  o->bias=bias;
-  o->old_bias=old_bias;
-
-  return( sq_norm_W );
-}
-
-/*----------------------------------------------------------------------------------
-  sparse_add_new_cut( new_col_H, new_cut, cut_length, nSel ) does the following:
-
-    new_a = sum(data_X(:,find(new_cut ~=0 )),2);
-    new_col_H = [sparse_A(:,1:nSel)'*new_a ; new_a'*new_a];
-    sparse_A(:,nSel+1) = new_a;
-
-  ---------------------------------------------------------------------------------*/
-void* CWDSVMOcas::add_new_cut_helper( void* ptr)
-{
-	wdocas_thread_params_add* p = (wdocas_thread_params_add*) ptr;
-	CWDSVMOcas* o = p->wdocas;
-	int32_t start = p->start;
-	int32_t end = p->end;
-	int32_t string_length = o->string_length;
-	//uint32_t nDim=(uint32_t) o->w_dim;
-	uint32_t cut_length=p->cut_length;
-	uint32_t* new_cut=p->new_cut;
-	int32_t* w_offsets = o->w_offsets;
-	float64_t* y = o->lab;
-	int32_t alphabet_size = o->alphabet_size;
-	float32_t* wd_weights = o->wd_weights;
-	int32_t degree = o->degree;
-	CStringFeatures<uint8_t>* f = o->features;
-	float64_t normalization_const = o->normalization_const;
-
-	// temporary vector
-	float32_t* new_a = p->new_a;
-	//float32_t* new_a = SG_MALLOC(float32_t, nDim);
-	//memset(new_a, 0, sizeof(float32_t)*nDim);
-
-	int32_t* val=SG_MALLOC(int32_t, cut_length);
-	for (int32_t j=start; j<end; j++)
-	{
-		int32_t offs=o->w_dim_single_char*j;
-		memset(val,0,sizeof(int32_t)*cut_length);
-		int32_t lim=CMath::min(degree, string_length-j);
-		int32_t len;
-
-		for (int32_t k=0; k<lim; k++)
-		{
-			bool free_vec;
-			uint8_t* vec = f->get_feature_vector(j+k, len, free_vec);
-			float32_t wd = wd_weights[k]/normalization_const;
-
-			for(uint32_t i=0; i < cut_length; i++)
-			{
-				val[i]=val[i]*alphabet_size + vec[new_cut[i]];
-				new_a[offs+val[i]]+=wd * y[new_cut[i]];
-			}
-			offs+=w_offsets[k];
-			f->free_feature_vector(vec, j+k, free_vec);
-		}
-	}
-
-	//p->new_a=new_a;
-	SG_FREE(val);
-	return NULL;
-}
-
-int CWDSVMOcas::add_new_cut(
-	float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
-	uint32_t nSel, void* ptr)
-{
-	CWDSVMOcas* o = (CWDSVMOcas*) ptr;
-	uint32_t i;
-	float64_t* c_bias = o->cp_bias;
-	uint32_t nDim=(uint32_t) o->w_dim;
-	float32_t** cuts=o->cuts;
-	float32_t* new_a=SG_MALLOC(float32_t, nDim);
-	memset(new_a, 0, sizeof(float32_t)*nDim);
-#ifdef HAVE_PTHREAD
-
-	wdocas_thread_params_add* params_add=SG_MALLOC(wdocas_thread_params_add, o->parallel->get_num_threads());
-	pthread_t* threads=SG_MALLOC(pthread_t, o->parallel->get_num_threads());
-
-	int32_t string_length = o->string_length;
-	int32_t t;
-	int32_t nthreads=o->parallel->get_num_threads()-1;
-	int32_t step= string_length/o->parallel->get_num_threads();
-
-	if (step<1)
-	{
-		nthreads=string_length-1;
-		step=1;
-	}
-
-	for (t=0; t<nthreads; t++)
-	{
-		params_add[t].wdocas=o;
-		//params_add[t].new_a=NULL;
-		params_add[t].new_a=new_a;
-		params_add[t].new_cut=new_cut;
-		params_add[t].start = step*t;
-		params_add[t].end = step*(t+1);
-		params_add[t].cut_length = cut_length;
-
-		if (pthread_create(&threads[t], NULL, &CWDSVMOcas::add_new_cut_helper, (void*)&params_add[t]) != 0)
-		{
-			nthreads=t;
-			SG_SWARNING("thread creation failed\n")
-			break;
-		}
-	}
-
-	params_add[t].wdocas=o;
-	//params_add[t].new_a=NULL;
-	params_add[t].new_a=new_a;
-	params_add[t].new_cut=new_cut;
-	params_add[t].start = step*t;
-	params_add[t].end = string_length;
-	params_add[t].cut_length = cut_length;
-	add_new_cut_helper(&params_add[t]);
-	//float32_t* new_a=params_add[t].new_a;
-
-	for (t=0; t<nthreads; t++)
-	{
-		if (pthread_join(threads[t], NULL) != 0)
-			SG_SWARNING("pthread_join failed\n")
-
-		//float32_t* a=params_add[t].new_a;
-		//for (i=0; i<nDim; i++)
-		//	new_a[i]+=a[i];
-		//SG_FREE(a);
-	}
-	SG_FREE(threads);
-	SG_FREE(params_add);
-#endif /* HAVE_PTHREAD */
-	for(i=0; i < cut_length; i++)
-	{
-		if (o->use_bias)
-			c_bias[nSel]+=o->lab[new_cut[i]];
-	}
-
-	// insert new_a into the last column of sparse_A
-	for(i=0; i < nSel; i++)
-		new_col_H[i] = CMath::dot(new_a, cuts[i], nDim) + c_bias[nSel]*c_bias[i];
-	new_col_H[nSel] = CMath::dot(new_a, new_a, nDim) + CMath::sq(c_bias[nSel]);
-
-	cuts[nSel]=new_a;
-	//CMath::display_vector(new_col_H, nSel+1, "new_col_H");
-	//CMath::display_vector(cuts[nSel], nDim, "cut[nSel]");
-	//
-
-	return 0;
-}
-
-int CWDSVMOcas::sort( float64_t* vals, float64_t* data, uint32_t size)
-{
-	CMath::qsort_index(vals, data, size);
-	return 0;
-}
-
-/*----------------------------------------------------------------------
-  sparse_compute_output( output ) does the follwing:
-
-  output = data_X'*W;
-  ----------------------------------------------------------------------*/
-void* CWDSVMOcas::compute_output_helper(void* ptr)
-{
-	wdocas_thread_params_output* p = (wdocas_thread_params_output*) ptr;
-	CWDSVMOcas* o = p->wdocas;
-	int32_t start = p->start;
-	int32_t end = p->end;
-	float32_t* out = p->out;
-	float64_t* output = p->output;
-	int32_t* val = p->val;
-
-	CStringFeatures<uint8_t>* f=o->get_features();
-
-	int32_t degree = o->degree;
-	int32_t string_length = o->string_length;
-	int32_t alphabet_size = o->alphabet_size;
-	int32_t* w_offsets = o->w_offsets;
-	float32_t* wd_weights = o->wd_weights;
-	float32_t* w= o->w;
-
-	float64_t* y = o->lab;
-	float64_t normalization_const = o->normalization_const;
-
-
-	for (int32_t j=0; j<string_length; j++)
-	{
-		int32_t offs=o->w_dim_single_char*j;
-		for (int32_t i=start ; i<end; i++)
-			val[i]=0;
-
-		int32_t lim=CMath::min(degree, string_length-j);
-		int32_t len;
-
-		for (int32_t k=0; k<lim; k++)
-		{
-			bool free_vec;
-			uint8_t* vec=f->get_feature_vector(j+k, len, free_vec);
-			float32_t wd = wd_weights[k];
-
-			for (int32_t i=start; i<end; i++) // quite fast 1.9s
-			{
-				val[i]=val[i]*alphabet_size + vec[i];
-				out[i]+=wd*w[offs+val[i]];
-			}
-
-			/*for (int32_t i=0; i<nData/4; i++) // slowest 2s
-			{
-				uint32_t x=((uint32_t*) vec)[i];
-				int32_t ii=4*i;
-				val[ii]=val[ii]*alphabet_size + (x&255);
-				val[ii+1]=val[ii+1]*alphabet_size + ((x>>8)&255);
-				val[ii+2]=val[ii+2]*alphabet_size + ((x>>16)&255);
-				val[ii+3]=val[ii+3]*alphabet_size + (x>>24);
-				out[ii]+=wd*w[offs+val[ii]];
-				out[ii+1]+=wd*w[offs+val[ii+1]];
-				out[ii+2]+=wd*w[offs+val[ii+2]];
-				out[ii+3]+=wd*w[offs+val[ii+3]];
-			}*/
-
-			/*for (int32_t i=0; i<nData>>3; i++) // fastest on 64bit: 1.5s
-			{
-				uint64_t x=((uint64_t*) vec)[i];
-				int32_t ii=i<<3;
-				val[ii]=val[ii]*alphabet_size + (x&255);
-				val[ii+1]=val[ii+1]*alphabet_size + ((x>>8)&255);
-				val[ii+2]=val[ii+2]*alphabet_size + ((x>>16)&255);
-				val[ii+3]=val[ii+3]*alphabet_size + ((x>>24)&255);
-				val[ii+4]=val[ii+4]*alphabet_size + ((x>>32)&255);
-				val[ii+5]=val[ii+5]*alphabet_size + ((x>>40)&255);
-				val[ii+6]=val[ii+6]*alphabet_size + ((x>>48)&255);
-				val[ii+7]=val[ii+7]*alphabet_size + (x>>56);
-				out[ii]+=wd*w[offs+val[ii]];
-				out[ii+1]+=wd*w[offs+val[ii+1]];
-				out[ii+2]+=wd*w[offs+val[ii+2]];
-				out[ii+3]+=wd*w[offs+val[ii+3]];
-				out[ii+4]+=wd*w[offs+val[ii+4]];
-				out[ii+5]+=wd*w[offs+val[ii+5]];
-				out[ii+6]+=wd*w[offs+val[ii+6]];
-				out[ii+7]+=wd*w[offs+val[ii+7]];
-			}*/
-			offs+=w_offsets[k];
-			f->free_feature_vector(vec, j+k, free_vec);
-		}
-	}
-
-	for (int32_t i=start; i<end; i++)
-		output[i]=y[i]*o->bias + out[i]*y[i]/normalization_const;
-
-	//CMath::display_vector(o->w, o->w_dim, "w");
-	//CMath::display_vector(output, nData, "out");
-	return NULL;
-}
-
-int CWDSVMOcas::compute_output( float64_t *output, void* ptr )
-{
-#ifdef HAVE_PTHREAD
-	CWDSVMOcas* o = (CWDSVMOcas*) ptr;
-	int32_t nData=o->num_vec;
-	wdocas_thread_params_output* params_output=SG_MALLOC(wdocas_thread_params_output, o->parallel->get_num_threads());
-	pthread_t* threads = SG_MALLOC(pthread_t, o->parallel->get_num_threads());
-
-	float32_t* out=SG_MALLOC(float32_t, nData);
-	int32_t* val=SG_MALLOC(int32_t, nData);
-	memset(out, 0, sizeof(float32_t)*nData);
-
-	int32_t t;
-	int32_t nthreads=o->parallel->get_num_threads()-1;
-	int32_t step= nData/o->parallel->get_num_threads();
-
-	if (step<1)
-	{
-		nthreads=nData-1;
-		step=1;
-	}
-
-	for (t=0; t<nthreads; t++)
-	{
-		params_output[t].wdocas=o;
-		params_output[t].output=output;
-		params_output[t].out=out;
-		params_output[t].val=val;
-		params_output[t].start = step*t;
-		params_output[t].end = step*(t+1);
-
-		//SG_SPRINT("t=%d start=%d end=%d output=%p\n", t, params_output[t].start, params_output[t].end, params_output[t].output)
-		if (pthread_create(&threads[t], NULL, &CWDSVMOcas::compute_output_helper, (void*)&params_output[t]) != 0)
-		{
-			nthreads=t;
-			SG_SWARNING("thread creation failed\n")
-			break;
-		}
-	}
-
-	params_output[t].wdocas=o;
-	params_output[t].output=output;
-	params_output[t].out=out;
-	params_output[t].val=val;
-	params_output[t].start = step*t;
-	params_output[t].end = nData;
-	compute_output_helper(&params_output[t]);
-	//SG_SPRINT("t=%d start=%d end=%d output=%p\n", t, params_output[t].start, params_output[t].end, params_output[t].output)
-
-	for (t=0; t<nthreads; t++)
-	{
-		if (pthread_join(threads[t], NULL) != 0)
-			SG_SWARNING("pthread_join failed\n")
-	}
-	SG_FREE(threads);
-	SG_FREE(params_output);
-	SG_FREE(val);
-	SG_FREE(out);
-#endif /* HAVE_PTHREAD */
-	return 0;
-}
-/*----------------------------------------------------------------------
-  sq_norm_W = compute_W( alpha, nSel ) does the following:
-
-  oldW = W;
-  W = sparse_A(:,1:nSel)'*alpha;
-  sq_norm_W = W'*W;
-  dp_WoldW = W'*oldW';
-
-  ----------------------------------------------------------------------*/
-void CWDSVMOcas::compute_W(
-	float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha, uint32_t nSel,
-	void* ptr)
-{
-	CWDSVMOcas* o = (CWDSVMOcas*) ptr;
-	uint32_t nDim= (uint32_t) o->w_dim;
-	CMath::swap(o->w, o->old_w);
-	float32_t* W=o->w;
-	float32_t* oldW=o->old_w;
-	float32_t** cuts=o->cuts;
-	memset(W, 0, sizeof(float32_t)*nDim);
-	float64_t* c_bias = o->cp_bias;
-	float64_t old_bias=o->bias;
-	float64_t bias=0;
-
-	for (uint32_t i=0; i<nSel; i++)
-	{
-		if (alpha[i] > 0)
-			SGVector<float32_t>::vec1_plus_scalar_times_vec2(W, (float32_t) alpha[i], cuts[i], nDim);
-
-		bias += c_bias[i]*alpha[i];
-	}
-
-	*sq_norm_W = CMath::dot(W,W, nDim) +CMath::sq(bias);
-	*dp_WoldW = CMath::dot(W,oldW, nDim) + bias*old_bias;;
-	//SG_PRINT("nSel=%d sq_norm_W=%f dp_WoldW=%f\n", nSel, *sq_norm_W, *dp_WoldW)
-
-	o->bias = bias;
-	o->old_bias = old_bias;
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/classifier/svm/WDSVMOcas.h b/src/shogun/classifier/svm/WDSVMOcas.h
deleted file mode 100644
index 85050faf030..00000000000
--- a/src/shogun/classifier/svm/WDSVMOcas.h
+++ /dev/null
@@ -1,379 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2007-2008 Vojtech Franc
- * Written (W) 2007-2009 Soeren Sonnenburg
- * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
- */
-
-
-#ifndef _WDSVMOCAS_H___
-#define _WDSVMOCAS_H___
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/common.h>
-#include <shogun/machine/Machine.h>
-#include <shogun/classifier/svm/SVMOcas.h>
-#include <shogun/features/StringFeatures.h>
-#include <shogun/labels/Labels.h>
-
-namespace shogun
-{
-template <class ST> class CStringFeatures;
-
-/** @brief class WDSVMOcas */
-class CWDSVMOcas : public CMachine
-{
-	public:
-		/** problem type */
-		MACHINE_PROBLEM_TYPE(PT_BINARY);
-
-		/** default constructor  */
-		CWDSVMOcas();
-
-		/** constructor
-		 *
-		 * @param type type of SVM
-		 */
-		CWDSVMOcas(E_SVM_TYPE type);
-
-		/** constructor
-		 *
-		 * @param C constant C
-		 * @param d degree
-		 * @param from_d from degree
-		 * @param traindat training features
-		 * @param trainlab labels for training features
-		 */
-		CWDSVMOcas(
-			float64_t C, int32_t d, int32_t from_d,
-			CStringFeatures<uint8_t>* traindat, CLabels* trainlab);
-		virtual ~CWDSVMOcas();
-
-		/** get classifier type
-		 *
-		 * @return classifier type WDSVMOCAS
-		 */
-		virtual EMachineType get_classifier_type() { return CT_WDSVMOCAS; }
-
-		/** set C
-		 *
-		 * @param c_neg new C constant for negatively labeled examples
-		 * @param c_pos new C constant for positively labeled examples
-		 *
-		 */
-		inline void set_C(float64_t c_neg, float64_t c_pos) { C1=c_neg; C2=c_pos; }
-
-		/** get C1
-		 *
-		 * @return C1
-		 */
-		inline float64_t get_C1() { return C1; }
-
-		/** get C2
-		 *
-		 * @return C2
-		 */
-		inline float64_t get_C2() { return C2; }
-
-		/** set epsilon
-		 *
-		 * @param eps new epsilon
-		 */
-		inline void set_epsilon(float64_t eps) { epsilon=eps; }
-
-		/** get epsilon
-		 *
-		 * @return epsilon
-		 */
-		inline float64_t get_epsilon() { return epsilon; }
-
-		/** set features
-		 *
-		 * @param feat features to set
-		 */
-		inline void set_features(CStringFeatures<uint8_t>* feat)
-		{
-			SG_REF(feat);
-			SG_UNREF(features);
-			features=feat;
-		}
-
-		/** get features
-		 *
-		 * @return features
-		 */
-		inline CStringFeatures<uint8_t>* get_features()
-		{
-			SG_REF(features);
-			return features;
-		}
-
-		/** set if bias shall be enabled
-		 *
-		 * @param enable_bias if bias shall be enabled
-		 */
-		inline void set_bias_enabled(bool enable_bias) { use_bias=enable_bias; }
-
-		/** check if bias is enabled
-		 *
-		 * @return if bias is enabled
-		 */
-		inline bool get_bias_enabled() { return use_bias; }
-
-		/** set buffer size
-		 *
-		 * @param sz buffer size
-		 */
-		inline void set_bufsize(int32_t sz) { bufsize=sz; }
-
-		/** get buffer size
-		 *
-		 * @return buffer size
-		 */
-		inline int32_t get_bufsize() { return bufsize; }
-
-		/** set degree
-		 *
-		 * @param d degree
-		 * @param from_d from degree
-		 */
-		inline void set_degree(int32_t d, int32_t from_d)
-		{
-			degree=d;
-			from_degree=from_d;
-		}
-
-		/** get degree
-		 *
-		 * @return degree
-		 */
-		inline int32_t get_degree() { return degree; }
-
-		/** classify objects
-		 * for binary classification problems
-		 *
-		 * @param data (test)data to be classified
-		 * @return classified labels
-		 */
-		virtual CBinaryLabels* apply_binary(CFeatures* data=NULL);
-
-		/** classify objects
-		 * for regression problems
-		 *
-		 * @param data (test)data to be classified
-		 * @return classified labels
-		 */
-		virtual CRegressionLabels* apply_regression(CFeatures* data=NULL);
-
-		/** classify one example
-		 *
-		 * @param num number of example to classify
-		 * @return classified result
-		 */
-		virtual float64_t apply_one(int32_t num)
-		{
-			ASSERT(features)
-			if (!wd_weights)
-				set_wd_weights();
-
-			int32_t len=0;
-			float64_t sum=0;
-			bool free_vec;
-			uint8_t* vec=features->get_feature_vector(num, len, free_vec);
-			//SG_INFO("len %d, string_length %d\n", len, string_length)
-			ASSERT(len==string_length)
-
-			for (int32_t j=0; j<string_length; j++)
-			{
-				int32_t offs=w_dim_single_char*j;
-				int32_t val=0;
-				for (int32_t k=0; (j+k<string_length) && (k<degree); k++)
-				{
-					val=val*alphabet_size + vec[j+k];
-					sum+=wd_weights[k] * w[offs+val];
-					offs+=w_offsets[k];
-				}
-			}
-			features->free_feature_vector(vec, num, free_vec);
-			return sum/normalization_const;
-		}
-
-		/** set normalization const */
-		inline void set_normalization_const()
-		{
-			ASSERT(features)
-			normalization_const=0;
-			for (int32_t i=0; i<degree; i++)
-				normalization_const+=(string_length-i)*wd_weights[i]*wd_weights[i];
-
-			normalization_const=CMath::sqrt(normalization_const);
-			SG_DEBUG("normalization_const:%f\n", normalization_const)
-		}
-
-		/** get normalization const
-		 *
-		 * @return normalization const
-		 */
-		inline float64_t get_normalization_const() { return normalization_const; }
-
-
-	protected:
-
-		/** get real outputs
-		 *
-		 * @param data features to apply for
-		 */
-		SGVector<float64_t> apply_get_outputs(CFeatures* data);
-
-		/** set wd weights
-		 *
-		 * @return w_dim_single_c
-		 */
-		int32_t set_wd_weights();
-
-		/** compute W
-		 *
-		 * @param sq_norm_W square normed W
-		 * @param dp_WoldW dp W old W
-		 * @param alpha alpha
-		 * @param nSel nSel
-		 * @param ptr ptr
-		 */
-		static void compute_W(
-			float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha,
-			uint32_t nSel, void* ptr );
-
-		/** update W
-		 *
-		 * @param t t
-		 * @param ptr ptr
-		 * @return something floaty
-		 */
-		static float64_t update_W(float64_t t, void* ptr );
-
-		/** helper function for adding a new cut
-		 *
-		 * @param ptr
-		 * @return ptr
-		 */
-		static void* add_new_cut_helper(void* ptr);
-
-		/** add new cut
-		 *
-		 * @param new_col_H new col H
-		 * @param new_cut new cut
-		 * @param cut_length length of cut
-		 * @param nSel nSel
-		 * @param ptr ptr
-		 */
-		static int add_new_cut(
-			float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
-			uint32_t nSel, void* ptr );
-
-		/** helper function for computing the output
-		 *
-		 * @param ptr
-		 * @return ptr
-		 */
-		static void* compute_output_helper(void* ptr);
-
-		/** compute output
-		 *
-		 * @param output output
-		 * @param ptr ptr
-		 */
-		static int compute_output( float64_t *output, void* ptr );
-
-		/** sort
-		 *
-		 * @param vals vals
-		 * @param data data
-		 * @param size size
-		 */
-		static int sort( float64_t* vals, float64_t* data, uint32_t size);
-
-		/** print nothing */
-		static inline void print(ocas_return_value_T value)
-		{
-			  return;
-		}
-
-
-		/** @return object name */
-		virtual const char* get_name() const { return "WDSVMOcas"; }
-
-	protected:
-		/** train classifier
-		 *
-		 * @param data training data (parameter can be avoided if distance or
-		 * kernel-based classifiers are used and distance/kernels are
-		 * initialized with train data)
-		 *
-		 * @return whether training was successful
-		 */
-		virtual bool train_machine(CFeatures* data=NULL);
-
-	protected:
-		/** features */
-		CStringFeatures<uint8_t>* features;
-		/** if bias shall be used */
-		bool use_bias;
-		/** buffer size */
-		int32_t bufsize;
-		/** C1 */
-		float64_t C1;
-		/** C2 */
-		float64_t C2;
-		/** epsilon */
-		float64_t epsilon;
-		/** method */
-		E_SVM_TYPE method;
-
-		/** degree */
-		int32_t degree;
-		/** from degree */
-		int32_t from_degree;
-		/** wd weights */
-		float32_t* wd_weights;
-		/** num vectors */
-		int32_t num_vec;
-		/** length of string in vector */
-		int32_t string_length;
-		/** size of alphabet */
-		int32_t alphabet_size;
-
-		/** normalization const */
-		float64_t normalization_const;
-
-		/** bias */
-		float64_t bias;
-		/** old_bias */
-		float64_t old_bias;
-		/** w offsets */
-		int32_t* w_offsets;
-		/** w dim */
-		int32_t w_dim;
-		/** w dim of a single char */
-		int32_t w_dim_single_char;
-		/** w */
-		float32_t* w;
-		/** old w*/
-		float32_t* old_w;
-		/** labels */
-		float64_t* lab;
-
-		/** cuts */
-		float32_t** cuts;
-		/** bias dimensions */
-		float64_t* cp_bias;
-};
-}
-#endif
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/classifier/vw/VowpalWabbit.cpp b/src/shogun/classifier/vw/VowpalWabbit.cpp
index 6d5928f1762..4b17973dd83 100644
--- a/src/shogun/classifier/vw/VowpalWabbit.cpp
+++ b/src/shogun/classifier/vw/VowpalWabbit.cpp
@@ -59,10 +59,9 @@ CVowpalWabbit::CVowpalWabbit(CVowpalWabbit *vw)
 	save_predictions = vw->save_predictions;
 	prediction_fd = vw->prediction_fd;
 
-	w = reg->weight_vectors[0];
+	m_w.vector = reg->weight_vectors[0];
 	reg->weight_vectors[0] = NULL;
-	copy(vw->w, vw->w+vw->w_dim, w);
-	w_dim = vw->w_dim;
+	copy(vw->m_w.vector, vw->m_w.vector+vw->m_w.vlen, m_w.vector);
 	bias = vw->bias;
 }
 
@@ -83,7 +82,7 @@ void CVowpalWabbit::reinitialize_weights()
 	}
 
 	reg->init(env);
-	w = reg->weight_vectors[0];
+	m_w.vector = reg->weight_vectors[0];
 	reg->weight_vectors[0] = NULL;
 }
 
@@ -114,9 +113,8 @@ void CVowpalWabbit::set_exact_adaptive_norm(bool exact_adaptive)
 void CVowpalWabbit::load_regressor(char* file_name)
 {
 	reg->load_regressor(file_name);
-	w = reg->weight_vectors[0];
+	m_w = SGVector<float32_t>(reg->weight_vectors[0], 1 << env->num_bits);
 	reg->weight_vectors[0] = NULL;
-	w_dim = 1 << env->num_bits;
 }
 
 void CVowpalWabbit::set_regressor_out(char* file_name, bool is_text)
@@ -163,9 +161,8 @@ bool CVowpalWabbit::train_machine(CFeatures* feat)
 			  "loss", "last", "counter", "weight", "label", "predict", "features");
 	}
 
-	CSignal::clear_cancel();
 	features->start_parser();
-	while (!(CSignal::cancel_computations()) && (env->passes_complete < env->num_passes))
+	while (!(cancel_computation()) && (env->passes_complete < env->num_passes))
 	{
 		while (features->get_next_example())
 		{
@@ -271,9 +268,8 @@ void CVowpalWabbit::init(CStreamingVwFeatures* feat)
 	save_predictions = false;
 	prediction_fd = -1;
 
-	w = reg->weight_vectors[0];
+	m_w = SGVector<float32_t>(reg->weight_vectors[0], 1 << env->num_bits);
 	reg->weight_vectors[0] = NULL;
-	w_dim = 1 << env->num_bits;
 	bias = 0.;
 }
 
diff --git a/src/shogun/clustering/GMM.cpp b/src/shogun/clustering/GMM.cpp
index f2877377871..045637695b6 100644
--- a/src/shogun/clustering/GMM.cpp
+++ b/src/shogun/clustering/GMM.cpp
@@ -11,15 +11,15 @@
 
 #ifdef HAVE_LAPACK
 
+#include <shogun/base/Parameter.h>
 #include <shogun/clustering/GMM.h>
 #include <shogun/clustering/KMeans.h>
 #include <shogun/distance/EuclideanDistance.h>
-#include <shogun/base/Parameter.h>
+#include <shogun/labels/MulticlassLabels.h>
 #include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/lapack.h>
-#include <shogun/labels/MulticlassLabels.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/multiclass/KNN.h>
-
 #include <vector>
 
 using namespace shogun;
@@ -32,8 +32,7 @@ CGMM::CGMM() : CDistribution(), m_components(),	m_coefficients()
 
 CGMM::CGMM(int32_t n, ECovType cov_type) : CDistribution(), m_components(), m_coefficients()
 {
-	m_coefficients.vector=SG_MALLOC(float64_t, n);
-	m_coefficients.vlen=n;
+	m_coefficients = SGVector<float64_t>(n);
 	m_components = vector<CGaussian*>(n);
 
 	for (int32_t i=0; i<n; i++)
@@ -71,20 +70,17 @@ CGMM::CGMM(vector<CGaussian*> components, SGVector<float64_t> coefficients, bool
 			m_components[i]->set_cov_type(components[i]->get_cov_type());
 
 			SGVector<float64_t> old_mean=components[i]->get_mean();
-			SGVector<float64_t> new_mean(old_mean.vlen);
-			sg_memcpy(new_mean.vector, old_mean.vector, old_mean.vlen*sizeof(float64_t));
+			SGVector<float64_t> new_mean = old_mean.clone();
 			m_components[i]->set_mean(new_mean);
 
 			SGVector<float64_t> old_d=components[i]->get_d();
-			SGVector<float64_t> new_d(old_d.vlen);
-			sg_memcpy(new_d.vector, old_d.vector, old_d.vlen*sizeof(float64_t));
+			SGVector<float64_t> new_d = old_d.clone();
 			m_components[i]->set_d(new_d);
 
 			if (components[i]->get_cov_type()==FULL)
 			{
 				SGMatrix<float64_t> old_u=components[i]->get_u();
-				SGMatrix<float64_t> new_u(old_u.num_rows, old_u.num_cols);
-				sg_memcpy(new_u.matrix, old_u.matrix, old_u.num_rows*old_u.num_cols*sizeof(float64_t));
+				SGMatrix<float64_t> new_u = old_u.clone();
 				m_components[i]->set_u(new_u);
 			}
 
@@ -154,8 +150,8 @@ float64_t CGMM::train_em(float64_t min_cov, int32_t max_iter, float64_t min_chan
 	int32_t iter=0;
 	float64_t log_likelihood_prev=0;
 	float64_t log_likelihood_cur=0;
-	float64_t* logPxy=SG_MALLOC(float64_t, num_vectors*m_components.size());
-	float64_t* logPx=SG_MALLOC(float64_t, num_vectors);
+	SGVector<float64_t> logPxy(num_vectors * m_components.size());
+	SGVector<float64_t> logPx(num_vectors);
 	//float64_t* logPost=SG_MALLOC(float64_t, num_vectors*m_components.vlen);
 
 	while (iter<max_iter)
@@ -169,8 +165,11 @@ float64_t CGMM::train_em(float64_t min_cov, int32_t max_iter, float64_t min_chan
 			SGVector<float64_t> v=dotdata->get_computed_dot_feature_vector(i);
 			for (int32_t j=0; j<int32_t(m_components.size()); j++)
 			{
-				logPxy[i*m_components.size()+j]=m_components[j]->compute_log_PDF(v)+CMath::log(m_coefficients[j]);
-				logPx[i]+=CMath::exp(logPxy[i*m_components.size()+j]);
+				logPxy[index_t(i * m_components.size() + j)] =
+				    m_components[j]->compute_log_PDF(v) +
+				    CMath::log(m_coefficients[j]);
+				logPx[i] +=
+				    CMath::exp(logPxy[index_t(i * m_components.size() + j)]);
 			}
 
 			logPx[i]=CMath::log(logPx[i]);
@@ -179,7 +178,8 @@ float64_t CGMM::train_em(float64_t min_cov, int32_t max_iter, float64_t min_chan
 			for (int32_t j=0; j<int32_t(m_components.size()); j++)
 			{
 				//logPost[i*m_components.vlen+j]=logPxy[i*m_components.vlen+j]-logPx[i];
-				alpha.matrix[i*m_components.size()+j]=CMath::exp(logPxy[i*m_components.size()+j]-logPx[i]);
+				alpha.matrix[i * m_components.size() + j] = CMath::exp(
+				    logPxy[index_t(i * m_components.size() + j)] - logPx[i]);
 			}
 		}
 
@@ -191,9 +191,6 @@ float64_t CGMM::train_em(float64_t min_cov, int32_t max_iter, float64_t min_chan
 		iter++;
 	}
 
-	SG_FREE(logPxy);
-	SG_FREE(logPx);
-
 	return log_likelihood_cur;
 }
 
@@ -211,16 +208,19 @@ float64_t CGMM::train_smem(int32_t max_iter, int32_t max_cand, float64_t min_cov
 	float64_t cur_likelihood=train_em(min_cov, max_em_iter, min_change);
 
 	int32_t iter=0;
-	float64_t* logPxy=SG_MALLOC(float64_t, num_vectors*m_components.size());
-	float64_t* logPx=SG_MALLOC(float64_t, num_vectors);
-	float64_t* logPost=SG_MALLOC(float64_t, num_vectors*m_components.size());
-	float64_t* logPostSum=SG_MALLOC(float64_t, m_components.size());
-	float64_t* logPostSum2=SG_MALLOC(float64_t, m_components.size());
-	float64_t* logPostSumSum=SG_MALLOC(float64_t, m_components.size()*(m_components.size()-1)/2);
-	float64_t* split_crit=SG_MALLOC(float64_t, m_components.size());
-	float64_t* merge_crit=SG_MALLOC(float64_t, m_components.size()*(m_components.size()-1)/2);
-	int32_t* split_ind=SG_MALLOC(int32_t, m_components.size());
-	int32_t* merge_ind=SG_MALLOC(int32_t, m_components.size()*(m_components.size()-1)/2);
+	SGVector<float64_t> logPxy(num_vectors * m_components.size());
+	SGVector<float64_t> logPx(num_vectors);
+	SGVector<float64_t> logPost(num_vectors * m_components.size());
+	SGVector<float64_t> logPostSum(m_components.size());
+	SGVector<float64_t> logPostSum2(m_components.size());
+	SGVector<float64_t> logPostSumSum(
+	    m_components.size() * (m_components.size() - 1) / 2);
+	SGVector<float64_t> split_crit(m_components.size());
+	SGVector<float64_t> merge_crit(
+	    m_components.size() * (m_components.size() - 1) / 2);
+	SGVector<int32_t> split_ind(m_components.size());
+	SGVector<int32_t> merge_ind(
+	    m_components.size() * (m_components.size() - 1) / 2);
 
 	while (iter<max_iter)
 	{
@@ -233,17 +233,23 @@ float64_t CGMM::train_smem(int32_t max_iter, int32_t max_cand, float64_t min_cov
 			SGVector<float64_t> v=dotdata->get_computed_dot_feature_vector(i);
 			for (int32_t j=0; j<int32_t(m_components.size()); j++)
 			{
-				logPxy[i*m_components.size()+j]=m_components[j]->compute_log_PDF(v)+CMath::log(m_coefficients[j]);
-				logPx[i]+=CMath::exp(logPxy[i*m_components.size()+j]);
+				logPxy[index_t(i * m_components.size() + j)] =
+				    m_components[j]->compute_log_PDF(v) +
+				    CMath::log(m_coefficients[j]);
+				logPx[i] +=
+				    CMath::exp(logPxy[index_t(i * m_components.size() + j)]);
 			}
 
 			logPx[i]=CMath::log(logPx[i]);
 
 			for (int32_t j=0; j<int32_t(m_components.size()); j++)
 			{
-				logPost[i*m_components.size()+j]=logPxy[i*m_components.size()+j]-logPx[i];
-				logPostSum[j]+=CMath::exp(logPost[i*m_components.size()+j]);
-				logPostSum2[j]+=CMath::exp(2*logPost[i*m_components.size()+j]);
+				logPost[index_t(i * m_components.size() + j)] =
+				    logPxy[index_t(i * m_components.size() + j)] - logPx[i];
+				logPostSum[j] +=
+				    CMath::exp(logPost[index_t(i * m_components.size() + j)]);
+				logPostSum2[j] += CMath::exp(
+				    2 * logPost[index_t(i * m_components.size() + j)]);
 			}
 
 			int32_t counter=0;
@@ -251,7 +257,9 @@ float64_t CGMM::train_smem(int32_t max_iter, int32_t max_cand, float64_t min_cov
 			{
 				for (int32_t k=j+1; k<int32_t(m_components.size()); k++)
 				{
-					logPostSumSum[counter]+=CMath::exp(logPost[i*m_components.size()+j]+logPost[i*m_components.size()+k]);
+					logPostSumSum[counter] += CMath::exp(
+					    logPost[index_t(i * m_components.size() + j)] +
+					    logPost[index_t(i * m_components.size() + k)]);
 					counter++;
 				}
 			}
@@ -265,8 +273,13 @@ float64_t CGMM::train_smem(int32_t max_iter, int32_t max_cand, float64_t min_cov
 			split_ind[i]=i;
 			for (int32_t j=0; j<num_vectors; j++)
 			{
-				split_crit[i]+=(logPost[j*m_components.size()+i]-logPostSum[i]-logPxy[j*m_components.size()+i]+CMath::log(m_coefficients[i]))*
-								(CMath::exp(logPost[j*m_components.size()+i])/CMath::exp(logPostSum[i]));
+				split_crit[i] +=
+				    (logPost[index_t(j * m_components.size() + i)] -
+				     logPostSum[i] -
+				     logPxy[index_t(j * m_components.size() + i)] +
+				     CMath::log(m_coefficients[i])) *
+				    (CMath::exp(logPost[index_t(j * m_components.size() + i)]) /
+				     CMath::exp(logPostSum[i]));
 			}
 			for (int32_t j=i+1; j<int32_t(m_components.size()); j++)
 			{
@@ -275,8 +288,11 @@ float64_t CGMM::train_smem(int32_t max_iter, int32_t max_cand, float64_t min_cov
 				counter++;
 			}
 		}
-		CMath::qsort_backward_index(split_crit, split_ind, int32_t(m_components.size()));
-		CMath::qsort_backward_index(merge_crit, merge_ind, int32_t(m_components.size()*(m_components.size()-1)/2));
+		CMath::qsort_backward_index(
+		    split_crit.vector, split_ind.vector, int32_t(m_components.size()));
+		CMath::qsort_backward_index(
+		    merge_crit.vector, merge_ind.vector,
+		    int32_t(m_components.size() * (m_components.size() - 1) / 2));
 
 		bool better_found=false;
 		int32_t candidates_checked=0;
@@ -321,17 +337,6 @@ float64_t CGMM::train_smem(int32_t max_iter, int32_t max_cand, float64_t min_cov
 		iter++;
 	}
 
-	SG_FREE(logPxy);
-	SG_FREE(logPx);
-	SG_FREE(logPost);
-	SG_FREE(split_crit);
-	SG_FREE(merge_crit);
-	SG_FREE(logPostSum);
-	SG_FREE(logPostSum2);
-	SG_FREE(logPostSumSum);
-	SG_FREE(split_ind);
-	SG_FREE(merge_ind);
-
 	return cur_likelihood;
 }
 
@@ -340,10 +345,10 @@ void CGMM::partial_em(int32_t comp1, int32_t comp2, int32_t comp3, float64_t min
 	CDotFeatures* dotdata=(CDotFeatures *) features;
 	int32_t num_vectors=dotdata->get_num_vectors();
 
-	float64_t* init_logPxy=SG_MALLOC(float64_t, num_vectors*m_components.size());
-	float64_t* init_logPx=SG_MALLOC(float64_t, num_vectors);
-	float64_t* init_logPx_fix=SG_MALLOC(float64_t, num_vectors);
-	float64_t* post_add=SG_MALLOC(float64_t, num_vectors);
+	SGVector<float64_t> init_logPxy(num_vectors * m_components.size());
+	SGVector<float64_t> init_logPx(num_vectors);
+	SGVector<float64_t> init_logPx_fix(num_vectors);
+	SGVector<float64_t> post_add(num_vectors);
 
 	for (int32_t i=0; i<num_vectors; i++)
 	{
@@ -353,18 +358,29 @@ void CGMM::partial_em(int32_t comp1, int32_t comp2, int32_t comp3, float64_t min
 		SGVector<float64_t> v=dotdata->get_computed_dot_feature_vector(i);
 		for (int32_t j=0; j<int32_t(m_components.size()); j++)
 		{
-			init_logPxy[i*m_components.size()+j]=m_components[j]->compute_log_PDF(v)+CMath::log(m_coefficients[j]);
-			init_logPx[i]+=CMath::exp(init_logPxy[i*m_components.size()+j]);
+			init_logPxy[index_t(i * m_components.size() + j)] =
+			    m_components[j]->compute_log_PDF(v) +
+			    CMath::log(m_coefficients[j]);
+			init_logPx[i] +=
+			    CMath::exp(init_logPxy[index_t(i * m_components.size() + j)]);
 			if (j!=comp1 && j!=comp2 && j!=comp3)
 			{
-				init_logPx_fix[i]+=CMath::exp(init_logPxy[i*m_components.size()+j]);
+				init_logPx_fix[i] += CMath::exp(
+				    init_logPxy[index_t(i * m_components.size() + j)]);
 			}
 		}
 
 		init_logPx[i]=CMath::log(init_logPx[i]);
-		post_add[i]=CMath::log(CMath::exp(init_logPxy[i*m_components.size()+comp1]-init_logPx[i])+
-					CMath::exp(init_logPxy[i*m_components.size()+comp2]-init_logPx[i])+
-					CMath::exp(init_logPxy[i*m_components.size()+comp3]-init_logPx[i]));
+		post_add[i] = CMath::log(
+		    CMath::exp(
+		        init_logPxy[index_t(i * m_components.size() + comp1)] -
+		        init_logPx[i]) +
+		    CMath::exp(
+		        init_logPxy[index_t(i * m_components.size() + comp2)] -
+		        init_logPx[i]) +
+		    CMath::exp(
+		        init_logPxy[index_t(i * m_components.size() + comp3)] -
+		        init_logPx[i]));
 	}
 
 	vector<CGaussian*> components(3);
@@ -385,8 +401,10 @@ void CGMM::partial_em(int32_t comp1, int32_t comp2, int32_t comp3, float64_t min
 	float64_t noise_mag=SGVector<float64_t>::twonorm(components[0]->get_mean().vector, dim_n)*0.1/
 						CMath::sqrt((float64_t)dim_n);
 
-	SGVector<float64_t>::add(components[1]->get_mean().vector, alpha1, components[1]->get_mean().vector, alpha2,
-				components[2]->get_mean().vector, dim_n);
+	auto temp_mean = components[2]->get_mean();
+	auto temp_mean_result = components[1]->get_mean();
+	linalg::add(temp_mean_result, temp_mean, temp_mean_result, alpha1, alpha2);
+	components[1]->set_mean(temp_mean_result);
 
 	for (int32_t i=0; i<dim_n; i++)
 	{
@@ -402,7 +420,7 @@ void CGMM::partial_em(int32_t comp1, int32_t comp2, int32_t comp3, float64_t min
 	{
 		SGMatrix<float64_t> c1=components[1]->get_cov();
 		SGMatrix<float64_t> c2=components[2]->get_cov();
-		SGVector<float64_t>::add(c1.matrix, alpha1, c1.matrix, alpha2, c2.matrix, dim_n*dim_n);
+		linalg::add(c1, c2, c1, alpha1, alpha2);
 
 		components[1]->set_d(SGVector<float64_t>(SGMatrix<float64_t>::compute_eigenvectors(c1.matrix, dim_n, dim_n), dim_n));
 		components[1]->set_u(c1);
@@ -434,8 +452,10 @@ void CGMM::partial_em(int32_t comp1, int32_t comp2, int32_t comp3, float64_t min
 	}
 	else if(components[0]->get_cov_type()==DIAG)
 	{
-		SGVector<float64_t>::add(components[1]->get_d().vector, alpha1, components[1]->get_d().vector,
-					alpha2, components[2]->get_d().vector, dim_n);
+		auto result_d = components[1]->get_d();
+		auto temp_d = components[2]->get_d();
+		linalg::add(result_d, temp_d, result_d, alpha1, alpha2);
+		components[1]->set_d(result_d);
 
 		float64_t new_d=0;
 		for (int32_t i=0; i<dim_n; i++)
@@ -464,8 +484,8 @@ void CGMM::partial_em(int32_t comp1, int32_t comp2, int32_t comp3, float64_t min
 	float64_t log_likelihood_cur=0;
 	int32_t iter=0;
 	SGMatrix<float64_t> alpha(num_vectors, 3);
-	float64_t* logPxy=SG_MALLOC(float64_t, num_vectors*3);
-	float64_t* logPx=SG_MALLOC(float64_t, num_vectors);
+	SGVector<float64_t> logPxy(num_vectors * 3);
+	SGVector<float64_t> logPx(num_vectors);
 	//float64_t* logPost=SG_MALLOC(float64_t, num_vectors*m_components.vlen);
 
 	while (iter<max_em_iter)
@@ -508,12 +528,6 @@ void CGMM::partial_em(int32_t comp1, int32_t comp2, int32_t comp3, float64_t min
 	m_coefficients.vector[comp3]=coefficients.vector[2];
 
 	delete partial_candidate;
-	SG_FREE(logPxy);
-	SG_FREE(logPx);
-	SG_FREE(init_logPxy);
-	SG_FREE(init_logPx);
-	SG_FREE(init_logPx_fix);
-	SG_FREE(post_add);
 }
 
 void CGMM::max_likelihood(SGMatrix<float64_t> alpha, float64_t min_cov)
@@ -523,113 +537,121 @@ void CGMM::max_likelihood(SGMatrix<float64_t> alpha, float64_t min_cov)
 
 	float64_t alpha_sum;
 	float64_t alpha_sum_sum=0;
-	float64_t* mean_sum;
-	float64_t* cov_sum=NULL;
 
 	for (int32_t i=0; i<alpha.num_cols; i++)
 	{
 		alpha_sum=0;
-		mean_sum=SG_MALLOC(float64_t, num_dim);
-		memset(mean_sum, 0, num_dim*sizeof(float64_t));
+		SGVector<float64_t> mean_sum(num_dim);
+		linalg::zero(mean_sum);
 
 		for (int32_t j=0; j<alpha.num_rows; j++)
 		{
 			alpha_sum+=alpha.matrix[j*alpha.num_cols+i];
 			SGVector<float64_t> v=dotdata->get_computed_dot_feature_vector(j);
-			SGVector<float64_t>::add(mean_sum, alpha.matrix[j*alpha.num_cols+i], v.vector, 1, mean_sum, v.vlen);
+			linalg::add(
+			    v, mean_sum, mean_sum, alpha.matrix[j * alpha.num_cols + i],
+			    1.0);
 		}
 
-		for (int32_t j=0; j<num_dim; j++)
-			mean_sum[j]/=alpha_sum;
+		linalg::scale(mean_sum, mean_sum, 1.0 / alpha_sum);
 
-		m_components[i]->set_mean(SGVector<float64_t>(mean_sum, num_dim));
+		m_components[i]->set_mean(mean_sum);
 
-		ECovType cov_type=m_components[i]->get_cov_type();
+		SGMatrix<float64_t> cov_sum;
 
+		ECovType cov_type = m_components[i]->get_cov_type();
 		if (cov_type==FULL)
 		{
-			cov_sum=SG_MALLOC(float64_t, num_dim*num_dim);
-			memset(cov_sum, 0, num_dim*num_dim*sizeof(float64_t));
+			cov_sum = SGMatrix<float64_t>(num_dim, num_dim);
+			linalg::zero(cov_sum);
 		}
 		else if(cov_type==DIAG)
 		{
-			cov_sum=SG_MALLOC(float64_t, num_dim);
-			memset(cov_sum, 0, num_dim*sizeof(float64_t));
+			cov_sum = SGMatrix<float64_t>(1, num_dim);
+			linalg::zero(cov_sum);
 		}
 		else if(cov_type==SPHERICAL)
 		{
-			cov_sum=SG_MALLOC(float64_t, 1);
-			cov_sum[0]=0;
+			cov_sum = SGMatrix<float64_t>(1, 1);
+			linalg::zero(cov_sum);
 		}
 
 		for (int32_t j=0; j<alpha.num_rows; j++)
 		{
 			SGVector<float64_t> v=dotdata->get_computed_dot_feature_vector(j);
-			SGVector<float64_t>::add(v.vector, 1, v.vector, -1, mean_sum, v.vlen);
 
+			linalg::add(v, mean_sum, v, 1.0, -1.0);
 			switch (cov_type)
 			{
 				case FULL:
-					cblas_dger(CblasRowMajor, num_dim, num_dim, alpha.matrix[j*alpha.num_cols+i], v.vector, 1, v.vector,
-								 1, (double*) cov_sum, num_dim);
-
-					break;
-				case DIAG:
-					for (int32_t k=0; k<num_dim; k++)
-						cov_sum[k]+=v.vector[k]*v.vector[k]*alpha.matrix[j*alpha.num_cols+i];
-
-					break;
-				case SPHERICAL:
-					float64_t temp=0;
-
-					for (int32_t k=0; k<num_dim; k++)
-						temp+=v.vector[k]*v.vector[k];
-
-					cov_sum[0]+=temp*alpha.matrix[j*alpha.num_cols+i];
-					break;
+				    cblas_dger(
+				        CblasRowMajor, num_dim, num_dim,
+				        alpha.matrix[j * alpha.num_cols + i], v.vector, 1,
+				        v.vector, 1, (double*)cov_sum.matrix, num_dim);
+
+				    break;
+			    case DIAG:
+			    {
+				    auto temp_matrix = SGMatrix<float64_t>(v.vector, 1, v.vlen);
+				    auto temp_result = linalg::matrix_prod(
+				        temp_matrix, temp_matrix, true, false);
+				    cov_sum = temp_result.get_diagonal_vector().clone();
+				    linalg::scale(
+				        cov_sum, cov_sum, alpha.matrix[j * alpha.num_cols + i]);
+			    }
+
+			    break;
+			    case SPHERICAL:
+				    float64_t temp = 0;
+
+				    temp = linalg::dot(v, v);
+
+				    cov_sum(0, 0) +=
+				        temp * alpha.matrix[j * alpha.num_cols + i];
+				    break;
 			}
 		}
 
 		switch (cov_type)
 		{
 			case FULL:
-				for (int32_t j=0; j<num_dim*num_dim; j++)
-					cov_sum[j]/=alpha_sum;
+		    {
+			    linalg::scale(cov_sum, cov_sum, 1.0 / alpha_sum);
 
-				float64_t* d0;
-				d0=SGMatrix<float64_t>::compute_eigenvectors(cov_sum, num_dim, num_dim);
-				for (int32_t j=0; j<num_dim; j++)
-					d0[j]=CMath::max(min_cov, d0[j]);
+			    SGVector<float64_t> d0 =
+			        SGMatrix<float64_t>::compute_eigenvectors(cov_sum);
+			    for (int32_t j = 0; j < num_dim; j++)
+				    d0[j] = CMath::max(min_cov, d0[j]);
 
-				m_components[i]->set_d(SGVector<float64_t>(d0, num_dim));
-				m_components[i]->set_u(SGMatrix<float64_t>(cov_sum, num_dim, num_dim));
+			    m_components[i]->set_d(d0);
+			    m_components[i]->set_u(cov_sum);
 
-				break;
-			case DIAG:
-				for (int32_t j=0; j<num_dim; j++)
-				{
-					cov_sum[j]/=alpha_sum;
-					cov_sum[j]=CMath::max(min_cov, cov_sum[j]);
-				}
+			    break;
+		    }
+		    case DIAG:
+			    for (int32_t j = 0; j < num_dim; j++)
+			    {
+				    cov_sum(0, j) /= alpha_sum;
+				    cov_sum(0, j) = CMath::max(min_cov, cov_sum(0, j));
+			    }
 
-				m_components[i]->set_d(SGVector<float64_t>(cov_sum, num_dim));
+			    m_components[i]->set_d(cov_sum.get_row_vector(0));
 
-				break;
-			case SPHERICAL:
-				cov_sum[0]/=alpha_sum*num_dim;
-				cov_sum[0]=CMath::max(min_cov, cov_sum[0]);
+			    break;
+		    case SPHERICAL:
+			    cov_sum[0] /= alpha_sum * num_dim;
+			    cov_sum[0] = CMath::max(min_cov, cov_sum[0]);
 
-				m_components[i]->set_d(SGVector<float64_t>(cov_sum, 1));
+			    m_components[i]->set_d(cov_sum.get_row_vector(0));
 
-				break;
+			    break;
 		}
 
 		m_coefficients.vector[i]=alpha_sum;
 		alpha_sum_sum+=alpha_sum;
 	}
 
-	for (int32_t i=0; i<alpha.num_cols; i++)
-		m_coefficients.vector[i]/=alpha_sum_sum;
+	linalg::scale(m_coefficients, m_coefficients, 1.0 / alpha_sum_sum);
 }
 
 int32_t CGMM::get_num_model_parameters()
@@ -745,17 +767,17 @@ SGMatrix<float64_t> CGMM::alpha_init(SGMatrix<float64_t> init_means)
 	SGVector<float64_t> label_num(init_means.num_cols);
 
 	for (int32_t i=0; i<init_means.num_cols; i++)
-		label_num.vector[i]=i;
+		label_num[i] = i;
 
 	CKNN* knn=new CKNN(1, new CEuclideanDistance(), new CMulticlassLabels(label_num));
 	knn->train(new CDenseFeatures<float64_t>(init_means));
 	CMulticlassLabels* init_labels=(CMulticlassLabels*) knn->apply(features);
 
 	SGMatrix<float64_t> alpha(num_vectors, int32_t(m_components.size()));
-	memset(alpha.matrix, 0, num_vectors*m_components.size()*sizeof(float64_t));
+	linalg::zero(alpha);
 
 	for (int32_t i=0; i<num_vectors; i++)
-		alpha.matrix[i*m_components.size()+init_labels->get_int_label(i)]=1;
+		alpha[i * m_components.size() + init_labels->get_int_label(i)] = 1;
 
 	SG_UNREF(init_labels);
 
@@ -766,7 +788,7 @@ SGVector<float64_t> CGMM::sample()
 {
 	REQUIRE(m_components.size()>0, "Number of mixture components is %d but "
 			"must be positive\n", m_components.size());
-	float64_t rand_num=CMath::random(float64_t(0), float64_t(1));
+	float64_t rand_num = CMath::random(0.0, 1.0);
 	float64_t cum_sum=0;
 	for (int32_t i=0; i<m_coefficients.vlen; i++)
 	{
diff --git a/src/shogun/clustering/Hierarchical.cpp b/src/shogun/clustering/Hierarchical.cpp
index 02e9335c681..020b64355ff 100644
--- a/src/shogun/clustering/Hierarchical.cpp
+++ b/src/shogun/clustering/Hierarchical.cpp
@@ -8,12 +8,13 @@
  * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
  */
 
+#include <shogun/base/Parallel.h>
+#include <shogun/base/progress.h>
 #include <shogun/clustering/Hierarchical.h>
 #include <shogun/distance/Distance.h>
-#include <shogun/labels/Labels.h>
 #include <shogun/features/Features.h>
+#include <shogun/labels/Labels.h>
 #include <shogun/mathematics/Math.h>
-#include <shogun/base/Parallel.h>
 
 using namespace shogun;
 
@@ -83,21 +84,21 @@ bool CHierarchical::train_machine(CFeatures* data)
 	float64_t* distances=SG_MALLOC(float64_t, num_pairs);
 
 	int32_t offs=0;
-	for (int32_t i=0; i<num; i++)
+	for (auto i : progress(range(0, num), *this->io))
 	{
 		for (int32_t j=i+1; j<num; j++)
 		{
-			distances[offs]=distance->distance(i,j);
-			index[offs].idx1=i;
-			index[offs].idx2=j;
-			offs++;					//offs=i*(i+1)/2+j
+			distances[offs] = distance->distance(i, j);
+			index[offs].idx1 = i;
+			index[offs].idx2 = j;
+			offs++; // offs=i*(i+1)/2+j
 		}
-		SG_PROGRESS(i, 0, num-1)
 	}
 
 	CMath::qsort_index<float64_t,pair>(distances, index, (num-1)*num/2);
 	//CMath::display_vector(distances, (num-1)*num/2, "dists");
 
+	auto pb = progress(range(0, num_pairs - 1), *this->io);
 	int32_t k=-1;
 	int32_t l=0;
 	for (; l<num && (num-l)>=merges && k<num_pairs-1; l++)
@@ -114,7 +115,7 @@ bool CHierarchical::train_machine(CFeatures* data)
 			if (c1==c2)
 				continue;
 
-			SG_PROGRESS(k, 0, num_pairs-1)
+			pb.print_progress();
 
 			if (c1<c2)
 			{
@@ -140,6 +141,7 @@ bool CHierarchical::train_machine(CFeatures* data)
 			break;
 		}
 	}
+	pb.complete();
 
 	assignment_size=num;
 	table_size=l-1;
diff --git a/src/shogun/converter/ica/FastICA.cpp b/src/shogun/converter/ica/FastICA.cpp
index 21614347ec1..acb201c7fde 100644
--- a/src/shogun/converter/ica/FastICA.cpp
+++ b/src/shogun/converter/ica/FastICA.cpp
@@ -76,6 +76,7 @@ CFeatures* CFastICA::apply(CFeatures* features)
 	SG_REF(features);
 
 	SGMatrix<float64_t> X = ((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
+	REQUIRE(X.data(), "Features have not been provided.\n");
 
 	int n = X.num_rows;
 	int p = X.num_cols;
diff --git a/src/shogun/distance/CustomMahalanobisDistance.cpp b/src/shogun/distance/CustomMahalanobisDistance.cpp
index c4f8f84cc9e..734353390c1 100644
--- a/src/shogun/distance/CustomMahalanobisDistance.cpp
+++ b/src/shogun/distance/CustomMahalanobisDistance.cpp
@@ -10,8 +10,7 @@
 
 #include <shogun/distance/CustomMahalanobisDistance.h>
 
-
-#include <Eigen/Dense>
+#include <shogun/mathematics/eigen3.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/src/shogun/distance/Distance.cpp b/src/shogun/distance/Distance.cpp
index 950f159d427..342936f9130 100644
--- a/src/shogun/distance/Distance.cpp
+++ b/src/shogun/distance/Distance.cpp
@@ -9,14 +9,15 @@
  * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
  */
 
-#include <shogun/lib/config.h>
-#include <shogun/lib/common.h>
-#include <shogun/io/SGIO.h>
-#include <shogun/io/File.h>
-#include <shogun/lib/Time.h>
-#include <shogun/lib/Signal.h>
 #include <shogun/base/Parallel.h>
 #include <shogun/base/Parameter.h>
+#include <shogun/base/progress.h>
+#include <shogun/io/File.h>
+#include <shogun/io/SGIO.h>
+#include <shogun/lib/Signal.h>
+#include <shogun/lib/Time.h>
+#include <shogun/lib/common.h>
+#include <shogun/lib/config.h>
 
 #include <shogun/distance/Distance.h>
 #include <shogun/features/Features.h>
@@ -28,6 +29,7 @@
 
 #ifdef HAVE_OPENMP
 #include <omp.h>
+
 #endif
 
 using namespace shogun;
@@ -241,15 +243,11 @@ void CDistance::do_precompute_matrix()
 	SG_FREE(precomputed_matrix);
 	precomputed_matrix=SG_MALLOC(float32_t, num*(num+1)/2);
 
-	for (int32_t i=0; i<num; i++)
+	for (auto i : progress(range(num), *this->io))
 	{
-		SG_PROGRESS(i*i,0,num*num)
 		for (int32_t j=0; j<=i; j++)
 			precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
 	}
-
-	SG_PROGRESS(num*num,0,num*num)
-	SG_DONE()
 }
 
 void CDistance::init()
@@ -279,8 +277,6 @@ SGMatrix<T> CDistance::get_distance_matrix()
 
 	int64_t total_num = int64_t(m)*n;
 	int64_t total=0;
-	int64_t total_start=0;
-	int64_t total_end=total_num;
 
 	// if lhs == rhs and sizes match assume k(i,j)=k(j,i)
 	bool symmetric= (lhs && lhs==rhs && m==n);
@@ -289,6 +285,8 @@ SGMatrix<T> CDistance::get_distance_matrix()
 
 	result=SG_MALLOC(T, total_num);
 
+	PRange<int64_t> pb = PRange<int64_t>(
+	    range(total_num), *this->io, "PROGRESS: ", UTF8, []() { return true; });
 	int32_t num_threads;
 	int64_t step;
 	#pragma omp parallel shared(num_threads, step)
@@ -333,17 +331,16 @@ SGMatrix<T> CDistance::get_distance_matrix()
 					if (symmetric && i!=j)
 						total++;
 
-					if (total%100 == 0)
-					SG_OBJ_PROGRESS(this, total, total_start, total_end)
+					pb.print_progress();
 
-					if (CSignal::cancel_computations())
-						break;
+					// TODO: replace with new signal
+					// if (CSignal::cancel_computations())
+					//	break;
 				}
 			}
 		}
 	}
-
-	SG_DONE()
+	pb.complete();
 
 	return SGMatrix<T>(result,m,n,true);
 }
diff --git a/src/shogun/distance/MahalanobisDistance.cpp b/src/shogun/distance/MahalanobisDistance.cpp
index f05a4248c70..d343d0763a3 100644
--- a/src/shogun/distance/MahalanobisDistance.cpp
+++ b/src/shogun/distance/MahalanobisDistance.cpp
@@ -50,7 +50,8 @@ bool CMahalanobisDistance::init(CFeatures* l, CFeatures* r)
 	}
 	else
 	{
-		mean = ((CDenseFeatures<float64_t>*) l)->get_mean((CDotFeatures*) lhs, (CDotFeatures*) rhs);
+		mean = ((CDenseFeatures<float64_t>*)l)
+		           ->compute_mean((CDotFeatures*)lhs, (CDotFeatures*)rhs);
 		icov = CDotFeatures::compute_cov((CDotFeatures*) lhs, (CDotFeatures*) rhs);
 	}
 
diff --git a/src/shogun/distributions/Gaussian.cpp b/src/shogun/distributions/Gaussian.cpp
index 3520649568e..d5c36c6e9e4 100644
--- a/src/shogun/distributions/Gaussian.cpp
+++ b/src/shogun/distributions/Gaussian.cpp
@@ -10,22 +10,24 @@
  */
 #include <shogun/lib/config.h>
 
-#ifdef HAVE_LAPACK
-
+#include <shogun/base/Parameter.h>
 #include <shogun/distributions/Gaussian.h>
 #include <shogun/mathematics/Math.h>
-#include <shogun/base/Parameter.h>
+#include <shogun/mathematics/eigen3.h>
 #include <shogun/mathematics/lapack.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 using namespace shogun;
+using namespace linalg;
 
 CGaussian::CGaussian() : CDistribution(), m_constant(0), m_d(), m_u(), m_mean(), m_cov_type(FULL)
 {
 	register_params();
 }
 
-CGaussian::CGaussian(const SGVector<float64_t> mean, SGMatrix<float64_t> cov, ECovType cov_type)
- : CDistribution()
+CGaussian::CGaussian(
+    const SGVector<float64_t> mean, SGMatrix<float64_t> cov, ECovType cov_type)
+    : CDistribution()
 {
 	ASSERT(mean.vlen==cov.num_rows)
 	ASSERT(cov.num_rows==cov.num_cols)
@@ -118,102 +120,115 @@ float64_t CGaussian::get_log_likelihood_example(int32_t num_example)
 float64_t CGaussian::update_params_em(float64_t* alpha_k, int32_t len)
 {
 	CDotFeatures* dotdata=dynamic_cast<CDotFeatures *>(features);
-	REQUIRE(dotdata,"dynamic cast from CFeatures to CDotFeatures returned NULL\n")
+	REQUIRE(
+	    dotdata, "dynamic cast from CFeatures to CDotFeatures returned NULL\n");
 	int32_t num_dim=dotdata->get_dim_feature_space();
 
 	// compute mean
 
 	float64_t alpha_k_sum=0;
 	SGVector<float64_t> mean(num_dim);
-	mean.fill_vector(mean.vector,mean.vlen,0);
-	for (int32_t i=0;i<len;i++)
+	mean.fill_vector(mean.vector, mean.vlen, 0);
+	for (int32_t i = 0; i < len; i++)
 	{
 		alpha_k_sum+=alpha_k[i];
 		SGVector<float64_t> v=dotdata->get_computed_dot_feature_vector(i);
-		SGVector<float64_t>::add(mean.vector, alpha_k[i], v.vector, 1, mean.vector, v.vlen);
+		linalg::add(v, mean, mean, alpha_k[i], 1.0);
 	}
 
-	for (int32_t i=0; i<num_dim; i++)
-		mean[i]/=alpha_k_sum;
+	linalg::scale(mean, mean, 1.0 / alpha_k_sum);
 
 	set_mean(mean);
 
 	// compute covariance matrix
 
-	float64_t* cov_sum=NULL;
+	SGMatrix<float64_t> cov_sum;
 	ECovType cov_type=get_cov_type();
 	if (cov_type==FULL)
 	{
-		cov_sum=SG_MALLOC(float64_t, num_dim*num_dim);
-		memset(cov_sum, 0, num_dim*num_dim*sizeof(float64_t));
+		cov_sum = SGMatrix<float64_t>(num_dim, num_dim);
+		cov_sum.zero();
 	}
 	else if(cov_type==DIAG)
 	{
-		cov_sum=SG_MALLOC(float64_t,num_dim);
-		memset(cov_sum, 0, num_dim*sizeof(float64_t));
+		cov_sum = SGMatrix<float64_t>(1, num_dim);
+		cov_sum.zero();
 	}
 	else if(cov_type==SPHERICAL)
 	{
-		cov_sum=SG_MALLOC(float64_t,1);
-		cov_sum[0]=0;
+		cov_sum = SGMatrix<float64_t>(1, 1);
+		cov_sum.zero();
 	}
 
 	for (int32_t j=0; j<len; j++)
 	{
 		SGVector<float64_t> v=dotdata->get_computed_dot_feature_vector(j);
-		SGVector<float64_t>::add(v.vector, 1, v.vector, -1, mean.vector, v.vlen);
+		linalg::add(v, mean, v, -1.0, 1.0);
 
 		switch (cov_type)
 		{
-			case FULL:
-				cblas_dger(CblasRowMajor, num_dim, num_dim, alpha_k[j], v.vector, 1, v.vector,
-							 1, (double*) cov_sum, num_dim);
-
-				break;
-			case DIAG:
-				for (int32_t k=0; k<num_dim; k++)
-					cov_sum[k]+=v.vector[k]*v.vector[k]*alpha_k[j];
+		case FULL:
+#ifdef HAVE_LAPACK
+			cblas_dger(
+			    CblasRowMajor, num_dim, num_dim, alpha_k[j], v.vector, 1,
+			    v.vector, 1, (double*)cov_sum.matrix, num_dim);
+#else
+			linalg::dger<float64_t>(alpha_k[j], v, v, cov_sum);
+#endif
+			break;
+		case DIAG:
+			for (int32_t k = 0; k < num_dim; k++)
+				cov_sum(1, k) += v.vector[k] * v.vector[k] * alpha_k[j];
 
-				break;
-			case SPHERICAL:
-				float64_t temp=0;
+			break;
+		case SPHERICAL:
+			float64_t temp = 0;
 
-				for (int32_t k=0; k<num_dim; k++)
-					temp+=v.vector[k]*v.vector[k];
+			temp = linalg::dot(v, v);
 
-				cov_sum[0]+=temp*alpha_k[j];
-				break;
+			cov_sum(0, 0) += temp * alpha_k[j];
+			break;
 		}
 	}
 
 	switch (cov_type)
 	{
-		case FULL:
-			for (int32_t j=0; j<num_dim*num_dim; j++)
-				cov_sum[j]/=alpha_k_sum;
-
-			float64_t* d0;
-			d0=SGMatrix<float64_t>::compute_eigenvectors(cov_sum, num_dim, num_dim);
-
-			set_d(SGVector<float64_t>(d0, num_dim));
-			set_u(SGMatrix<float64_t>(cov_sum, num_dim, num_dim));
-
-			break;
+	case FULL:
+	{
+		linalg::scale(cov_sum, cov_sum, 1 / alpha_k_sum);
 
-		case DIAG:
-			for (int32_t j=0; j<num_dim; j++)
-				cov_sum[j]/=alpha_k_sum;
+		SGVector<float64_t> d0(num_dim);
+#ifdef HAVE_LAPACK
+		d0.vector = SGMatrix<float64_t>::compute_eigenvectors(
+		    cov_sum.matrix, num_dim, num_dim);
+#else
+		// FIXME use eigenvectors computeation warpper by micmn
+		typename SGMatrix<float64_t>::EigenMatrixXtMap eig = cov_sum;
+		typename SGVector<float64_t>::EigenVectorXtMap eigenvalues_eig = d0;
+
+		Eigen::EigenSolver<typename SGMatrix<float64_t>::EigenMatrixXt> solver(
+		    eig);
+		eigenvalues_eig = solver.eigenvalues().real();
+#endif
+
+		set_d(d0);
+		set_u(cov_sum);
+
+		break;
+	}
+	case DIAG:
+		linalg::scale(cov_sum, cov_sum, 1 / alpha_k_sum);
 
-			set_d(SGVector<float64_t>(cov_sum,num_dim));
+		set_d(cov_sum.get_row_vector(0));
 
-			break;
+		break;
 
-		case SPHERICAL:
-			cov_sum[0]/=alpha_k_sum*num_dim;
+	case SPHERICAL:
+		cov_sum[0] /= alpha_k_sum * num_dim;
 
-			set_d(SGVector<float64_t>(cov_sum,1));
+		set_d(cov_sum.get_row_vector(0));
 
-			break;
+		break;
 	}
 
 	return alpha_k_sum;
@@ -223,24 +238,26 @@ float64_t CGaussian::compute_log_PDF(SGVector<float64_t> point)
 {
 	ASSERT(m_mean.vector && m_d.vector)
 	ASSERT(point.vlen == m_mean.vlen)
-	float64_t* difference=SG_MALLOC(float64_t, m_mean.vlen);
-	sg_memcpy(difference, point.vector, sizeof(float64_t)*m_mean.vlen);
+	SGVector<float64_t> difference = point.clone();
 
-	for (int32_t i = 0; i < m_mean.vlen; i++)
-		difference[i] -= m_mean.vector[i];
+	linalg::add(difference, m_mean, difference, -1.0, 1.0);
 
 	float64_t answer=m_constant;
 
 	if (m_cov_type==FULL)
 	{
-		float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen);
-		cblas_dgemv(CblasRowMajor, CblasNoTrans, m_d.vlen, m_d.vlen,
-					1, m_u.matrix, m_d.vlen, difference, 1, 0, temp_holder, 1);
+		SGVector<float64_t> temp_holder(m_d.vlen);
+		temp_holder.zero();
+#ifdef HAVE_LAPACK
+		cblas_dgemv(
+		    CblasRowMajor, CblasNoTrans, m_d.vlen, m_d.vlen, 1, m_u.matrix,
+		    m_d.vlen, difference, 1, 0, temp_holder, 1);
+#else
+		linalg::dgemv<float64_t>(1, m_u, false, difference, 0, temp_holder);
+#endif
 
 		for (int32_t i=0; i<m_d.vlen; i++)
 			answer+=temp_holder[i]*temp_holder[i]/m_d.vector[i];
-
-		SG_FREE(temp_holder);
 	}
 	else if (m_cov_type==DIAG)
 	{
@@ -250,12 +267,10 @@ float64_t CGaussian::compute_log_PDF(SGVector<float64_t> point)
 	else
 	{
 		for (int32_t i=0; i<m_mean.vlen; i++)
-			answer+=difference[i]*difference[i]/m_d.vector[0];
+			answer += difference[i] * difference[i] / m_d.vector[0];
 	}
 
-	SG_FREE(difference);
-
-	return -0.5*answer;
+	return -0.5 * answer;
 }
 
 SGVector<float64_t> CGaussian::get_mean()
@@ -287,41 +302,45 @@ void CGaussian::set_d(const SGVector<float64_t> d)
 
 SGMatrix<float64_t> CGaussian::get_cov()
 {
-	float64_t* cov=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
-	memset(cov, 0, sizeof(float64_t)*m_mean.vlen*m_mean.vlen);
+	SGMatrix<float64_t> cov(m_mean.vlen, m_mean.vlen);
+	cov.zero();
 
 	if (m_cov_type==FULL)
 	{
 		if (!m_u.matrix)
 			SG_ERROR("Unitary matrix not set\n")
 
-		float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
-		float64_t* diag_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
-		memset(diag_holder, 0, sizeof(float64_t)*m_d.vlen*m_d.vlen);
-		for(int32_t i=0; i<m_d.vlen; i++)
-			diag_holder[i*m_d.vlen+i]=m_d.vector[i];
-
-		cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
-					m_d.vlen, m_d.vlen, m_d.vlen, 1, m_u.matrix, m_d.vlen,
-					diag_holder, m_d.vlen, 0, temp_holder, m_d.vlen);
-		cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
-					m_d.vlen, m_d.vlen, m_d.vlen, 1, temp_holder, m_d.vlen,
-					m_u.matrix, m_d.vlen, 0, cov, m_d.vlen);
-
-		SG_FREE(diag_holder);
-		SG_FREE(temp_holder);
+		SGMatrix<float64_t> temp_holder(m_mean.vlen, m_mean.vlen);
+		SGMatrix<float64_t> diag_holder(m_mean.vlen, m_mean.vlen);
+		diag_holder.zero();
+		for (int32_t i = 0; i < m_d.vlen; i++)
+			diag_holder(i, i) = m_d.vector[i];
+#ifdef HAVE_LAPACK
+		cblas_dgemm(
+		    CblasRowMajor, CblasTrans, CblasNoTrans, m_d.vlen, m_d.vlen,
+		    m_d.vlen, 1, m_u.matrix, m_d.vlen, diag_holder.matrix, m_d.vlen, 0,
+		    temp_holder.matrix, m_d.vlen);
+		cblas_dgemm(
+		    CblasRowMajor, CblasNoTrans, CblasNoTrans, m_d.vlen, m_d.vlen,
+		    m_d.vlen, 1, temp_holder.matrix, m_d.vlen, m_u.matrix, m_d.vlen, 0,
+		    cov.matrix, m_d.vlen);
+#else
+		linalg::dgemm<float64_t>(
+		    1, m_u, diag_holder, true, false, 0, temp_holder);
+		linalg::dgemm<float64_t>(1, temp_holder, m_u, false, false, 0, cov);
+#endif
 	}
-	else if (m_cov_type==DIAG)
+	else if (m_cov_type == DIAG)
 	{
-		for (int32_t i=0; i<m_d.vlen; i++)
-			cov[i*m_d.vlen+i]=m_d.vector[i];
+		for (int32_t i = 0; i < m_d.vlen; i++)
+			cov(i, i) = m_d.vector[i];
 	}
 	else
 	{
-		for (int32_t i=0; i<m_mean.vlen; i++)
-			cov[i*m_mean.vlen+i]=m_d.vector[0];
+		for (int32_t i = 0; i < m_mean.vlen; i++)
+			cov(i, i) = m_d.vector[0];
 	}
-	return SGMatrix<float64_t>(cov, m_mean.vlen, m_mean.vlen, false);//fix needed
+	return cov;
 }
 
 void CGaussian::register_params()
@@ -337,77 +356,94 @@ void CGaussian::decompose_cov(SGMatrix<float64_t> cov)
 {
 	switch (m_cov_type)
 	{
-		case FULL:
-			m_u=SGMatrix<float64_t>(cov.num_rows,cov.num_rows);
-			sg_memcpy(m_u.matrix, cov.matrix, sizeof(float64_t)*cov.num_rows*cov.num_rows);
-
-			m_d.vector=SGMatrix<float64_t>::compute_eigenvectors(m_u.matrix, cov.num_rows, cov.num_rows);
-			m_d.vlen=cov.num_rows;
-			m_u.num_rows=cov.num_rows;
-			m_u.num_cols=cov.num_rows;
-			break;
-		case DIAG:
-			m_d=SGVector<float64_t>(cov.num_rows);
-			for (int32_t i=0; i<cov.num_rows; i++)
-				m_d[i]=cov.matrix[i*cov.num_rows+i];
-
-			break;
-		case SPHERICAL:
-			m_d=SGVector<float64_t>(1);
-			m_d.vector[0]=cov.matrix[0];
-			break;
+	case FULL:
+	{
+		m_u = SGMatrix<float64_t>(cov.num_rows, cov.num_rows);
+		m_u = cov.clone();
+		m_d = SGVector<float64_t>(cov.num_rows);
+#ifdef HAVE_LAPACK
+		m_d.vector = SGMatrix<float64_t>::compute_eigenvectors(
+		    m_u.matrix, cov.num_rows, cov.num_rows);
+#else
+		// FIXME use eigenvectors computeation warpper by micmn
+		typename SGMatrix<float64_t>::EigenMatrixXtMap eig = m_u;
+		typename SGVector<float64_t>::EigenVectorXtMap eigenvalues_eig = m_d;
+
+		Eigen::EigenSolver<typename SGMatrix<float64_t>::EigenMatrixXt> solver(
+		    eig);
+		eigenvalues_eig = solver.eigenvalues().real();
+#endif
+		break;
+	}
+	case DIAG:
+		m_d = SGVector<float64_t>(cov.num_rows);
+		for (int32_t i = 0; i < cov.num_rows; i++)
+			m_d[i] = cov.matrix[i * cov.num_rows + i];
+
+		break;
+	case SPHERICAL:
+		m_d = SGVector<float64_t>(1);
+		m_d.vector[0] = cov.matrix[0];
+		break;
 	}
 }
 
 SGVector<float64_t> CGaussian::sample()
 {
 	SG_DEBUG("Entering\n");
-	float64_t* r_matrix=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
-	memset(r_matrix, 0, m_mean.vlen*m_mean.vlen*sizeof(float64_t));
+	SGMatrix<float64_t> r_matrix(m_mean.vlen, m_mean.vlen);
+	r_matrix.zero();
 
 	switch (m_cov_type)
 	{
-		case FULL:
-		case DIAG:
-			for (int32_t i=0; i<m_mean.vlen; i++)
-				r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[i]);
+	case FULL:
+	case DIAG:
+		for (int32_t i = 0; i < m_mean.vlen; i++)
+			r_matrix(i, i) = CMath::sqrt(m_d.vector[i]);
 
-			break;
-		case SPHERICAL:
-			for (int32_t i=0; i<m_mean.vlen; i++)
-				r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[0]);
+		break;
+	case SPHERICAL:
+		for (int32_t i = 0; i < m_mean.vlen; i++)
+			r_matrix(i, i) = CMath::sqrt(m_d.vector[0]);
 
-			break;
+		break;
 	}
 
-	float64_t* random_vec=SG_MALLOC(float64_t, m_mean.vlen);
+	SGVector<float64_t> random_vec(m_mean.vlen);
 
-	for (int32_t i=0; i<m_mean.vlen; i++)
-		random_vec[i]=CMath::randn_double();
+	for (int32_t i = 0; i < m_mean.vlen; i++)
+		random_vec.vector[i] = CMath::randn_double();
 
-	if (m_cov_type==FULL)
+	if (m_cov_type == FULL)
 	{
-		float64_t* temp_matrix=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
-		cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
-					m_d.vlen, m_d.vlen, m_d.vlen, 1, m_u.matrix, m_d.vlen,
-					r_matrix, m_d.vlen, 0, temp_matrix, m_d.vlen);
-		SG_FREE(r_matrix);
-		r_matrix=temp_matrix;
+		SGMatrix<float64_t> temp_matrix(m_d.vlen, m_d.vlen);
+		temp_matrix.zero();
+#ifdef HAVE_LAPACK
+		cblas_dgemm(
+		    CblasRowMajor, CblasNoTrans, CblasNoTrans, m_d.vlen, m_d.vlen,
+		    m_d.vlen, 1, m_u.matrix, m_d.vlen, r_matrix.matrix, m_d.vlen, 0,
+		    temp_matrix.matrix, m_d.vlen);
+#else
+		linalg::dgemm<float64_t>(
+		    1, m_u, r_matrix, false, false, 0, temp_matrix);
+#endif
+		r_matrix = temp_matrix;
 	}
 
-	float64_t* samp=SG_MALLOC(float64_t, m_mean.vlen);
-
-	cblas_dgemv(CblasRowMajor, CblasNoTrans, m_mean.vlen, m_mean.vlen,
-				1, r_matrix, m_mean.vlen, random_vec, 1, 0, samp, 1);
-
-	for (int32_t i=0; i<m_mean.vlen; i++)
-		samp[i]+=m_mean.vector[i];
+	SGVector<float64_t> samp(m_mean.vlen);
 
-	SG_FREE(random_vec);
-	SG_FREE(r_matrix);
+#ifdef HAVE_LAPACK
+	cblas_dgemv(
+	    CblasRowMajor, CblasNoTrans, m_mean.vlen, m_mean.vlen, 1,
+	    r_matrix.matrix, m_mean.vlen, random_vec.vector, 1, 0, samp.vector, 1);
+#else
+	linalg::dgemv<float64_t>(1.0, r_matrix, false, random_vec, 0.0, samp);
+#endif
+	for (int32_t i = 0; i < m_mean.vlen; i++)
+		samp.vector[i] += m_mean.vector[i];
 
 	SG_DEBUG("Leaving\n");
-	return SGVector<float64_t>(samp, m_mean.vlen, false);//fix needed
+	return samp;
 }
 
 CGaussian* CGaussian::obtain_from_generic(CDistribution* distribution)
@@ -423,5 +459,3 @@ CGaussian* CGaussian::obtain_from_generic(CDistribution* distribution)
 	SG_REF(casted);
 	return casted;
 }
-
-#endif // HAVE_LAPACK
diff --git a/src/shogun/distributions/Gaussian.h b/src/shogun/distributions/Gaussian.h
index e5e21839324..202c7f31cff 100644
--- a/src/shogun/distributions/Gaussian.h
+++ b/src/shogun/distributions/Gaussian.h
@@ -14,8 +14,6 @@
 
 #include <shogun/lib/config.h>
 
-#ifdef HAVE_LAPACK
-
 #include <shogun/distributions/Distribution.h>
 #include <shogun/features/DotFeatures.h>
 #include <shogun/lib/common.h>
@@ -245,5 +243,4 @@ class CGaussian : public CDistribution
 		ECovType m_cov_type;
 };
 }
-#endif //HAVE_LAPACK
 #endif //_GAUSSIAN_H__
diff --git a/src/shogun/distributions/HMM.cpp b/src/shogun/distributions/HMM.cpp
index 3bc2dd5c556..e5cbec82702 100644
--- a/src/shogun/distributions/HMM.cpp
+++ b/src/shogun/distributions/HMM.cpp
@@ -5586,7 +5586,9 @@ bool CHMM::baum_welch_viterbi_train(BaumWelchViterbiType type)
 	float64_t prob_train=CMath::ALMOST_NEG_INFTY;
 	iteration_count=iterations;
 
-	while (!converged(prob, prob_train) && (!CSignal::cancel_computations()))
+	// TODO: replace with the new signal
+	// while (!converged(prob, prob_train) && (!CSignal::cancel_computations()))
+	while (!converged(prob, prob_train))
 	{
 		CMath::swap(working, estimate);
 		prob=prob_train;
diff --git a/src/shogun/evaluation/CrossValidation.cpp b/src/shogun/evaluation/CrossValidation.cpp
index cc77dbadf7c..8c0336cc092 100644
--- a/src/shogun/evaluation/CrossValidation.cpp
+++ b/src/shogun/evaluation/CrossValidation.cpp
@@ -8,14 +8,15 @@
  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
  */
 
+#include <shogun/base/Parameter.h>
+#include <shogun/base/progress.h>
 #include <shogun/evaluation/CrossValidation.h>
-#include <shogun/machine/Machine.h>
+#include <shogun/evaluation/CrossValidationStorage.h>
 #include <shogun/evaluation/Evaluation.h>
 #include <shogun/evaluation/SplittingStrategy.h>
-#include <shogun/base/Parameter.h>
-#include <shogun/mathematics/Statistics.h>
-#include <shogun/evaluation/CrossValidationOutput.h>
 #include <shogun/lib/List.h>
+#include <shogun/machine/Machine.h>
+#include <shogun/mathematics/Statistics.h>
 
 using namespace shogun;
 
@@ -24,61 +25,44 @@ CCrossValidation::CCrossValidation() : CMachineEvaluation()
 	init();
 }
 
-CCrossValidation::CCrossValidation(CMachine* machine, CFeatures* features,
-		CLabels* labels, CSplittingStrategy* splitting_strategy,
-		CEvaluation* evaluation_criterion, bool autolock) :
-		CMachineEvaluation(machine, features, labels, splitting_strategy,
-		evaluation_criterion, autolock)
+CCrossValidation::CCrossValidation(
+    CMachine* machine, CFeatures* features, CLabels* labels,
+    CSplittingStrategy* splitting_strategy, CEvaluation* evaluation_criterion,
+    bool autolock)
+    : CMachineEvaluation(
+          machine, features, labels, splitting_strategy, evaluation_criterion,
+          autolock)
 {
 	init();
 }
 
-CCrossValidation::CCrossValidation(CMachine* machine, CLabels* labels,
-		CSplittingStrategy* splitting_strategy,
-		CEvaluation* evaluation_criterion, bool autolock) :
-		CMachineEvaluation(machine, labels, splitting_strategy, evaluation_criterion,
-		autolock)
+CCrossValidation::CCrossValidation(
+    CMachine* machine, CLabels* labels, CSplittingStrategy* splitting_strategy,
+    CEvaluation* evaluation_criterion, bool autolock)
+    : CMachineEvaluation(
+          machine, labels, splitting_strategy, evaluation_criterion, autolock)
 {
 	init();
 }
 
 CCrossValidation::~CCrossValidation()
 {
-	SG_UNREF(m_xval_outputs);
 }
 
 void CCrossValidation::init()
 {
-	m_num_runs=1;
+	m_num_runs = 1;
 
-	/* do reference counting for output objects */
-	m_xval_outputs=new CList(true);
-
-	SG_ADD(&m_num_runs, "num_runs", "Number of repetitions",
-			MS_NOT_AVAILABLE);
-	SG_ADD((CSGObject**)&m_xval_outputs, "m_xval_outputs", "List of output "
-			"classes for intermediade cross-validation results",
-			MS_NOT_AVAILABLE);
+	SG_ADD(&m_num_runs, "num_runs", "Number of repetitions", MS_NOT_AVAILABLE);
 }
 
-CEvaluationResult* CCrossValidation::evaluate()
+CEvaluationResult* CCrossValidation::evaluate_impl()
 {
-	SG_DEBUG("entering %s::evaluate()\n", get_name())
-
-	REQUIRE(m_machine, "%s::evaluate() is only possible if a machine is "
-			"attached\n", get_name());
-
-	REQUIRE(m_features, "%s::evaluate() is only possible if features are "
-			"attached\n", get_name());
-
-	REQUIRE(m_labels, "%s::evaluate() is only possible if labels are "
-			"attached\n", get_name());
-
 	/* if for some reason the do_unlock_frag is set, unlock */
 	if (m_do_unlock)
 	{
 		m_machine->data_unlock();
-		m_do_unlock=false;
+		m_do_unlock = false;
 	}
 
 	/* set labels in any case (no locking needs this) */
@@ -93,86 +77,78 @@ CEvaluationResult* CCrossValidation::evaluate()
 			if (!m_machine->is_data_locked())
 			{
 				m_machine->data_lock(m_labels, m_features);
-				m_do_unlock=true;
+				m_do_unlock = true;
 			}
 		}
 		else
 		{
-			SG_WARNING("%s does not support locking. Autolocking is skipped. "
-					"Set autolock flag to false to get rid of warning.\n",
-					m_machine->get_name());
+			SG_WARNING(
+			    "%s does not support locking. Autolocking is skipped. "
+			    "Set autolock flag to false to get rid of warning.\n",
+			    m_machine->get_name());
 		}
 	}
 
 	SGVector<float64_t> results(m_num_runs);
 
-	/* evtl. update xvalidation output class */
-	CCrossValidationOutput* current=(CCrossValidationOutput*)
-			m_xval_outputs->get_first_element();
-	while (current)
-	{
-		current->init_num_runs(m_num_runs);
-		current->init_num_folds(m_splitting_strategy->get_num_subsets());
-		current->init_expose_labels(m_labels);
-		current->post_init();
-		SG_UNREF(current);
-		current=(CCrossValidationOutput*)
-				m_xval_outputs->get_next_element();
-	}
-
 	/* perform all the x-val runs */
 	SG_DEBUG("starting %d runs of cross-validation\n", m_num_runs)
-	for (index_t i=0; i <m_num_runs; ++i)
+	for (index_t i = 0; i < m_num_runs; i++)
 	{
-
 		/* evtl. update xvalidation output class */
-		current=(CCrossValidationOutput*)m_xval_outputs->get_first_element();
-		while (current)
-		{
-			current->update_run_index(i);
-			SG_UNREF(current);
-			current=(CCrossValidationOutput*)
-					m_xval_outputs->get_next_element();
-		}
+		SG_DEBUG("Creating CrossValidationStorage.\n")
+		CrossValidationStorage* storage = new CrossValidationStorage();
+		SG_REF(storage)
+		storage->set_num_runs(m_num_runs);
+		storage->set_num_folds(m_splitting_strategy->get_num_subsets());
+		storage->set_expose_labels(m_labels);
+		storage->post_init();
+		SG_DEBUG("Ending CrossValidationStorage initilization.\n")
 
 		SG_DEBUG("entering cross-validation run %d \n", i)
-		results[i]=evaluate_one_run();
+		results[i] = evaluate_one_run(i, storage);
 		SG_DEBUG("result of cross-validation run %d is %f\n", i, results[i])
+
+		/* Emit the value*/
+		std::string obs_value_name{"cross_validation_run"};
+		ObservedValue cv_data{i, obs_value_name, erase_type(storage),
+		                      CROSSVALIDATION};
+		observe(cv_data);
+		SG_UNREF(storage)
 	}
 
 	/* construct evaluation result */
 	CCrossValidationResult* result = new CCrossValidationResult();
-	result->mean=CStatistics::mean(results);
-	if (m_num_runs>1)
-		result->std_dev=CStatistics::std_deviation(results);
+	result->set_mean(CStatistics::mean(results));
+	if (m_num_runs > 1)
+		result->set_std_dev(CStatistics::std_deviation(results));
 	else
-		result->std_dev=0;
+		result->set_std_dev(0);
 
 	/* unlock machine if it was locked in this method */
 	if (m_machine->is_data_locked() && m_do_unlock)
 	{
 		m_machine->data_unlock();
-		m_do_unlock=false;
+		m_do_unlock = false;
 	}
 
-	SG_DEBUG("leaving %s::evaluate()\n", get_name())
-
 	SG_REF(result);
 	return result;
 }
 
 void CCrossValidation::set_num_runs(int32_t num_runs)
 {
-	if (num_runs <1)
+	if (num_runs < 1)
 		SG_ERROR("%d is an illegal number of repetitions\n", num_runs)
 
-	m_num_runs=num_runs;
+	m_num_runs = num_runs;
 }
 
-float64_t CCrossValidation::evaluate_one_run()
+float64_t CCrossValidation::evaluate_one_run(
+    int64_t index, CrossValidationStorage* storage)
 {
 	SG_DEBUG("entering %s::evaluate_one_run()\n", get_name())
-	index_t num_subsets=m_splitting_strategy->get_num_subsets();
+	index_t num_subsets = m_splitting_strategy->get_num_subsets();
 
 	SG_DEBUG("building index sets for %d-fold cross-validation\n", num_subsets)
 
@@ -185,45 +161,38 @@ float64_t CCrossValidation::evaluate_one_run()
 	/* different behavior whether data is locked or not */
 	if (m_machine->is_data_locked())
 	{
+		m_machine->set_store_model_features(true);
 		SG_DEBUG("starting locked evaluation\n", get_name())
 		/* do actual cross-validation */
-		for (index_t i=0; i <num_subsets; ++i)
+		for (index_t i = 0; i < num_subsets; ++i)
 		{
+			EVALUATION_CONTROLLERS
+
 			/* evtl. update xvalidation output class */
-			CCrossValidationOutput* current=(CCrossValidationOutput*)
-					m_xval_outputs->get_first_element();
-			while (current)
-			{
-				current->update_fold_index(i);
-				SG_UNREF(current);
-				current=(CCrossValidationOutput*)
-						m_xval_outputs->get_next_element();
-			}
+			CrossValidationFoldStorage* fold = new CrossValidationFoldStorage();
+			SG_REF(fold)
+			fold->set_run_index(index);
+			fold->set_fold_index(i);
 
 			/* index subset for training, will be freed below */
 			SGVector<index_t> inverse_subset_indices =
-					m_splitting_strategy->generate_subset_inverse(i);
+			    m_splitting_strategy->generate_subset_inverse(i);
 
 			/* train machine on training features */
 			m_machine->train_locked(inverse_subset_indices);
 
 			/* feature subset for testing */
 			SGVector<index_t> subset_indices =
-					m_splitting_strategy->generate_subset_indices(i);
+			    m_splitting_strategy->generate_subset_indices(i);
 
 			/* evtl. update xvalidation output class */
-			current=(CCrossValidationOutput*)m_xval_outputs->get_first_element();
-			while (current)
-			{
-				current->update_train_indices(inverse_subset_indices, "\t");
-				current->update_trained_machine(m_machine, "\t");
-				SG_UNREF(current);
-				current=(CCrossValidationOutput*)
-						m_xval_outputs->get_next_element();
-			}
+			fold->set_train_indices(inverse_subset_indices);
+			auto fold_machine = (CMachine*)m_machine->clone();
+			fold->set_trained_machine(fold_machine);
+			SG_UNREF(fold_machine)
 
 			/* produce output for desired indices */
-			CLabels* result_labels=m_machine->apply_locked(subset_indices);
+			CLabels* result_labels = m_machine->apply_locked(subset_indices);
 			SG_REF(result_labels);
 
 			/* set subset for testing labels */
@@ -231,27 +200,27 @@ float64_t CCrossValidation::evaluate_one_run()
 
 			/* evaluate against own labels */
 			m_evaluation_criterion->set_indices(subset_indices);
-			results[i]=m_evaluation_criterion->evaluate(result_labels, m_labels);
+			results[i] =
+			    m_evaluation_criterion->evaluate(result_labels, m_labels);
 
 			/* evtl. update xvalidation output class */
-			current=(CCrossValidationOutput*)m_xval_outputs->get_first_element();
-			while (current)
-			{
-				current->update_test_indices(subset_indices, "\t");
-				current->update_test_result(result_labels, "\t");
-				current->update_test_true_result(m_labels, "\t");
-				current->post_update_results();
-				current->update_evaluation_result(results[i], "\t");
-				SG_UNREF(current);
-				current=(CCrossValidationOutput*)
-						m_xval_outputs->get_next_element();
-			}
+			fold->set_test_indices(subset_indices);
+			fold->set_test_result(result_labels);
+			CLabels* true_labels = (CLabels*)m_labels->clone();
+			fold->set_test_true_result(true_labels);
+			SG_UNREF(true_labels)
+			fold->post_update_results();
+			fold->set_evaluation_result(results[i]);
 
 			/* remove subset to prevent side effects */
 			m_labels->remove_subset();
 
+			/* Save fold into storage */
+			storage->append_fold_result(fold);
+
 			/* clean up */
 			SG_UNREF(result_labels);
+			SG_UNREF(fold);
 
 			SG_DEBUG("done locked evaluation\n", get_name())
 		}
@@ -265,61 +234,57 @@ float64_t CCrossValidation::evaluate_one_run()
 
 		/* do actual cross-validation */
 
-		//TODO parallel xvalidation needs some serious fixing, see #3743
+		// TODO parallel xvalidation needs some serious fixing, see #3743
 		//#pragma omp parallel for
-		for (index_t i=0; i <num_subsets; ++i)
+		for (index_t i = 0; i < num_subsets; ++i)
 		{
+			EVALUATION_CONTROLLERS
+
+			CrossValidationFoldStorage* fold = new CrossValidationFoldStorage();
+			SG_REF(fold)
+
 			CMachine* machine;
 			CFeatures* features;
 			CLabels* labels;
 			CEvaluation* evaluation_criterion;
 
-			if (get_global_parallel()->get_num_threads()==1)
+			if (get_global_parallel()->get_num_threads() == 1)
 			{
-				machine=m_machine;
-				features=m_features;
-				evaluation_criterion=m_evaluation_criterion;
+				machine = m_machine;
+				features = m_features;
+				evaluation_criterion = m_evaluation_criterion;
 			}
 			else
 			{
-				machine=(CMachine*)m_machine->clone();
-				features=(CFeatures*)m_features->clone();
-				evaluation_criterion=(CEvaluation*)m_evaluation_criterion->clone();
+				machine = (CMachine*)m_machine->clone();
+				features = (CFeatures*)m_features->clone();
+				evaluation_criterion =
+				    (CEvaluation*)m_evaluation_criterion->clone();
 			}
 
 			/* evtl. update xvalidation output class */
-			CCrossValidationOutput* current;
-			#pragma omp critical
-			{
-			current=(CCrossValidationOutput*)
-					m_xval_outputs->get_first_element();
-			while (current)
-			{
-				current->update_fold_index(i);
-				SG_UNREF(current);
-				current=(CCrossValidationOutput*)
-						m_xval_outputs->get_next_element();
-			}
-			}
+			fold->set_run_index(index);
+			fold->set_fold_index(i);
 
 			/* set feature subset for training */
-			SGVector<index_t> inverse_subset_indices=
-					m_splitting_strategy->generate_subset_inverse(i);
+			SGVector<index_t> inverse_subset_indices =
+			    m_splitting_strategy->generate_subset_inverse(i);
 
 			features->add_subset(inverse_subset_indices);
 
 			/* set label subset for training */
-			if (get_global_parallel()->get_num_threads()==1)
-				labels=m_labels;
+			if (get_global_parallel()->get_num_threads() == 1)
+				labels = m_labels;
 			else
-				labels=machine->get_labels();
+				labels = machine->get_labels();
 			labels->add_subset(inverse_subset_indices);
 
 			SG_DEBUG("training set %d:\n", i)
-			if (io->get_loglevel()==MSG_DEBUG)
+			if (io->get_loglevel() == MSG_DEBUG)
 			{
-				SGVector<index_t>::display_vector(inverse_subset_indices.vector,
-						inverse_subset_indices.vlen, "training indices");
+				SGVector<index_t>::display_vector(
+				    inverse_subset_indices.vector, inverse_subset_indices.vlen,
+				    "training indices");
 			}
 
 			/* train machine on training features and remove subset */
@@ -328,69 +293,56 @@ float64_t CCrossValidation::evaluate_one_run()
 			SG_DEBUG("finished training\n")
 
 			/* evtl. update xvalidation output class */
-			#pragma omp critical
-			{
-			current=(CCrossValidationOutput*)m_xval_outputs->get_first_element();
-			while (current)
-			{
-				current->update_train_indices(inverse_subset_indices, "\t");
-				current->update_trained_machine(machine, "\t");
-				SG_UNREF(current);
-				current=(CCrossValidationOutput*)
-						m_xval_outputs->get_next_element();
-			}
-			}
+			fold->set_train_indices(inverse_subset_indices);
+			auto fold_machine = (CMachine*)machine->clone();
+			fold->set_trained_machine(fold_machine);
+			SG_UNREF(fold_machine)
 
 			features->remove_subset();
 			labels->remove_subset();
 
-			/* set feature subset for testing (subset method that stores pointer) */
+			/* set feature subset for testing (subset method that stores
+			 * pointer) */
 			SGVector<index_t> subset_indices =
-					m_splitting_strategy->generate_subset_indices(i);
+			    m_splitting_strategy->generate_subset_indices(i);
 			features->add_subset(subset_indices);
 
 			/* set label subset for testing */
 			labels->add_subset(subset_indices);
 
 			SG_DEBUG("test set %d:\n", i)
-			if (io->get_loglevel()==MSG_DEBUG)
+			if (io->get_loglevel() == MSG_DEBUG)
 			{
-				SGVector<index_t>::display_vector(subset_indices.vector,
-						subset_indices.vlen, "test indices");
+				SGVector<index_t>::display_vector(
+				    subset_indices.vector, subset_indices.vlen, "test indices");
 			}
 
 			/* apply machine to test features and remove subset */
 			SG_DEBUG("starting evaluation\n")
 			SG_DEBUG("%p\n", features)
-			CLabels* result_labels=machine->apply(features);
+			CLabels* result_labels = machine->apply(features);
 			SG_DEBUG("finished evaluation\n")
 			features->remove_subset();
 			SG_REF(result_labels);
 
 			/* evaluate */
-			results[i]=evaluation_criterion->evaluate(result_labels, labels);
+			results[i] = evaluation_criterion->evaluate(result_labels, labels);
 			SG_DEBUG("result on fold %d is %f\n", i, results[i])
 
 			/* evtl. update xvalidation output class */
-			#pragma omp critical
-			{
-			current=(CCrossValidationOutput*)m_xval_outputs->get_first_element();
-			while (current)
-			{
-				current->update_test_indices(subset_indices, "\t");
-				current->update_test_result(result_labels, "\t");
-				current->update_test_true_result(labels, "\t");
-				current->post_update_results();
-				current->update_evaluation_result(results[i], "\t");
-				SG_UNREF(current);
-				current=(CCrossValidationOutput*)
-						m_xval_outputs->get_next_element();
-			}
-			}
+			fold->set_test_indices(subset_indices);
+			fold->set_test_result(result_labels);
+			CLabels* true_labels = (CLabels*)labels->clone();
+			fold->set_test_true_result(true_labels);
+			SG_UNREF(true_labels)
+			fold->post_update_results();
+			fold->set_evaluation_result(results[i]);
+
+			storage->append_fold_result(fold);
 
 			/* clean up, remove subsets */
 			labels->remove_subset();
-			if (get_global_parallel()->get_num_threads()!=1)
+			if (get_global_parallel()->get_num_threads() != 1)
 			{
 				SG_UNREF(machine);
 				SG_UNREF(features);
@@ -398,20 +350,15 @@ float64_t CCrossValidation::evaluate_one_run()
 				SG_UNREF(evaluation_criterion);
 			}
 			SG_UNREF(result_labels);
+			SG_UNREF(fold)
 		}
 
 		SG_DEBUG("done unlocked evaluation\n", get_name())
 	}
 
 	/* build arithmetic mean of results */
-	float64_t mean=CStatistics::mean(results);
+	float64_t mean = CStatistics::mean(results);
 
 	SG_DEBUG("leaving %s::evaluate_one_run()\n", get_name())
 	return mean;
 }
-
-void CCrossValidation::add_cross_validation_output(
-			CCrossValidationOutput* cross_validation_output)
-{
-	m_xval_outputs->append_element(cross_validation_output);
-}
diff --git a/src/shogun/evaluation/CrossValidation.h b/src/shogun/evaluation/CrossValidation.h
index 076c9590d5f..b0b07ef7323 100644
--- a/src/shogun/evaluation/CrossValidation.h
+++ b/src/shogun/evaluation/CrossValidation.h
@@ -19,20 +19,23 @@
 namespace shogun
 {
 
-class CMachineEvaluation;
-class CCrossValidationOutput;
-class CList;
+	class CMachineEvaluation;
+	class CCrossValidationOutput;
+	class CrossValidationStorage;
+	class CList;
 
-/** @brief type to encapsulate the results of an evaluation run.
- */
-class CCrossValidationResult : public CEvaluationResult
-{
+	/** @brief type to encapsulate the results of an evaluation run.
+	 */
+	class CCrossValidationResult : public CEvaluationResult
+	{
 	public:
 		CCrossValidationResult()
 		{
 			SG_ADD(&mean, "mean", "Mean of results", MS_NOT_AVAILABLE);
-			SG_ADD(&std_dev, "std_dev",
-					"Standard deviation of cross-validation folds", MS_NOT_AVAILABLE);
+			SG_ADD(
+			    &std_dev, "std_dev",
+			    "Standard deviation of cross-validation folds",
+			    MS_NOT_AVAILABLE);
 
 			mean = 0;
 			std_dev = 0;
@@ -52,24 +55,28 @@ class CCrossValidationResult : public CEvaluationResult
 		 *
 		 *  @return name of the SGSerializable
 		 */
-		virtual const char* get_name() const { return "CrossValidationResult"; }
+		virtual const char* get_name() const
+		{
+			return "CrossValidationResult";
+		}
 
 		/** helper method used to specialize a base class instance
 		 *
 		 * @param eval_result its dynamic type must be CCrossValidationResult
 		 */
-		static CCrossValidationResult* obtain_from_generic(
-				CEvaluationResult* eval_result)
+		static CCrossValidationResult*
+		obtain_from_generic(CEvaluationResult* eval_result)
 		{
 			if (!eval_result)
 				return NULL;
 
-			REQUIRE(eval_result->get_result_type()==CROSSVALIDATION_RESULT,
-					"CrossValidationResult::obtain_from_generic(): argument is"
-					"of wrong type!\n");
+			REQUIRE(
+			    eval_result->get_result_type() == CROSSVALIDATION_RESULT,
+			    "CrossValidationResult::obtain_from_generic(): argument is"
+			    "of wrong type!\n");
 
 			SG_REF(eval_result);
-			return (CCrossValidationResult*) eval_result;
+			return (CCrossValidationResult*)eval_result;
 		}
 
 		/** print result */
@@ -78,115 +85,155 @@ class CCrossValidationResult : public CEvaluationResult
 			SG_SPRINT("%f+-%f\n", mean, std_dev);
 		}
 
-	public:
+		/**
+		 * Get the evaluations mean.
+		 * @return mean
+		 */
+		float64_t get_mean() const
+		{
+			return mean;
+		}
+
+		/**
+		 * Get the standard deviation.
+		 * @return standard deviation
+		 */
+		float64_t get_std_dev() const
+		{
+			return std_dev;
+		}
+
+		/**
+		 * Set the evaluations mean.
+		 * @param mean the mean
+		 */
+		void set_mean(float64_t ev_mean)
+		{
+			this->mean = ev_mean;
+		}
+
+		/**
+		 * Set the standard deviation
+		 * @param std_dev the standard deviation
+		 */
+		void set_std_dev(float64_t ev_std_dev)
+		{
+			this->std_dev = ev_std_dev;
+		}
+
+	private:
 		/** mean */
 		float64_t mean;
 		/** Standard deviation of cross-validation folds */
 		float64_t std_dev;
-};
-
-/** @brief base class for cross-validation evaluation.
- * Given a learning machine, a splitting strategy, an evaluation criterion,
- * features and corresponding labels, this provides an interface for
- * cross-validation. Results may be retrieved using the evaluate method. A
- * number of repetitions may be specified for obtaining more accurate results.
- * The arithmetic mean and standard deviation of different runs is returned.
- * Default number of runs is one.
- *
- * This class calculates an evaluation criterion of every fold and then
- * calculates the arithmetic mean of all folds. This is for example suitable
- * for the AUC or for Accuracy. However, for example F1-measure may not be
- * merged this way (result will be biased). To solve this, different sub-classes
- * may average results of each cross validation fold differently by overwriting
- * the evaluate_one_run method.
- *
- * See [Forman, G. and Scholz, M. (2009). Apples-to-apples in cross-validation
- * studies: Pitfalls in classifier performance measurement. Technical report,
- * HP Laboratories.] for details on this subject.
- *
- * Cross validation tries to lock underlying machines if that is possible to
- * speed up computations. Can be turned off by the set_autolock()  method.
- * Locking in general may speed up things (eg for kernel machines the kernel
- * matrix is precomputed), however, it is not always supported.
- *
- * Crossvalidation runs with current number of threads
- * (Parallel::set_num_threads) for unlocked case, and currently duplicates all
- * objects (might be changed later).
- *
- */
-class CCrossValidation: public CMachineEvaluation
-{
-public:
-	/** constructor */
-	CCrossValidation();
-
-	/** constructor
-	 * @param machine learning machine to use
-	 * @param features features to use for cross-validation
-	 * @param labels labels that correspond to the features
-	 * @param splitting_strategy splitting strategy to use
-	 * @param evaluation_criterion evaluation criterion to use
-	 * @param autolock whether machine should be auto-locked before evaluation
-	 */
-	CCrossValidation(CMachine* machine, CFeatures* features, CLabels* labels,
-			CSplittingStrategy* splitting_strategy,
-			CEvaluation* evaluation_criterion, bool autolock=true);
-
-	/** constructor, for use with custom kernels (no features)
-	 * @param machine learning machine to use
-	 * @param labels labels that correspond to the features
-	 * @param splitting_strategy splitting strategy to use
-	 * @param evaluation_criterion evaluation criterion to use
-	 * @param autolock autolock
+	};
+
+	/** @brief base class for cross-validation evaluation.
+	 * Given a learning machine, a splitting strategy, an evaluation criterion,
+	 * features and corresponding labels, this provides an interface for
+	 * cross-validation. Results may be retrieved using the evaluate method. A
+	 * number of repetitions may be specified for obtaining more accurate
+	 * results.
+	 * The arithmetic mean and standard deviation of different runs is returned.
+	 * Default number of runs is one.
+	 *
+	 * This class calculates an evaluation criterion of every fold and then
+	 * calculates the arithmetic mean of all folds. This is for example suitable
+	 * for the AUC or for Accuracy. However, for example F1-measure may not be
+	 * merged this way (result will be biased). To solve this, different
+	 * sub-classes
+	 * may average results of each cross validation fold differently by
+	 * overwriting
+	 * the evaluate_one_run method.
+	 *
+	 * See [Forman, G. and Scholz, M. (2009). Apples-to-apples in
+	 * cross-validation
+	 * studies: Pitfalls in classifier performance measurement. Technical
+	 * report,
+	 * HP Laboratories.] for details on this subject.
+	 *
+	 * Cross validation tries to lock underlying machines if that is possible to
+	 * speed up computations. Can be turned off by the set_autolock()  method.
+	 * Locking in general may speed up things (eg for kernel machines the kernel
+	 * matrix is precomputed), however, it is not always supported.
+	 *
+	 * Crossvalidation runs with current number of threads
+	 * (Parallel::set_num_threads) for unlocked case, and currently duplicates
+	 * all
+	 * objects (might be changed later).
+	 *
 	 */
-	CCrossValidation(CMachine* machine, CLabels* labels,
-			CSplittingStrategy* splitting_strategy,
-			CEvaluation* evaluation_criterion, bool autolock=true);
-
-	/** destructor */
-	virtual ~CCrossValidation();
-
-	/** setter for the number of runs to use for evaluation */
-	void set_num_runs(int32_t num_runs);
+	class CCrossValidation : public CMachineEvaluation
+	{
+	public:
+		/** constructor */
+		CCrossValidation();
+
+		/** constructor
+		 * @param machine learning machine to use
+		 * @param features features to use for cross-validation
+		 * @param labels labels that correspond to the features
+		 * @param splitting_strategy splitting strategy to use
+		 * @param evaluation_criterion evaluation criterion to use
+		 * @param autolock whether machine should be auto-locked before
+		 * evaluation
+		 */
+		CCrossValidation(
+		    CMachine* machine, CFeatures* features, CLabels* labels,
+		    CSplittingStrategy* splitting_strategy,
+		    CEvaluation* evaluation_criterion, bool autolock = true);
+
+		/** constructor, for use with custom kernels (no features)
+		 * @param machine learning machine to use
+		 * @param labels labels that correspond to the features
+		 * @param splitting_strategy splitting strategy to use
+		 * @param evaluation_criterion evaluation criterion to use
+		 * @param autolock autolock
+		 */
+		CCrossValidation(
+		    CMachine* machine, CLabels* labels,
+		    CSplittingStrategy* splitting_strategy,
+		    CEvaluation* evaluation_criterion, bool autolock = true);
 
-	/** evaluate */
-	virtual CEvaluationResult* evaluate();
+		/** destructor */
+		virtual ~CCrossValidation();
 
-	/** appends given cross validation output instance
-	 * to the list of listeners
-	 *
-	 * @param cross_validation_output given cross validation output
-	 */
-	void add_cross_validation_output(
-			CCrossValidationOutput* cross_validation_output);
+		/** setter for the number of runs to use for evaluation */
+		void set_num_runs(int32_t num_runs);
 
-	/** @return name of the SGSerializable */
-	virtual const char* get_name() const
-	{
-		return "CrossValidation";
-	}
-
-private:
-	void init();
-
-protected:
-	/** Evaluates one single cross-validation run.
-	 * Current implementation evaluates each fold separately and then calculates
-	 * arithmetic mean. Suitable for accuracy and AUC for example. NOT for
-	 * F1-measure. Has to be overridden by sub-classes if results have to be
-	 * merged differently
-	 *
-	 * @return evaluation result of one cross-validation run
-	 */
-	virtual float64_t evaluate_one_run();
+		/** @return name of the SGSerializable */
+		virtual const char* get_name() const
+		{
+			return "CrossValidation";
+		}
 
-	/** number of evaluation runs for one fold */
-	int32_t m_num_runs;
+	private:
+		void init();
 
-	/** xval output listeners */
-	CList* m_xval_outputs;
-};
+	protected:
+		/**
+		 * Does the actual evaluation.
+		 * @return the cross-validation result
+		 */
+		virtual CEvaluationResult* evaluate_impl();
+
+	protected:
+	protected:
+		/** Evaluates one single cross-validation run.
+		 * Current implementation evaluates each fold separately and then
+		 * calculates
+		 * arithmetic mean. Suitable for accuracy and AUC for example. NOT for
+		 * F1-measure. Has to be overridden by sub-classes if results have to be
+		 * merged differently
+		 *
+		 * @return evaluation result of one cross-validation run
+		 */
+		virtual float64_t
+		evaluate_one_run(int64_t index, CrossValidationStorage* storage);
 
+		/** number of evaluation runs for one fold */
+		int32_t m_num_runs;
+	};
 }
 
 #endif /* __CROSSVALIDATION_H_ */
diff --git a/src/shogun/evaluation/CrossValidationMKLStorage.cpp b/src/shogun/evaluation/CrossValidationMKLStorage.cpp
deleted file mode 100644
index 17ac9a4400f..00000000000
--- a/src/shogun/evaluation/CrossValidationMKLStorage.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Written (W) 2012 Heiko Strathmann
- */
-
-#include <shogun/evaluation/CrossValidationMKLStorage.h>
-#include <shogun/kernel/CombinedKernel.h>
-#include <shogun/classifier/mkl/MKL.h>
-#include <shogun/classifier/mkl/MKLMulticlass.h>
-
-using namespace shogun;
-
-void CCrossValidationMKLStorage::update_trained_machine(
-		CMachine* machine, const char* prefix)
-{
-	REQUIRE(machine, "%s::update_trained_machine(): Provided Machine is NULL!\n",
-			get_name());
-
-	CMKL* mkl=dynamic_cast<CMKL*>(machine);
-	CMKLMulticlass* mkl_multiclass=dynamic_cast<CMKLMulticlass*>(machine);
-	REQUIRE(mkl || mkl_multiclass, "%s::update_trained_machine(): This method is only usable "
-				"with CMKL derived machines. This one is \"%s\"\n", get_name(),
-				machine->get_name());
-
-	CKernel* kernel = NULL;
-	if (mkl)
-		kernel = mkl->get_kernel();
-	else
-		kernel = mkl_multiclass->get_kernel();
-
-	REQUIRE(kernel, "%s::update_trained_machine(): No kernel assigned to "
-			"machine of type \"%s\"\n", get_name(), machine->get_name());
-
-	CCombinedKernel* combined_kernel=dynamic_cast<CCombinedKernel*>(kernel);
-	REQUIRE(combined_kernel, "%s::update_trained_machine(): This method is only"
-			" usable with CCombinedKernel on machines. This one is \"s\"\n",
-			get_name(), kernel->get_name());
-
-	SGVector<float64_t> w=combined_kernel->get_subkernel_weights();
-
-	/* evtl re-allocate memory (different number of runs from evaluation before) */
-	if (m_mkl_weights.num_rows!=w.vlen ||
-			m_mkl_weights.num_cols!=m_num_folds*m_num_runs)
-	{
-		if (m_mkl_weights.matrix)
-		{
-			SG_DEBUG("deleting memory for mkl weight matrix\n")
-			m_mkl_weights=SGMatrix<float64_t>();
-		}
-	}
-
-	/* evtl allocate memory (first call) */
-	if (!m_mkl_weights.matrix)
-	{
-		SG_DEBUG("allocating memory for mkl weight matrix\n")
-		m_mkl_weights=SGMatrix<float64_t>(w.vlen,m_num_folds*m_num_runs);
-	}
-
-	/* put current mkl weights into matrix, copy memory vector wise to make
-	 * things fast. Compute index of address to where vector goes */
-
-	/* number of runs is w.vlen*m_num_folds shift */
-	index_t run_shift=m_current_run_index*w.vlen*m_num_folds;
-
-	/* fold shift is m_current_fold_index*w-vlen */
-	index_t fold_shift=m_current_fold_index*w.vlen;
-
-	/* add both index shifts */
-	index_t first_idx=run_shift+fold_shift;
-	SG_DEBUG("run %d, fold %d, matrix index %d\n",m_current_run_index,
-			m_current_fold_index, first_idx);
-
-	/* copy memory */
-	sg_memcpy(&m_mkl_weights.matrix[first_idx], w.vector,
-			w.vlen*sizeof(float64_t));
-
-	SG_UNREF(kernel);
-}
diff --git a/src/shogun/evaluation/CrossValidationMKLStorage.h b/src/shogun/evaluation/CrossValidationMKLStorage.h
deleted file mode 100644
index 8ad062650c9..00000000000
--- a/src/shogun/evaluation/CrossValidationMKLStorage.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Heiko Strathmann
- *
- */
-
-#ifndef __CROSSVALIDATIONMKLSTORAGE_H_
-#define __CROSSVALIDATIONMKLSTORAGE_H_
-
-#include <shogun/lib/config.h>
-
-#include <shogun/evaluation/CrossValidationOutput.h>
-#include <shogun/lib/SGMatrix.h>
-
-namespace shogun
-{
-
-class CMachine;
-class CLabels;
-class CEvaluation;
-
-/** @brief Class for storing MKL weights in every fold of cross-validation */
-class CCrossValidationMKLStorage: public CCrossValidationOutput
-{
-public:
-
-	/** constructor */
-	CCrossValidationMKLStorage() : CCrossValidationOutput() {}
-
-	/** destructor */
-	virtual ~CCrossValidationMKLStorage() {};
-
-	/** @return name of SG_SERIALIZABLE */
-	virtual const char* get_name() const { return "CrossValidationMKLStorage"; }
-
-	/** update trained machine. Here, stores MKL weights in local matrix
-	 *
-	 * @param machine trained machine instance
-	 * @param prefix prefix for output
-	 */
-	virtual void update_trained_machine(CMachine* machine,
-			const char* prefix="");
-
-	/** @return mkl weights matrix, one set of weights per column,
-	 * num_folds*num_runs columns, one fold after another */
-	virtual SGMatrix<float64_t> get_mkl_weights() { return m_mkl_weights; }
-
-protected:
-	/** storage for MKL weights, one set per column
-	 * num_kernel times num_folds*num_runs matrix where all folds of a runs
-	 * are added one after another */
-	SGMatrix<float64_t> m_mkl_weights;
-};
-
-}
-
-#endif /* __CROSSVALIDATIONMKLSTORAGE_H_ */
diff --git a/src/shogun/evaluation/CrossValidationMulticlassStorage.cpp b/src/shogun/evaluation/CrossValidationMulticlassStorage.cpp
deleted file mode 100644
index 38716bd74f3..00000000000
--- a/src/shogun/evaluation/CrossValidationMulticlassStorage.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn, Heiko Strathmann
- */
-
-#include <shogun/evaluation/CrossValidationMulticlassStorage.h>
-#include <shogun/evaluation/ROCEvaluation.h>
-#include <shogun/evaluation/PRCEvaluation.h>
-#include <shogun/evaluation/MulticlassAccuracy.h>
-
-using namespace shogun;
-
-CCrossValidationMulticlassStorage::CCrossValidationMulticlassStorage(bool compute_ROC, bool compute_PRC, bool compute_conf_matrices) :
-	CCrossValidationOutput()
-{
-	m_initialized = false;
-	m_compute_ROC = compute_ROC;
-	m_compute_PRC = compute_PRC;
-	m_compute_conf_matrices = compute_conf_matrices;
-	m_pred_labels = NULL;
-	m_true_labels = NULL;
-	m_num_classes = 0;
-	m_binary_evaluations = new CDynamicObjectArray();
-
-	m_fold_ROC_graphs=NULL;
-	m_conf_matrices=NULL;
-}
-
-
-CCrossValidationMulticlassStorage::~CCrossValidationMulticlassStorage()
-{
-	if (m_compute_ROC && m_fold_ROC_graphs)
-	{
-		SG_FREE(m_fold_ROC_graphs);
-	}
-
-	if (m_compute_PRC && m_fold_PRC_graphs)
-	{
-		SG_FREE(m_fold_PRC_graphs);
-	}
-
-	if (m_compute_conf_matrices && m_conf_matrices)
-	{
-		SG_FREE(m_conf_matrices);
-	}
-
-	if (m_binary_evaluations)
-	{
-		SG_UNREF(m_binary_evaluations);
-	}
-};
-
-
-void CCrossValidationMulticlassStorage::post_init()
-{
-	if (m_initialized)
-		SG_ERROR("CrossValidationMulticlassStorage was already initialized once\n")
-
-	if (m_compute_ROC)
-	{
-		SG_DEBUG("Allocating %d ROC graphs\n", m_num_folds*m_num_runs*m_num_classes)
-		m_fold_ROC_graphs = SG_MALLOC(SGMatrix<float64_t>, m_num_folds*m_num_runs*m_num_classes);
-		for (int32_t i=0; i<m_num_folds*m_num_runs*m_num_classes; i++)
-			new (&m_fold_ROC_graphs[i]) SGMatrix<float64_t>();
-	}
-
-	if (m_compute_PRC)
-	{
-		SG_DEBUG("Allocating %d PRC graphs\n", m_num_folds*m_num_runs*m_num_classes)
-		m_fold_PRC_graphs = SG_MALLOC(SGMatrix<float64_t>, m_num_folds*m_num_runs*m_num_classes);
-		for (int32_t i=0; i<m_num_folds*m_num_runs*m_num_classes; i++)
-			new (&m_fold_PRC_graphs[i]) SGMatrix<float64_t>();
-	}
-
-	if (m_binary_evaluations->get_num_elements())
-		m_evaluations_results = SGVector<float64_t>(m_num_folds*m_num_runs*m_num_classes*m_binary_evaluations->get_num_elements());
-
-	m_accuracies = SGVector<float64_t>(m_num_folds*m_num_runs);
-
-	if (m_compute_conf_matrices)
-	{
-		m_conf_matrices = SG_MALLOC(SGMatrix<int32_t>, m_num_folds*m_num_runs);
-		for (int32_t i=0; i<m_num_folds*m_num_runs; i++)
-			new (&m_conf_matrices[i]) SGMatrix<int32_t>();
-	}
-
-	m_initialized = true;
-}
-
-void CCrossValidationMulticlassStorage::init_expose_labels(CLabels* labels)
-{
-	ASSERT((CMulticlassLabels*)labels)
-	m_num_classes = ((CMulticlassLabels*)labels)->get_num_classes();
-}
-
-void CCrossValidationMulticlassStorage::post_update_results()
-{
-	CROCEvaluation eval_ROC;
-	CPRCEvaluation eval_PRC;
-	int32_t n_evals = m_binary_evaluations->get_num_elements();
-	for (int32_t c=0; c<m_num_classes; c++)
-	{
-		SG_DEBUG("Computing ROC for run %d fold %d class %d", m_current_run_index, m_current_fold_index, c)
-		CBinaryLabels* pred_labels_binary = m_pred_labels->get_binary_for_class(c);
-		CBinaryLabels* true_labels_binary = m_true_labels->get_binary_for_class(c);
-		if (m_compute_ROC)
-		{
-			eval_ROC.evaluate(pred_labels_binary, true_labels_binary);
-			m_fold_ROC_graphs[m_current_run_index*m_num_folds*m_num_classes+m_current_fold_index*m_num_classes+c] =
-				eval_ROC.get_ROC();
-		}
-		if (m_compute_PRC)
-		{
-			eval_PRC.evaluate(pred_labels_binary, true_labels_binary);
-			m_fold_PRC_graphs[m_current_run_index*m_num_folds*m_num_classes+m_current_fold_index*m_num_classes+c] =
-				eval_PRC.get_PRC();
-		}
-
-		for (int32_t i=0; i<n_evals; i++)
-		{
-			CBinaryClassEvaluation* evaluator = (CBinaryClassEvaluation*)m_binary_evaluations->get_element_safe(i);
-			m_evaluations_results[m_current_run_index*m_num_folds*m_num_classes*n_evals+m_current_fold_index*m_num_classes*n_evals+c*n_evals+i] =
-				evaluator->evaluate(pred_labels_binary, true_labels_binary);
-			SG_UNREF(evaluator);
-		}
-
-		SG_UNREF(pred_labels_binary);
-		SG_UNREF(true_labels_binary);
-	}
-	CMulticlassAccuracy accuracy;
-
-	m_accuracies[m_current_run_index*m_num_folds+m_current_fold_index] = accuracy.evaluate(m_pred_labels, m_true_labels);
-
-	if (m_compute_conf_matrices)
-	{
-		m_conf_matrices[m_current_run_index*m_num_folds+m_current_fold_index] = CMulticlassAccuracy::get_confusion_matrix(m_pred_labels, m_true_labels);
-	}
-}
-
-void CCrossValidationMulticlassStorage::update_test_result(CLabels* results, const char* prefix)
-{
-	m_pred_labels = (CMulticlassLabels*)results;
-}
-
-void CCrossValidationMulticlassStorage::update_test_true_result(CLabels* results, const char* prefix)
-{
-	m_true_labels = (CMulticlassLabels*)results;
-}
-
diff --git a/src/shogun/evaluation/CrossValidationMulticlassStorage.h b/src/shogun/evaluation/CrossValidationMulticlassStorage.h
deleted file mode 100644
index 04b09f403dc..00000000000
--- a/src/shogun/evaluation/CrossValidationMulticlassStorage.h
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Heiko Strathmann, Sergey Lisitsyn
- *
- */
-
-#ifndef CROSSVALIDATIONMULTICLASSSTORAGE_H_
-#define CROSSVALIDATIONMULTICLASSSTORAGE_H_
-
-#include <shogun/lib/config.h>
-
-#include <shogun/evaluation/CrossValidationOutput.h>
-#include <shogun/evaluation/BinaryClassEvaluation.h>
-#include <shogun/labels/MulticlassLabels.h>
-#include <shogun/lib/SGMatrix.h>
-#include <shogun/lib/DynamicObjectArray.h>
-
-namespace shogun
-{
-
-class CMachine;
-class CLabels;
-class CEvaluation;
-
-/** @brief Class for storing multiclass evaluation information in every fold of cross-validation.
- *
- * Be careful - can be very expensive memory-wise.
- */
-class CCrossValidationMulticlassStorage: public CCrossValidationOutput
-{
-public:
-
-	/** constructor
-	 * @param compute_ROC whether to compute ROCs
-	 * @param compute_PRC whether to compute PRCs
-	 * @param compute_conf_matrices whether to compute confusion matrices
-	 */
-	CCrossValidationMulticlassStorage(bool compute_ROC=true, bool compute_PRC=false, bool compute_conf_matrices=false);
-
-	/** destructor */
-	virtual ~CCrossValidationMulticlassStorage();
-
-	/** returns ROC of 1-v-R in given fold and run
-	 *
-	 * @param run run
-	 * @param fold fold
-	 * @param c class
-	 * @return ROC of 'run' run, 'fold' fold and 'c' class
-	 */
-	SGMatrix<float64_t> get_fold_ROC(int32_t run, int32_t fold, int32_t c)
-	{
-		ASSERT(0<=run)
-		ASSERT(run<m_num_runs)
-		ASSERT(0<=fold)
-		ASSERT(fold<m_num_folds)
-		ASSERT(0<=c)
-		ASSERT(c<m_num_classes)
-		REQUIRE(m_compute_ROC, "ROC computation was not enabled\n")
-		return m_fold_ROC_graphs[run*m_num_folds*m_num_classes+fold*m_num_classes+c];
-	}
-
-	/** returns PRC of 1-v-R in given fold and run
-	 *
-	 * @param run run
-	 * @param fold fold
-	 * @param c class
-	 * @return ROC of 'run' run, 'fold' fold and 'c' class
-	 */
-	SGMatrix<float64_t> get_fold_PRC(int32_t run, int32_t fold, int32_t c)
-	{
-		ASSERT(0<=run)
-		ASSERT(run<m_num_runs)
-		ASSERT(0<=fold)
-		ASSERT(fold<m_num_folds)
-		ASSERT(0<=c)
-		ASSERT(c<m_num_classes)
-		REQUIRE(m_compute_PRC, "PRC computation was not enabled\n")
-		return m_fold_PRC_graphs[run*m_num_folds*m_num_classes+fold*m_num_classes+c];
-	}
-
-	/** appends a binary evaluation instance
-	 *
-	 * @param evaluation binary evaluation to add
-	 */
-	void append_binary_evaluation(CBinaryClassEvaluation* evaluation)
-	{
-		m_binary_evaluations->push_back(evaluation);
-	}
-
-	/** returns binary evalution appended before
-	 *
-	 * @param idx
-	 */
-	CBinaryClassEvaluation* get_binary_evaluation(int32_t idx)
-	{
-		return (CBinaryClassEvaluation*)m_binary_evaluations->get_element_safe(idx);
-	}
-
-	/** returns evaluation result of 1-v-R in given fold and run
-	 *
-	 * @param run run
-	 * @param fold fold
-	 * @param c class
-	 * @param e evaluation number
-	 */
-	float64_t get_fold_evaluation_result(int32_t run, int32_t fold, int32_t c, int32_t e)
-	{
-		ASSERT(0<=run)
-		ASSERT(run<m_num_runs)
-		ASSERT(0<=fold)
-		ASSERT(fold<m_num_folds)
-		ASSERT(0<=c)
-		ASSERT(c<m_num_classes)
-		ASSERT(0<=e)
-		int32_t n_evals = m_binary_evaluations->get_num_elements();
-		ASSERT(e<n_evals)
-		return m_evaluations_results[run*m_num_folds*m_num_classes*n_evals+fold*m_num_classes*n_evals+c*n_evals+e];
-	}
-
-	/** returns accuracy of fold and run
-	 * @param run run
-	 * @param fold fold
-	 */
-	float64_t get_fold_accuracy(int32_t run, int32_t fold)
-	{
-		ASSERT(0<=run)
-		ASSERT(run<m_num_runs)
-		ASSERT(0<=fold)
-		ASSERT(fold<m_num_folds)
-		return m_accuracies[run*m_num_folds+fold];
-	}
-
-	/** returns confusion matrix of fold and run
-	 * @param run run
-	 * @param fold fold
-	 */
-	SGMatrix<int32_t> get_fold_conf_matrix(int32_t run, int32_t fold)
-	{
-		ASSERT(0<=run)
-		ASSERT(run<m_num_runs)
-		ASSERT(0<=fold)
-		ASSERT(fold<m_num_folds)
-		REQUIRE(m_compute_conf_matrices, "Confusion matrices computation was not enabled\n")
-		return m_conf_matrices[run*m_num_folds+fold];
-	}
-
-	/** post init */
-	virtual void post_init();
-
-	/** post update results */
-	virtual void post_update_results();
-
-	/** expose labels
-	 * @param labels labels to expose
-	 */
-	virtual void init_expose_labels(CLabels* labels);
-
-	/** update test result
-	 *
-	 * @param results result labels for test/validation run
-	 * @param prefix prefix for output
-	 */
-	virtual void update_test_result(CLabels* results,
-			const char* prefix="");
-
-	/** update test true result
-	 *
-	 * @param results ground truth labels for test/validation run
-	 * @param prefix prefix for output
-	 */
-	virtual void update_test_true_result(CLabels* results,
-			const char* prefix="");
-
-	/** @return name of SG_SERIALIZABLE */
-	virtual const char* get_name() const { return "CrossValidationMulticlassStorage"; }
-
-protected:
-
-	/** is initialized */
-	bool m_initialized;
-
-	/** custom binary evaluators */
-	CDynamicObjectArray* m_binary_evaluations;
-
-	/** fold evaluation results */
-	SGVector<float64_t> m_evaluations_results;
-
-	/** accuracies */
-	SGVector<float64_t> m_accuracies;
-
-	/** whether compute ROCs */
-	bool m_compute_ROC;
-
-	/** fold ROC graphs */
-	SGMatrix<float64_t>* m_fold_ROC_graphs;
-
-	/** whether compute PRCs */
-	bool m_compute_PRC;
-
-	/** fold PRC graphs */
-	SGMatrix<float64_t>* m_fold_PRC_graphs;
-
-	/** whether compute confusion matrices */
-	bool m_compute_conf_matrices;
-
-	/** confusion matrices */
-	SGMatrix<int32_t>* m_conf_matrices;
-
-	/** predicted results */
-	CMulticlassLabels* m_pred_labels;
-
-	/** true labels */
-	CMulticlassLabels* m_true_labels;
-
-	/** number of classes */
-	int32_t m_num_classes;
-
-};
-
-}
-
-#endif /* CROSSVALIDATIONMULTICLASSSTORAGE_H_ */
diff --git a/src/shogun/evaluation/CrossValidationOutput.h b/src/shogun/evaluation/CrossValidationOutput.h
deleted file mode 100644
index d14b107a8f5..00000000000
--- a/src/shogun/evaluation/CrossValidationOutput.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Written (W) 2012 Heiko Strathmann
- *
- */
-
-#ifndef __CROSSVALIDATIONOUTPUT_H_
-#define __CROSSVALIDATIONOUTPUT_H_
-
-#include <shogun/lib/config.h>
-
-#include <shogun/base/SGObject.h>
-#include <shogun/lib/SGVector.h>
-
-namespace shogun
-{
-
-class CMachine;
-class CLabels;
-class CEvaluation;
-
-/** @brief Class for managing individual folds in cross-validation.
- *
- * It is often desired to print/save informations that occur during individual
- * folds in cross-validation, such as indices, parameters of underlying
- * machine etc. This abstract base class might be called from the
- * CCrossValidation class after each fold in order to collect these things.
- * Different implementations then could output the informations, or even store
- * them to make them accessible later. Since is different for every underlying
- * machine, individual sub-classes have to handle this separately.
- * When writing new subclasses, try to make the design as inheritance based
- * as possible, such that future sub-sub-classes can use yours.
- * Note that subclasses only need to implement methods that they need since all
- * methods in this class are already implemented empty, except for num/fold
- * stuff. These number of runs/folds and their current indices are stored since
- * these might be used from many subclasses.
- */
-class CCrossValidationOutput: public CSGObject
-{
-public:
-
-	/** constructor */
-	CCrossValidationOutput() : CSGObject()
-	{
-		m_current_run_index=0;
-		m_current_fold_index=0;
-		m_num_runs=0;
-		m_num_folds=0;
-	}
-
-	/** destructor */
-	virtual ~CCrossValidationOutput() {}
-
-	/** @return name of SG_SERIALIZABLE */
-	virtual const char* get_name() const=0;
-
-	/** init number of runs (called once). saves them to local variable
-	 *
-	 * @param num_runs number of runs that will be performed
-	 * @param prefix prefix for output
-	 */
-	virtual void init_num_runs(index_t num_runs, const char* prefix="")
-	{
-		m_num_runs=num_runs;
-	}
-
-	/** init number of folds (called once). saves them to local variable
-	 * @param num_folds number of folds that will be performed
-	 * @param prefix prefix for output
-	 */
-	virtual void init_num_folds(index_t num_folds, const char* prefix="")
-	{
-		m_num_folds=num_folds;
-	}
-
-	/** initially expose labels before usage
-	 * @param labels labels to expose to CV output
-	 */
-	virtual void init_expose_labels(CLabels* labels) { }
-
-	/** post init action (called once) */
-	virtual void post_init() { }
-
-	/** update run index (called every iteration). saves to local variable
-	 *
-	 * @param run_index index of current run
-	 * @param prefix prefix for output
-	 */
-	virtual void update_run_index(index_t run_index,
-			const char* prefix="")
-	{
-		m_current_run_index=run_index;
-	}
-
-	/** update fold index (called every iteration). saves to local variable
-	 *
-	 * @param fold_index index of current run
-	 * @param prefix prefix for output
-	 */
-	virtual void update_fold_index(index_t fold_index,
-			const char* prefix="")
-	{
-		m_current_fold_index=fold_index;
-	}
-
-	/** update train indices
-	 *
-	 * @param indices indices used for training
-	 * @param prefix prefix for output
-	 */
-	virtual void update_train_indices(SGVector<index_t> indices,
-			const char* prefix="") {}
-
-	/** update test indices
-	 *
-	 * @param indices indices used for testing/validation
-	 * @param prefix prefix for output
-	 */
-	virtual void update_test_indices(SGVector<index_t> indices,
-			const char* prefix="") {}
-
-	/** update trained machine
-	 *
-	 * @param machine trained machine instance
-	 * @param prefix prefix for output
-	 */
-	virtual void update_trained_machine(CMachine* machine,
-			const char* prefix="") {}
-
-	/** update test result
-	 *
-	 * @param results result labels for test/validation run
-	 * @param prefix prefix for output
-	 */
-	virtual void update_test_result(CLabels* results,
-			const char* prefix="") {}
-
-	/** update test true result
-	 *
-	 * @param results ground truth labels for test/validation run
-	 * @param prefix prefix for output
-	 */
-	virtual void update_test_true_result(CLabels* results,
-			const char* prefix="") {}
-
-	/** post update test and true results
-	 */
-	virtual void post_update_results() {}
-
-	/** update evaluate result
-	 *
-	 * @param result evaluation result
-	 * @param prefix prefix for output
-	 */
-	virtual void update_evaluation_result(float64_t result,
-			const char* prefix="") {}
-
-protected:
-	/** current run index is written here */
-	index_t m_current_run_index;
-
-	/** current fold index is written here */
-	index_t m_current_fold_index;
-
-	/** number of runs is initialised here */
-	index_t m_num_runs;
-
-	/** number of folds is initialised here */
-	index_t m_num_folds;
-};
-
-}
-
-#endif /* __CROSSVALIDATIONOUTPUT_H_ */
diff --git a/src/shogun/evaluation/CrossValidationPrintOutput.cpp b/src/shogun/evaluation/CrossValidationPrintOutput.cpp
deleted file mode 100644
index 1002ced9e3f..00000000000
--- a/src/shogun/evaluation/CrossValidationPrintOutput.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Written (W) 2012 Heiko Strathmann
- */
-
-#include <shogun/evaluation/CrossValidationPrintOutput.h>
-#include <shogun/machine/LinearMachine.h>
-#include <shogun/machine/LinearMulticlassMachine.h>
-#include <shogun/machine/KernelMachine.h>
-#include <shogun/machine/KernelMulticlassMachine.h>
-#include <shogun/kernel/CombinedKernel.h>
-#include <shogun/classifier/mkl/MKL.h>
-#include <shogun/classifier/mkl/MKLMulticlass.h>
-
-using namespace shogun;
-
-void CCrossValidationPrintOutput::init_num_runs(index_t num_runs,
-		const char* prefix)
-{
-	SG_PRINT("%scross validation number of runs %d\n", prefix, num_runs)
-}
-
-/** init number of folds */
-void CCrossValidationPrintOutput::init_num_folds(index_t num_folds,
-		const char* prefix)
-{
-	SG_PRINT("%scross validation number of folds %d\n", prefix, num_folds)
-}
-
-void CCrossValidationPrintOutput::update_run_index(index_t run_index,
-		const char* prefix)
-{
-	SG_PRINT("%scross validation run %d\n", prefix, run_index)
-}
-
-void CCrossValidationPrintOutput::update_fold_index(index_t fold_index,
-		const char* prefix)
-{
-	SG_PRINT("%sfold %d\n", prefix, fold_index)
-}
-
-void CCrossValidationPrintOutput::update_train_indices(
-		SGVector<index_t> indices, const char* prefix)
-{
-	indices.display_vector("train_indices", prefix);
-}
-
-void CCrossValidationPrintOutput::update_test_indices(
-		SGVector<index_t> indices, const char* prefix)
-{
-	indices.display_vector("test_indices", prefix);
-}
-
-void CCrossValidationPrintOutput::update_trained_machine(
-		CMachine* machine, const char* prefix)
-{
-	if (dynamic_cast<CLinearMachine*>(machine))
-	{
-		CLinearMachine* linear_machine=(CLinearMachine*)machine;
-		linear_machine->get_w().display_vector("learned_w", prefix);
-		SG_PRINT("%slearned_bias=%f\n", prefix, linear_machine->get_bias())
-	}
-
-	if (dynamic_cast<CKernelMachine*>(machine))
-	{
-		CKernelMachine* kernel_machine=(CKernelMachine*)machine;
-		kernel_machine->get_alphas().display_vector("learned_alphas", prefix);
-		SG_PRINT("%slearned_bias=%f\n", prefix, kernel_machine->get_bias())
-	}
-
-	if (dynamic_cast<CLinearMulticlassMachine*>(machine)
-			|| dynamic_cast<CKernelMulticlassMachine*>(machine))
-	{
-		/* append one tab to prefix */
-		char* new_prefix=append_tab_to_string(prefix);
-
-		CMulticlassMachine* mc_machine=(CMulticlassMachine*)machine;
-		for (int i=0; i<mc_machine->get_num_machines(); i++)
-		{
-			CMachine* sub_machine=mc_machine->get_machine(i);
-            //SG_PRINT("%smulti-class machine %d:\n", i, sub_machine)
-			this->update_trained_machine(sub_machine, new_prefix);
-			SG_UNREF(sub_machine);
-		}
-
-		/* clean up */
-		SG_FREE(new_prefix);
-	}
-
-	if (dynamic_cast<CMKL*>(machine))
-	{
-		CMKL* mkl=(CMKL*)machine;
-		CCombinedKernel* kernel=dynamic_cast<CCombinedKernel*>(
-				mkl->get_kernel());
-		kernel->get_subkernel_weights().display_vector("MKL sub-kernel weights",
-				prefix);
-		SG_UNREF(kernel);
-	}
-
-	if (dynamic_cast<CMKLMulticlass*>(machine))
-	{
-		CMKLMulticlass* mkl=(CMKLMulticlass*)machine;
-		CCombinedKernel* kernel=dynamic_cast<CCombinedKernel*>(
-				mkl->get_kernel());
-		kernel->get_subkernel_weights().display_vector("MKL sub-kernel weights",
-				prefix);
-		SG_UNREF(kernel);
-	}
-}
-
-void CCrossValidationPrintOutput::update_test_result(CLabels* results,
-		const char* prefix)
-{
-	results->get_values().display_vector("test_labels", prefix);
-}
-
-void CCrossValidationPrintOutput::update_test_true_result(CLabels* results,
-		const char* prefix)
-{
-	results->get_values().display_vector("true_labels", prefix);
-}
-
-void CCrossValidationPrintOutput::update_evaluation_result(float64_t result,
-		const char* prefix)
-{
-	SG_PRINT("%sevaluation result=%f\n", prefix, result)
-}
-
-char* CCrossValidationPrintOutput::append_tab_to_string(const char* string)
-{
-	/* allocate memory, concatenate and add termination character */
-	index_t len=strlen(string);
-	char* new_prefix=SG_MALLOC(char, len+2);
-	sg_memcpy(new_prefix, string, sizeof(char)*len);
-	new_prefix[len]='\t';
-	new_prefix[len+1]='\0';
-
-	return new_prefix;
-}
diff --git a/src/shogun/evaluation/CrossValidationPrintOutput.h b/src/shogun/evaluation/CrossValidationPrintOutput.h
deleted file mode 100644
index 58f798514dd..00000000000
--- a/src/shogun/evaluation/CrossValidationPrintOutput.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Written (W) 2012 Heiko Strathmann
- *
- */
-
-#ifndef __CROSSVALIDATIONPRINTOUTPUT_H_
-#define __CROSSVALIDATIONPRINTOUTPUT_H_
-
-#include <shogun/lib/config.h>
-
-#include <shogun/evaluation/CrossValidationOutput.h>
-
-namespace shogun
-{
-
-class CMachine;
-class CLabels;
-class CEvaluation;
-
-/** @brief Class for outputting cross-validation intermediate results to the
- * standard output. Simply prints all messages it gets */
-class CCrossValidationPrintOutput: public CCrossValidationOutput
-{
-public:
-
-	/** constructor */
-	CCrossValidationPrintOutput() {};
-
-	/** destructor */
-	virtual ~CCrossValidationPrintOutput() {};
-
-	/** @return name of SG_SERIALIZABLE */
-	virtual const char* get_name() const { return "CrossValidationPrintOutput"; }
-
-	/** init number of runs (called once)
-	 *
-	 * @param num_runs number of runs that will be performed
-	 * @param prefix prefix for output
-	 */
-	virtual void init_num_runs(index_t num_runs, const char* prefix="");
-
-	/** init number of folds
-	 * @param num_folds number of folds that will be performed
-	 * @param prefix prefix for output
-	 */
-	virtual void init_num_folds(index_t num_folds, const char* prefix="");
-
-	/** update run index
-	 *
-	 * @param run_index index of current run
-	 * @param prefix prefix for output
-	 */
-	virtual void update_run_index(index_t run_index,
-			const char* prefix="");
-
-	/** update fold index
-	 *
-	 * @param fold_index index of current run
-	 * @param prefix prefix for output
-	 */
-	virtual void update_fold_index(index_t fold_index,
-			const char* prefix="");
-
-	/** update train indices
-	 *
-	 * @param indices indices used for training
-	 * @param prefix prefix for output
-	 */
-	virtual void update_train_indices(SGVector<index_t> indices,
-			const char* prefix="");
-
-	/** update test indices
-	 *
-	 * @param indices indices used for testing/validation
-	 * @param prefix prefix for output
-	 */
-	virtual void update_test_indices(SGVector<index_t> indices,
-			const char* prefix="");
-
-	/** update trained machine
-	 *
-	 * @param machine trained machine instance
-	 * @param prefix prefix for output
-	 */
-	virtual void update_trained_machine(CMachine* machine,
-			const char* prefix="");
-
-	/** update test result
-	 *
-	 * @param results result labels for test/validation run
-	 * @param prefix prefix for output
-	 */
-	virtual void update_test_result(CLabels* results,
-			const char* prefix="");
-
-	/** update test true result
-	 *
-	 * @param results ground truth labels for test/validation run
-	 * @param prefix prefix for output
-	 */
-	virtual void update_test_true_result(CLabels* results,
-			const char* prefix="");
-
-	/** update evaluate result
-	 *
-	 * @param result evaluation result
-	 * @param prefix prefix for output
-	 */
-	virtual void update_evaluation_result(float64_t result,
-			const char* prefix="");
-
-protected:
-	/** returns a string which is the provided one plus a tab character
-	 *
-	 * @param string null-terminated string to append tab to
-	 * @return null-terminated string with tab appended
-	 */
-	char* append_tab_to_string(const char* string);
-};
-
-}
-
-#endif /* __CROSSVALIDATIONPRINTOUTPUT_H_ */
diff --git a/src/shogun/evaluation/CrossValidationStorage.cpp b/src/shogun/evaluation/CrossValidationStorage.cpp
new file mode 100644
index 00000000000..57cefc0fd9d
--- /dev/null
+++ b/src/shogun/evaluation/CrossValidationStorage.cpp
@@ -0,0 +1,274 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+
+#include "CrossValidationStorage.h"
+#include <shogun/labels/Labels.h>
+#include <shogun/machine/Machine.h>
+
+using namespace shogun;
+
+CrossValidationFoldStorage::CrossValidationFoldStorage() : CSGObject()
+{
+	m_current_run_index = 0;
+	m_current_fold_index = 0;
+	m_trained_machine = NULL;
+	m_test_result = NULL;
+	m_test_true_result = NULL;
+
+	SG_ADD(
+	    &m_current_run_index, "m_current_run_index",
+	    "The current run index of this fold", MS_AVAILABLE)
+	SG_ADD(
+	    &m_current_fold_index, "m_current_fold_index", "The current fold index",
+	    MS_AVAILABLE)
+	SG_ADD(
+	    (CSGObject**)&m_trained_machine, "m_trained_machine",
+	    "The machine trained by this fold", MS_AVAILABLE)
+	SG_ADD(
+	    (CSGObject**)&m_test_result, "m_test_result",
+	    "The test result of this fold", MS_AVAILABLE)
+	SG_ADD(
+	    (CSGObject**)&m_test_true_result, "m_test_true_result",
+	    "The true test result for this fold", MS_AVAILABLE)
+}
+
+CrossValidationFoldStorage::~CrossValidationFoldStorage()
+{
+	SG_UNREF(m_test_result);
+	SG_UNREF(m_test_true_result);
+	SG_UNREF(m_trained_machine);
+}
+
+void CrossValidationFoldStorage::set_run_index(index_t run_index)
+{
+	m_current_run_index = run_index;
+}
+
+void CrossValidationFoldStorage::set_fold_index(index_t fold_index)
+{
+	m_current_fold_index = fold_index;
+}
+
+void CrossValidationFoldStorage::set_train_indices(SGVector<index_t> indices)
+{
+	m_train_indices = indices;
+}
+
+void CrossValidationFoldStorage::set_test_indices(SGVector<index_t> indices)
+{
+	m_test_indices = indices;
+}
+
+void CrossValidationFoldStorage::set_trained_machine(CMachine* machine)
+{
+	SG_REF(machine)
+	SG_UNREF(m_trained_machine)
+	m_trained_machine = machine;
+}
+
+void CrossValidationFoldStorage::set_test_result(CLabels* results)
+{
+	SG_REF(results)
+	SG_UNREF(m_test_result)
+	m_test_result = results;
+}
+
+void CrossValidationFoldStorage::set_test_true_result(CLabels* results)
+{
+	SG_REF(results)
+	SG_UNREF(m_test_true_result)
+	m_test_true_result = results;
+}
+
+void CrossValidationFoldStorage::post_update_results()
+{
+}
+
+void CrossValidationFoldStorage::set_evaluation_result(float64_t result)
+{
+	m_evaluation_result = result;
+}
+
+index_t CrossValidationFoldStorage::get_current_run_index() const
+{
+	return m_current_run_index;
+}
+
+index_t CrossValidationFoldStorage::get_current_fold_index() const
+{
+	return m_current_fold_index;
+}
+
+const SGVector<index_t>& CrossValidationFoldStorage::get_train_indices() const
+{
+	return m_train_indices;
+}
+
+const SGVector<index_t>& CrossValidationFoldStorage::get_test_indices() const
+{
+	return m_test_indices;
+}
+
+CMachine* CrossValidationFoldStorage::get_trained_machine() const
+{
+	return m_trained_machine;
+}
+
+CLabels* CrossValidationFoldStorage::get_test_result() const
+{
+	return m_test_result;
+}
+
+CLabels* CrossValidationFoldStorage::get_test_true_result() const
+{
+	return m_test_true_result;
+}
+
+float64_t CrossValidationFoldStorage::get_evaluation_result() const
+{
+	return m_evaluation_result;
+}
+
+CrossValidationFoldStorage* CrossValidationStorage::get_fold(int fold) const
+{
+	REQUIRE(
+	    fold < get_num_folds(), "The fold number must be less than %i",
+	    get_num_folds())
+
+	CrossValidationFoldStorage* fld = m_folds_results[fold];
+	SG_REF(fld);
+	return fld;
+}
+
+bool CrossValidationFoldStorage::
+operator==(const CrossValidationFoldStorage& rhs) const
+{
+	return m_current_run_index == rhs.m_current_run_index &&
+	       m_current_fold_index == rhs.m_current_fold_index &&
+	       // m_train_indices.equals(rhs.m_train_indices) &&
+	       // m_test_indices.equals(rhs.m_test_indices) &&
+	       m_trained_machine->equals(rhs.m_trained_machine) &&
+	       m_test_result->equals(rhs.m_test_result) &&
+	       m_test_true_result->equals(rhs.m_test_true_result) &&
+	       m_evaluation_result == rhs.m_evaluation_result;
+}
+
+/** CrossValidationStorage **/
+
+CrossValidationStorage::CrossValidationStorage() : CSGObject()
+{
+	m_num_runs = 0;
+	m_num_folds = 0;
+	m_expose_labels = NULL;
+
+	SG_ADD(
+	    &m_num_runs, "m_num_runs", "The total number of cross-validation runs",
+	    MS_AVAILABLE)
+	SG_ADD(
+	    &m_num_folds, "m_num_folds",
+	    "The total number of cross-validation folds", MS_AVAILABLE)
+	SG_ADD(
+	    (CSGObject**)&m_expose_labels, "m_expose_labels",
+	    "The labels used for this cross-validation", MS_AVAILABLE)
+}
+
+CrossValidationStorage::~CrossValidationStorage()
+{
+	SG_UNREF(m_expose_labels);
+	for (auto i : m_folds_results)
+		SG_UNREF(i)
+}
+
+void CrossValidationStorage::set_num_runs(index_t num_runs)
+{
+	m_num_runs = num_runs;
+}
+
+void CrossValidationStorage::set_num_folds(index_t num_folds)
+{
+	m_num_folds = num_folds;
+}
+
+void CrossValidationStorage::set_expose_labels(CLabels* labels)
+{
+	SG_REF(labels)
+	SG_UNREF(m_expose_labels)
+	m_expose_labels = labels;
+}
+
+void CrossValidationStorage::post_init()
+{
+}
+
+index_t CrossValidationStorage::get_num_runs() const
+{
+	return m_num_runs;
+}
+
+index_t CrossValidationStorage::get_num_folds() const
+{
+	return m_num_folds;
+}
+
+CLabels* CrossValidationStorage::get_expose_labels() const
+{
+	return m_expose_labels;
+}
+
+void CrossValidationStorage::append_fold_result(
+    CrossValidationFoldStorage* result)
+{
+	SG_REF(result);
+	m_folds_results.push_back(result);
+}
+
+bool CrossValidationStorage::operator==(const CrossValidationStorage& rhs) const
+{
+	auto member_vars = m_num_runs == rhs.m_num_runs &&
+	                   m_num_folds == rhs.m_num_folds &&
+	                   m_expose_labels->equals(rhs.m_expose_labels);
+
+	if (!member_vars)
+		return member_vars;
+
+	if (rhs.m_folds_results.size() != m_folds_results.size())
+		return false;
+	for (index_t i = 0; i < m_folds_results.size(); i++)
+	{
+		if (!(m_folds_results[i] == rhs.m_folds_results[i]))
+			return false;
+	}
+	return member_vars;
+}
\ No newline at end of file
diff --git a/src/shogun/evaluation/CrossValidationStorage.h b/src/shogun/evaluation/CrossValidationStorage.h
new file mode 100644
index 00000000000..f06b21eba2d
--- /dev/null
+++ b/src/shogun/evaluation/CrossValidationStorage.h
@@ -0,0 +1,293 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+
+#ifndef SHOGUN_CROSSVALIDATIONSTORAGE_H
+#define SHOGUN_CROSSVALIDATIONSTORAGE_H
+
+#include <shogun/base/SGObject.h>
+#include <shogun/lib/SGVector.h>
+#include <vector>
+
+namespace shogun
+{
+
+	class CMachine;
+	class CLabels;
+	class CEvaluation;
+
+	/**
+	 * Store information about a single fold run.
+	 */
+	class CrossValidationFoldStorage : public CSGObject
+	{
+	public:
+		CrossValidationFoldStorage();
+		virtual ~CrossValidationFoldStorage();
+
+		/** Set run index.
+		 *
+		 * @param run_index index of current run
+		 */
+		virtual void set_run_index(index_t run_index);
+
+		/** Set fold index.
+		 *
+		 * @param fold_index index of current run
+		 */
+		virtual void set_fold_index(index_t fold_index);
+
+		/** Set train indices
+		 *
+		 * @param indices indices used for training
+		 */
+		virtual void set_train_indices(SGVector<index_t> indices);
+
+		/** Set test indices
+		 *
+		 * @param indices indices used for testing/validation
+		 */
+		virtual void set_test_indices(SGVector<index_t> indices);
+
+		/** Set trained machine
+		 *
+		 * @param machine trained machine instance
+		 */
+		virtual void set_trained_machine(CMachine* machine);
+
+		/** Set test result
+		 *
+		 * @param results result labels for test/validation run
+		 */
+		virtual void set_test_result(CLabels* results);
+
+		/** Set test true result
+		 *
+		 * @param results ground truth labels for test/validation run
+		 */
+		virtual void set_test_true_result(CLabels* results);
+
+		/** post update test and true results
+		 */
+		virtual void post_update_results();
+
+		/** Set evaluate result
+		 *
+		 * @param result evaluation result
+		 */
+		virtual void set_evaluation_result(float64_t result);
+
+		/**
+		 * Get current run index
+		 * @return index of the current run
+		 */
+		index_t get_current_run_index() const;
+
+		/**
+		 * Get current fold index
+		 * @return index of the current fold
+		 */
+		index_t get_current_fold_index() const;
+
+		/**
+		 * Get train indices.
+		 * @return train indices
+		 */
+		const SGVector<index_t>& get_train_indices() const;
+
+		/**
+		 * Get test indices.
+		 * @return test indices
+		 */
+		const SGVector<index_t>& get_test_indices() const;
+
+		/**
+		 * Get trained machine on this fold
+		 * @return trained machine
+		 */
+		CMachine* get_trained_machine() const;
+
+		/**
+		 * Get test result
+		 * @return test result
+		 */
+		CLabels* get_test_result() const;
+
+		/**
+		 * Get ground truth (correct labels for this fold)
+		 * @return ground truth
+		 */
+		CLabels* get_test_true_result() const;
+
+		/**
+		 * Get the evaluation result of this fold
+		 * @return evaluation result
+		 */
+		float64_t get_evaluation_result() const;
+
+		/**
+		 * Operator == needed for Any comparison
+		 * @param rhs other CrossValidationFoldStorage
+		 * @return true if the objects are the same, false otherwise.
+		 */
+		bool operator==(const CrossValidationFoldStorage& rhs) const;
+
+		/**
+		 * Class name (used for serialization)
+		 * @return class name
+		 */
+		virtual const char* get_name() const
+		{
+			return "CrossValidationFoldStorage";
+		};
+
+	protected:
+		/** Current run index is written here */
+		index_t m_current_run_index;
+
+		/** Current fold index is written here */
+		index_t m_current_fold_index;
+
+		/** Train indices */
+		SGVector<index_t> m_train_indices;
+
+		/** Test indices */
+		SGVector<index_t> m_test_indices;
+
+		/** Trained machine */
+		CMachine* m_trained_machine;
+
+		/** Test results */
+		CLabels* m_test_result;
+
+		/** Ground truth */
+		CLabels* m_test_true_result;
+
+		/** Evaluation result for this fold */
+		float64_t m_evaluation_result;
+	};
+
+	/**
+	 * This class store some information about CrossValidation runs.
+	 */
+	class CrossValidationStorage : public CSGObject
+	{
+	public:
+		/** Constructor */
+		CrossValidationStorage();
+
+		/** Destructor */
+		virtual ~CrossValidationStorage();
+
+		/**
+		 * Class name (used for serialization)
+		 * @return class name
+		 */
+		virtual const char* get_name() const
+		{
+			return "CrossValidationStorage";
+		};
+
+		/** Set number of runs.
+		 * @param num_runs number of runs that will be performed
+		 */
+		virtual void set_num_runs(index_t num_runs);
+
+		/** Set number of folds.
+		 * @param num_folds number of folds that will be performed
+		 */
+		virtual void set_num_folds(index_t num_folds);
+
+		/** Set labels before usage.
+		 * @param labels labels to expose to CV output
+		 */
+		virtual void set_expose_labels(CLabels* labels);
+
+		/** Post init action. */
+		virtual void post_init();
+
+		/**
+		 * Append a fold result to this storage
+		 * @param result the result of a fold
+		 */
+		virtual void append_fold_result(CrossValidationFoldStorage* result);
+
+		/**
+		 * Get number of Cross Validation runs.
+		 * @return Cross Validation's runs
+		 */
+		index_t get_num_runs() const;
+
+		/**
+		 * Get number of folds.
+		 * @return
+		 */
+		index_t get_num_folds() const;
+
+		/**
+		 * Get original labels.
+		 * @return labels
+		 */
+		CLabels* get_expose_labels() const;
+
+		/**
+		 * Get a specific fold result
+		 * @param fold the fold index
+		 * @return the CrossValidationFoldStorage object
+		 */
+		CrossValidationFoldStorage* get_fold(int fold) const;
+
+		/**
+		 * Operator == needed for Any comparison.
+		 * @param rhs other CrossValidationStorage
+		 * @return true if the objects are the same, false otherwise.
+		 */
+		bool operator==(const CrossValidationStorage& rhs) const;
+
+	protected:
+		/** number of runs is initialised here */
+		index_t m_num_runs;
+
+		/** number of folds is initialised here */
+		index_t m_num_folds;
+
+		/** Original labels */
+		CLabels* m_expose_labels;
+
+		/** Vector with all the folds results */
+		std::vector<CrossValidationFoldStorage*> m_folds_results;
+	};
+}
+
+#endif // SHOGUN_CROSSVALIDATIONSTORAGE_H
diff --git a/src/shogun/evaluation/GradientEvaluation.cpp b/src/shogun/evaluation/GradientEvaluation.cpp
index ee87f556a69..09d18b5476c 100644
--- a/src/shogun/evaluation/GradientEvaluation.cpp
+++ b/src/shogun/evaluation/GradientEvaluation.cpp
@@ -49,7 +49,7 @@ void CGradientEvaluation::update_parameter_dictionary()
 	SG_REF(m_parameter_dictionary);
 }
 
-CEvaluationResult* CGradientEvaluation::evaluate()
+CEvaluationResult* CGradientEvaluation::evaluate_impl()
 {
 	if (parameter_hash_changed())
 		update_parameter_dictionary();
diff --git a/src/shogun/evaluation/GradientEvaluation.h b/src/shogun/evaluation/GradientEvaluation.h
index 4a587c9141d..753555a6c9b 100644
--- a/src/shogun/evaluation/GradientEvaluation.h
+++ b/src/shogun/evaluation/GradientEvaluation.h
@@ -48,12 +48,6 @@ class CGradientEvaluation: public CMachineEvaluation
 	 */
 	virtual const char* get_name() const { return "GradientEvaluation"; }
 
-	/** evaluates differentiable function for value and derivative.
-	 *
-	 * @return GradientResult containing value and gradient
-	 */
-	virtual CEvaluationResult* evaluate();
-
 	/** set differentiable function
 	*
 	* @param diff differentiable function
@@ -79,6 +73,12 @@ class CGradientEvaluation: public CMachineEvaluation
 	/** initialses and registers parameters */
 	void init();
 
+	/** evaluates differentiable function for value and derivative.
+	 *
+	 * @return GradientResult containing value and gradient
+	 */
+	virtual CEvaluationResult* evaluate_impl();
+
 	/** updates parameter dictionary of differentiable function */
 	void update_parameter_dictionary();
 
diff --git a/src/shogun/evaluation/MachineEvaluation.cpp b/src/shogun/evaluation/MachineEvaluation.cpp
index 5711ea3d77a..0f73183f1e9 100644
--- a/src/shogun/evaluation/MachineEvaluation.cpp
+++ b/src/shogun/evaluation/MachineEvaluation.cpp
@@ -10,14 +10,18 @@
  * Heiko Strathmann
  */
 
-#include "MachineEvaluation.h"
+#include <shogun/base/Parameter.h>
 #include <shogun/evaluation/CrossValidation.h>
-#include <shogun/machine/Machine.h>
 #include <shogun/evaluation/Evaluation.h>
+#include <shogun/evaluation/MachineEvaluation.h>
 #include <shogun/evaluation/SplittingStrategy.h>
-#include <shogun/base/Parameter.h>
+#include <shogun/machine/Machine.h>
 #include <shogun/mathematics/Statistics.h>
 
+#include <rxcpp/rx-lite.hpp>
+#include <shogun/base/init.h>
+#include <shogun/lib/Signal.h>
+
 using namespace shogun;
 
 CMachineEvaluation::CMachineEvaluation()
@@ -81,6 +85,8 @@ void CMachineEvaluation::init()
 	m_evaluation_criterion = NULL;
 	m_do_unlock = false;
 	m_autolock = true;
+	m_cancel_computation = false;
+	m_pause_computation_flag = false;
 
 	SG_ADD((CSGObject**)&m_machine, "machine", "Used learning machine",
 			MS_NOT_AVAILABLE);
@@ -101,6 +107,34 @@ void CMachineEvaluation::init()
 
 }
 
+CEvaluationResult* CMachineEvaluation::evaluate()
+{
+	SG_DEBUG("entering %s::evaluate()\n", get_name())
+
+	REQUIRE(
+	    m_machine, "%s::evaluate() is only possible if a machine is "
+	               "attached\n",
+	    get_name());
+
+	REQUIRE(
+	    m_features, "%s::evaluate() is only possible if features are "
+	                "attached\n",
+	    get_name());
+
+	REQUIRE(
+	    m_labels, "%s::evaluate() is only possible if labels are "
+	              "attached\n",
+	    get_name());
+
+	auto sub = connect_to_signal_handler();
+	CEvaluationResult* result = evaluate_impl();
+	sub.unsubscribe();
+	reset_computation_variables();
+
+	SG_DEBUG("leaving %s::evaluate()\n", get_name())
+	return result;
+};
+
 CMachine* CMachineEvaluation::get_machine() const
 {
 	SG_REF(m_machine);
@@ -111,3 +145,17 @@ EEvaluationDirection CMachineEvaluation::get_evaluation_direction()
 {
 	return m_evaluation_criterion->get_evaluation_direction();
 }
+
+rxcpp::subscription CMachineEvaluation::connect_to_signal_handler()
+{
+	// Subscribe this algorithm to the signal handler
+	auto subscriber = rxcpp::make_subscriber<int>(
+	    [this](int i) {
+		    if (i == SG_PAUSE_COMP)
+			    this->on_pause();
+		    else
+			    this->on_next();
+		},
+	    [this]() { this->on_complete(); });
+	return get_global_signal()->get_observable()->subscribe(subscriber);
+}
\ No newline at end of file
diff --git a/src/shogun/evaluation/MachineEvaluation.h b/src/shogun/evaluation/MachineEvaluation.h
index e1a38647da4..825645833fb 100644
--- a/src/shogun/evaluation/MachineEvaluation.h
+++ b/src/shogun/evaluation/MachineEvaluation.h
@@ -20,100 +20,193 @@
 #include <shogun/evaluation/EvaluationResult.h>
 #include <shogun/evaluation/MachineEvaluation.h>
 
-namespace shogun
-{
+#include <condition_variable>
+#include <mutex>
 
-class CMachine;
-class CFeatures;
-class CLabels;
-class CSplittingStrategy;
-class CEvaluation;
-
-/** @brief Machine Evaluation is an abstract class
- * that evaluates a machine according to some criterion.
- *
- */
-class CMachineEvaluation: public CSGObject
+namespace shogun
 {
 
-public:
+#define EVALUATION_CONTROLLERS                                                 \
+	if (cancel_evaluation())                                                   \
+		continue;                                                              \
+	pause_evaluation();
 
-	CMachineEvaluation();
+	class CMachine;
+	class CFeatures;
+	class CLabels;
+	class CSplittingStrategy;
+	class CEvaluation;
 
-	/** constructor
-	 * @param machine learning machine to use
-	 * @param features features to use for cross-validation
-	 * @param labels labels that correspond to the features
-	 * @param splitting_strategy splitting strategy to use
-	 * @param evaluation_criterion evaluation criterion to use
-	 * @param autolock whether machine should be auto-locked before evaluation
-	 */
-	CMachineEvaluation(CMachine* machine, CFeatures* features, CLabels* labels,
-			CSplittingStrategy* splitting_strategy,
-			CEvaluation* evaluation_criterion, bool autolock = true);
-
-	/** constructor, for use with custom kernels (no features)
-	 * @param machine learning machine to use
-	 * @param labels labels that correspond to the features
-	 * @param splitting_strategy splitting strategy to use
-	 * @param evaluation_criterion evaluation criterion to use
-	 * @param autolock autolock
-	 */
-	CMachineEvaluation(CMachine* machine, CLabels* labels,
-			CSplittingStrategy* splitting_strategy,
-			CEvaluation* evaluation_criterion, bool autolock = true);
-
-
-	virtual ~CMachineEvaluation();
-
-	/** @return in which direction is the best evaluation value? */
-	EEvaluationDirection get_evaluation_direction();
-
-	/** method for evaluation. Performs cross-validation.
-	 * Is repeated m_num_runs. If this number is larger than one, a confidence
-	 * interval is calculated if m_conf_int_alpha is (0<p<1).
-	 * By default m_num_runs=1 and m_conf_int_alpha=0
+	/** @brief Machine Evaluation is an abstract class
+	 * that evaluates a machine according to some criterion.
 	 *
-	 * @return result of evaluation
 	 */
-	virtual CEvaluationResult* evaluate() = 0;
-
-	/** @return underlying learning machine */
-	CMachine* get_machine() const;
-
-	/** setter for the autolock property. If true, machine will tried to be
-	 * locked before evaluation */
-	void set_autolock(bool autolock) { m_autolock = autolock; }
-
-protected:
-
-	/** Initialize Object */
-	virtual void init();
-
-protected:
-
-	/** Machine to be Evaluated */
-	CMachine* m_machine;
-
-	/** Features to be used*/
-	CFeatures* m_features;
-
-	/** Labels for the features */
-	CLabels* m_labels;
-
-	/** Splitting Strategy to be used */
-	CSplittingStrategy* m_splitting_strategy;
-
-	/** Criterion for evaluation */
-	CEvaluation* m_evaluation_criterion;
-
-	/** whether machine will automatically be locked before evaluation */
-	bool m_autolock;
-
-	/** whether machine should be unlocked after evaluation */
-	bool m_do_unlock;
-
-};
+	class CMachineEvaluation : public CSGObject
+	{
+
+	public:
+		CMachineEvaluation();
+
+		/** constructor
+		 * @param machine learning machine to use
+		 * @param features features to use for cross-validation
+		 * @param labels labels that correspond to the features
+		 * @param splitting_strategy splitting strategy to use
+		 * @param evaluation_criterion evaluation criterion to use
+		 * @param autolock whether machine should be auto-locked before
+		 * evaluation
+		 */
+		CMachineEvaluation(
+		    CMachine* machine, CFeatures* features, CLabels* labels,
+		    CSplittingStrategy* splitting_strategy,
+		    CEvaluation* evaluation_criterion, bool autolock = true);
+
+		/** constructor, for use with custom kernels (no features)
+		 * @param machine learning machine to use
+		 * @param labels labels that correspond to the features
+		 * @param splitting_strategy splitting strategy to use
+		 * @param evaluation_criterion evaluation criterion to use
+		 * @param autolock autolock
+		 */
+		CMachineEvaluation(
+		    CMachine* machine, CLabels* labels,
+		    CSplittingStrategy* splitting_strategy,
+		    CEvaluation* evaluation_criterion, bool autolock = true);
+
+		virtual ~CMachineEvaluation();
+
+		/** @return in which direction is the best evaluation value? */
+		EEvaluationDirection get_evaluation_direction();
+
+		/** method for evaluation. Performs cross-validation.
+		 * Is repeated m_num_runs. If this number is larger than one, a
+		 * confidence
+		 * interval is calculated if m_conf_int_alpha is (0<p<1).
+		 * By default m_num_runs=1 and m_conf_int_alpha=0
+		 *
+		 * @return result of evaluation (already SG_REF'ed)
+		 */
+		virtual CEvaluationResult* evaluate();
+
+		/** @return underlying learning machine */
+		CMachine* get_machine() const;
+
+		/** setter for the autolock property. If true, machine will tried to be
+		 * locked before evaluation */
+		void set_autolock(bool autolock)
+		{
+			m_autolock = autolock;
+		}
+
+#ifndef SWIG
+		/** @return whether the evaluation needs to be stopped */
+		SG_FORCED_INLINE bool cancel_evaluation() const
+		{
+			return m_cancel_computation.load();
+		}
+#endif
+
+#ifndef SWIG
+		/** Pause the evaluation f the flag is set */
+		SG_FORCED_INLINE void pause_evaluation()
+		{
+			if (m_pause_computation_flag.load())
+			{
+				std::unique_lock<std::mutex> lck(m_mutex);
+				while (m_pause_computation_flag.load())
+					m_pause_computation.wait(lck);
+			}
+		}
+#endif
+
+#ifndef SWIG
+		/** Resume current evaluation (sets the flag) */
+		SG_FORCED_INLINE void resume_evaluation()
+		{
+			std::unique_lock<std::mutex> lck(m_mutex);
+			m_pause_computation_flag = false;
+			m_pause_computation.notify_all();
+		}
+#endif
+
+	protected:
+		/** Initialize Object */
+		virtual void init();
+
+		/**
+		 * Implementation of the evaluation procedure. Called
+		 * by evaluate() method. This method has to SG_REF its result
+		 * before returning it.
+		 * @return the evaluation result
+		 */
+		virtual CEvaluationResult* evaluate_impl() = 0;
+
+		/** connect the machine instance to the signal handler */
+		rxcpp::subscription connect_to_signal_handler();
+
+		/** reset the computation variables */
+		void reset_computation_variables()
+		{
+			m_cancel_computation = false;
+			m_pause_computation_flag = false;
+		}
+
+		/** The action which will be done when the user decides to
+		* premature stop the CMachineEvaluation execution */
+		virtual void on_next()
+		{
+			m_cancel_computation.store(true);
+		}
+
+		/** The action which will be done when the user decides to
+		* pause the CMachineEvaluation execution */
+		virtual void on_pause()
+		{
+			m_pause_computation_flag.store(true);
+			/* Here there should be the actual code*/
+			resume_evaluation();
+		}
+
+		/** The action which will be done when the user decides to
+		* return to prompt and terminate the program execution */
+		virtual void on_complete()
+		{
+		}
+
+	protected:
+		/** Machine to be Evaluated */
+		CMachine* m_machine;
+
+		/** Features to be used*/
+		CFeatures* m_features;
+
+		/** Labels for the features */
+		CLabels* m_labels;
+
+		/** Splitting Strategy to be used */
+		CSplittingStrategy* m_splitting_strategy;
+
+		/** Criterion for evaluation */
+		CEvaluation* m_evaluation_criterion;
+
+		/** whether machine will automatically be locked before evaluation */
+		bool m_autolock;
+
+		/** whether machine should be unlocked after evaluation */
+		bool m_do_unlock;
+
+		/** Cancel evaluation */
+		std::atomic<bool> m_cancel_computation;
+
+		/** Pause evaluation flag */
+		std::atomic<bool> m_pause_computation_flag;
+
+		/** Conditional variable to make threads wait */
+		std::condition_variable m_pause_computation;
+
+		/** Mutex used to pause threads */
+		std::mutex m_mutex;
+	};
 
 } /* namespace shogun */
 
diff --git a/src/shogun/features/CombinedFeatures.cpp b/src/shogun/features/CombinedFeatures.cpp
index 5fc4f508750..2e97688ca66 100644
--- a/src/shogun/features/CombinedFeatures.cpp
+++ b/src/shogun/features/CombinedFeatures.cpp
@@ -48,6 +48,9 @@ CCombinedFeatures::~CCombinedFeatures()
 
 CFeatures* CCombinedFeatures::get_feature_obj(int32_t idx)
 {
+	REQUIRE(
+	    idx < get_num_feature_obj() && idx>=0, "Feature index (%d) must be within [%d, %d]",
+	    idx, 0, get_num_feature_obj()-1);
 	return (CFeatures*) feature_array->get_element(idx);
 }
 
diff --git a/src/shogun/features/DataGenerator.cpp b/src/shogun/features/DataGenerator.cpp
index 2341ade9882..f38983116bc 100644
--- a/src/shogun/features/DataGenerator.cpp
+++ b/src/shogun/features/DataGenerator.cpp
@@ -129,7 +129,7 @@ SGMatrix<float64_t> CDataGenerator::generate_sym_mix_gauss(index_t m,
 
 	return result;
 }
-#ifdef HAVE_LAPACK
+
 SGMatrix<float64_t> CDataGenerator::generate_gaussians(index_t m, index_t n, index_t dim)
 {
 	/* evtl. allocate space */
@@ -154,7 +154,6 @@ SGMatrix<float64_t> CDataGenerator::generate_gaussians(index_t m, index_t n, ind
 		{
 			SGVector<float64_t> v = g->sample();
 			sg_memcpy((result.matrix+j*result.num_rows+i*m*dim), v.vector, dim*sizeof(float64_t));
-			SG_FREE(v.vector);
 		}
 
 		SG_UNREF(g);
@@ -162,4 +161,3 @@ SGMatrix<float64_t> CDataGenerator::generate_gaussians(index_t m, index_t n, ind
 
 	return result;
 }
-#endif /* HAVE_LAPACK */
diff --git a/src/shogun/features/DataGenerator.h b/src/shogun/features/DataGenerator.h
index b8d8fb24dbe..6e8bb6bcf09 100644
--- a/src/shogun/features/DataGenerator.h
+++ b/src/shogun/features/DataGenerator.h
@@ -85,7 +85,6 @@ class CDataGenerator: public CSGObject
 			float64_t d, float64_t angle,
 			SGMatrix<float64_t> target=SGMatrix<float64_t>());
 
-#ifdef HAVE_LAPACK
 	/** Produces samples of gaussians
 	 * The functions produces m number of samples of each gaussians (n number) with
 	 * the given dimension.
@@ -97,7 +96,6 @@ class CDataGenerator: public CSGObject
 	 * of the first gaussian, m number of second etc.
 	 */
 	static SGMatrix<float64_t> generate_gaussians(index_t m, index_t n, index_t dim);
-#endif /* HAVE_LAPACK */
 
 	virtual const char* get_name() const { return "DataGenerator"; }
 
diff --git a/src/shogun/features/DenseFeatures.cpp b/src/shogun/features/DenseFeatures.cpp
index db716171ad3..1ef65dcc811 100644
--- a/src/shogun/features/DenseFeatures.cpp
+++ b/src/shogun/features/DenseFeatures.cpp
@@ -19,6 +19,7 @@
 #include <shogun/base/Parameter.h>
 #include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/eigen3.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <algorithm>
 #include <string.h>
 
@@ -76,7 +77,6 @@ template<class ST> CDenseFeatures<ST>::~CDenseFeatures()
 
 template<class ST> void CDenseFeatures<ST>::free_features()
 {
-	m_subset_stack->remove_all_subsets();
 	free_feature_matrix();
 	SG_UNREF(feature_cache);
 }
@@ -504,8 +504,10 @@ template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatu
 
 	ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
 	ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);
+	SGVector<ST> sg_vec1(vec1, len1, false);
+	SGVector<ST> sg_vec2(vec2, len2, false);
 
-	float64_t result = CMath::dot(vec1, vec2, len1);
+	float64_t result = linalg::dot(sg_vec1, sg_vec2);
 
 	free_feature_vector(vec1, vec_idx1, free1);
 	sf->free_feature_vector(vec2, vec_idx2, free2);
@@ -943,9 +945,11 @@ template<> float64_t CDenseFeatures<float64_t>::dense_dot(
 	int32_t vlen;
 	bool vfree;
 	float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
+	SGVector<float64_t> sg_vec1(vec1, vlen, false);
 
 	ASSERT(vlen == num_features)
-	float64_t result = CMath::dot(vec1, vec2, num_features);
+	SGVector<float64_t> tmp(const_cast<float64_t*>(vec2), vec2_len, false);
+	float64_t result = linalg::dot(sg_vec1, tmp);
 
 	free_feature_vector(vec1, vec_idx1, vfree);
 
diff --git a/src/shogun/features/DotFeatures.cpp b/src/shogun/features/DotFeatures.cpp
index 00eec29cf3d..c964e911497 100644
--- a/src/shogun/features/DotFeatures.cpp
+++ b/src/shogun/features/DotFeatures.cpp
@@ -8,13 +8,15 @@
  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
  */
 
+#include <shogun/base/Parallel.h>
+#include <shogun/base/Parameter.h>
+#include <shogun/base/progress.h>
 #include <shogun/features/DotFeatures.h>
 #include <shogun/io/SGIO.h>
 #include <shogun/lib/Signal.h>
 #include <shogun/lib/Time.h>
 #include <shogun/mathematics/Math.h>
-#include <shogun/base/Parallel.h>
-#include <shogun/base/Parameter.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 #ifdef HAVE_OPENMP
 #include <omp.h>
@@ -61,11 +63,10 @@ void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t sto
 	int32_t num_vectors=stop-start;
 	ASSERT(num_vectors>0)
 
-	CSignal::clear_cancel();
-
 	int32_t num_threads;
 	int32_t step;
-	#pragma omp parallel shared(num_threads, step)
+	auto pb = progress(range(num_vectors), *this->io);
+#pragma omp parallel shared(num_threads, step)
 	{
 #ifdef HAVE_OPENMP
 		#pragma omp single
@@ -80,7 +81,6 @@ void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t sto
 		step=num_vectors;
 		int32_t thread_num=0;
 #endif
-		bool progress=false; // (thread_num == 0);
 
 		int32_t t_start=thread_num*step;
 		int32_t t_stop=(thread_num==num_threads) ? stop : (thread_num+1)*step;
@@ -88,23 +88,20 @@ void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t sto
 #ifdef WIN32
 		for (int32_t i=t_start; i<t_stop; i++)
 #else
-		for (int32_t i=t_start; i<t_stop &&
-				!CSignal::cancel_computations(); i++)
+		// TODO: replace with the new signal
+		// for (int32_t i=t_start; i<t_stop &&
+		//		!CSignal::cancel_computations(); i++)
+		for (int32_t i = t_start; i < t_stop; i++)
 #endif
 		{
 			if (alphas)
 				output[i]=alphas[i]*this->dense_dot(i, vec, dim)+b;
 			else
 				output[i]=this->dense_dot(i, vec, dim)+b;
-			if (progress)
-				this->display_progress(t_start, t_stop, i);
+			pb.print_progress();
 		}
 	}
-
-#ifndef WIN32
-		if ( CSignal::cancel_computations() )
-			SG_INFO("prematurely stopped.           \n")
-#endif
+	pb.complete();
 }
 
 void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
@@ -112,8 +109,7 @@ void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float
 	ASSERT(sub_index)
 	ASSERT(output)
 
-	CSignal::clear_cancel();
-
+	auto pb = progress(range(num), *this->io);
 	int32_t num_threads;
 	int32_t step;
 	#pragma omp parallel shared(num_threads, step)
@@ -131,7 +127,6 @@ void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float
 		step = num;
 		int32_t thread_num=0;
 #endif
-		bool progress=false; // (thread_num == 0);
 
 		int32_t t_start=thread_num*step;
 		int32_t t_stop=(thread_num==num_threads) ? num : (thread_num+1)*step;
@@ -139,23 +134,20 @@ void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float
 #ifdef WIN32
 		for (int32_t i=t_start; i<t_stop; i++)
 #else
-		for (int32_t i=t_start; i<t_stop &&
-				!CSignal::cancel_computations(); i++)
+		// TODO: replace with the new signal
+		// for (int32_t i=t_start; i<t_stop &&
+		//		!CSignal::cancel_computations(); i++)
+		for (int32_t i = t_start; i < t_stop; i++)
 #endif
 		{
 			if (alphas)
 				output[i]=alphas[sub_index[i]]*this->dense_dot(sub_index[i], vec, dim)+b;
 			else
 				output[i]=this->dense_dot(sub_index[i], vec, dim)+b;
-			if (progress)
-				this->display_progress(t_start, t_stop, i);
+			pb.print_progress();
 		}
 	}
-
-#ifndef WIN32
-		if ( CSignal::cancel_computations() )
-			SG_INFO("prematurely stopped.           \n")
-#endif
+	pb.complete();
 }
 
 SGMatrix<float64_t> CDotFeatures::get_computed_dot_feature_matrix()
@@ -266,17 +258,18 @@ SGVector<float64_t> CDotFeatures::get_mean()
 	ASSERT(dim>0)
 
 	SGVector<float64_t> mean(dim);
-    memset(mean.vector, 0, sizeof(float64_t)*dim);
+	linalg::zero(mean);
 
-	for (int i = 0; i < num; i++)
+	for (int32_t i = 0; i < num; ++i)
 		add_to_dense_vec(1, i, mean.vector, dim);
-	for (int j = 0; j < dim; j++)
-		mean.vector[j] /= num;
+
+	linalg::scale(mean, mean, 1.0 / num);
 
 	return mean;
 }
 
-SGVector<float64_t> CDotFeatures::get_mean(CDotFeatures* lhs, CDotFeatures* rhs)
+SGVector<float64_t>
+CDotFeatures::compute_mean(CDotFeatures* lhs, CDotFeatures* rhs)
 {
 	ASSERT(lhs && rhs)
 	ASSERT(lhs->get_dim_feature_space() == rhs->get_dim_feature_space())
@@ -289,19 +282,20 @@ SGVector<float64_t> CDotFeatures::get_mean(CDotFeatures* lhs, CDotFeatures* rhs)
 	ASSERT(dim>0)
 
 	SGVector<float64_t> mean(dim);
-    memset(mean.vector, 0, sizeof(float64_t)*dim);
+	linalg::zero(mean);
 
 	for (int i = 0; i < num_lhs; i++)
 		lhs->add_to_dense_vec(1, i, mean.vector, dim);
+
 	for (int i = 0; i < num_rhs; i++)
 		rhs->add_to_dense_vec(1, i, mean.vector, dim);
-	for (int j = 0; j < dim; j++)
-		mean.vector[j] /= (num_lhs+num_rhs);
+
+	linalg::scale(mean, mean, 1.0 / (num_lhs + num_rhs));
 
 	return mean;
 }
 
-SGMatrix<float64_t> CDotFeatures::get_cov()
+SGMatrix<float64_t> CDotFeatures::get_cov(bool copy_data_for_speed)
 {
 	int32_t num=get_num_vectors();
 	int32_t dim=get_dim_feature_space();
@@ -309,41 +303,44 @@ SGMatrix<float64_t> CDotFeatures::get_cov()
 	ASSERT(dim>0)
 
 	SGMatrix<float64_t> cov(dim, dim);
-
-	memset(cov.matrix, 0, sizeof(float64_t)*dim*dim);
-
 	SGVector<float64_t> mean = get_mean();
 
-	for (int i = 0; i < num; i++)
+	if (copy_data_for_speed)
 	{
-		SGVector<float64_t> v = get_computed_dot_feature_vector(i);
-		SGVector<float64_t>::add(v.vector, 1, v.vector, -1, mean.vector, v.vlen);
-		for (int m = 0; m < v.vlen; m++)
+		SGMatrix<float64_t> centered_data(dim, num);
+		for (int i = 0; i < num; i++)
 		{
-			for (int n = 0; n <= m ; n++)
-			{
-				(cov.matrix)[m*v.vlen+n] += v.vector[m]*v.vector[n];
-			}
+			SGVector<float64_t> v = get_computed_dot_feature_vector(i);
+			centered_data.set_column(i, linalg::add(v, mean, 1.0, -1.0));
 		}
+
+		cov = linalg::matrix_prod(centered_data, centered_data, false, true);
 	}
-	for (int m = 0; m < dim; m++)
+	else
 	{
-		for (int n = 0; n <= m ; n++)
+		linalg::zero(cov);
+		for (int i = 0; i < num; i++)
 		{
-			(cov.matrix)[m*dim+n] /= num;
+			SGVector<float64_t> v = get_computed_dot_feature_vector(i);
+			linalg::add(v, mean, v, 1.0, -1.0);
+			for (int m = 0; m < v.vlen; m++)
+				linalg::add_col_vec(cov, m, v, cov, 1.0, v.vector[m]);
 		}
-	}
-	for (int m = 0; m < dim-1; m++)
-	{
-		for (int n = m+1; n < dim; n++)
+		for (int m = 0; m < dim - 1; m++)
 		{
-			(cov.matrix)[m*dim+n] = (cov.matrix)[n*dim+m];
+			for (int n = m + 1; n < dim; n++)
+			{
+				(cov.matrix)[m * dim + n] = (cov.matrix)[n * dim + m];
+			}
 		}
 	}
+	linalg::scale(cov, cov, 1.0 / num);
+
 	return cov;
 }
 
-SGMatrix<float64_t> CDotFeatures::compute_cov(CDotFeatures* lhs, CDotFeatures* rhs)
+SGMatrix<float64_t> CDotFeatures::compute_cov(
+    CDotFeatures* lhs, CDotFeatures* rhs, bool copy_data_for_speed)
 {
 	CDotFeatures* feats[2];
 	feats[0]=lhs;
@@ -364,53 +361,50 @@ SGMatrix<float64_t> CDotFeatures::compute_cov(CDotFeatures* lhs, CDotFeatures* r
 	int32_t dim = dims[0];
 
 	SGMatrix<float64_t> cov(dim, dim);
+	SGVector<float64_t> mean = compute_mean(lhs, rhs);
 
-	memset(cov.matrix, 0, sizeof(float64_t)*dim*dim);
-
-	SGVector<float64_t>  mean=get_mean(lhs,rhs);
-
-	for (int i = 0; i < 2; i++)
+	if (copy_data_for_speed)
 	{
-		for (int j = 0; j < nums[i]; j++)
+		SGMatrix<float64_t> centered_data(dim, num);
+		for (int i = 0; i < num; i++)
 		{
-			SGVector<float64_t> v = feats[i]->get_computed_dot_feature_vector(j);
-			SGVector<float64_t>::add(v.vector, 1, v.vector, -1, mean.vector, v.vlen);
-			for (int m = 0; m < v.vlen; m++)
-			{
-				for (int n = 0; n <= m; n++)
-				{
-					(cov.matrix)[m*v.vlen+n] += v.vector[m]*v.vector[n];
-				}
-			}
+			SGVector<float64_t> v =
+			    i < nums[0] ? lhs->get_computed_dot_feature_vector(i)
+			                : rhs->get_computed_dot_feature_vector(i - nums[0]);
+
+			centered_data.set_column(i, linalg::add(v, mean, 1.0, -1.0));
 		}
+
+		cov = linalg::matrix_prod(centered_data, centered_data, false, true);
 	}
-	for (int m = 0; m < dim; m++)
+	else
 	{
-		for (int n = 0; n <= m; n++)
+		linalg::zero(cov);
+		for (int i = 0; i < 2; i++)
 		{
-			(cov.matrix)[m*dim+n] /= num;
+			for (int j = 0; j < nums[i]; j++)
+			{
+				SGVector<float64_t> v =
+				    feats[i]->get_computed_dot_feature_vector(j);
+				linalg::add(v, mean, v, 1.0, -1.0);
+				for (int m = 0; m < v.vlen; m++)
+					linalg::add_col_vec(cov, m, v, cov, 1.0, v.vector[m]);
+			}
 		}
-	}
-	for (int m = 0; m < dim-1; m++)
-	{
-		for (int n = m+1; n < dim; n++)
+
+		for (int m = 0; m < dim - 1; m++)
 		{
-			(cov.matrix[m*dim+n]) = (cov.matrix)[n*dim+m];
+			for (int n = m + 1; n < dim; n++)
+			{
+				(cov.matrix[m * dim + n]) = (cov.matrix)[n * dim + m];
+			}
 		}
 	}
+	linalg::scale(cov, cov, 1.0 / num);
 
 	return cov;
 }
 
-void CDotFeatures::display_progress(int32_t start, int32_t stop, int32_t v)
-{
-	int32_t num_vectors=stop-start;
-	int32_t i=v-start;
-
-	if ( (i% (num_vectors/100+1))== 0)
-		SG_PROGRESS(v, 0.0, num_vectors-1)
-}
-
 void CDotFeatures::init()
 {
 	set_property(FP_DOT);
diff --git a/src/shogun/features/DotFeatures.h b/src/shogun/features/DotFeatures.h
index 96e361f2cc8..c2068cc5392 100644
--- a/src/shogun/features/DotFeatures.h
+++ b/src/shogun/features/DotFeatures.h
@@ -213,28 +213,31 @@ class CDotFeatures : public CFeatures
 		 *
 		 * @return mean returned
 		 */
-		static SGVector<float64_t> get_mean(CDotFeatures* lhs, CDotFeatures* rhs);
+		static SGVector<float64_t>
+		compute_mean(CDotFeatures* lhs, CDotFeatures* rhs);
 
 		/** get covariance
 		 *
+		 * @param copy_data_for_speed if true, the method stores explicitly
+		 * the centered data matrix and the covariance is calculated by matrix
+		 * product of the centered data with its transpose, this make it
+		 * possible to take advantage of multithreaded matrix product,
+		 * this may not be possible if the data doesn't fit into memory,
+		 * in such case set this parameter to false to compute iteratively
+		 * the covariance matrix without storing the centered data.
+		 * [default = true]
 		 * @return covariance
 		 */
-		virtual SGMatrix<float64_t> get_cov();
+		virtual SGMatrix<float64_t> get_cov(bool copy_data_for_speed = true);
 
 		/** compute the covariance of two CDotFeatures together
 		 *
+		 * @param copy_data_for_speed @see CDotFeatures::get_cov
 		 * @return covariance
 		 */
-		static SGMatrix<float64_t> compute_cov(CDotFeatures* lhs, CDotFeatures* rhs);
-
-	protected:
-		/** display progress output
-		 *
-		 * @param start minimum value
-		 * @param stop maximum value
-		 * @param v current value
-		 */
-		void display_progress(int32_t start, int32_t stop, int32_t v);
+		static SGMatrix<float64_t> compute_cov(
+		    CDotFeatures* lhs, CDotFeatures* rhs,
+		    bool copy_data_for_speed = true);
 
 	private:
 		void init();
diff --git a/src/shogun/features/ExplicitSpecFeatures.cpp b/src/shogun/features/ExplicitSpecFeatures.cpp
index 770c3e37a54..12911537c86 100644
--- a/src/shogun/features/ExplicitSpecFeatures.cpp
+++ b/src/shogun/features/ExplicitSpecFeatures.cpp
@@ -10,6 +10,7 @@
 
 #include <shogun/features/ExplicitSpecFeatures.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/io/SGIO.h>
 
 using namespace shogun;
@@ -68,21 +69,21 @@ float64_t CExplicitSpecFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t
 
 	ASSERT(vec_idx1 < num_strings)
 	ASSERT(vec_idx2 < sf->num_strings)
-	float64_t* vec1=k_spectrum[vec_idx1];
-	float64_t* vec2=sf->k_spectrum[vec_idx2];
+	SGVector<float64_t> vec1(k_spectrum[vec_idx1], spec_size, false);
+	SGVector<float64_t> vec2(sf->k_spectrum[vec_idx2], spec_size, false);
 
-	return CMath::dot(vec1, vec2, spec_size);
+	return linalg::dot(vec1, vec2);
 }
 
 float64_t CExplicitSpecFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
 {
 	ASSERT(vec2_len == spec_size)
 	ASSERT(vec_idx1 < num_strings)
-	float64_t* vec1=k_spectrum[vec_idx1];
+	SGVector<float64_t> vec1(k_spectrum[vec_idx1], spec_size, false);
+	SGVector<float64_t> vec2_wrapper(const_cast<float64_t*>(vec2), vec2_len, false);
 	float64_t result=0;
 
-	for (int32_t i=0; i<spec_size; i++)
-		result+=vec1[i]*vec2[i];
+	linalg::dot(vec1, vec2_wrapper);
 
 	return result;
 }
diff --git a/src/shogun/features/Features.h b/src/shogun/features/Features.h
index b0404d01196..fca7e9b0330 100644
--- a/src/shogun/features/Features.h
+++ b/src/shogun/features/Features.h
@@ -24,6 +24,7 @@
 #include <shogun/lib/List.h>
 #include <shogun/lib/DynamicObjectArray.h>
 #include <shogun/lib/DynamicArray.h>
+#include <shogun/base/range.h>
 
 namespace shogun
 {
@@ -109,6 +110,22 @@ class CFeatures : public CSGObject
 		 */
 		virtual EFeatureClass get_feature_class() const=0;
 
+#ifndef SWIG
+		/** returns an iterator of indices
+		 * from 0 to @ref CFeatures::get_num_vectors
+		 *
+		 * Should be used in algorithms in the following way:
+		 * @code
+		 * for (auto idx : features->index_iterator()) { ... }
+		 * @endcode
+		 *
+		 */
+		virtual Range<int32_t> index_iterator() const
+		{
+			return range(0, get_num_vectors());
+		}
+#endif
+
 		/** add preprocessor
 		 *
 		 * @param p preprocessor to set
diff --git a/src/shogun/features/LBPPyrDotFeatures.cpp b/src/shogun/features/LBPPyrDotFeatures.cpp
index 2f68a32cb9e..17047d43ab6 100644
--- a/src/shogun/features/LBPPyrDotFeatures.cpp
+++ b/src/shogun/features/LBPPyrDotFeatures.cpp
@@ -11,6 +11,7 @@
  */
 #include <shogun/features/LBPPyrDotFeatures.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 using namespace shogun;
 
@@ -105,7 +106,7 @@ float64_t CLBPPyrDotFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t ve
 	SGVector<char> vec1 = get_transformed_image(vec_idx1);
 	SGVector<char> vec2 = lbp_feat->get_transformed_image(vec_idx2);
 
-	return CMath::dot(vec1.vector, vec2.vector, vec_nDim);
+	return linalg::dot(vec1, vec2);
 }
 
 SGVector<char> CLBPPyrDotFeatures::get_transformed_image(int32_t index)
diff --git a/src/shogun/features/StringFeatures.cpp b/src/shogun/features/StringFeatures.cpp
index f9b469373b5..1c409d742f9 100644
--- a/src/shogun/features/StringFeatures.cpp
+++ b/src/shogun/features/StringFeatures.cpp
@@ -1,11 +1,12 @@
+#include <shogun/base/Parameter.h>
+#include <shogun/base/progress.h>
 #include <shogun/features/StringFeatures.h>
-#include <shogun/preprocessor/Preprocessor.h>
-#include <shogun/preprocessor/StringPreprocessor.h>
 #include <shogun/io/MemoryMappedFile.h>
 #include <shogun/io/SGIO.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/base/Parameter.h>
 #include <shogun/lib/SGStringList.h>
+#include <shogun/mathematics/Math.h>
+#include <shogun/preprocessor/Preprocessor.h>
+#include <shogun/preprocessor/StringPreprocessor.h>
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -18,6 +19,7 @@
 #include <vector>
 #else
 #include <unistd.h>
+
 #endif
 
 namespace shogun
@@ -488,6 +490,7 @@ template<class ST> void CStringFeatures<ST>::load_ascii_file(char* fname, bool r
 
 		SG_DEBUG("block_size=%ld file_size=%ld\n", blocksize, fsize)
 
+		auto pb = progress(range(fsize), *this->io, "COUNTING: ");
 		size_t sz=blocksize;
 		while (sz == blocksize)
 		{
@@ -502,8 +505,9 @@ template<class ST> void CStringFeatures<ST>::load_ascii_file(char* fname, bool r
 					old_block_offs=block_offs;
 				}
 			}
-			SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t")
+			pb.print_progress();
 		}
+		pb.complete();
 
 		SG_INFO("found %d strings\n", num_vectors)
 		SG_FREE(dummy);
@@ -512,6 +516,10 @@ template<class ST> void CStringFeatures<ST>::load_ascii_file(char* fname, bool r
 		overflow=SG_MALLOC(uint8_t, blocksize);
 		features=SG_MALLOC(SGString<ST>, num_vectors);
 
+		auto pb2 =
+			PRange<int>(range(num_vectors), *this->io, "LOADING: ", UTF8, []() {
+				return true;
+			});
 		rewind(f);
 		sz=blocksize;
 		int32_t lines=0;
@@ -556,9 +564,11 @@ template<class ST> void CStringFeatures<ST>::load_ascii_file(char* fname, bool r
 					//CMath::display_vector(features[lines].string, len);
 					old_sz=i+1;
 					lines++;
-					SG_PROGRESS(lines, 0, num_vectors, 1, "LOADING:\t")
+					pb2.print_progress();
 				}
 			}
+			pb2.complete();
+
 			for (size_t i=old_sz; i<sz; i++)
 				overflow[i-old_sz]=dummy[i];
 
diff --git a/src/shogun/features/SubsetStack.cpp b/src/shogun/features/SubsetStack.cpp
index 58280d95ba2..d28ed637c96 100644
--- a/src/shogun/features/SubsetStack.cpp
+++ b/src/shogun/features/SubsetStack.cpp
@@ -45,9 +45,12 @@ CSubsetStack::CSubsetStack(const CSubsetStack& other)
 
 	for (int32_t i=0; i < other.m_active_subsets_stack->get_num_elements(); ++i)
 	{
-		m_active_subset=(CSubset*)other.m_active_subsets_stack->get_element(i);
-		m_active_subsets_stack->append_element(m_active_subset);
+		auto subset = other.m_active_subsets_stack->get_element(i);
+		m_active_subsets_stack->append_element(subset);
+		SG_UNREF(subset)
 	}
+	m_active_subset = other.m_active_subset;
+	SG_REF(m_active_subset)
 }
 
 CSubsetStack::~CSubsetStack()
@@ -63,6 +66,7 @@ void CSubsetStack::remove_all_subsets()
 		m_active_subsets_stack->delete_element(i);
 
 	SG_UNREF(m_active_subset);
+	m_active_subset = nullptr;
 }
 
 void CSubsetStack::init()
diff --git a/src/shogun/features/hashed/HashedWDFeaturesTransposed.cpp b/src/shogun/features/hashed/HashedWDFeaturesTransposed.cpp
index cda8ab0b491..e4061081243 100644
--- a/src/shogun/features/hashed/HashedWDFeaturesTransposed.cpp
+++ b/src/shogun/features/hashed/HashedWDFeaturesTransposed.cpp
@@ -8,13 +8,15 @@
  * Copyright (C) 2010 Berlin Institute of Technology
  */
 
+#include <shogun/base/Parallel.h>
+#include <shogun/base/progress.h>
 #include <shogun/features/hashed/HashedWDFeaturesTransposed.h>
 #include <shogun/io/SGIO.h>
 #include <shogun/lib/Signal.h>
-#include <shogun/base/Parallel.h>
 
 #ifdef HAVE_PTHREAD
 #include <pthread.h>
+
 #endif
 
 using namespace shogun;
@@ -31,6 +33,7 @@ struct HASHEDWD_THREAD_PARAM
 	float64_t* vec;
 	float64_t bias;
 	bool progress;
+	PRange<int32_t>* progress_bar;
 	uint32_t* index;
 };
 #endif // DOXYGEN_SHOULD_SKIP_THIS
@@ -219,14 +222,13 @@ void CHashedWDFeaturesTransposed::dense_dot_range(float64_t* output, int32_t sta
 #endif
 	ASSERT(num_threads>0)
 
-	CSignal::clear_cancel();
-
 	if (dim != w_dim)
 		SG_ERROR("Dimensions don't match, vec_len=%d, w_dim=%d\n", dim, w_dim)
 
 	if (num_threads < 2)
 	{
 		HASHEDWD_THREAD_PARAM params;
+		auto pb = progress(range(start, stop), *this->io);
 		params.hf=this;
 		params.sub_index=NULL;
 		params.output=output;
@@ -236,14 +238,17 @@ void CHashedWDFeaturesTransposed::dense_dot_range(float64_t* output, int32_t sta
 		params.vec=vec;
 		params.bias=b;
 		params.progress=false; //true;
-		params.index=index;
+		params.progress_bar = &pb;
+		params.index = index;
 		dense_dot_range_helper((void*) &params);
+		pb.complete();
 	}
 #ifdef HAVE_PTHREAD
 	else
 	{
 		pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
 		HASHEDWD_THREAD_PARAM* params = SG_MALLOC(HASHEDWD_THREAD_PARAM, num_threads);
+		auto pb = progress(range(start, stop), *this->io);
 		int32_t step= num_vectors/num_threads;
 
 		int32_t t;
@@ -259,6 +264,7 @@ void CHashedWDFeaturesTransposed::dense_dot_range(float64_t* output, int32_t sta
 			params[t].vec=vec;
 			params[t].bias=b;
 			params[t].progress = false;
+			params[t].progress_bar = &pb;
 			params[t].index=index;
 			pthread_create(&threads[t], NULL,
 					CHashedWDFeaturesTransposed::dense_dot_range_helper, (void*)&params[t]);
@@ -273,6 +279,7 @@ void CHashedWDFeaturesTransposed::dense_dot_range(float64_t* output, int32_t sta
 		params[t].vec=vec;
 		params[t].bias=b;
 		params[t].progress = false; //true;
+		params[t].progress_bar = &pb;
 		params[t].index=index;
 		CHashedWDFeaturesTransposed::dense_dot_range_helper((void*) &params[t]);
 
@@ -284,11 +291,6 @@ void CHashedWDFeaturesTransposed::dense_dot_range(float64_t* output, int32_t sta
 	}
 #endif
 	SG_FREE(index);
-
-#ifndef WIN32
-		if ( CSignal::cancel_computations() )
-			SG_INFO("prematurely stopped.           \n")
-#endif
 }
 
 void CHashedWDFeaturesTransposed::dense_dot_range_subset(int32_t* sub_index, int num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
@@ -306,14 +308,13 @@ void CHashedWDFeaturesTransposed::dense_dot_range_subset(int32_t* sub_index, int
 #endif
 	ASSERT(num_threads>0)
 
-	CSignal::clear_cancel();
-
 	if (dim != w_dim)
 		SG_ERROR("Dimensions don't match, vec_len=%d, w_dim=%d\n", dim, w_dim)
 
 	if (num_threads < 2)
 	{
 		HASHEDWD_THREAD_PARAM params;
+		auto pb = progress(range(num), *this->io);
 		params.hf=this;
 		params.sub_index=sub_index;
 		params.output=output;
@@ -323,8 +324,10 @@ void CHashedWDFeaturesTransposed::dense_dot_range_subset(int32_t* sub_index, int
 		params.vec=vec;
 		params.bias=b;
 		params.progress=false; //true;
+		params.progress_bar = &pb;
 		params.index=index;
 		dense_dot_range_helper((void*) &params);
+		pb.complete();
 	}
 #ifdef HAVE_PTHREAD
 	else
@@ -332,7 +335,7 @@ void CHashedWDFeaturesTransposed::dense_dot_range_subset(int32_t* sub_index, int
 		pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
 		HASHEDWD_THREAD_PARAM* params = SG_MALLOC(HASHEDWD_THREAD_PARAM, num_threads);
 		int32_t step= num/num_threads;
-
+		auto pb = progress(range(num), *this->io);
 		int32_t t;
 
 		for (t=0; t<num_threads-1; t++)
@@ -346,6 +349,7 @@ void CHashedWDFeaturesTransposed::dense_dot_range_subset(int32_t* sub_index, int
 			params[t].vec=vec;
 			params[t].bias=b;
 			params[t].progress = false;
+			params[t].progress_bar = &pb;
 			params[t].index=index;
 			pthread_create(&threads[t], NULL,
 					CHashedWDFeaturesTransposed::dense_dot_range_helper, (void*)&params[t]);
@@ -360,22 +364,19 @@ void CHashedWDFeaturesTransposed::dense_dot_range_subset(int32_t* sub_index, int
 		params[t].vec=vec;
 		params[t].bias=b;
 		params[t].progress = false; //true;
+		params[t].progress_bar = &pb;
 		params[t].index=index;
 		CHashedWDFeaturesTransposed::dense_dot_range_helper((void*) &params[t]);
 
 		for (t=0; t<num_threads-1; t++)
 			pthread_join(threads[t], NULL);
 
+		pb.complete();
 		SG_FREE(params);
 		SG_FREE(threads);
 		SG_FREE(index);
 	}
 #endif
-
-#ifndef WIN32
-		if ( CSignal::cancel_computations() )
-			SG_INFO("prematurely stopped.           \n")
-#endif
 }
 
 void* CHashedWDFeaturesTransposed::dense_dot_range_helper(void* p)
@@ -390,6 +391,7 @@ void* CHashedWDFeaturesTransposed::dense_dot_range_helper(void* p)
 	float64_t* vec=par->vec;
 	float64_t bias=par->bias;
 	bool progress=par->progress;
+	auto pb = par->progress_bar;
 	uint32_t* index=par->index;
 	int32_t string_length=hf->string_length;
 	int32_t degree=hf->degree;
@@ -440,7 +442,7 @@ void* CHashedWDFeaturesTransposed::dense_dot_range_helper(void* p)
 			offs+=partial_w_dim*degree;
 
 			if (progress)
-				hf->io->progress(i, 0,string_length, i);
+				pb->print_progress();
 		}
 
 		for (int32_t j=start; j<stop; j++)
@@ -491,7 +493,7 @@ void* CHashedWDFeaturesTransposed::dense_dot_range_helper(void* p)
 			offs+=partial_w_dim*degree;
 
 			if (progress)
-				hf->io->progress(i, 0,string_length, i);
+				pb->print_progress();
 		}
 
 		for (int32_t j=start; j<stop; j++)
diff --git a/src/shogun/features/streaming/StreamingDenseFeatures.cpp b/src/shogun/features/streaming/StreamingDenseFeatures.cpp
index 6549bd9e87f..630998ae489 100644
--- a/src/shogun/features/streaming/StreamingDenseFeatures.cpp
+++ b/src/shogun/features/streaming/StreamingDenseFeatures.cpp
@@ -10,6 +10,7 @@
  */
 
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/features/streaming/StreamingDenseFeatures.h>
 #include <shogun/io/streaming/StreamingFileFromDenseFeatures.h>
 
@@ -264,7 +265,7 @@ float32_t CStreamingDenseFeatures<T>::dot(CStreamingDotFeatures* df)
 
 	SGVector<T> other_vector=sf->get_vector();
 
-	return CMath::dot(current_vector.vector, other_vector.vector, current_vector.vlen);
+	return linalg::dot(current_vector, other_vector);
 }
 
 template<class T>
@@ -277,7 +278,7 @@ float32_t CStreamingDenseFeatures<T>::dot(SGVector<T> sgvec1)
 		SG_ERROR(
 				"Lengths %d and %d not equal while computing dot product!\n", len1, current_vector.vlen);
 
-	return CMath::dot(current_vector.vector, sgvec1.vector, len1);
+	return linalg::dot(current_vector, sgvec1);
 }
 
 template<class T>
diff --git a/src/shogun/io/LibSVMFile.cpp b/src/shogun/io/LibSVMFile.cpp
index 724a528fbea..bf6d93aa6ea 100644
--- a/src/shogun/io/LibSVMFile.cpp
+++ b/src/shogun/io/LibSVMFile.cpp
@@ -10,12 +10,13 @@
 
 #include <shogun/io/LibSVMFile.h>
 
-#include <shogun/lib/SGVector.h>
-#include <shogun/lib/SGSparseVector.h>
 #include <shogun/base/DynArray.h>
+#include <shogun/base/progress.h>
 #include <shogun/io/LineReader.h>
 #include <shogun/io/Parser.h>
 #include <shogun/lib/DelimiterTokenizer.h>
+#include <shogun/lib/SGSparseVector.h>
+#include <shogun/lib/SGVector.h>
 
 using namespace shogun;
 
@@ -119,7 +120,8 @@ void CLibSVMFile::get_sparse_matrix(SGSparseVector<sg_type>*& mat_feat, int32_t&
 	for (int32_t i=0; i<num_vec; i++) \
 	{ \
 		REQUIRE(multilabel[i].size()==1, \
-			"%s a multilabel file. You are trying to read it with a single-label reader.", filename); \
+			"%s is a multilabel (%d) file. You are trying to read it with a single-label reader.", \
+			multilabel[i].size(), filename); \
 	} \
 	labels=SG_MALLOC(float64_t, num_vec); \
 	\
@@ -143,113 +145,120 @@ GET_LABELED_SPARSE_MATRIX(read_long, int64_t)
 GET_LABELED_SPARSE_MATRIX(read_ulong, uint64_t)
 #undef GET_LABELED_SPARSE_MATRIX
 
-#define GET_MULTI_LABELED_SPARSE_MATRIX(read_func, sg_type) \
-void CLibSVMFile::get_sparse_matrix(SGSparseVector<sg_type>*& mat_feat, int32_t& num_feat, int32_t& num_vec, \
-					SGVector<float64_t>*& multilabel, int32_t& num_classes, bool load_labels) \
-{ \
-	num_feat=0; \
-	\
-	SG_INFO("counting line numbers in file %s\n", filename) \
-	num_vec=get_num_lines(); \
-	\
-	int32_t current_line_ind=0; \
-	SGVector<char> line; \
-	\
-	int32_t num_feat_entries=0; \
-	DynArray<SGVector<char> > entries_feat; \
-	DynArray<float64_t > entries_label; \
-	DynArray<float64_t> classes; \
-	\
-	mat_feat=SG_MALLOC(SGSparseVector<sg_type>, num_vec); \
-	multilabel=SG_MALLOC(SGVector<float64_t>, num_vec); \
-	\
-	num_classes=0; \
-	SG_SET_LOCALE_C; \
-	\
-	while (m_line_reader->has_next()) \
-	{ \
-		num_feat_entries=0; \
-		entries_feat.reset(SGVector<char>(false)); \
-		line=m_line_reader->read_line(); \
-		\
-		m_parser->set_tokenizer(m_whitespace_tokenizer); \
-		m_parser->set_text(line); \
-		\
-		SGVector<char> entry_label; \
-		if (load_labels && m_parser->has_next()) \
-		{ \
-			entry_label=m_parser->read_string(); \
-			if (is_feat_entry(entry_label)) \
-			{ \
-				entries_feat.push_back(entry_label); \
-				num_feat_entries++; \
-				entry_label=SGVector<char>(0); \
-			} \
-		} \
-		\
-		while (m_parser->has_next()) \
-		{ \
-			entries_feat.push_back(m_parser->read_string()); \
-			num_feat_entries++; \
-		} \
-		\
-		mat_feat[current_line_ind]=SGSparseVector<sg_type>(num_feat_entries); \
-		for (int32_t i=0; i<num_feat_entries; i++) \
-		{ \
-			m_parser->set_tokenizer(m_delimiter_feat_tokenizer); \
-			m_parser->set_text(entries_feat[i]); \
-			\
-			int32_t feat_index=0; \
-			\
-			if (m_parser->has_next()) \
-				feat_index=m_parser->read_int(); \
-			\
-			sg_type entry=0; \
-			\
-			if (m_parser->has_next()) \
-				entry=m_parser->read_func(); \
-			\
-			if (feat_index>num_feat) \
-				num_feat=feat_index; \
-			\
-			mat_feat[current_line_ind].features[i].feat_index=feat_index-1; \
-			mat_feat[current_line_ind].features[i].entry=entry; \
-		} \
-		\
-		if (load_labels) \
-		{ \
-			m_parser->set_tokenizer(m_delimiter_label_tokenizer); \
-			m_parser->set_text(entry_label); \
-			\
-			int32_t num_label_entries=0; \
-			entries_label.reset(0); \
-			\
-			while (m_parser->has_next()) \
-			{ \
-				num_label_entries++; \
-				float64_t label_val=m_parser->read_real(); \
-				\
-				if (classes.find_element(label_val)==-1) \
-					classes.push_back(label_val); \
-				\
-				entries_label.push_back(label_val); \
-			} \
-			multilabel[current_line_ind]=SGVector<float64_t>(num_label_entries); \
-			\
-			for (int32_t j=0; j < num_label_entries; j++) \
-				multilabel[current_line_ind][j]=entries_label[j]; \
-			\
-		} \
-		\
-		current_line_ind++; \
-		SG_PROGRESS(current_line_ind, 0, num_vec, 1, "LOADING:\t") \
-	} \
-	num_classes=classes.get_num_elements(); \
-	\
-	SG_RESET_LOCALE; \
-	\
-	SG_INFO("file successfully read\n") \
-}
+#define GET_MULTI_LABELED_SPARSE_MATRIX(read_func, sg_type)                    \
+	void CLibSVMFile::get_sparse_matrix(                                       \
+	    SGSparseVector<sg_type>*& mat_feat, int32_t& num_feat,                 \
+	    int32_t& num_vec, SGVector<float64_t>*& multilabel,                    \
+	    int32_t& num_classes, bool load_labels)                                \
+	{                                                                          \
+		num_feat = 0;                                                          \
+                                                                               \
+		SG_INFO("counting line numbers in file %s.\n", filename)               \
+		num_vec = get_num_lines();                                             \
+		SG_INFO("File %s has %d lines.\n", filename, num_vec)                  \
+                                                                               \
+		int32_t current_line_ind = 0;                                          \
+		SGVector<char> line;                                                   \
+                                                                               \
+		int32_t num_feat_entries = 0;                                          \
+		DynArray<SGVector<char>> entries_feat;                                 \
+		DynArray<float64_t> entries_label;                                     \
+		DynArray<float64_t> classes;                                           \
+                                                                               \
+		mat_feat = SG_MALLOC(SGSparseVector<sg_type>, num_vec);                \
+		multilabel = SG_MALLOC(SGVector<float64_t>, num_vec);                  \
+                                                                               \
+		auto pb = progress(range(0, num_vec), *this->io, "LOADING: ");         \
+		num_classes = 0;                                                       \
+		SG_SET_LOCALE_C;                                                       \
+                                                                               \
+		while (m_line_reader->has_next())                                      \
+		{                                                                      \
+			num_feat_entries = 0;                                              \
+			entries_feat.reset(SGVector<char>(false));                         \
+			line = m_line_reader->read_line();                                 \
+                                                                               \
+			m_parser->set_tokenizer(m_whitespace_tokenizer);                   \
+			m_parser->set_text(line);                                          \
+                                                                               \
+			SGVector<char> entry_label;                                        \
+			if (load_labels && m_parser->has_next())                           \
+			{                                                                  \
+				entry_label = m_parser->read_string();                         \
+				if (is_feat_entry(entry_label))                                \
+				{                                                              \
+					entries_feat.push_back(entry_label);                       \
+					num_feat_entries++;                                        \
+					entry_label = SGVector<char>(0);                           \
+				}                                                              \
+			}                                                                  \
+                                                                               \
+			while (m_parser->has_next())                                       \
+			{                                                                  \
+				entries_feat.push_back(m_parser->read_string());               \
+				num_feat_entries++;                                            \
+			}                                                                  \
+                                                                               \
+			mat_feat[current_line_ind] =                                       \
+			    SGSparseVector<sg_type>(num_feat_entries);                     \
+			for (int32_t i = 0; i < num_feat_entries; i++)                     \
+			{                                                                  \
+				m_parser->set_tokenizer(m_delimiter_feat_tokenizer);           \
+				m_parser->set_text(entries_feat[i]);                           \
+                                                                               \
+				int32_t feat_index = 0;                                        \
+                                                                               \
+				if (m_parser->has_next())                                      \
+					feat_index = m_parser->read_int();                         \
+                                                                               \
+				sg_type entry = 0;                                             \
+                                                                               \
+				if (m_parser->has_next())                                      \
+					entry = m_parser->read_func();                             \
+                                                                               \
+				if (feat_index > num_feat)                                     \
+					num_feat = feat_index;                                     \
+                                                                               \
+				mat_feat[current_line_ind].features[i].feat_index =            \
+				    feat_index - 1;                                            \
+				mat_feat[current_line_ind].features[i].entry = entry;          \
+			}                                                                  \
+                                                                               \
+			if (load_labels)                                                   \
+			{                                                                  \
+				m_parser->set_tokenizer(m_delimiter_label_tokenizer);          \
+				m_parser->set_text(entry_label);                               \
+                                                                               \
+				int32_t num_label_entries = 0;                                 \
+				entries_label.reset(0);                                        \
+                                                                               \
+				while (m_parser->has_next())                                   \
+				{                                                              \
+					num_label_entries++;                                       \
+					float64_t label_val = m_parser->read_real();               \
+                                                                               \
+					if (classes.find_element(label_val) == -1)                 \
+						classes.push_back(label_val);                          \
+                                                                               \
+					entries_label.push_back(label_val);                        \
+				}                                                              \
+				multilabel[current_line_ind] =                                 \
+				    SGVector<float64_t>(num_label_entries);                    \
+                                                                               \
+				for (int32_t j = 0; j < num_label_entries; j++)                \
+					multilabel[current_line_ind][j] = entries_label[j];        \
+			}                                                                  \
+                                                                               \
+			current_line_ind++;                                                \
+			pb.print_progress();                                               \
+		}                                                                      \
+		pb.complete();                                                         \
+		num_classes = classes.get_num_elements();                              \
+                                                                               \
+		SG_RESET_LOCALE;                                                       \
+                                                                               \
+		SG_INFO("file successfully read\n")                                    \
+	}
 
 GET_MULTI_LABELED_SPARSE_MATRIX(read_bool, bool)
 GET_MULTI_LABELED_SPARSE_MATRIX(read_char, int8_t)
diff --git a/src/shogun/io/SGIO.cpp b/src/shogun/io/SGIO.cpp
index f98b6ad38cb..4d6a44c4d24 100644
--- a/src/shogun/io/SGIO.cpp
+++ b/src/shogun/io/SGIO.cpp
@@ -53,20 +53,17 @@ char SGIO::file_buffer[FBUFSIZE];
 char SGIO::directory_name[FBUFSIZE];
 
 SGIO::SGIO()
-: target(stdout), last_progress_time(0), progress_start_time(0),
-	last_progress(0), show_progress(false), location_info(MSG_NONE),
-	syntax_highlight(true), loglevel(MSG_WARN)
+    : target(stdout), show_progress(false), location_info(MSG_NONE),
+      syntax_highlight(true), loglevel(MSG_WARN)
 {
 	m_refcount = new RefCount();
 }
 
 SGIO::SGIO(const SGIO& orig)
-: target(orig.get_target()), last_progress_time(0),
-	progress_start_time(0), last_progress(0),
-	show_progress(orig.get_show_progress()),
-	location_info(orig.get_location_info()),
-	syntax_highlight(orig.get_syntax_highlight()),
-	loglevel(orig.get_loglevel())
+    : target(orig.get_target()), show_progress(orig.get_show_progress()),
+      location_info(orig.get_location_info()),
+      syntax_highlight(orig.get_syntax_highlight()),
+      loglevel(orig.get_loglevel())
 {
 	m_refcount = new RefCount();
 }
@@ -152,129 +149,6 @@ void SGIO::buffered_message(EMessageType prio, const char *fmt, ... ) const
 	}
 }
 
-void SGIO::progress(
-	float64_t current_val, float64_t min_val, float64_t max_val,
-	int32_t decimals, const char* prefix)
-{
-	if (!show_progress)
-		return;
-
-	float64_t runtime = CTime::get_curtime();
-
-	char str[1000];
-	float64_t v=-1, estimate=0, total_estimate=0 ;
-
-	if (max_val-min_val>0.0)
-		v=100*(current_val-min_val+1)/(max_val-min_val+1);
-	else
-		return;
-
-	v=CMath::clamp(v,1e-5,100.0);
-
-	if (decimals < 1)
-		decimals = 1;
-
-	if (last_progress==0)
-	{
-		last_progress_time = runtime;
-		progress_start_time = runtime;
-		last_progress = v;
-	}
-	else
-	{
-		last_progress = v-1e-6;
-		if ((v!=100.0) && (runtime - last_progress_time<0.5))
-		{
-
-			// This is made to display correctly the percentage
-			// if the algorithm execution is too fast
-			if (current_val >= max_val-1)
-			{
-				v = 100;
-				last_progress=v-1e-6;
-				snprintf(str, sizeof(str), "%%s %%%d.%df%%%%    %%1.1f seconds remaining    %%1.1f seconds total    ",decimals+3, decimals);
-				message(MSG_MESSAGEONLY, "", "", -1, str, prefix, v, estimate, total_estimate);
-				message(MSG_MESSAGEONLY, "", "", -1, "\n");
-			}
-			return;
-		}
-
-		last_progress_time = runtime;
-		estimate = (1-v/100)*(last_progress_time-progress_start_time)/(v/100);
-		total_estimate = (last_progress_time-progress_start_time)/(v/100);
-	}
-
-	if (estimate>120)
-	{
-		snprintf(str, sizeof(str), "%%s %%%d.%df%%%%    %%1.1f minutes remaining    %%1.1f minutes total    \r",decimals+3, decimals);
-		message(MSG_MESSAGEONLY, "", "", -1, str, prefix, v, estimate/60, total_estimate/60);
-	}
-	else
-	{
-		snprintf(str, sizeof(str), "%%s %%%d.%df%%%%    %%1.1f seconds remaining    %%1.1f seconds total    \r",decimals+3, decimals);
-		message(MSG_MESSAGEONLY, "", "", -1, str, prefix, v, estimate, total_estimate);
-	}
-
-	// Print a new line if the execution is completed
-	// to prevent bad display
-	if (current_val >= max_val-1)
-	{
-		message(MSG_MESSAGEONLY, "", "", -1, "\n");
-	}
-
-    fflush(target);
-}
-
-void SGIO::absolute_progress(
-	float64_t current_val, float64_t val, float64_t min_val, float64_t max_val,
-	int32_t decimals, const char* prefix)
-{
-	if (!show_progress)
-		return;
-
-	float64_t runtime = CTime::get_curtime();
-
-	char str[1000];
-	float64_t v=-1, estimate=0, total_estimate=0 ;
-
-	if (max_val-min_val>0)
-		v=100*(val-min_val+1)/(max_val-min_val+1);
-
-	if (decimals < 1)
-		decimals = 1;
-
-	if (last_progress>v)
-	{
-		last_progress_time = runtime;
-		progress_start_time = runtime;
-		last_progress = v;
-	}
-	else
-	{
-		v=CMath::clamp(v,1e-5,100.0);
-		last_progress = v-1e-6;
-
-		if ((v!=100.0) && (runtime - last_progress_time<100))
-			return;
-
-		last_progress_time = runtime;
-		estimate = (1-v/100)*(last_progress_time-progress_start_time)/(v/100);
-		total_estimate = (last_progress_time-progress_start_time)/(v/100);
-	}
-
-	if (estimate>120)
-	{
-		snprintf(str, sizeof(str), "%%s %%%d.%df    %%1.1f minutes remaining    %%1.1f minutes total    \r",decimals+3, decimals);
-		message(MSG_MESSAGEONLY, "", "", -1, str, prefix, current_val, estimate/60, total_estimate/60);
-	}
-	else
-	{
-		snprintf(str, sizeof(str), "%%s %%%d.%df    %%1.1f seconds remaining    %%1.1f seconds total    \r",decimals+3, decimals);
-		message(MSG_MESSAGEONLY, "", "", -1, str, prefix, current_val, estimate, total_estimate);
-	}
-
-    fflush(target);
-}
 
 void SGIO::done()
 {
diff --git a/src/shogun/io/SGIO.h b/src/shogun/io/SGIO.h
index 27086b9d971..e041bc1dac3 100644
--- a/src/shogun/io/SGIO.h
+++ b/src/shogun/io/SGIO.h
@@ -136,23 +136,9 @@ __FILE__ ":" func ": Unstable method!  Please report if it seems to " \
 #define SG_PRINT(...) { io->message(MSG_MESSAGEONLY, __PRETTY_FUNCTION__, __FILE__, __LINE__, __VA_ARGS__); }
 #define SG_OBJ_PRINT(o, ...) { o->io->message(MSG_MESSAGEONLY, __PRETTY_FUNCTION__, __FILE__, __LINE__, __VA_ARGS__); }
 #define SG_NOTIMPLEMENTED { io->not_implemented(__PRETTY_FUNCTION__, __FILE__, __LINE__); }
+#define SG_GPL_ONLY { io->gpl_only(__PRETTY_FUNCTION__, __FILE__, __LINE__); }
 #define SG_DEPRECATED { io->deprecated(__PRETTY_FUNCTION__, __FILE__, __LINE__); }
 
-#define SG_PROGRESS(...) {						\
-	if (SG_UNLIKELY(io->get_show_progress()))	\
-		io->progress(__VA_ARGS__);				\
-}
-
-#define SG_OBJ_PROGRESS(o, ...) {				\
-	if (SG_UNLIKELY(o->io->get_show_progress()))\
-		o->io->progress(__VA_ARGS__);			\
-}
-
-#define SG_ABS_PROGRESS(...) {					\
-	if (SG_UNLIKELY(io->get_show_progress()))	\
-		io->absolute_progress(__VA_ARGS__);		\
-}
-
 #define SG_DONE() {								\
 	if (SG_UNLIKELY(io->get_show_progress()))	\
 		io->done();								\
@@ -178,23 +164,13 @@ __FILE__ ":" func ": Unstable method!  Please report if it seems to " \
 #define SG_SERROR(...) { sg_io->message(MSG_ERROR,__PRETTY_FUNCTION__, __FILE__, __LINE__, __VA_ARGS__); }
 #define SG_SPRINT(...) { sg_io->message(MSG_MESSAGEONLY,__PRETTY_FUNCTION__, __FILE__, __LINE__, __VA_ARGS__); }
 
-
-#define SG_SPROGRESS(...) {							\
-	if (SG_UNLIKELY(sg_io->get_show_progress()))	\
-		sg_io->progress(__VA_ARGS__);				\
-}
-
-#define SG_SABS_PROGRESS(...) {						\
-	if (SG_UNLIKELY(sg_io->get_show_progress()))	\
-		sg_io->absolute_progress(__VA_ARGS__);		\
-}
-
 #define SG_SDONE() {								\
 	if (SG_UNLIKELY(sg_io->get_show_progress()))	\
 		sg_io->done();								\
 }
 
 #define SG_SNOTIMPLEMENTED { sg_io->not_implemented(__PRETTY_FUNCTION__, __FILE__, __LINE__); }
+#define SG_SGPL_ONLY { sg_io->gpl_only(__PRETTY_FUNCTION__, __FILE__, __LINE__); }
 #define SG_SDEPRECATED { sg_io->deprecated(__PRETTY_FUNCTION__, __FILE__, __LINE__); }
 
 #define ASSERT(x) {																	\
@@ -288,12 +264,6 @@ class SGIO
 			return location_info;
 		}
 
-		/** @return last progress as a percentage */
-		inline float64_t get_last_progress() const
-		{
-			return last_progress;
-		}
-
 		/** get syntax highlight
 		 *
 		 * @return if syntax highlighting is enabled
@@ -317,33 +287,6 @@ class SGIO
 		void message(EMessageType prio, const char* function, const char* file,
 				int32_t line, const char *fmt, ... ) const;
 
-		/** print progress bar
-		 *
-		 * @param current_val current value
-		 * @param min_val minimum value
-		 * @param max_val maximum value
-		 * @param decimals decimals
-		 * @param prefix message prefix
-		 */
-		void progress(
-			float64_t current_val,
-			float64_t min_val=0.0, float64_t max_val=1.0, int32_t decimals=1,
-			const char* prefix="PROGRESS:\t");
-
-		/** print absolute progress bar
-		 *
-		 * @param current_val current value
-		 * @param val value
-		 * @param min_val minimum value
-		 * @param max_val maximum value
-		 * @param decimals decimals
-		 * @param prefix message prefix
-		 */
-		void absolute_progress(
-			float64_t current_val, float64_t val,
-			float64_t min_val=0.0, float64_t max_val=1.0, int32_t decimals=1,
-			const char* prefix="PROGRESS:\t");
-
 		/** print 'done' with priority INFO,
 		 * but only if progress bar is enabled
 		 *
@@ -356,6 +299,12 @@ class SGIO
 			message(MSG_ERROR, function, file, line, "Sorry, not yet implemented .\n");
 		}
 
+		/** print error message 'Only available with GPL parts.' */
+		inline void gpl_only(const char* function, const char* file, int32_t line) const
+		{
+			message(MSG_ERROR, function, file, line, "This feature is only available if Shogun is built with GPL codes.\n");
+		}
+
 		/** print warning message 'function deprecated' */
 		inline void deprecated(const char* function, const char* file, int32_t line) const
 		{
@@ -567,12 +516,6 @@ class SGIO
 	protected:
 		/** target file */
 		FILE* target;
-		/** last progress time */
-		float64_t last_progress_time;
-		/** progress start time */
-		float64_t progress_start_time;
-		/** last progress */
-		float64_t last_progress;
 		/** if progress bar shall be shown */
 		bool show_progress;
 		/** if each print function should append filename and linenumber of
diff --git a/src/shogun/io/TBOutputFormat.cpp b/src/shogun/io/TBOutputFormat.cpp
new file mode 100644
index 00000000000..9478dde26d6
--- /dev/null
+++ b/src/shogun/io/TBOutputFormat.cpp
@@ -0,0 +1,156 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#include <shogun/lib/config.h>
+#ifdef HAVE_TFLOGGER
+
+#include <chrono>
+#include <shogun/io/TBOutputFormat.h>
+#include <shogun/lib/common.h>
+#include <shogun/lib/tfhistogram/histogram.h>
+#include <vector>
+
+using namespace shogun;
+
+#define CHECK_TYPE(type)                                                       \
+	else if (                                                                  \
+	    value.first.get_value().type_info().hash_code() ==                     \
+	    typeid(type).hash_code())                                              \
+	{                                                                          \
+		summaryValue->set_simple_value(                                        \
+		    recall_type<type>(value.first.get_value()));                       \
+	}
+
+#define CHECK_TYPE_HISTO(type)                                                 \
+	else if (                                                                  \
+	    value.first.get_value().type_info().hash_code() ==                     \
+	    typeid(type).hash_code())                                              \
+	{                                                                          \
+		tensorflow::histogram::Histogram h;                                    \
+		tensorflow::HistogramProto* hp = new tensorflow::HistogramProto();     \
+		auto v = recall_type<type>(value.first.get_value());                   \
+		for (auto value_v : v)                                                 \
+			h.Add(value_v);                                                    \
+		h.EncodeToProto(hp, true);                                             \
+		summaryValue->set_allocated_histo(hp);                                 \
+	}
+
+TBOutputFormat::TBOutputFormat(){};
+
+TBOutputFormat::~TBOutputFormat(){};
+
+tensorflow::Event TBOutputFormat::convert_scalar(
+    const TimedObservedValue& value, std::string& node_name)
+{
+	tensorflow::Event e;
+	std::time_t now_t = convert_to_millis(value.second);
+	e.set_wall_time(now_t);
+	e.set_step(value.first.get_step());
+
+	tensorflow::Summary* summary = e.mutable_summary();
+	auto summaryValue = summary->add_value();
+	summaryValue->set_tag(value.first.get_name());
+	summaryValue->set_node_name(node_name);
+
+	if (value.first.get_value().type_info().hash_code() ==
+	    typeid(int8_t).hash_code())
+	{
+		summaryValue->set_simple_value(
+		    recall_type<int8_t>(value.first.get_value()));
+	}
+	CHECK_TYPE(uint8_t)
+	CHECK_TYPE(int16_t)
+	CHECK_TYPE(uint16_t)
+	CHECK_TYPE(int32_t)
+	CHECK_TYPE(uint32_t)
+	CHECK_TYPE(int64_t)
+	CHECK_TYPE(uint64_t)
+	CHECK_TYPE(float32_t)
+	CHECK_TYPE(float64_t)
+	CHECK_TYPE(floatmax_t)
+	CHECK_TYPE(char)
+	else
+	{
+		SG_ERROR(
+		    "Unsupported type %s", value.first.get_value().type_info().name());
+	}
+
+	return e;
+}
+
+tensorflow::Event TBOutputFormat::convert_vector(
+    const TimedObservedValue& value, std::string& node_name)
+{
+	tensorflow::Event e;
+	std::time_t now_t = convert_to_millis(value.second);
+	e.set_wall_time(now_t);
+	e.set_step(value.first.get_step());
+
+	tensorflow::Summary* summary = e.mutable_summary();
+	auto summaryValue = summary->add_value();
+	summaryValue->set_tag(value.first.get_name());
+	summaryValue->set_node_name(node_name);
+
+	if (value.first.get_value().type_info().hash_code() ==
+	    typeid(std::vector<int8_t>).hash_code())
+	{
+		tensorflow::histogram::Histogram h;
+		tensorflow::HistogramProto* hp = new tensorflow::HistogramProto();
+		auto v = recall_type<std::vector<int8_t>>(value.first.get_value());
+		for (auto value_v : v)
+			h.Add(value_v);
+		h.EncodeToProto(hp, true);
+		summaryValue->set_allocated_histo(hp);
+	}
+	CHECK_TYPE_HISTO(std::vector<uint8_t>)
+	CHECK_TYPE_HISTO(std::vector<int16_t>)
+	CHECK_TYPE_HISTO(std::vector<uint16_t>)
+	CHECK_TYPE_HISTO(std::vector<int32_t>)
+	CHECK_TYPE_HISTO(std::vector<uint32_t>)
+	CHECK_TYPE_HISTO(std::vector<int64_t>)
+	CHECK_TYPE_HISTO(std::vector<uint64_t>)
+	CHECK_TYPE_HISTO(std::vector<float32_t>)
+	CHECK_TYPE_HISTO(std::vector<float64_t>)
+	CHECK_TYPE_HISTO(std::vector<floatmax_t>)
+	CHECK_TYPE_HISTO(std::vector<char>)
+	else
+	{
+		SG_ERROR(
+		    "Unsupported type %s", value.first.get_value().type_info().name());
+	}
+
+	return e;
+}
+
+#endif // HAVE_TFLOGGER
diff --git a/src/shogun/io/TBOutputFormat.h b/src/shogun/io/TBOutputFormat.h
new file mode 100644
index 00000000000..bffcab33ae0
--- /dev/null
+++ b/src/shogun/io/TBOutputFormat.h
@@ -0,0 +1,82 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#include <shogun/lib/config.h>
+#ifdef HAVE_TFLOGGER
+
+#ifndef SHOGUN_OUTPUTFORMAT_H
+#define SHOGUN_OUTPUTFORMAT_H
+
+#include <shogun/base/SGObject.h>
+#include <shogun/lib/any.h>
+#include <shogun/lib/parameter_observers/ObservedValue.h>
+#include <tflogger/event.pb.h>
+
+#include <utility>
+
+namespace shogun
+{
+	/**
+	 * Convert an std::pair<std::string, Any> to a tensorflow::Event,
+	 * which can be written to file and used with tools like Tensorboard.
+	 */
+	class TBOutputFormat : public CSGObject
+	{
+
+	public:
+		TBOutputFormat();
+		~TBOutputFormat();
+
+		/**
+		 * Generate a tensorflow::Event object give some informations
+		 * @param event_step the current event step
+		 * @param value the value which will be converted to tensorflow::Event
+		 * @param node_name the node name (default: node)
+		 * @return the newly created tensorflow::Event
+		 */
+		tensorflow::Event
+		convert_scalar(const TimedObservedValue& value, std::string& node_name);
+
+		tensorflow::Event
+		convert_vector(const TimedObservedValue& value, std::string& node_name);
+
+		virtual const char* get_name() const
+		{
+			return "TFLogger";
+		}
+	};
+}
+
+#endif // SHOGUN_OUTPUTFORMAT_H
+#endif // HAVE_TFLOGGER
diff --git a/src/shogun/io/UAIFile.cpp b/src/shogun/io/UAIFile.cpp
index f93e82f05cb..e12ac98ef00 100644
--- a/src/shogun/io/UAIFile.cpp
+++ b/src/shogun/io/UAIFile.cpp
@@ -104,30 +104,34 @@ void CUAIFile::init_with_defaults()
     SG_REF(m_line_reader);
 }
 
-#define GET_VECTOR(read_func, sg_type) \
-void CUAIFile::get_vector(sg_type*& vector, int32_t& len) \
-{ \
-    if (!m_line_reader->has_next()) \
-        return; \
-    \
-    SGVector<char> line; \
-    int32_t num_elements = 0; \
-    \
-    line = m_line_reader->read_line(); \
-    m_tokenizer->set_text(line); \
-    while (m_tokenizer->has_next()) \
-    { \
-        int32_t temp_start; \
-        m_tokenizer->next_token_idx(temp_start); \
-        num_elements++; \
-    } \
-    \
-    vector = SG_MALLOC(sg_type, num_elements); \
-    m_parser->set_text(line); \
-    for (int32_t i=0; i<num_elements; i++) \
-        vector[i] = m_parser->read_func(); \
-    len = num_elements; \
-}
+#define GET_VECTOR(read_func, sg_type)                                         \
+	void CUAIFile::get_vector(sg_type*& vector, int32_t& len)                  \
+	{                                                                          \
+		SG_SET_LOCALE_C;                                                       \
+                                                                               \
+		if (!m_line_reader->has_next())                                        \
+			return;                                                            \
+                                                                               \
+		SGVector<char> line;                                                   \
+		int32_t num_elements = 0;                                              \
+                                                                               \
+		line = m_line_reader->read_line();                                     \
+		m_tokenizer->set_text(line);                                           \
+		while (m_tokenizer->has_next())                                        \
+		{                                                                      \
+			int32_t temp_start;                                                \
+			m_tokenizer->next_token_idx(temp_start);                           \
+			num_elements++;                                                    \
+		}                                                                      \
+                                                                               \
+		vector = SG_MALLOC(sg_type, num_elements);                             \
+		m_parser->set_text(line);                                              \
+		for (int32_t i = 0; i < num_elements; i++)                             \
+			vector[i] = m_parser->read_func();                                 \
+		len = num_elements;                                                    \
+                                                                               \
+		SG_RESET_LOCALE;                                                       \
+	}
 
 GET_VECTOR(read_char, int8_t)
 GET_VECTOR(read_byte, uint8_t)
diff --git a/src/shogun/kernel/CombinedKernel.cpp b/src/shogun/kernel/CombinedKernel.cpp
index 9c39a2c8fb7..cfab3c28aad 100644
--- a/src/shogun/kernel/CombinedKernel.cpp
+++ b/src/shogun/kernel/CombinedKernel.cpp
@@ -121,8 +121,13 @@ bool CCombinedKernel::init(CFeatures* l, CFeatures* r)
 		// skip over features - the custom kernel does not need any
 		if (k->get_kernel_type() != K_CUSTOM)
 		{
-			lf = ((CCombinedFeatures*) l)->get_feature_obj(f_idx);
-			rf = ((CCombinedFeatures*) r)->get_feature_obj(f_idx);
+			if (((CCombinedFeatures*)l)->get_num_feature_obj() > f_idx &&
+			    ((CCombinedFeatures*)r)->get_num_feature_obj() > f_idx)
+			{
+				lf = ((CCombinedFeatures*)l)->get_feature_obj(f_idx);
+				rf = ((CCombinedFeatures*)r)->get_feature_obj(f_idx);
+			}
+
 			f_idx++;
 			if (!lf || !rf)
 			{
diff --git a/src/shogun/kernel/CombinedKernel.h b/src/shogun/kernel/CombinedKernel.h
index b139a0ac029..185c4fb3e93 100644
--- a/src/shogun/kernel/CombinedKernel.h
+++ b/src/shogun/kernel/CombinedKernel.h
@@ -124,7 +124,14 @@ class CCombinedKernel : public CKernel
 		 */
 		inline CKernel* get_kernel(int32_t idx)
 		{
-			return (CKernel*) kernel_array->get_element(idx);
+			if (idx < get_num_kernels())
+			{
+				return (CKernel*)kernel_array->get_element(idx);
+			}
+			else
+			{
+				return 0;
+			}
 		}
 
 		/** get last kernel
diff --git a/src/shogun/kernel/ExponentialARDKernel.cpp b/src/shogun/kernel/ExponentialARDKernel.cpp
index 349ef96c800..564f1c65c8e 100644
--- a/src/shogun/kernel/ExponentialARDKernel.cpp
+++ b/src/shogun/kernel/ExponentialARDKernel.cpp
@@ -14,7 +14,6 @@
 #include <shogun/features/DenseFeatures.h>
 #include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/linalg/LinalgNamespace.h>
-#include <shogun/mathematics/linalg/linalg.h>
 
 using namespace shogun;
 
@@ -89,11 +88,7 @@ void CExponentialARDKernel::lazy_update_weights()
 		if (m_ARD_type==KT_SCALAR || m_ARD_type==KT_DIAG)
 		{
 			SGMatrix<float64_t> log_weights(m_log_weights.vector,1,m_log_weights.vlen,false);
-			m_weights_raw=linalg::elementwise_compute(m_log_weights,
-				[  ](float64_t& value)
-				{
-				return CMath::exp(value);
-				});
+			m_weights_raw = linalg::exponent(m_log_weights);
 		}
 		else if (m_ARD_type==KT_FULL)
 		{
@@ -235,12 +230,8 @@ SGMatrix<float64_t> CExponentialARDKernel::get_weighted_vector(SGVector<float64_
 	else
 	{
 		SGMatrix<float64_t> rtmp(vec.vector,vec.vlen,1,false);
-		SGMatrix<float64_t> weights=linalg::elementwise_compute(m_log_weights,
-			[  ](float64_t& value)
-			{
-			return CMath::exp(value);
-			});
-		res=linalg::element_prod(weights, rtmp);
+		SGMatrix<float64_t> weights(linalg::exponent(m_log_weights));
+		res = linalg::element_prod(weights, rtmp);
 	}
 	return res;
 }
diff --git a/src/shogun/kernel/Kernel.cpp b/src/shogun/kernel/Kernel.cpp
index 2f8303d799d..41b4b0baaa0 100644
--- a/src/shogun/kernel/Kernel.cpp
+++ b/src/shogun/kernel/Kernel.cpp
@@ -12,12 +12,13 @@
  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
  */
 
-#include <shogun/lib/config.h>
-#include <shogun/lib/common.h>
-#include <shogun/io/SGIO.h>
+#include <shogun/base/progress.h>
 #include <shogun/io/File.h>
-#include <shogun/lib/Time.h>
+#include <shogun/io/SGIO.h>
 #include <shogun/lib/Signal.h>
+#include <shogun/lib/Time.h>
+#include <shogun/lib/common.h>
+#include <shogun/lib/config.h>
 
 #include <shogun/base/Parallel.h>
 
@@ -984,8 +985,6 @@ template <class T> struct K_THREAD_PARAM
 	int32_t end;
 	/** start (unit number of elements) */
 	int64_t total_start;
-	/** end (unit number of elements) */
-	int64_t total_end;
 	/** m */
 	int32_t m;
 	/** n */
@@ -996,6 +995,8 @@ template <class T> struct K_THREAD_PARAM
 	bool symmetric;
 	/** output progress */
 	bool verbose;
+	/* Progress bar*/
+	PRange<int64_t>* pb;
 };
 }
 
@@ -1274,8 +1275,8 @@ template <class T> void* CKernel::get_kernel_matrix_helper(void* p)
 	int32_t m=params->m;
 	bool verbose=params->verbose;
 	int64_t total_start=params->total_start;
-	int64_t total_end=params->total_end;
 	int64_t total=total_start;
+	PRange<int64_t>* pb = params->pb;
 
 	for (int32_t i=i_start; i<i_end; i++)
 	{
@@ -1299,11 +1300,11 @@ template <class T> void* CKernel::get_kernel_matrix_helper(void* p)
 				if (symmetric && i!=j)
 					total++;
 
-				if (total%100 == 0)
-					SG_OBJ_PROGRESS(k, total, total_start, total_end)
+				pb->print_progress();
 
-				if (CSignal::cancel_computations())
-					break;
+				// TODO: replace with the new signal
+				// if (CSignal::cancel_computations())
+				//	break;
 			}
 		}
 
@@ -1335,7 +1336,8 @@ SGMatrix<T> CKernel::get_kernel_matrix()
 	K_THREAD_PARAM<T> params;
 	int64_t step = total_num/num_threads;
 	index_t t = 0;
-	#pragma omp parallel for lastprivate(t) private(params)
+	auto pb = progress(range(total_num), *this->io);
+#pragma omp parallel for lastprivate(t) private(params)
 	for (t = 0; t < num_threads; ++t)
 	{
 		params.kernel = this;
@@ -1343,11 +1345,11 @@ SGMatrix<T> CKernel::get_kernel_matrix()
 		params.start = compute_row_start(t*step, n, symmetric);
 		params.end = compute_row_start((t+1)*step, n, symmetric);
 		params.total_start=t*step;
-		params.total_end=(t+1)*step;
 		params.n=n;
 		params.m=m;
 		params.symmetric=symmetric;
 		params.verbose=false;
+		params.pb = &pb;
 		CKernel::get_kernel_matrix_helper<T>((void*)&params);
 	}
 
@@ -1358,15 +1360,15 @@ SGMatrix<T> CKernel::get_kernel_matrix()
 		params.start = compute_row_start(t*step, n, symmetric);
 		params.end = m;
 		params.total_start=t*step;
-		params.total_end=total_num;
 		params.n=n;
 		params.m=m;
 		params.symmetric=symmetric;
 		params.verbose=false;
+		params.pb = &pb;
 		CKernel::get_kernel_matrix_helper<T>((void*)&params);
 	}
 
-	SG_DONE()
+	pb.complete();
 
 	return SGMatrix<T>(result,m,n,true);
 }
@@ -1377,4 +1379,3 @@ template SGMatrix<float32_t> CKernel::get_kernel_matrix<float32_t>();
 
 template void* CKernel::get_kernel_matrix_helper<float64_t>(void* p);
 template void* CKernel::get_kernel_matrix_helper<float32_t>(void* p);
-
diff --git a/src/shogun/kernel/Kernel.h b/src/shogun/kernel/Kernel.h
index 88e66fb413b..4768cd9f3eb 100644
--- a/src/shogun/kernel/Kernel.h
+++ b/src/shogun/kernel/Kernel.h
@@ -18,7 +18,6 @@
 #include <shogun/lib/config.h>
 
 #include <shogun/lib/common.h>
-#include <shogun/lib/Signal.h>
 #include <shogun/io/SGIO.h>
 #include <shogun/io/File.h>
 #include <shogun/mathematics/Math.h>
diff --git a/src/shogun/kernel/string/CommUlongStringKernel.cpp b/src/shogun/kernel/string/CommUlongStringKernel.cpp
index 6936dab8967..bfba00ee012 100644
--- a/src/shogun/kernel/string/CommUlongStringKernel.cpp
+++ b/src/shogun/kernel/string/CommUlongStringKernel.cpp
@@ -8,11 +8,13 @@
  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
  */
 
-#include <shogun/lib/common.h>
-#include <shogun/kernel/string/CommUlongStringKernel.h>
-#include <shogun/kernel/normalizer/SqrtDiagKernelNormalizer.h>
+#include <shogun/base/progress.h>
 #include <shogun/features/StringFeatures.h>
 #include <shogun/io/SGIO.h>
+#include <shogun/kernel/string/CommUlongStringKernel.h>
+#include <shogun/lib/common.h>
+
+#include <shogun/kernel/normalizer/SqrtDiagKernelNormalizer.h>
 
 using namespace shogun;
 
@@ -231,11 +233,8 @@ bool CCommUlongStringKernel::init_optimization(
 
 	SG_DEBUG("initializing CCommUlongStringKernel optimization\n")
 
-	for (int32_t i=0; i<count; i++)
+	for (auto i : progress(range(0, count), *this->io))
 	{
-		if ( (i % (count/10+1)) == 0)
-			SG_PROGRESS(i, 0, count)
-
 		add_to_normal(IDX[i], weights[i]);
 	}
 
diff --git a/src/shogun/kernel/string/CommWordStringKernel.cpp b/src/shogun/kernel/string/CommWordStringKernel.cpp
index 35450c378ba..f2dfb0f53de 100644
--- a/src/shogun/kernel/string/CommWordStringKernel.cpp
+++ b/src/shogun/kernel/string/CommWordStringKernel.cpp
@@ -8,14 +8,15 @@
  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
  */
 
-#include <shogun/lib/common.h>
-#include <shogun/io/SGIO.h>
-
 #include <shogun/base/Parameter.h>
+#include <shogun/base/progress.h>
+#include <shogun/io/SGIO.h>
+#include <shogun/lib/common.h>
 
+#include <shogun/features/StringFeatures.h>
 #include <shogun/kernel/string/CommWordStringKernel.h>
+
 #include <shogun/kernel/normalizer/SqrtDiagKernelNormalizer.h>
-#include <shogun/features/StringFeatures.h>
 
 using namespace shogun;
 
@@ -45,20 +46,17 @@ CCommWordStringKernel::CCommWordStringKernel(
 
 bool CCommWordStringKernel::init_dictionary(int32_t size)
 {
-	dictionary_size= size;
-	SG_FREE(dictionary_weights);
-	dictionary_weights=SG_MALLOC(float64_t, size);
+	dictionary_weights=SGVector<float64_t>(size);
 	SG_DEBUG("using dictionary of %d words\n", size)
 	clear_normal();
 
-	return dictionary_weights!=NULL;
+	return dictionary_weights.vector!=NULL;
 }
 
 CCommWordStringKernel::~CCommWordStringKernel()
 {
 	cleanup();
 
-	SG_FREE(dictionary_weights);
 	SG_FREE(dict_diagonal_optimization);
 }
 
@@ -97,7 +95,7 @@ float64_t CCommWordStringKernel::compute_diag(int32_t idx_a)
 	ASSERT((1<<(sizeof(uint16_t)*8)) > alen)
 
 	int32_t num_symbols=(int32_t) l->get_num_symbols();
-	ASSERT(num_symbols<=dictionary_size)
+	ASSERT(num_symbols<=dictionary_weights.vlen)
 
 	int32_t* dic = dict_diagonal_optimization;
 	memset(dic, 0, num_symbols*sizeof(int32_t));
@@ -285,7 +283,7 @@ void CCommWordStringKernel::add_to_normal(int32_t vec_idx, float64_t weight)
 
 void CCommWordStringKernel::clear_normal()
 {
-	memset(dictionary_weights, 0, dictionary_size*sizeof(float64_t));
+	dictionary_weights.zero();
 	set_is_initialized(false);
 }
 
@@ -303,11 +301,8 @@ bool CCommWordStringKernel::init_optimization(
 
 	SG_DEBUG("initializing CCommWordStringKernel optimization\n")
 
-	for (int32_t i=0; i<count; i++)
+	for (auto i : progress(range(0, count), *this->io))
 	{
-		if ( (i % (count/10+1)) == 0)
-			SG_PROGRESS(i, 0, count)
-
 		add_to_normal(IDX[i], weights[i]);
 	}
 
@@ -602,9 +597,6 @@ char* CCommWordStringKernel::compute_consensus(
 
 void CCommWordStringKernel::init()
 {
-	dictionary_size=0;
-	dictionary_weights=NULL;
-
 	use_sign=false;
 	use_dict_diagonal_optimization=false;
 	dict_diagonal_optimization=NULL;
@@ -613,8 +605,8 @@ void CCommWordStringKernel::init()
 	init_dictionary(1<<(sizeof(uint16_t)*8));
 	set_normalizer(new CSqrtDiagKernelNormalizer(use_dict_diagonal_optimization));
 
-	m_parameters->add_vector(&dictionary_weights, &dictionary_size, "dictionary_weights",
-			"Dictionary for applying kernel.");
+	SG_ADD(&dictionary_weights,  "dictionary_weights",
+			"Dictionary for applying kernel.", MS_NOT_AVAILABLE);
 	SG_ADD(&use_sign, "use_sign",
 	    "If signum(counts) is used instead of counts.", MS_AVAILABLE);
 	SG_ADD(&use_dict_diagonal_optimization,
diff --git a/src/shogun/kernel/string/CommWordStringKernel.h b/src/shogun/kernel/string/CommWordStringKernel.h
index 841bb31ccee..3011c5f30fa 100644
--- a/src/shogun/kernel/string/CommWordStringKernel.h
+++ b/src/shogun/kernel/string/CommWordStringKernel.h
@@ -149,13 +149,11 @@ class CCommWordStringKernel : public CStringKernel<uint16_t>
 
 		/** get dictionary
 		 *
-		 * @param dsize dictionary size will be stored in here
-		 * @param dweights dictionary weights will be stored in here
+		 * @return dictionary weights
 		 */
-		void get_dictionary(int32_t& dsize, float64_t*& dweights)
+		SGVector<float64_t> get_dictionary() const
 		{
-			dsize=dictionary_size;
-			dweights = dictionary_weights;
+			return dictionary_weights;
 		}
 
 		/** compute scoring
@@ -240,11 +238,9 @@ class CCommWordStringKernel : public CStringKernel<uint16_t>
 		void init();
 
 	protected:
-		/** size of dictionary (number of possible strings) */
-		int32_t dictionary_size;
 		/** dictionary weights - array to hold counters for all possible
 		 * strings */
-		float64_t* dictionary_weights;
+		SGVector<float64_t> dictionary_weights;
 
 		/** if sign shall be used */
 		bool use_sign;
diff --git a/src/shogun/kernel/string/DistantSegmentsKernel.cpp b/src/shogun/kernel/string/DistantSegmentsKernel.cpp
deleted file mode 100644
index a9e9407a7d4..00000000000
--- a/src/shogun/kernel/string/DistantSegmentsKernel.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2011 Heiko Strathmann
- * DS-Kernel implementation Written (W) 2008 Sébastien Boisvert under GPLv3
- * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
- */
-
-#include <shogun/kernel/string/DistantSegmentsKernel.h>
-#include <string>
-
-using namespace shogun;
-
-CDistantSegmentsKernel::CDistantSegmentsKernel() : CStringKernel<char>(),
-	m_delta(0), m_theta(0)
-{
-	init();
-}
-
-CDistantSegmentsKernel::CDistantSegmentsKernel(int32_t size, int32_t delta,
-		int32_t theta) : CStringKernel<char>(size), m_delta(delta),
-		m_theta(theta)
-{
-	init();
-}
-
-CDistantSegmentsKernel::CDistantSegmentsKernel(CStringFeatures<char>* l,
-		CStringFeatures<char>* r, int32_t size, int32_t delta, int32_t theta) :
-	CStringKernel<char>(size), m_delta(delta), m_theta(theta)
-{
-	init();
-	CStringKernel<char>::init(l, r);
-}
-
-bool CDistantSegmentsKernel::init(CFeatures* l, CFeatures* r)
-{
-	CKernel::init(l, r);
-	return init_normalizer();
-}
-
-void CDistantSegmentsKernel::init()
-{
-	SG_ADD(&m_delta, "delta", "Delta parameter of the DS-Kernel", MS_AVAILABLE);
-	SG_ADD(&m_theta, "theta", "Theta parameter of the DS-Kernel", MS_AVAILABLE);
-}
-
-float64_t CDistantSegmentsKernel::compute(int32_t idx_a, int32_t idx_b)
-{
-	bool free_a, free_b;
-	int32_t aLength=0, bLength=0;
-	char* a=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, aLength,
-			free_a);
-	char* b=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, bLength,
-			free_b);
-	ASSERT(a && b)
-
-	if ((aLength<1)||(bLength<1))
-		SG_ERROR("Empty sequences")
-
-	float64_t result=compute(a, aLength, b, bLength, m_delta, m_theta);
-
-	((CStringFeatures<char>*) lhs)->free_feature_vector(a, idx_a, free_a);
-	((CStringFeatures<char>*) rhs)->free_feature_vector(b, idx_b, free_b);
-
-	return result;
-}
-
-int32_t CDistantSegmentsKernel::bin(int32_t j, int32_t i)
-{
-	if (i>j)
-		return 0;
-	if (i==3 && j>=3)
-	{
-		return j*(j-1)*(j-2)/6;
-	}
-	else if (i==2 && j>=2)
-	{
-		return j*(j-1)/2;
-	}
-	return 0;
-}
-
-int32_t CDistantSegmentsKernel::compute(char* s, int32_t sLength, char* t,
-		int32_t tLength, int32_t delta_m, int32_t theta_m)
-{
-	int32_t c=0;
-	int32_t* i_=SG_MALLOC(int32_t, delta_m+1);
-	int32_t* l_=SG_MALLOC(int32_t, delta_m+1);
-	for (int32_t j_s=0; j_s<=(int32_t) sLength-1; j_s++)
-	{
-		for (int32_t j_t=0; j_t<=(int32_t) tLength-1; j_t++)
-		{
-			if (s[j_s-1+1]==t[j_t-1+1])
-			{
-				int32_t n=CMath::min(CMath::min(sLength-j_s, tLength-j_t), delta_m);
-				int32_t k=-1;
-				int32_t i=1;
-				while (i<=n)
-				{
-					k++;
-					i_[2*k]=i;
-					i++;
-					while (i<=n&&s[j_s-1+i]==t[j_t-1+i])
-						i++;
-					i_[2*k+1]=i;
-					l_[k]=i_[2*k+1]-i_[2*k]+1;
-					i++;
-					while (i<=n&&s[j_s-1+i]!=t[j_t-1+i])
-						i++;
-				}
-				c+=bin(l_[0], 3)-2*bin(l_[0]-theta_m, 3)
-						+bin(l_[0]-2*theta_m, 3);
-				int32_t c1=0;
-				for (int32_t r=1; r<=k; r++)
-				{
-					c1+=bin(l_[r], 2)-bin(l_[r]-theta_m, 2);
-				}
-				c+=CMath::min(theta_m, i_[1]-i_[0])*c1;
-			}
-		}
-	}
-	SG_FREE(l_);
-	SG_FREE(i_);
-	return c;
-}
diff --git a/src/shogun/kernel/string/DistantSegmentsKernel.h b/src/shogun/kernel/string/DistantSegmentsKernel.h
deleted file mode 100644
index aedd5b5595d..00000000000
--- a/src/shogun/kernel/string/DistantSegmentsKernel.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2011 Heiko Strathmann
- * DS-Kernel implementation Written (W) 2008 Sébastien Boisvert under GPLv3
- * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
- */
-
-#ifndef DISTANTSEGMENTSKERNEL_H_
-#define DISTANTSEGMENTSKERNEL_H_
-
-#include <shogun/lib/config.h>
-
-#include <shogun/kernel/string/StringKernel.h>
-
-namespace shogun
-{
-
-/**
- * @brief The distant segments kernel is a string kernel,
- * which counts the number of substrings, so-called segments,
- * at a certain distance from each other.
- *
- * The implementation is taken from
- * http://www.retrovirology.com/content/5/1/110/ and
- * only adjusted to work with shogun. See that page for any details.
- *
- * Reference: Sebastien Boisvert, Mario Marchand, Francois Laviolette,
- * and Jacques Corbeil. Hiv-1 coreceptor usage prediction without
- * multiple alignments: an application of string kernels.
- * Retrovirology, 5(1):110, Dec 2008.
- */
-class CDistantSegmentsKernel: public CStringKernel<char>
-{
-public:
-	/** default constructor */
-	CDistantSegmentsKernel();
-
-	/** constructor
-	 *
-	 * @param size cache size
-	 * @param delta \f[\delta\f]-parameter of the DS-kernel
-	 * @param theta \f[\theta\f]-parameter of the DS-kernel
-	 */
-	CDistantSegmentsKernel(int32_t size, int32_t delta, int32_t theta);
-
-	/** constructor
-	 *
-	 * @param l features of left-side
-	 * @param r features of right-side
-	 * @param size cache size
-	 * @param delta \f[\delta\f]-parameter of the DS-kernel
-	 * @param theta \f[\theta\f]-parameter of the DS-kernel
-	 */
-	CDistantSegmentsKernel(CStringFeatures<char>* l, CStringFeatures<char>* r,
-			int32_t size, int32_t delta, int32_t theta);
-
-	/** initialize kernel with features
-	 *
-	 * @param l features of left-side
-	 * @param r features of right-side
-	 * @return true if successful
-	 */
-	virtual bool init(CFeatures* l, CFeatures* r);
-
-	/**
-	 * @return kernel type
-	 */
-	virtual EKernelType get_kernel_type()
-	{
-		return K_DISTANTSEGMENTS;
-	}
-
-	/**
-	 * @return name of kernel
-	 */
-	virtual const char* get_name() const
-	{
-		return "DistantSegmentsKernel";
-	}
-
-protected:
-	/**
-	 * compute kernel function for features a and b
-	 * idx_{a,b} denote the index of the feature vectors
-	 * in the corresponding feature object
-	 *
-	 * @param idx_a index a
-	 * @param idx_b index b
-	 * @return computed kernel function at indices a,b
-	 */
-	virtual float64_t compute(int32_t idx_a, int32_t idx_b);
-
-private:
-	/** initializes kernel parameters and registers them */
-	void init();
-
-	/**
-	 * helper function taken from
-	 * http://www.retrovirology.com/content/5/1/110/
-	 * */
-	int32_t bin(int32_t j, int32_t i);
-
-	/**
-	 * Computes the DS-kernel for the given strings and parameters.
-	 * Taken from http://www.retrovirology.com/content/5/1/110/
-	 * with little adjustments.
-	 *
-	 * @param s first string for kernel computation
-	 * @param sLength length of that string
-	 * @param b second string for kernel computation
-	 * @param bLength length of that string
-	 * @param delta_m delta parameter
-	 * @param theta_m theta parameter
-	 * @return computed kernel function of the given strings and parameters
-	 */
-	int32_t compute(char* s, int32_t sLength, char* b, int32_t bLength,
-			int32_t delta_m, int32_t theta_m);
-
-protected:
-	/** delta parameter of DS-kernel */
-	int32_t m_delta;
-
-	/** theta parameter of DS-kernel */
-	int32_t m_theta;
-
-
-};
-
-}
-
-#endif /* DISTANTSEGMENTSKERNEL_H_ */
diff --git a/src/shogun/kernel/string/LinearStringKernel.cpp b/src/shogun/kernel/string/LinearStringKernel.cpp
index 1d0fddf3fd3..2b43cecf2a6 100644
--- a/src/shogun/kernel/string/LinearStringKernel.cpp
+++ b/src/shogun/kernel/string/LinearStringKernel.cpp
@@ -10,20 +10,20 @@
 
 #include <shogun/lib/common.h>
 #include <shogun/io/SGIO.h>
-#include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/kernel/string/LinearStringKernel.h>
 #include <shogun/features/StringFeatures.h>
 
 using namespace shogun;
 
 CLinearStringKernel::CLinearStringKernel()
-: CStringKernel<char>(0), normal(NULL)
+: CStringKernel<char>(0)
 {
 }
 
 CLinearStringKernel::CLinearStringKernel(
 	CStringFeatures<char>* l, CStringFeatures<char>* r)
-: CStringKernel<char>(0), normal(NULL)
+: CStringKernel<char>(0)
 {
 	init(l, r);
 }
@@ -48,7 +48,7 @@ void CLinearStringKernel::cleanup()
 
 void CLinearStringKernel::clear_normal()
 {
-	memset(normal, 0, lhs->get_num_vectors()*sizeof(float64_t));
+	memset(m_normal.vector, 0, lhs->get_num_vectors()*sizeof(float64_t));
 }
 
 void CLinearStringKernel::add_to_normal(int32_t idx, float64_t weight)
@@ -58,7 +58,7 @@ void CLinearStringKernel::add_to_normal(int32_t idx, float64_t weight)
 	char* vec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx, vlen, vfree);
 
 	for (int32_t i=0; i<vlen; i++)
-		normal[i] += weight*normalizer->normalize_lhs(vec[i], idx);
+		m_normal.vector[i] += weight*normalizer->normalize_lhs(vec[i], idx);
 
 	((CStringFeatures<char>*) lhs)->free_feature_vector(vec, idx, vfree);
 }
@@ -71,7 +71,9 @@ float64_t CLinearStringKernel::compute(int32_t idx_a, int32_t idx_b)
 	char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
 	char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
 	ASSERT(alen==blen)
-	float64_t result=CMath::dot(avec, bvec, alen);
+	SGVector<char> a_wrap(avec, alen, false);
+	SGVector<char> b_wrap(bvec, blen, false);
+	float64_t result = linalg::dot(a_wrap, b_wrap);
 	((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
 	((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
 	return result;
@@ -83,8 +85,8 @@ bool CLinearStringKernel::init_optimization(
 	int32_t num_feat = ((CStringFeatures<char>*) lhs)->get_max_vector_length();
 	ASSERT(num_feat)
 
-	normal = SG_MALLOC(float64_t, num_feat);
-	ASSERT(normal)
+	m_normal = SGVector<float64_t>(num_feat);
+	ASSERT(m_normal.vector)
 	clear_normal();
 
 	for (int32_t i = 0; i<num_suppvec; i++)
@@ -96,7 +98,7 @@ bool CLinearStringKernel::init_optimization(
 
 		for (int32_t j = 0; j<num_feat; j++)
 		{
-			normal[j] += alphas[i]*
+			m_normal.vector[j] += alphas[i]*
 				normalizer->normalize_lhs(((float64_t) avec[j]), sv_idx[i]);
 		}
 		((CStringFeatures<char>*) lhs)->free_feature_vector(avec, sv_idx[i], free_avec);
@@ -107,8 +109,7 @@ bool CLinearStringKernel::init_optimization(
 
 bool CLinearStringKernel::delete_optimization()
 {
-	SG_FREE(normal);
-	normal = NULL;
+	m_normal = SGVector<float64_t>();
 	set_is_initialized(false);
 	return true;
 }
@@ -118,7 +119,10 @@ float64_t CLinearStringKernel::compute_optimized(int32_t idx_b)
 	int32_t blen;
 	bool free_bvec;
 	char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
-	float64_t result=normalizer->normalize_rhs(CMath::dot(normal, bvec, blen), idx_b);
+	float64_t dot = 0.0;
+	for (auto i = 0; m_normal.vlen; ++i)
+		dot += m_normal[i]*(float64_t)bvec[i];
+	float64_t result=normalizer->normalize_rhs(dot, idx_b);
 	((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
 	return result;
 }
diff --git a/src/shogun/kernel/string/LinearStringKernel.h b/src/shogun/kernel/string/LinearStringKernel.h
index 5b59c13ea78..3c03db01d5f 100644
--- a/src/shogun/kernel/string/LinearStringKernel.h
+++ b/src/shogun/kernel/string/LinearStringKernel.h
@@ -117,7 +117,7 @@ class CLinearStringKernel: public CStringKernel<char>
 
 	protected:
 		/** normal vector (used in case of optimized kernel) */
-		float64_t* normal;
+		SGVector<float64_t> m_normal;
 };
 }
 #endif /* _LINEARSTRINGKERNEL_H___ */
diff --git a/src/shogun/kernel/string/WeightedDegreePositionStringKernel.cpp b/src/shogun/kernel/string/WeightedDegreePositionStringKernel.cpp
index 4841b13e0d9..f5253b81401 100644
--- a/src/shogun/kernel/string/WeightedDegreePositionStringKernel.cpp
+++ b/src/shogun/kernel/string/WeightedDegreePositionStringKernel.cpp
@@ -9,11 +9,12 @@
  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
  */
 
-#include <shogun/lib/common.h>
+#include <shogun/base/Parallel.h>
+#include <shogun/base/progress.h>
 #include <shogun/io/SGIO.h>
 #include <shogun/lib/Signal.h>
 #include <shogun/lib/Trie.h>
-#include <shogun/base/Parallel.h>
+#include <shogun/lib/common.h>
 
 #include <shogun/kernel/string/WeightedDegreePositionStringKernel.h>
 #include <shogun/kernel/normalizer/SqrtDiagKernelNormalizer.h>
@@ -24,6 +25,7 @@
 
 #ifdef HAVE_PTHREAD
 #include <pthread.h>
+
 #endif
 
 using namespace shogun;
@@ -268,12 +270,10 @@ bool CWeightedDegreePositionStringKernel::init_optimization(
 	if (tree_num<0)
 		SG_DEBUG("initializing CWeightedDegreePositionStringKernel optimization\n")
 
-	for (int32_t i=0; i<p_count; i++)
+	for (auto i : progress(range(p_count), *this->io))
 	{
 		if (tree_num<0)
 		{
-			if ( (i % (p_count/10+1)) == 0)
-				SG_PROGRESS(i,0,p_count)
 			add_example_to_tree(IDX[i], alphas[i]);
 		}
 		else
@@ -283,9 +283,6 @@ bool CWeightedDegreePositionStringKernel::init_optimization(
 		}
 	}
 
-	if (tree_num<0)
-		SG_DONE()
-
 	set_is_initialized(true) ;
 	return true ;
 }
@@ -1255,35 +1252,41 @@ void CWeightedDegreePositionStringKernel::compute_batch(
 
 	if (num_threads < 2)
 	{
-       CSignal::clear_cancel();
-	   for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
-			{
-				init_optimization(num_suppvec, IDX, alphas, j);
-				S_THREAD_PARAM_WDS<DNATrie> params;
-				params.vec=vec;
-				params.result=result;
-				params.weights=weights;
-				params.kernel=this;
-				params.tries=&tries;
-				params.factor=factor;
-				params.j=j;
-				params.start=0;
-				params.end=num_vec;
-				params.length=length;
-				params.max_shift=max_shift;
-				params.shift=shift;
-				params.vec_idx=vec_idx;
-				compute_batch_helper((void*) &params);
-
-				SG_PROGRESS(j,0,num_feat)
-			}
+
+	   auto pb = progress(range(num_feat), *this->io);
+	   // TODO: replace with the new signal
+	   // for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
+	   for (int32_t j = 0; j < num_feat; j++)
+	   {
+		   init_optimization(num_suppvec, IDX, alphas, j);
+		   S_THREAD_PARAM_WDS<DNATrie> params;
+		   params.vec = vec;
+		   params.result = result;
+		   params.weights = weights;
+		   params.kernel = this;
+		   params.tries = &tries;
+		   params.factor = factor;
+		   params.j = j;
+		   params.start = 0;
+		   params.end = num_vec;
+		   params.length = length;
+		   params.max_shift = max_shift;
+		   params.shift = shift;
+		   params.vec_idx = vec_idx;
+		   compute_batch_helper((void*)&params);
+
+		   pb.print_progress();
+		    }
+		    pb.complete();
 	}
 #ifdef HAVE_PTHREAD
 	else
 	{
 
-		CSignal::clear_cancel();
-		for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
+		auto pb = progress(range(num_feat), *this->io);
+		// TODO: replace with the new signal
+		// for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
+		for (int32_t j = 0; j < num_feat; j++)
 		{
 			init_optimization(num_suppvec, IDX, alphas, j);
 			pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
@@ -1326,11 +1329,12 @@ void CWeightedDegreePositionStringKernel::compute_batch(
 
 			for (t=0; t<num_threads-1; t++)
 				pthread_join(threads[t], NULL);
-			SG_PROGRESS(j,0,num_feat)
+			pb.print_progress();
 
 			SG_FREE(params);
 			SG_FREE(threads);
 		}
+		pb.complete();
 	}
 #endif
 
@@ -1422,7 +1426,7 @@ float64_t* CWeightedDegreePositionStringKernel::compute_scoring(
 	info.R_k = NULL;
 
 	// === main loop
-	i = 0; // total progress
+	auto pb = progress(range(num_feat * max_degree), *this->io);
 	for( k = 0; k < max_degree; ++k )
 	{
 		const int32_t nofKmers = nofsKmers[ k ];
@@ -1439,7 +1443,7 @@ float64_t* CWeightedDegreePositionStringKernel::compute_scoring(
 				x[j] = -1;
 			}
 			tries.traverse( tree, p, info, 0, x, k );
-			SG_PROGRESS(i++,0,num_feat*max_degree)
+			pb.print_progress();
 		}
 
 		// --- add partial overlap scores
@@ -1478,6 +1482,7 @@ float64_t* CWeightedDegreePositionStringKernel::compute_scoring(
 		//     end;
 		//   end;
 	}
+	pb.complete();
 
 	// === return a vector
 	num_feat=1;
@@ -1521,7 +1526,7 @@ char* CWeightedDegreePositionStringKernel::compute_consensus(
 		table[i]=new DynArray<ConsensusEntry>(num_suppvec/10);
 
 	//compute consensus via dynamic programming
-	for (int32_t i=0; i<num_tables; i++)
+	for (auto i : progress(range(num_tables), *this->io))
 	{
 		bool cumulative=false;
 
@@ -1537,8 +1542,6 @@ char* CWeightedDegreePositionStringKernel::compute_consensus(
 			tries.fill_backtracking_table(i, NULL, table[i], cumulative, weights);
 		else
 			tries.fill_backtracking_table(i, table[i-1], table[i], cumulative, weights);
-
-		SG_PROGRESS(i,0,num_feat)
 	}
 
 
diff --git a/src/shogun/kernel/string/WeightedDegreeStringKernel.cpp b/src/shogun/kernel/string/WeightedDegreeStringKernel.cpp
index 50632dac655..b325867d290 100644
--- a/src/shogun/kernel/string/WeightedDegreeStringKernel.cpp
+++ b/src/shogun/kernel/string/WeightedDegreeStringKernel.cpp
@@ -9,12 +9,13 @@
  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
  */
 
-#include <shogun/lib/common.h>
+#include <shogun/base/Parallel.h>
+#include <shogun/base/Parameter.h>
+#include <shogun/base/progress.h>
 #include <shogun/io/SGIO.h>
 #include <shogun/lib/Signal.h>
 #include <shogun/lib/Trie.h>
-#include <shogun/base/Parameter.h>
-#include <shogun/base/Parallel.h>
+#include <shogun/lib/common.h>
 
 #include <shogun/kernel/string/WeightedDegreeStringKernel.h>
 #include <shogun/kernel/normalizer/FirstElementKernelNormalizer.h>
@@ -23,6 +24,7 @@
 
 #ifdef HAVE_PTHREAD
 #include <pthread.h>
+
 #endif
 
 using namespace shogun;
@@ -213,12 +215,10 @@ bool CWeightedDegreeStringKernel::init_optimization(int32_t count, int32_t* IDX,
 	if (tree_num<0)
 		SG_DEBUG("initializing CWeightedDegreeStringKernel optimization\n")
 
-	for (int32_t i=0; i<count; i++)
+	for (auto i : progress(range(count), *this->io))
 	{
 		if (tree_num<0)
 		{
-			if ( (i % (count/10+1)) == 0)
-				SG_PROGRESS(i, 0, count)
 
 			if (max_mismatch==0)
 				add_example_to_tree(IDX[i], alphas[i]) ;
@@ -236,9 +236,6 @@ bool CWeightedDegreeStringKernel::init_optimization(int32_t count, int32_t* IDX,
 		}
 	}
 
-	if (tree_num<0)
-		SG_DONE()
-
 	//tries.compact_nodes(NO_CHILD, 0, weights) ;
 
 	set_is_initialized(true) ;
@@ -883,11 +880,13 @@ void CWeightedDegreeStringKernel::compute_batch(
 #endif
 	ASSERT(num_threads>0)
 	int32_t* vec=SG_MALLOC(int32_t, num_threads*num_feat);
+	auto pb = progress(range(num_feat), *this->io);
 
 	if (num_threads < 2)
 	{
-        CSignal::clear_cancel();
-		for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
+		// TODO: replace with the new signal
+		// for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
+		for (int32_t j = 0; j < num_feat; j++)
 		{
 			init_optimization(num_suppvec, IDX, alphas, j);
 			S_THREAD_PARAM_WD params;
@@ -904,14 +903,16 @@ void CWeightedDegreeStringKernel::compute_batch(
 			params.vec_idx=vec_idx;
 			compute_batch_helper((void*) &params);
 
-			SG_PROGRESS(j,0,num_feat)
+			pb.print_progress();
 		}
+		pb.complete();
 	}
 #ifdef HAVE_PTHREAD
 	else
 	{
-        CSignal::clear_cancel();
-		for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
+		// TODO: replace with the new signal
+		// for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
+		for (int32_t j = 0; j < num_feat; j++)
 		{
 			init_optimization(num_suppvec, IDX, alphas, j);
 			pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
@@ -949,11 +950,12 @@ void CWeightedDegreeStringKernel::compute_batch(
 
 			for (t=0; t<num_threads-1; t++)
 				pthread_join(threads[t], NULL);
-			SG_PROGRESS(j,0,num_feat)
+			pb.print_progress();
 
 			SG_FREE(params);
 			SG_FREE(threads);
 		}
+		pb.complete();
 	}
 #endif
 
diff --git a/src/shogun/labels/BinaryLabels.cpp b/src/shogun/labels/BinaryLabels.cpp
index 6f1e93f0484..9d48a3013e3 100644
--- a/src/shogun/labels/BinaryLabels.cpp
+++ b/src/shogun/labels/BinaryLabels.cpp
@@ -54,7 +54,7 @@ CBinaryLabels::CBinaryLabels(SGVector<float64_t> src, float64_t threshold) : CDe
 	SGVector<float64_t> labels(src.vlen);
 	for (int32_t i = 0; i < labels.vlen; i++)
 	{
-		labels[i] = src[i] + threshold >= 0 ? +1.0 : -1.0;
+		labels[i] = src[i] >= threshold ? +1.0 : -1.0;
 	}
 	set_labels(labels);
 	set_values(src);
diff --git a/src/shogun/labels/DenseLabels.cpp b/src/shogun/labels/DenseLabels.cpp
index 525f43b3d5b..01bd9ef8c4d 100644
--- a/src/shogun/labels/DenseLabels.cpp
+++ b/src/shogun/labels/DenseLabels.cpp
@@ -10,13 +10,14 @@
  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
  */
 
-#include <shogun/labels/Labels.h>
-#include <shogun/labels/DenseLabels.h>
-#include <shogun/lib/common.h>
+#include <shogun/base/Parameter.h>
 #include <shogun/io/File.h>
 #include <shogun/io/SGIO.h>
+#include <shogun/labels/DenseLabels.h>
+#include <shogun/labels/Labels.h>
+#include <shogun/lib/common.h>
 #include <shogun/mathematics/Math.h>
-#include <shogun/base/Parameter.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 using namespace shogun;
 
@@ -32,6 +33,8 @@ CDenseLabels::CDenseLabels(int32_t num_lab)
 	init();
 	m_labels = SGVector<float64_t>(num_lab);
 	m_current_values=SGVector<float64_t>(num_lab);
+	linalg::zero(m_labels);
+	linalg::zero(m_current_values);
 }
 
 CDenseLabels::CDenseLabels(CFile* loader)
diff --git a/src/shogun/labels/Labels.cpp b/src/shogun/labels/Labels.cpp
index 78cc90c046d..1c6c3b24fb5 100644
--- a/src/shogun/labels/Labels.cpp
+++ b/src/shogun/labels/Labels.cpp
@@ -31,7 +31,9 @@ void CLabels::init()
 {
 	SG_ADD((CSGObject **)&m_subset_stack, "subset_stack",
 	       "Current subset stack", MS_NOT_AVAILABLE);
-
+	SG_ADD(
+	    &m_current_values, "current_values", "current active value vector",
+	    MS_NOT_AVAILABLE)
 	m_subset_stack = new CSubsetStack();
 	SG_REF(m_subset_stack);
 }
diff --git a/src/shogun/labels/MulticlassLabels.cpp b/src/shogun/labels/MulticlassLabels.cpp
index 8219d152c19..29d3ff7c906 100644
--- a/src/shogun/labels/MulticlassLabels.cpp
+++ b/src/shogun/labels/MulticlassLabels.cpp
@@ -25,6 +25,15 @@ CMulticlassLabels::CMulticlassLabels(CFile* loader) : CDenseLabels(loader)
 	init();
 }
 
+CMulticlassLabels::CMulticlassLabels(CBinaryLabels* labels)
+    : CDenseLabels(labels->get_num_labels())
+{
+	init();
+
+	for (index_t i = 0; i < labels->get_num_labels(); ++i)
+		m_labels[i] = (labels->get_label(i) == 1 ? 1 : 0);
+}
+
 CMulticlassLabels::~CMulticlassLabels()
 {
 }
@@ -41,8 +50,7 @@ void CMulticlassLabels::set_multiclass_confidences(int32_t i,
 			"%s::set_multiclass_confidences(): Length of confidences should "
 			"match size of the matrix", get_name());
 
-	for (index_t j=0; j<confidences.size(); j++)
-		m_multiclass_confidences(j,i) = confidences[j];
+	m_multiclass_confidences.set_column(i, confidences);
 }
 
 SGVector<float64_t> CMulticlassLabels::get_multiclass_confidences(int32_t i)
@@ -147,3 +155,19 @@ CLabels* CMulticlassLabels::shallow_subset_copy()
 
 	return shallow_copy_labels;
 }
+
+CMulticlassLabels* CMulticlassLabels::obtain_from_generic(CLabels* labels)
+{
+	if (labels == NULL)
+		return NULL;
+
+	if (labels->get_label_type() != LT_MULTICLASS)
+	{
+		SG_SERROR("The Labels passed cannot be casted to CMulticlassLabels!")
+		return NULL;
+	}
+
+	CMulticlassLabels* casted = dynamic_cast<CMulticlassLabels*>(labels);
+	SG_REF(casted)
+	return casted;
+}
\ No newline at end of file
diff --git a/src/shogun/labels/MulticlassLabels.h b/src/shogun/labels/MulticlassLabels.h
index 5ea4b0fb20f..45673fd51c1 100644
--- a/src/shogun/labels/MulticlassLabels.h
+++ b/src/shogun/labels/MulticlassLabels.h
@@ -51,12 +51,21 @@ class CMulticlassLabels : public CDenseLabels
 		 */
 		CMulticlassLabels(SGVector<float64_t> src);
 
+
 		/** constructor
 		 *
 		 * @param loader File object via which to load data
 		 */
 		CMulticlassLabels(CFile* loader);
 
+		/**
+		 * Convert binary labels to multiclass labels,
+		 * namely -1 is mapped to 0 and 1 to 1.
+		 *
+		 * @param labels Binary labels
+		 */
+		CMulticlassLabels(CBinaryLabels* labels);
+
 		/** destructor */
 		~CMulticlassLabels();
 
@@ -126,6 +135,12 @@ class CMulticlassLabels : public CDenseLabels
 #ifndef SWIG // SWIG should skip this part
 		virtual CLabels* shallow_subset_copy();
 #endif
+		/**
+		 * Cast a generic label object to a multiclass one
+		 * @param labels generic CLabels instance
+		 * @return the casted pointer (already SG_REF'ed)
+		 */
+		static CMulticlassLabels* obtain_from_generic(CLabels* labels);
 
 	private:
 		/** initialises and register parameters */
diff --git a/src/shogun/latent/LatentSOSVM.cpp b/src/shogun/latent/LatentSOSVM.cpp
deleted file mode 100644
index 01e0b303e1e..00000000000
--- a/src/shogun/latent/LatentSOSVM.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Viktor Gal
- * Copyright (C) 2012 Viktor Gal
- */
-
-#include <shogun/latent/LatentSOSVM.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/structure/DualLibQPBMSOSVM.h>
-
-using namespace shogun;
-
-CLatentSOSVM::CLatentSOSVM()
-	: CLinearLatentMachine()
-{
-	register_parameters();
-	m_so_solver=NULL;
-}
-
-CLatentSOSVM::CLatentSOSVM(CLatentModel* model, CLinearStructuredOutputMachine* so_solver, float64_t C)
-	: CLinearLatentMachine(model, C)
-{
-	register_parameters();
-	set_so_solver(so_solver);
-}
-
-CLatentSOSVM::~CLatentSOSVM()
-{
-	SG_UNREF(m_so_solver);
-}
-
-CLatentLabels* CLatentSOSVM::apply_latent()
-{
-	return NULL;
-}
-
-void CLatentSOSVM::set_so_solver(CLinearStructuredOutputMachine* so)
-{
-	SG_REF(so);
-	SG_UNREF(m_so_solver);
-	m_so_solver = so;
-}
-
-float64_t CLatentSOSVM::do_inner_loop(float64_t cooling_eps)
-{
-	float64_t lambda = 1/m_C;
-	CDualLibQPBMSOSVM* so = new CDualLibQPBMSOSVM();
-	so->set_lambda(lambda);
-	so->train();
-
-	/* copy the resulting w */
-	set_w(so->get_w().clone());
-
-	/* get the primal objective value */
-	float64_t po = so->get_result().Fp;
-
-	SG_UNREF(so);
-
-	return po;
-}
-
-void CLatentSOSVM::register_parameters()
-{
-	m_parameters->add((CSGObject**)&m_so_solver, "so_solver", "Structured Output Solver.");
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/latent/LatentSOSVM.h b/src/shogun/latent/LatentSOSVM.h
deleted file mode 100644
index f5c0c6be3a4..00000000000
--- a/src/shogun/latent/LatentSOSVM.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Viktor Gal
- * Copyright (C) 2012 Viktor Gal
- */
-
-#ifndef __LATENTSOSVM_H__
-#define __LATENTSOSVM_H__
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/machine/LinearLatentMachine.h>
-#include <shogun/machine/LinearStructuredOutputMachine.h>
-
-namespace shogun
-{
-	/** @brief class Latent Structured Output SVM,
-	 * an structured output based machine for classification
-	 * problems with latent variables.
-	 */
-	class CLatentSOSVM: public CLinearLatentMachine
-	{
-		public:
-			/** default ctor*/
-			CLatentSOSVM();
-
-			/**
-			 *
-			 * @param model
-			 * @param so_solver
-			 * @param C
-			 */
-			CLatentSOSVM(CLatentModel* model, CLinearStructuredOutputMachine* so_solver, float64_t C);
-
-			virtual ~CLatentSOSVM();
-
-			/** apply linear machine to data
-			 *
-			 * @return classified labels
-			 */
-			virtual CLatentLabels* apply_latent();
-
-			/** set SO solver that is going to be used
-			 *
-			 * @param so SO machine
-			 */
-			void set_so_solver(CLinearStructuredOutputMachine* so);
-
-			/** Returns the name of the SGSerializable instance.
-			 *
-			 * @return name of the SGSerializable
-			 */
-			virtual const char* get_name() const { return "LatentSOSVM"; }
-
-		protected:
-			/** do inner loop with given cooling epsilon
-			 *
-			 * @param cooling_eps cooling epsilon
-			 */
-			virtual float64_t do_inner_loop(float64_t cooling_eps);
-
-		private:
-			void register_parameters();
-
-		private:
-			/** Linear Structured Solver */
-			CLinearStructuredOutputMachine* m_so_solver;
-	};
-}
-#endif //USE_GPL_SHOGUN
-#endif /* __LATENTSOSVM_H__ */
-
diff --git a/src/shogun/latent/LatentSVM.cpp b/src/shogun/latent/LatentSVM.cpp
deleted file mode 100644
index c0b99d10294..00000000000
--- a/src/shogun/latent/LatentSVM.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Viktor Gal
- * Copyright (C) 2012 Viktor Gal
- */
-
-#include <shogun/latent/LatentSVM.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <typeinfo>
-
-#include <shogun/classifier/svm/SVMOcas.h>
-#include <shogun/latent/LatentModel.h>
-
-using namespace shogun;
-
-CLatentSVM::CLatentSVM()
-	: CLinearLatentMachine()
-{
-}
-
-CLatentSVM::CLatentSVM(CLatentModel* model, float64_t C)
-	: CLinearLatentMachine(model, C)
-{
-}
-
-CLatentSVM::~CLatentSVM()
-{
-}
-
-CLatentLabels* CLatentSVM::apply_latent()
-{
-	if (!m_model)
-		SG_ERROR("LatentModel is not set!\n")
-
-	if (m_model->get_num_vectors() < 1)
-		return NULL;
-
-	SGVector<float64_t> w = get_w();
-	index_t num_examples = m_model->get_num_vectors();
-	CLatentLabels* hs = new CLatentLabels(num_examples);
-	CBinaryLabels* ys = new CBinaryLabels(num_examples);
-	hs->set_labels(ys);
-	m_model->set_labels(hs);
-
-	for (index_t i = 0; i < num_examples; ++i)
-	{
-		/* find h for the example */
-		CData* h = m_model->infer_latent_variable(w, i);
-		hs->add_latent_label(h);
-	}
-
-	/* compute the y labels */
-	CDotFeatures* x = m_model->get_psi_feature_vectors();
-	x->dense_dot_range(ys->get_labels().vector, 0, num_examples, NULL, w.vector, w.vlen, 0.0);
-
-	return hs;
-}
-
-float64_t CLatentSVM::do_inner_loop(float64_t cooling_eps)
-{
-	CLabels* ys = m_model->get_labels()->get_labels();
-	CDotFeatures* feats = (m_model->get_caching() ?
-			m_model->get_cached_psi_features() :
-			m_model->get_psi_feature_vectors());
-	CSVMOcas svm(m_C, feats, ys);
-	svm.set_epsilon(cooling_eps);
-	svm.train();
-	SG_UNREF(ys);
-	SG_UNREF(feats);
-
-	/* copy the resulting w */
-	set_w(svm.get_w().clone());
-
-	return svm.compute_primal_objective();
-}
-
-#endif //USE_GPL_SHOGUN
-
diff --git a/src/shogun/latent/LatentSVM.h b/src/shogun/latent/LatentSVM.h
deleted file mode 100644
index 35e43e6d0b4..00000000000
--- a/src/shogun/latent/LatentSVM.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Viktor Gal
- * Copyright (C) 2012 Viktor Gal
- */
-
-#ifndef __LATENTSVM_H__
-#define __LATENTSVM_H__
-
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/common.h>
-#include <shogun/machine/LinearLatentMachine.h>
-
-namespace shogun
-{
-	class LatentModel;
-
-	/** @brief LatentSVM class
-	 * Latent SVM implementation based on [1].
-	 * For optimization this implementation uses SVMOcas.
-	 *
-	 * User must provide a her own CLatentModel which implements the PSI(x_i,h_i)
-	 * function for the given problem.
-	 *
-	 * [1] P. F. Felzenszwalb, R. B. Girshick, D. McAllester, and D. Ramanan,
-	 *  "Object detection with discriminatively trained part-based models,"
-	 *  Pattern Analysis and Machine Intelligence,
-	 *  IEEE Transactions on, vol. 32, no. 9, pp. 1627-1645, 2010.
-	 *
-	 */
-	class CLatentSVM: public CLinearLatentMachine
-	{
-		public:
-			/** default contstructor */
-			CLatentSVM();
-
-			/** constructor
-			 *
-			 * @param model the user defined CLatentModel object.
-			 * @param C regularization constant
-			 */
-			CLatentSVM(CLatentModel* model, float64_t C);
-
-			virtual ~CLatentSVM();
-
-			/** apply linear machine to all examples
-			 *
-			 * @return resulting labels
-			 */
-			virtual CLatentLabels* apply_latent();
-
-			using CLinearLatentMachine::apply_latent;
-
-			/** Returns the name of the SGSerializable instance.
-			 *
-			 * @return name of the SGSerializable
-			 */
-			virtual const char* get_name() const { return "LatentSVM"; }
-
-		protected:
-			/** inner loop of the latent machine
-			 *
-			 * The optimization part after finding the argmax_h for the
-			 * positive examples in the outter loop. It uses SVMOcas for
-			 * finding the cutting plane.
-			 *
-			 * @param cooling_eps epsilon
-			 * @return primal objective value
-			 */
-			virtual float64_t do_inner_loop(float64_t cooling_eps);
-	};
-}
-
-#endif /* __LATENTSVM_H__ */
-
-#endif //USE_GPL_SHOGUN
-
diff --git a/src/shogun/lib/CircularBuffer.cpp b/src/shogun/lib/CircularBuffer.cpp
index ec886221938..50703228bc0 100644
--- a/src/shogun/lib/CircularBuffer.cpp
+++ b/src/shogun/lib/CircularBuffer.cpp
@@ -66,7 +66,7 @@ int32_t CCircularBuffer::push(SGVector<char> source)
 	// determine which part of the memory block is free to read
 	if (m_end_pos>=m_begin_pos)
 	{
-		int32_t bytes_to_memory_end=m_buffer.vlen-(m_end_pos-m_buffer.vector);
+		auto bytes_to_memory_end=m_buffer.vlen-std::distance(m_buffer.vector, m_end_pos);
 		if (bytes_to_memory_end<bytes_to_write)
 		{
 			// we need write as at end of memory block and at begin
@@ -110,7 +110,7 @@ int32_t CCircularBuffer::push(FILE* source, int32_t source_size)
 	// determine which part of the memory block is free to read
 	if (m_end_pos>=m_begin_pos)
 	{
-		int32_t bytes_to_memory_end=m_buffer.vlen-(m_end_pos-m_buffer.vector);
+		int32_t bytes_to_memory_end=m_buffer.vlen-std::distance(m_buffer.vector, m_end_pos);
 		if (bytes_to_memory_end<bytes_to_write)
 		{
 			// we need write as at end of memory block and at begin
@@ -184,7 +184,7 @@ bool CCircularBuffer::has_next()
 	if (m_bytes_count==0)
 		return false;
 
-	int32_t head_length=m_buffer_end-m_begin_pos;
+	auto head_length=std::distance(m_begin_pos, m_buffer_end);
 
 	// determine position of finder pointer in memory block
 	if (m_last_idx<head_length)
@@ -197,11 +197,9 @@ bool CCircularBuffer::has_next()
 		{
 			bool temp=false;
 			temp=has_next_locally(m_begin_pos+m_last_idx, m_buffer_end);
-
-			if (temp)
-				return temp;
-
-			return has_next_locally(m_buffer.vector+m_last_idx-head_length, m_end_pos);
+			return (temp > 0)
+				? temp
+				: has_next_locally(m_buffer.vector+m_last_idx-head_length, m_end_pos);
 		}
 	}
 	else
@@ -225,8 +223,8 @@ index_t CCircularBuffer::next_token_idx(index_t &start)
 	if (m_bytes_count==0)
 		return m_bytes_count;
 
-	int32_t tail_length=m_end_pos-m_buffer.vector;
-	int32_t head_length=m_buffer_end-m_begin_pos;
+	auto tail_length=std::distance(m_buffer.vector, m_end_pos);
+	auto head_length=std::distance(m_begin_pos, m_buffer_end);
 
 	// determine position of finder pointer in memory block
 	if (m_last_idx<head_length)
@@ -269,7 +267,11 @@ index_t CCircularBuffer::next_token_idx(index_t &start)
 
 void CCircularBuffer::skip_characters(int32_t num_chars)
 {
-	move_pointer(&m_begin_pos, m_begin_pos+num_chars);
+	auto head_length = std::distance(m_begin_pos, m_buffer_end);
+	if (head_length >= num_chars)
+		move_pointer(&m_begin_pos, m_begin_pos+num_chars);
+	else
+		move_pointer(&m_begin_pos, m_buffer.vector+num_chars-head_length);
 
 	m_last_idx-=num_chars;
 	if (m_last_idx<0)
@@ -377,7 +379,7 @@ void CCircularBuffer::detach_chunk(char** dest, int32_t* dest_size, int32_t dest
 
 bool CCircularBuffer::has_next_locally(char* part_begin, char* part_end)
 {
-	int32_t num_bytes_to_search=part_end-part_begin;
+	auto num_bytes_to_search=std::distance(part_begin, part_end);
 
 	SGVector<char> buffer_part(part_begin, num_bytes_to_search, false);
 	m_tokenizer->set_text(buffer_part);
@@ -388,7 +390,7 @@ bool CCircularBuffer::has_next_locally(char* part_begin, char* part_end)
 index_t CCircularBuffer::next_token_idx_locally(index_t &start, char* part_begin, char* part_end)
 {
 	index_t end=0;
-	int32_t num_bytes_to_search=part_end-part_begin;
+	auto num_bytes_to_search=std::distance(part_begin, part_end);
 	if (num_bytes_to_search<=0)
 	{
 		start=0;
@@ -411,7 +413,7 @@ index_t CCircularBuffer::next_token_idx_locally(index_t &start, char* part_begin
 
 void CCircularBuffer::move_pointer(char** pointer, char* new_position)
 {
-	*pointer=new_position;
-	if (*pointer>=m_buffer.vector+m_buffer.vlen)
-		*pointer=m_buffer.vector;
+	*pointer = (new_position >= m_buffer_end)
+		? m_buffer.vector
+		: new_position;
 }
diff --git a/src/shogun/lib/DynInt.h b/src/shogun/lib/DynInt.h
deleted file mode 100644
index 594475bc7cd..00000000000
--- a/src/shogun/lib/DynInt.h
+++ /dev/null
@@ -1,579 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2009 Soeren Sonnenburg
- * Copyright (C) 2009 Fraunhofer Institute FIRST and Max Planck Society
- */
-
-#ifndef __DYNINT_H__
-#define __DYNINT_H__
-
-#include <shogun/lib/config.h>
-
-#include <shogun/lib/common.h>
-#include <shogun/io/SGIO.h>
-#include <shogun/mathematics/Math.h>
-
-namespace shogun
-{
-/** @brief integer type of dynamic size
- *
- * This object can be used to create huge integers. These integers can be used
- * directly instead of the usual int32_t etc types since operators are properly
- * overloaded.
- *
- * An exampe use would be 512 wide unsigned ints consisting of four uint64's:
- *
- * CDynInt<uint64_t, 4> int512;
- *
- * This data type is mostly used as a (efficient) storage container for
- * bit-mapped strings. Therefore, currently only comparison, assignment and
- * bit operations are implemented.
- *
- * TODO: implement add,mul,div
- */
-template <class T, int sz> class CDynInt
-{
-public:
-	/** default constructor
-	 *
-	 * creates a DynInt that is all zero.
-	 */
-	CDynInt()
-	{
-		for (int i=0; i<sz; i++)
-			integer[i]=0;
-	}
-
-	/** constructor (set least significant ``word'')
-	 *
-	 * The least significant word is set, the rest filled with zeros.
-	 *
-	 * @param x least significant word
-	 */
-	CDynInt(uint8_t x)
-	{
-		for (int i=0; i<sz-1; i++)
-			integer[i]=0;
-		integer[sz-1]= (T) x;
-	}
-
-	/** constructor (set least significant ``word'')
-	 *
-	 * The least significant word is set, the rest filled with zeros.
-	 *
-	 * @param x least significant word
-	 */
-	CDynInt(uint16_t x)
-	{
-		for (int i=0; i<sz-1; i++)
-			integer[i]=0;
-		integer[sz-1]= (T) x;
-	}
-
-	/** constructor (set least significant ``word'')
-	 *
-	 * The least significant word is set, the rest filled with zeros.
-	 *
-	 * @param x least significant word
-	 */
-	CDynInt(uint32_t x)
-	{
-		for (int i=0; i<sz-1; i++)
-			integer[i]=0;
-		integer[sz-1]= (T) x;
-	}
-
-	/** constructor (set least significant ``word'')
-	 *
-	 * The least significant word is set, the rest filled with zeros.
-	 *
-	 * @param x least significant word
-	 */
-	CDynInt(int32_t x)
-	{
-		for (int i=0; i<sz-1; i++)
-			integer[i]=0;
-		integer[sz-1]= (T) x;
-	}
-
-	/** constructor (set least significant ``word'')
-	 *
-	 * The least significant word is set, the rest filled with zeros.
-	 *
-	 * @param x least significant word
-	 */
-	CDynInt(int64_t x)
-	{
-		for (int i=0; i<sz-1; i++)
-			integer[i]=0;
-		integer[sz-1]=(T) x;
-	}
-
-	/** constructor (set least significant ``word'')
-	 *
-	 * The least significant word is set, the rest filled with zeros.
-	 *
-	 * @param x least significant word
-	 */
-	CDynInt(uint64_t x)
-	{
-		for (int i=0; i<sz-1; i++)
-			integer[i]=0;
-		integer[sz-1]=(T) x;
-	}
-
-	/** constructor (set whole array)
-	 *
-	 * Initialize the DynInt based on an array, which is passed as an argument.
-	 *
-	 * @param x array of size sz
-	 */
-	CDynInt(const T x[sz])
-	{
-		for (int i=0; i<sz; i++)
-			integer[i]=x[i];
-	}
-
-	/** copy constructor */
-	CDynInt(const CDynInt<T,sz> &x)
-	{
-		for (int i=0; i<sz; i++)
-			integer[i]=x.integer[i];
-	}
-
-	/** destructor */
-	~CDynInt()
-	{
-	}
-
-	/** overload = operator
-	 * @param x assign elements from x
-	 */
-	CDynInt<T,sz>& operator=(const CDynInt<T,sz>& x)
-	{
-		for (int i=0; i<sz; i++)
-			integer[i]=x.integer[i];
-		return *this;
-	}
-
-	/** overload | operator and return x | y
-	 *
-	 * @param x x
-	 */
-	const CDynInt<T,sz> operator|(const CDynInt<T,sz>& x) const
-	{
-		CDynInt<T,sz> r;
-
-		for (int i=sz-1; i>=0; i--)
-			r.integer[i]=integer[i] | x.integer[i];
-
-		return r;
-	}
-
-	/** overload & operator and return x & y
-	 *
-	 * @param x x
-	 */
-	const CDynInt<T,sz> operator&(const CDynInt<T,sz>& x) const
-	{
-		CDynInt<T,sz> r;
-
-		for (int i=sz-1; i>=0; i--)
-			r.integer[i]=integer[i] & x.integer[i];
-
-		return r;
-	}
-
-	/** overload << operator
-	 *
-	 * perform bit shift to the left
-	 *
-	 * @param shift shift by this amount
-	 */
-	CDynInt<T,sz> operator<<(int shift)
-	{
-		CDynInt<T,sz> r=*this;
-
-		while (shift>0)
-		{
-			int s=CMath::min(shift, 8*((int) sizeof(T))-1);
-
-			for (int i=0; i<sz; i++)
-			{
-				T overflow=0;
-				if (i<sz-1)
-					overflow = r.integer[i+1] >> (sizeof(T)*8 - s);
-				r.integer[i]= (r.integer[i] << s) | overflow;
-			}
-
-			shift-=s;
-		}
-
-		return r;
-	}
-
-	/** overload >> operator
-	 *
-	 * perform bit shift to the right
-	 *
-	 * @param shift shift by this amount
-	 */
-	CDynInt<T,sz> operator>>(int shift)
-	{
-		CDynInt<T,sz> r=*this;
-
-		while (shift>0)
-		{
-			int s=CMath::min(shift, 8*((int) sizeof(T))-1);
-
-			for (int i=sz-1; i>=0; i--)
-			{
-				T overflow=0;
-				if (i>0)
-					overflow = (r.integer[i-1] << (sizeof(T)*8 - s));
-				r.integer[i]= (r.integer[i] >> s) | overflow;
-			}
-
-			shift-=s;
-		}
-
-		return r;
-	}
-
-	/** overload ^ operator and return x ^ y
-	 *
-	 * @param x x
-	 */
-	const CDynInt<T,sz> operator^(const CDynInt<T,sz>& x) const
-	{
-		CDynInt<T,sz> r;
-
-		for (int i=sz-1; i>=0; i--)
-			r.integer[i]=integer[i] ^ x.integer[i];
-
-		return r;
-	}
-
-	/** overload + operator and return x + y
-	 *
-	 * @param x x
-	 */
-	const CDynInt<T,sz> operator+(const CDynInt<T,sz> &x) const
-	{
-		CDynInt<T,sz> r;
-
-		T overflow=0;
-		for (int i=sz-1; i>=0; i--)
-		{
-			r.integer[i]=integer[i]+x.integer[i]+overflow;
-			if (r.integer[i] < CMath::max(integer[i], x.integer[i]))
-				overflow=1;
-			else
-				overflow=0;
-		}
-
-		return x;
-	}
-
-	/** overload - operator and return x - y
-	 *
-	 * @param x x
-	 */
-	const CDynInt<T,sz> operator-(const CDynInt<T,sz> &x) const
-	{
-		return NULL;
-	}
-
-	/** overload / operator and return x / y
-	 *
-	 * @param x x
-	 */
-	const CDynInt<T,sz> operator/(const CDynInt<T,sz> &x) const
-	{
-		return NULL;
-	}
-
-	/** overload * operator and return x * y
-	 *
-	 * @param x x
-	 */
-	const CDynInt<T,sz> operator*(const CDynInt<T,sz> &x) const
-	{
-		return NULL;
-	}
-
-	/** overload += operator; add x to current DynInt
-	 *
-	 * @param x x
-	 */
-	CDynInt<T,sz>& operator+=(const CDynInt<T,sz> &x)
-	{
-		return NULL;
-	}
-
-	/** overload -= operator; substract x from current DynInt
-	 *
-	 * @param x x
-	 */
-	CDynInt<T,sz>& operator-=(const CDynInt<T,sz> &x)
-	{
-		return NULL;
-	}
-
-	/** overload *= operator; multiple x to with current DynInt
-	 *
-	 * @param x x
-	 */
-	CDynInt<T,sz>& operator*=(const CDynInt<T,sz> &x)
-	{
-		return NULL;
-	}
-
-	/** overload /= operator; divide current object by x
-	 *
-	 * @param x x
-	 */
-	CDynInt<T,sz>& operator/=(const CDynInt<T,sz> &x)
-	{
-		return NULL;
-	}
-
-	/** overload == operator; test if current object equals x
-	 *
-	 * @param x x
-	 */
-	bool operator==(const CDynInt<T,sz> &x) const
-	{
-		for (int i=sz-1; i>=0; i--)
-		{
-			if (integer[i]!=x.integer[i])
-				return false;
-		}
-
-		return true;
-	}
-
-	/** overload >= operator; test if current object greater equal x
-	 *
-	 * @param x x
-	 */
-	bool operator>=(const CDynInt<T,sz> &x) const
-	{
-		for (int i=0; i<sz; i++)
-		{
-			if (integer[i]>x.integer[i])
-				return true;
-			if (integer[i]<x.integer[i])
-				return false;
-		}
-		return true;
-	}
-
-	/** overload <= operator; test if current object lower equal x
-	 *
-	 * @param x x
-	 */
-	bool operator<=(const CDynInt<T,sz> &x) const
-	{
-		for (int i=0; i<sz; i++)
-		{
-			if (integer[i]<x.integer[i])
-				return true;
-			if (integer[i]>x.integer[i])
-				return false;
-		}
-		return true;
-	}
-
-	/** overload > operator; test if current object is bigger than x
-	 *
-	 * @param x x
-	 */
-	bool operator>(const CDynInt<T,sz> &x) const
-	{
-		for (int i=0; i<sz; i++)
-		{
-			if (integer[i]>x.integer[i])
-				return true;
-			if (integer[i]<x.integer[i])
-				return false;
-		}
-		return false;
-	}
-
-	/** overload < operator; test if current object is smaller than x
-	 *
-	 * @param x x
-	 */
-	bool operator<(const CDynInt<T,sz> &x) const
-	{
-		for (int i=0; i<sz; i++)
-		{
-			if (integer[i]<x.integer[i])
-				return true;
-			if (integer[i]>x.integer[i])
-				return false;
-		}
-		return false;
-	}
-
-	/** overload ! operator; test if current object is not equal to x
-	 *
-	 * @param x x
-	 */
-	bool operator!=(const CDynInt<T,sz> &x) const
-	{
-		for (int i=sz-1; i>=0; i--)
-		{
-			if (integer[i]!=x.integer[i])
-				return true;
-		}
-		return false;
-	}
-
-	/** overload |= operator
-	 *
-	 * perform bitwise or with current DynInt and x
-	 *
-	 * @param x x
-	 */
-	CDynInt<T,sz>& operator|=(const CDynInt<T,sz>& x)
-	{
-		for (int i=sz-1; i>=0; i--)
-			integer[i]|=x.integer[i];
-
-		return *this;
-	}
-
-	/** overload &= operator
-	 *
-	 * perform bitwise and with current DynInt and x
-	 *
-	 * @param x x
-	 */
-	CDynInt<T,sz>& operator&=(const CDynInt<T,sz>& x)
-	{
-		for (int i=sz-1; i>=0; i--)
-			integer[i]&=x.integer[i];
-
-		return *this;
-	}
-
-	/** overload ^= operator
-	 *
-	 * perform bitwise xor with current DynInt and x
-	 *
-	 * @param x x
-	 */
-	CDynInt<T,sz>& operator^=(const CDynInt<T,sz>& x)
-	{
-		for (int i=sz-1; i>=0; i--)
-			integer[i]^=x.integer[i];
-
-		return *this;
-	}
-
-	/** overload <<= operator
-	 *
-	 * perform bit shift to the left
-	 *
-	 * @param shift shift by this amount
-	 */
-	CDynInt<T,sz>& operator<<=(int shift)
-	{
-		*this=*this<<shift;
-		return *this;
-	}
-
-	/** overload >>= operator
-	 *
-	 * perform bit shift to the right
-	 *
-	 * @param shift shift by this amount
-	 */
-	CDynInt<T,sz>& operator>>=(int shift)
-	{
-		*this=*this>>shift;
-		return *this;
-	}
-
-	/** negate DynInt */
-	CDynInt<T,sz>& operator~()
-	{
-		for (int i=sz-1; i>=0; i--)
-			integer[i]= ~integer[i];
-		return *this;
-	}
-
-	/** cast to least significant word *dangerous* */
-	operator T() { return integer[sz-1]; }
-
-	/** decrement DynInt by one */
-	CDynInt<T,sz>& operator--()
-	{
-		T overflow=0;
-		for (int i=sz-1; i>=0; i--)
-		{
-			T x = integer[i]-1-overflow;
-			overflow=0;
-			if (integer[i]>x)
-				overflow=1;
-			integer[i]=x;
-		}
-		return *this;
-	}
-
-	/** increment DynInt by one */
-	CDynInt<T,sz>& operator++()
-	{
-		T overflow=0;
-		for (int i=sz-1; i>=0; i--)
-		{
-			T x = integer[i]+1+overflow;
-			overflow=0;
-			if (integer[i]>x)
-				overflow=1;
-			integer[i]=x;
-		}
-		return *this;
-	}
-
-	/** print the current long integer in hex (without carriage return */
-	void print_hex() const
-	{
-		for (int i=0; i<sz; i++)
-			SG_SPRINT("%.16llx", (uint64_t) integer[i])
-	}
-
-	/** print the current long integer in bits (without carriage return */
-	void print_bits() const
-	{
-		CMath::display_bits(integer, sz);
-	}
-
-private:
-	/** the integer requiring sizeof(T)*sz bytes */
-	T integer[sz];
-};
-
-/**@name convenience typedefs */
-//@{
-
-/// 192 bit integer constructed out of 3 64bit uint64_t's
-typedef CDynInt<uint64_t,3> uint192_t;
-
-/// 256 bit integer constructed out of 4 64bit uint64_t's
-typedef CDynInt<uint64_t,4> uint256_t;
-
-/// 512 bit integer constructed out of 8 64bit uint64_t's
-typedef CDynInt<uint64_t,8> uint512_t;
-
-/// 1024 bit integer constructed out of 16 64bit uint64_t's
-typedef CDynInt<uint64_t,16> uint1024_t;
-//@}
-}
-#endif // __DYNINT_H__
diff --git a/src/shogun/lib/RxCppHeader.h b/src/shogun/lib/RxCppHeader.h
new file mode 100644
index 00000000000..33e069c59a7
--- /dev/null
+++ b/src/shogun/lib/RxCppHeader.h
@@ -0,0 +1,27 @@
+#ifndef SHOGUN_RXCPPHEADER_H
+#define SHOGUN_RXCPPHEADER_H
+
+/**
+* Rx namespace
+*/
+namespace rxcpp
+{
+	template <class, class, class, class, class>
+	class observer;
+
+	namespace subjects
+	{
+		template <class>
+		class subject;
+	}
+
+	template <class>
+	class dynamic_observable;
+	template <class, class>
+	class observable;
+	template <class, class>
+	class subscriber;
+	class subscription;
+}
+
+#endif // SHOGUN_RXCPPHEADER_H
diff --git a/src/shogun/lib/SGMatrix.cpp b/src/shogun/lib/SGMatrix.cpp
index acfafa150bc..12a2df5bd84 100644
--- a/src/shogun/lib/SGMatrix.cpp
+++ b/src/shogun/lib/SGMatrix.cpp
@@ -18,11 +18,11 @@
 #include <shogun/io/File.h>
 #include <shogun/lib/SGMatrix.h>
 #include <shogun/lib/SGVector.h>
+#include <shogun/mathematics/eigen3.h>
 #include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/lapack.h>
 #include <limits>
 #include <algorithm>
-#include <shogun/mathematics/eigen3.h>
 
 namespace shogun
 {
@@ -388,8 +388,31 @@ void SGMatrix<T>::create_diagonal_matrix(T* matrix, T* v,int32_t size)
 }
 
 template <class T>
-float64_t SGMatrix<T>::trace(
-	float64_t* mat, int32_t cols, int32_t rows)
+SGMatrix<T> SGMatrix<T>::submatrix(index_t col_start, index_t col_end) const
+{
+	assert_on_cpu();
+	return SGMatrix<T>(
+		get_column_vector(col_start), num_rows, col_end - col_start, false);
+}
+
+template <class T>
+SGVector<T> SGMatrix<T>::get_column(index_t col) const
+{
+	assert_on_cpu();
+	return SGVector<T>(get_column_vector(col), num_rows, false);
+}
+
+template <class T>
+void SGMatrix<T>::set_column(index_t col, const SGVector<T> vec)
+{
+	assert_on_cpu();
+	ASSERT(!vec.on_gpu())
+	ASSERT(vec.vlen == num_rows)
+	sg_memcpy(&matrix[num_rows * col], vec.vector, sizeof(T) * num_rows);
+}
+
+template <class T>
+float64_t SGMatrix<T>::trace(float64_t* mat, int32_t cols, int32_t rows)
 {
 	float64_t trace=0;
 	for (int64_t i=0; i<rows; i++)
@@ -753,170 +776,170 @@ SGMatrix<char> SGMatrix<char>::create_identity_matrix(index_t size, char scale)
 template <>
 SGMatrix<int8_t> SGMatrix<int8_t>::create_identity_matrix(index_t size, int8_t scale)
 {
-	SGMatrix<int8_t> I(size, size);
+	SGMatrix<int8_t> identity_matrix(size, size);
 	for (index_t i=0; i<size; ++i)
 	{
 		for (index_t j=0; j<size; ++j)
-			I(i,j)=i==j ? scale : 0.0;
+			identity_matrix(i,j)=i==j ? scale : 0.0;
 	}
 
-	return I;
+	return identity_matrix;
 }
 
 template <>
 SGMatrix<uint8_t> SGMatrix<uint8_t>::create_identity_matrix(index_t size, uint8_t scale)
 {
-	SGMatrix<uint8_t> I(size, size);
+	SGMatrix<uint8_t> identity_matrix(size, size);
 	for (index_t i=0; i<size; ++i)
 	{
 		for (index_t j=0; j<size; ++j)
-			I(i,j)=i==j ? scale : 0.0;
+			identity_matrix(i,j)=i==j ? scale : 0.0;
 	}
 
-	return I;
+	return identity_matrix;
 }
 
 template <>
 SGMatrix<bool> SGMatrix<bool>::create_identity_matrix(index_t size, bool scale)
 {
-	SGMatrix<bool> I(size, size);
+	SGMatrix<bool> identity_matrix(size, size);
 	for (index_t i=0; i<size; ++i)
 	{
 		for (index_t j=0; j<size; ++j)
-			I(i,j)=i==j ? scale : (!scale);
+			identity_matrix(i,j)=i==j ? scale : (!scale);
 	}
 
-	return I;
+	return identity_matrix;
 }
 
 template <>
 SGMatrix<int16_t> SGMatrix<int16_t>::create_identity_matrix(index_t size, int16_t scale)
 {
-	SGMatrix<int16_t> I(size, size);
+	SGMatrix<int16_t> identity_matrix(size, size);
 	for (index_t i=0; i<size; ++i)
 	{
 		for (index_t j=0; j<size; ++j)
-			I(i,j)=i==j ? scale : 0.0;
+			identity_matrix(i,j)=i==j ? scale : 0.0;
 	}
 
-	return I;
+	return identity_matrix;
 }
 
 template <>
 SGMatrix<uint16_t> SGMatrix<uint16_t>::create_identity_matrix(index_t size, uint16_t scale)
 {
-	SGMatrix<uint16_t> I(size, size);
+	SGMatrix<uint16_t> identity_matrix(size, size);
 	for (index_t i=0; i<size; ++i)
 	{
 		for (index_t j=0; j<size; ++j)
-			I(i,j)=i==j ? scale : 0.0;
+			identity_matrix(i,j)=i==j ? scale : 0.0;
 	}
 
-	return I;
+	return identity_matrix;
 }
 
 template <>
 SGMatrix<int32_t> SGMatrix<int32_t>::create_identity_matrix(index_t size, int32_t scale)
 {
-	SGMatrix<int32_t> I(size, size);
+	SGMatrix<int32_t> identity_matrix(size, size);
 	for (index_t i=0; i<size; ++i)
 	{
 		for (index_t j=0; j<size; ++j)
-			I(i,j)=i==j ? scale : 0.0;
+			identity_matrix(i,j)=i==j ? scale : 0.0;
 	}
 
-	return I;
+	return identity_matrix;
 }
 
 template <>
 SGMatrix<uint32_t> SGMatrix<uint32_t>::create_identity_matrix(index_t size, uint32_t scale)
 {
-	SGMatrix<uint32_t> I(size, size);
+	SGMatrix<uint32_t> identity_matrix(size, size);
 	for (index_t i=0; i<size; ++i)
 	{
 		for (index_t j=0; j<size; ++j)
-			I(i,j)=i==j ? scale : 0.0;
+			identity_matrix(i,j)=i==j ? scale : 0.0;
 	}
 
-	return I;
+	return identity_matrix;
 }
 
 template <>
 SGMatrix<int64_t> SGMatrix<int64_t>::create_identity_matrix(index_t size, int64_t scale)
 {
-	SGMatrix<int64_t> I(size, size);
+	SGMatrix<int64_t> identity_matrix(size, size);
 	for (index_t i=0; i<size; ++i)
 	{
 		for (index_t j=0; j<size; ++j)
-			I(i,j)=i==j ? scale : 0.0;
+			identity_matrix(i,j)=i==j ? scale : 0.0;
 	}
 
-	return I;
+	return identity_matrix;
 }
 
 template <>
 SGMatrix<uint64_t> SGMatrix<uint64_t>::create_identity_matrix(index_t size, uint64_t scale)
 {
-	SGMatrix<uint64_t> I(size, size);
+	SGMatrix<uint64_t> identity_matrix(size, size);
 	for (index_t i=0; i<size; ++i)
 	{
 		for (index_t j=0; j<size; ++j)
-			I(i,j)=i==j ? scale : 0.0;
+			identity_matrix(i,j)=i==j ? scale : 0.0;
 	}
 
-	return I;
+	return identity_matrix;
 }
 
 template <>
 SGMatrix<float32_t> SGMatrix<float32_t>::create_identity_matrix(index_t size, float32_t scale)
 {
-	SGMatrix<float32_t> I(size, size);
+	SGMatrix<float32_t> identity_matrix(size, size);
 	for (index_t i=0; i<size; ++i)
 	{
 		for (index_t j=0; j<size; ++j)
-			I(i,j)=i==j ? scale : 0.0;
+			identity_matrix(i,j)=i==j ? scale : 0.0;
 	}
 
-	return I;
+	return identity_matrix;
 }
 
 template <>
 SGMatrix<float64_t> SGMatrix<float64_t>::create_identity_matrix(index_t size, float64_t scale)
 {
-	SGMatrix<float64_t> I(size, size);
+	SGMatrix<float64_t> identity_matrix(size, size);
 	for (index_t i=0; i<size; ++i)
 	{
 		for (index_t j=0; j<size; ++j)
-			I(i,j)=i==j ? scale : 0.0;
+			identity_matrix(i,j)=i==j ? scale : 0.0;
 	}
 
-	return I;
+	return identity_matrix;
 }
 
 template <>
 SGMatrix<floatmax_t> SGMatrix<floatmax_t>::create_identity_matrix(index_t size, floatmax_t scale)
 {
-	SGMatrix<floatmax_t> I(size, size);
+	SGMatrix<floatmax_t> identity_matrix(size, size);
 	for (index_t i=0; i<size; ++i)
 	{
 		for (index_t j=0; j<size; ++j)
-			I(i,j)=i==j ? scale : 0.0;
+			identity_matrix(i,j)=i==j ? scale : 0.0;
 	}
 
-	return I;
+	return identity_matrix;
 }
 
 template <>
 SGMatrix<complex128_t> SGMatrix<complex128_t>::create_identity_matrix(index_t size, complex128_t scale)
 {
-	SGMatrix<complex128_t> I(size, size);
+	SGMatrix<complex128_t> identity_matrix(size, size);
 	for (index_t i=0; i<size; ++i)
 	{
 		for (index_t j=0; j<size; ++j)
-			I(i,j)=i==j ? scale : complex128_t(0.0);
+			identity_matrix(i,j)=i==j ? scale : complex128_t(0.0);
 	}
 
-	return I;
+	return identity_matrix;
 }
 
 //Howto construct the pseudo inverse (from "The Matrix Cookbook")
diff --git a/src/shogun/lib/SGMatrix.h b/src/shogun/lib/SGMatrix.h
index 3e91f090619..1c33e106678 100644
--- a/src/shogun/lib/SGMatrix.h
+++ b/src/shogun/lib/SGMatrix.h
@@ -16,6 +16,7 @@
 #include <shogun/io/SGIO.h>
 #include <shogun/lib/config.h>
 #include <shogun/lib/common.h>
+#include <shogun/util/iterators.h>
 #include <shogun/lib/SGReferencedData.h>
 
 #include <memory>
@@ -39,6 +40,9 @@ template<class T> class SGMatrix : public SGReferencedData
 {
 	friend class LinalgBackendEigen;
 
+	public:
+		typedef RandomIterator<T> iterator;
+
 	public:
 		typedef Eigen::Matrix<T,-1,-1,0,-1,-1> EigenMatrixXt;
 		typedef Eigen::Map<EigenMatrixXt,0,Eigen::Stride<0,0> > EigenMatrixXtMap;
@@ -132,7 +136,7 @@ template<class T> class SGMatrix : public SGReferencedData
 		/** Empty destructor */
 		virtual ~SGMatrix();
 
-#ifndef SWIG // SWIG should skip this part
+#ifndef SWIG // SWIG should skip this parts
 		/** Get a column vector
 		 * @param col column index
 		 * @return the column vector for index col
@@ -144,6 +148,28 @@ template<class T> class SGMatrix : public SGReferencedData
 			return &matrix[c*num_rows];
 		}
 
+		/** Given a range of columns (start, end), return a view
+		 * of the matrix from column start to end excluded.
+		 * \warning The returned SGMatrix is non-owning!
+		 * @param col_start column index (inclusive)
+		 * @param col_end column index (excluded)
+		 * @return the submatrix
+		 */
+		SGMatrix<T> submatrix(index_t col_start, index_t col_end) const;
+
+		/** Map a column to a SGVector
+		 * \warning The returned SGVector is non-owning!
+		 * @param col column index
+		 * @return the column vector for index col
+		 */
+		SGVector<T> get_column(index_t col) const;
+
+		/** Copy the content of a vector into a column
+		 * @param col column index
+		 * @param vec vector
+		 */
+		void set_column(index_t col, const SGVector<T> vec);
+
 		/** Get a row vector
 		 *
 		 * @param row row index
@@ -197,6 +223,12 @@ template<class T> class SGMatrix : public SGReferencedData
 			return matrix[index];
 		}
 
+		/** Returns an iterator to the first element of the container. */
+		iterator begin() noexcept { return iterator(matrix); }
+
+		/** Returns an iterator to the element following the last element of the container. */
+		iterator end() noexcept { return iterator(matrix + (num_rows * num_cols)); }
+
 #endif // SWIG should skip this part
 
 		/** Get element at index
@@ -240,9 +272,9 @@ template<class T> class SGMatrix : public SGReferencedData
 		}
 
 		/** The size */
-		inline uint64_t size() const
+		inline int64_t size() const
 		{
-			const uint64_t c=num_cols;
+			const int64_t c=num_cols;
 			return num_rows*c;
 		}
 
diff --git a/src/shogun/lib/SGVector.cpp b/src/shogun/lib/SGVector.cpp
index 89b4b0730bd..dc8388d75a4 100644
--- a/src/shogun/lib/SGVector.cpp
+++ b/src/shogun/lib/SGVector.cpp
@@ -21,11 +21,10 @@
 #include <shogun/io/File.h>
 
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/mathematics/lapack.h>
 #include <algorithm>
 
-#include <shogun/mathematics/eigen3.h>
-
 #define COMPLEX128_ERROR_NOARG(function) \
 template <> \
 void SGVector<complex128_t>::function() \
@@ -97,14 +96,30 @@ SGVector<T>::SGVector(index_t len, bool ref_counting)
 	m_on_gpu.store(false, std::memory_order_release);
 }
 
-template<class T>
+template <class T>
+SGVector<T>::SGVector(SGMatrix<T> matrix)
+	: SGReferencedData(matrix), vlen(matrix.num_cols * matrix.num_rows),
+	  gpu_ptr(NULL)
+{
+	ASSERT(!matrix.on_gpu())
+	vector = matrix.data();
+	m_on_gpu.store(false, std::memory_order_release);
+}
+
+template <class T>
 SGVector<T>::SGVector(GPUMemoryBase<T>* gpu_vector, index_t len)
- : SGReferencedData(true), vector(NULL), vlen(len),
-   gpu_ptr(std::shared_ptr<GPUMemoryBase<T>>(gpu_vector))
+	: SGReferencedData(true), vector(NULL), vlen(len),
+	  gpu_ptr(std::shared_ptr<GPUMemoryBase<T>>(gpu_vector))
 {
 	m_on_gpu.store(true, std::memory_order_release);
 }
 
+template <class T>
+SGVector<T>::SGVector(std::initializer_list<T> il):
+	SGVector(il.begin(), il.end())
+{
+}
+
 template<class T>
 SGVector<T>::SGVector(const SGVector &orig) : SGReferencedData(orig)
 {
@@ -722,13 +737,12 @@ float32_t SGVector<float32_t>::twonorm(const float32_t* x, int32_t len)
 template <>
 float64_t SGVector<float64_t>::twonorm(const float64_t* v, int32_t n)
 {
-	float64_t norm = 0.0;
 #ifdef HAVE_LAPACK
-	norm = cblas_dnrm2(n, v, 1);
+	return cblas_dnrm2(n, v, 1);
 #else
-	norm = CMath::sqrt(CMath::dot(v, v, n));
+	SGVector<float64_t> wrapper(const_cast<float64_t*>(v), n, false);
+	return CMath::sqrt(linalg::dot(wrapper, wrapper));
 #endif
-	return norm;
 }
 
 template <>
diff --git a/src/shogun/lib/SGVector.h b/src/shogun/lib/SGVector.h
index 8b7fedfa673..3b2a7344e9d 100644
--- a/src/shogun/lib/SGVector.h
+++ b/src/shogun/lib/SGVector.h
@@ -19,10 +19,12 @@
 #include <shogun/io/SGIO.h>
 #include <shogun/lib/common.h>
 #include <shogun/lib/SGReferencedData.h>
+#include <shogun/util/iterators.h>
 #include <shogun/mathematics/linalg/GPUMemoryBase.h>
 
 #include <memory>
 #include <atomic>
+#include <initializer_list>
 
 namespace Eigen
 {
@@ -43,6 +45,9 @@ template<class T> class SGVector : public SGReferencedData
 {
 	friend class LinalgBackendEigen;
 
+	public:
+		typedef RandomIterator<T> iterator;
+
 	public:
 		typedef Eigen::Matrix<T,-1,1,0,-1,1> EigenVectorXt;
 		typedef Eigen::Matrix<T,1,-1,0x1,1,-1> EigenRowVectorXt;
@@ -65,6 +70,9 @@ template<class T> class SGVector : public SGReferencedData
 		/** Constructor to create new vector in memory */
 		SGVector(index_t len, bool ref_counting=true);
 
+		/** Constructor to create new vector from a SGMatrix */
+		SGVector(SGMatrix<T> matrix);
+
 		/** Construct SGVector from GPU memory.
 		 *
 		 * @param vector GPUMemoryBase pointer
@@ -86,12 +94,24 @@ template<class T> class SGVector : public SGReferencedData
 		}
 
 #ifndef SWIG // SWIG should skip this part
-#if defined(HAVE_CXX0X) || defined(HAVE_CXX11)
 
 		/** The container type for a given template argument */
 		template <typename ST> using container_type = SGVector<ST>;
 
-#endif // define (HAVE_CXX0X) || defined(HAVE_CXX11)
+		/** Construct SGVector from InputIterator list */
+		template<typename InputIt>
+		SGVector(InputIt begin, InputIt end):
+			SGReferencedData(true),
+			vlen(std::distance(begin, end)),
+			gpu_ptr(nullptr)
+		{
+			vector = SG_MALLOC(T, vlen);
+			std::copy(begin, end, vector);
+			m_on_gpu.store(false, std::memory_order_release);
+		}
+
+		/** Construct SGVector from initializer list */
+		SGVector(std::initializer_list<T> il);
 
 		/** Wraps a matrix around the data of an Eigen3 column vector */
 		SGVector(EigenVectorXt& vec);
@@ -142,6 +162,12 @@ template<class T> class SGVector : public SGReferencedData
 			return vector;
 		}
 
+		/** Returns an iterator to the first element of the container. */
+		iterator begin() noexcept { return iterator(vector); }
+
+		/** Returns an iterator to the element following the last element of the container. */
+		iterator end() noexcept { return iterator(vector + vlen); }
+
 		SGVector<T>& operator=(const SGVector<T>&);
 
 		/** Cast to pointer */
diff --git a/src/shogun/lib/ShogunException.cpp b/src/shogun/lib/ShogunException.cpp
index 5da565c39e8..3a21aa4dc7f 100644
--- a/src/shogun/lib/ShogunException.cpp
+++ b/src/shogun/lib/ShogunException.cpp
@@ -35,7 +35,6 @@ ShogunException::init(const char* str)
 ShogunException::ShogunException(const char* str)
 {
 #ifndef WIN32
-	CSignal::unset_handler();
 #endif
 
 	init(str);
diff --git a/src/shogun/lib/Signal.cpp b/src/shogun/lib/Signal.cpp
index 1afc6b96cbb..fb5102f9357 100644
--- a/src/shogun/lib/Signal.cpp
+++ b/src/shogun/lib/Signal.cpp
@@ -5,174 +5,87 @@
  * (at your option) any later version.
  *
  * Written (W) 1999-2009 Soeren Sonnenburg
+ * Written (W) 2017 Giovanni De Toni
  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
  */
 
-#include <shogun/lib/config.h>
-
+#include <csignal>
 #include <stdlib.h>
-#include <string.h>
 
+#include <rxcpp/rx-lite.hpp>
 #include <shogun/io/SGIO.h>
 #include <shogun/lib/Signal.h>
-#include <shogun/base/init.h>
 
 using namespace shogun;
+using namespace rxcpp;
+
+bool CSignal::m_active = false;
+CSignal::SGSubjectS* CSignal::m_subject = new rxcpp::subjects::subject<int>();
 
-int CSignal::signals[NUMTRAPPEDSIGS]={SIGINT, SIGURG};
-struct sigaction CSignal::oldsigaction[NUMTRAPPEDSIGS];
-bool CSignal::active=false;
-bool CSignal::cancel_computation=false;
-bool CSignal::cancel_immediately=false;
+CSignal::SGObservableS* CSignal::m_observable =
+    new CSignal::SGObservableS(CSignal::m_subject->get_observable());
+CSignal::SGSubscriberS* CSignal::m_subscriber =
+    new CSignal::SGSubscriberS(CSignal::m_subject->get_subscriber());
 
 CSignal::CSignal()
-: CSGObject()
 {
 }
 
 CSignal::~CSignal()
 {
-	if (!unset_handler())
-		SG_PRINT("error uninitalizing signal handler\n")
 }
 
 void CSignal::handler(int signal)
 {
-	if (signal == SIGINT)
-	{
-		SG_SPRINT("\nImmediately return to prompt / Prematurely finish computations / Do nothing (I/P/D)? ")
-		char answer=fgetc(stdin);
+	/* If the handler is not enabled, then return */
+	if (!m_active)
+		return;
 
-		if (answer == 'I')
-		{
-			unset_handler();
-			set_cancel(true);
-			if (sg_print_error)
-				sg_print_error(stdout, "sg stopped by SIGINT\n");
-		}
-		else if (answer == 'P')
-			set_cancel();
-		else
-			SG_SPRINT("Continuing...\n")
-	}
-	else if (signal == SIGURG)
-		set_cancel();
-	else
-		SG_SPRINT("unknown signal %d received\n", signal)
-}
-
-bool CSignal::set_handler()
-{
-	if (!active)
+	if (signal == SIGINT)
 	{
-		struct sigaction act;
-		sigset_t st;
-
-		sigemptyset(&st);
-		for (int32_t i=0; i<NUMTRAPPEDSIGS; i++)
-			sigaddset(&st, signals[i]);
-
-#if !(defined(__INTERIX) || defined(__MINGW64__) || defined(_MSC_VER) || defined(__MINGW32__))
-		act.sa_sigaction=NULL; //just in case
-#endif
-		act.sa_handler=CSignal::handler;
-		act.sa_mask = st;
-		act.sa_flags = 0;
-
-		for (int32_t i=0; i<NUMTRAPPEDSIGS; i++)
+		SG_SPRINT(
+		    "\n[ShogunSignalHandler] "
+		    "Immediately return to prompt / "
+		    "Prematurely finish computations / "
+		    "Pause current computation / "
+		    "Do nothing (I/C/P/D)? ")
+		char answer = getchar();
+		getchar();
+		switch (answer)
 		{
-			if (sigaction(signals[i], &act, &oldsigaction[i]))
-			{
-				SG_SPRINT("Error trapping signals!\n")
-				for (int32_t j=i-1; j>=0; j--)
-					sigaction(signals[i], &oldsigaction[i], NULL);
-
-				clear();
-				return false;
-			}
+		case 'I':
+			SG_SPRINT("[ShogunSignalHandler] Killing the application...\n");
+			m_subscriber->on_completed();
+			exit(0);
+			break;
+		case 'C':
+			SG_SPRINT(
+			    "[ShogunSignalHandler] Terminating"
+			    " prematurely current algorithm...\n");
+			m_subscriber->on_next(SG_BLOCK_COMP);
+			break;
+		case 'P':
+			SG_SPRINT("[ShogunSignalHandler] Pausing current computation...")
+			m_subscriber->on_next(SG_PAUSE_COMP);
+			break;
+		default:
+			SG_SPRINT("[ShogunSignalHandler] Continuing...\n")
+			break;
 		}
-
-		active=true;
-		return true;
 	}
 	else
-		return false;
-}
-
-bool CSignal::unset_handler()
-{
-	if (active)
 	{
-		bool result=true;
-
-		for (int32_t i=0; i<NUMTRAPPEDSIGS; i++)
-		{
-			if (sigaction(signals[i], &oldsigaction[i], NULL))
-			{
-				SG_SPRINT("error uninitalizing signal handler for signal %d\n", signals[i])
-				result=false;
-			}
-		}
-
-		if (result)
-			clear();
-
-		return result;
-	}
-	else
-		return false;
-}
-
-void CSignal::clear_cancel()
-{
-	cancel_computation=false;
-	cancel_immediately=false;
-}
-
-void CSignal::set_cancel(bool immediately)
-{
-	cancel_computation=true;
-
-	if (immediately)
-		cancel_immediately=true;
-}
-
-void CSignal::clear()
-{
-	clear_cancel();
-	active=false;
-	memset(&CSignal::oldsigaction, 0, sizeof(CSignal::oldsigaction));
-}
-
-#if defined(__MINGW64__) || defined(_MSC_VER) || defined(__MINGW32__)
-#define SIGBAD(signo) ( (signo) <=0 || (signo) >=NSIG)
-Sigfunc *handlers[NSIG]={0};
-
-int sigaddset(sigset_t *set, int signo)
-{
-	if (SIGBAD(signo)) {
-		errno = EINVAL;
-		return -1;
+		SG_SPRINT("[ShogunSignalHandler] Unknown signal %d received\n", signal)
 	}
-	*set |= 1 << (signo-1);
-	return 0;
 }
 
-int sigaction(int signo, const struct sigaction *act, struct sigaction *oact)
+void CSignal::reset_handler()
 {
-	if (SIGBAD(signo)) {
-		errno = EINVAL;
-		return -1;
-	}
-
-	if(oact){
-			oact->sa_handler = handlers[signo];
-			oact->sa_mask = 0;
-			oact->sa_flags =0;
-	}
-	if (act)
-		handlers[signo]=act->sa_handler;
+	delete m_subject;
+	delete m_observable;
+	delete m_subscriber;
 
-	return 0;
+	m_subject = new rxcpp::subjects::subject<int>();
+	m_observable = new CSignal::SGObservableS(m_subject->get_observable());
+	m_subscriber = new CSignal::SGSubscriberS(m_subject->get_subscriber());
 }
-#endif
diff --git a/src/shogun/lib/Signal.h b/src/shogun/lib/Signal.h
index 764ac345a6c..f63b5d297b3 100644
--- a/src/shogun/lib/Signal.h
+++ b/src/shogun/lib/Signal.h
@@ -5,139 +5,99 @@
  * (at your option) any later version.
  *
  * Written (W) 1999-2009 Soeren Sonnenburg
+ * Written (W) 2017 Giovanni De Toni
  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
  */
 
 #ifndef __SIGNAL__H_
 #define __SIGNAL__H_
 
-#include <shogun/lib/config.h>
-
-#if defined(__MINGW64__) || defined(_MSC_VER)
-typedef unsigned long sigset_t;
-#endif
-#if defined(__MINGW32__) && !defined(__MINGW64__)
-typedef int sigset_t;
-#endif
-
-#ifndef SIGURG
-#define SIGURG  -16
-#endif
-
-#if defined(__MINGW64__) || defined(_MSC_VER) || defined(__MINGW32__)
-typedef void Sigfunc (int);
-
-struct sigaction {
-	Sigfunc *sa_handler;
-	sigset_t sa_mask;
-	int sa_flags;
-};
-
-#define sigemptyset(ptr) (*(ptr) = 0)
-#define sigfillset(ptr) ( *(ptr) = ~(sigset_t)0,0)
-
-int sigaddset(sigset_t*, int);
-int sigaction(int signo, const struct sigaction *act, struct sigaction *oact);
-#endif
-
-#ifndef DISABLE_CANCEL_CALLBACK
-namespace shogun
-{
-extern void (*sg_cancel_computations)(bool &delayed, bool &immediately);
-}
-#endif
-
-#include <shogun/lib/ShogunException.h>
 #include <shogun/base/SGObject.h>
 
-#include <csignal>
-#define NUMTRAPPEDSIGS 2
-
 namespace shogun
 {
-/** @brief Class Signal implements signal handling to e.g. allow ctrl+c to cancel a
- * long running process.
- *
- * This is done in two ways:
- *
- * -# A signal handler is attached to trap the SIGINT and SIGURG signal.
- *  Pressing ctrl+c or sending the SIGINT (kill ...) signal to the shogun
- *  process will make shogun print a message asking to immediately exit the
- *  running method and to fall back to the command line.
- * -# When an URG signal is received or ctrl+c P is pressed shogun will
- *  prematurely stop a method and continue execution. For example when an SVM
- *  solver takes a long time without progressing much, one might still be
- *  interested in the result and should thus send SIGURG or interactively
- *  prematurely stop the method
- */
-class CSignal : public CSGObject
-{
+	/**
+	 * Possible Shogun signal types.
+	 */
+	enum sg_signals_types
+	{
+		SG_BLOCK_COMP,
+		SG_PAUSE_COMP
+	};
+
+	/** @brief Class Signal implements signal handling to e.g. allow CTRL+C to
+	 * cancel a long running process.
+	 *
+	 * -# A signal handler is attached to trap the SIGINT signal.
+	 *  Pressing CTRL+C or sending the SIGINT (kill ...) signal to the shogun
+	 *  process will make shogun print a message asking the user to choose an
+	 *  option bewteen: immediately exit the running method and fall back to
+	 *  the command line, prematurely stop the current algoritmh and do nothing.
+	 */
+	class CSignal : CSGObject
+	{
 	public:
-		/** default constructor */
+		typedef rxcpp::subjects::subject<int> SGSubjectS;
+		typedef rxcpp::observable<int, rxcpp::dynamic_observable<int>>
+		    SGObservableS;
+		typedef rxcpp::subscriber<int,
+		                          rxcpp::observer<int, void, void, void, void>>
+		    SGSubscriberS;
+
 		CSignal();
 		virtual ~CSignal();
 
-		/** handler
+		/** Signal handler. Need to be registered with std::signal.
 		 *
 		 * @param signal signal number
 		 */
 		static void handler(int signal);
 
-		/** set handler
-		 *
-		 * @return if setting was successful
-		 */
-		static bool set_handler();
-
-		/** unset handler
-		 *
-		 * @return if unsetting was successful
-		 */
-		static bool unset_handler();
-
-		/** clear signals */
-		static void clear();
-
-		/** clear cancel flag signals */
-		static void clear_cancel();
-
-		/** set cancel flag signals */
-		static void set_cancel(bool immediately=false);
+#ifndef SWIG // SWIG should skip this part
+		     /**
+		     * Get observable
+		     * @return RxCpp observable
+		     */
+		SGObservableS* get_observable()
+		{
+			return m_observable;
+		};
+#endif
 
-		/** cancel computations
-		 *
-		 * @return if computations should be cancelled
-		 */
-		static inline bool cancel_computations()
+#ifndef SWIG // SWIG should skip this part
+		     /**
+		     * Get subscriber
+		     * @return RxCpp subscriber
+		     */
+		SGSubscriberS* get_subscriber()
 		{
-#ifndef DISABLE_CANCEL_CALLBACK
-			if (sg_cancel_computations)
-				sg_cancel_computations(cancel_computation, cancel_immediately);
+			return m_subscriber;
+		};
 #endif
-			if (cancel_immediately)
-				throw ShogunException("Computations have been cancelled immediately");
 
-			return cancel_computation;
+		/** Enable signal handler
+		*/
+		void enable_handler()
+		{
+			m_active = true;
 		}
+		/**
+		 * Reset handler in case of multiple instantiation
+		 */
+		static void reset_handler();
 
 		/** @return object name */
 		virtual const char* get_name() const { return "Signal"; }
 
-	protected:
-		/** signals; handling external lib  */
-		static int signals[NUMTRAPPEDSIGS];
-
-		/** signal actions */
-		static struct sigaction oldsigaction[NUMTRAPPEDSIGS];
-
-		/** active signal */
-		static bool active;
+	private:
+		/** Active signal */
+		static bool m_active;
 
-		/** if computation should be cancelled */
-		static bool cancel_computation;
-
-		/** if shogun should return ASAP */
-		static bool cancel_immediately;
+	public:
+		/** Observable */
+		static SGSubjectS* m_subject;
+		static SGObservableS* m_observable;
+		static SGSubscriberS* m_subscriber;
 };
 }
 #endif // __SIGNAL__H_
diff --git a/src/shogun/lib/any.h b/src/shogun/lib/any.h
index 5bb8e6caccd..9d61551725d 100644
--- a/src/shogun/lib/any.h
+++ b/src/shogun/lib/any.h
@@ -35,8 +35,9 @@
 #ifndef _ANY_H_
 #define _ANY_H_
 
-#include <string.h>
 #include <stdexcept>
+#include <string>
+#include <string.h>
 #include <typeinfo>
 #ifdef HAVE_CXA_DEMANGLE
 #include <cxxabi.h>
@@ -44,393 +45,429 @@
 
 namespace shogun
 {
-    /** Converts compiler-dependent name of class to
-     * something human readable.
-     * @return human readable name of class
-     */
-    template <typename T>
-    std::string demangledType()
-    {
+	/** Converts compiler-dependent name of class to
+	 * something human readable.
+	 * @return human readable name of class
+	 */
+	template <typename T>
+	std::string demangledType()
+	{
 #ifdef HAVE_CXA_DEMANGLE
 		size_t length;
 		int status;
-		char* demangled = abi::__cxa_demangle(typeid(T).name(), nullptr, &length, &status);
+		char* demangled =
+		    abi::__cxa_demangle(typeid(T).name(), nullptr, &length, &status);
 		std::string demangled_string(demangled);
 		free(demangled);
 #else
 		std::string demangled_string(typeid(T).name());
 #endif
-        return demangled_string;
-    }
-
-    enum class PolicyType {
-        OWNING,
-        NON_OWNING
-    };
-
-    /** @brief An interface for a policy to store a value.
-     * Value can be any data like primitive data-types, shogun objects, etc.
-     * Policy defines how to handle this data. It works with a
-     * provided memory region and is able to set value, clear it
-     * and return the type-name as string.
-     */
-    class BaseAnyPolicy
-    {
-    public:
-        /** Puts provided value pointed by v (untyped to be generic) to storage.
-         * @param storage pointer to a pointer to storage
-         * @param v pointer to value
-         */
-        virtual void set(void** storage, const void* v) const = 0;
-
-        /** Clears storage.
-         * @param storage pointer to a pointer to storage
-         */
-        virtual void clear(void** storage) const = 0;
-
-        /** Returns type-name as string.
-         * @return name of type class
-         */
-        virtual std::string type() const = 0;
-
-        /** Compares type.
-         * @param ti type information
-         * @return true if type matches
-         */
-        virtual bool matches(const std::type_info& ti) const = 0;
-
-        /** Compares two storages.
-         * @param storage pointer to a pointer to storage
-         * @param other_storage pointer to a pointer to another storage
-         * @return true if both storages have same value
-         */
-        virtual bool equals(void** storage, void** other_storage) const = 0;
-
-        /** Returns the name of policy.
-         * @return name of policy
-         */
-        virtual std::string policy_name() const = 0;
-
-        /** Returns the type of policy.
-         * @return type of policy
-         */
-        virtual PolicyType policy_type() const = 0;
-    };
-
-    /** @brief This is one concrete implementation of policy that
-     * uses void pointers to store values.
-     */
-    template <typename T>
-    class PointerValueAnyPolicy : public BaseAnyPolicy
-    {
-    public:
-        /** Puts provided value pointed by v (untyped to be generic) to storage.
-         * @param storage pointer to a pointer to storage
-         * @param v pointer to value
-         */
-        virtual void set(void** storage, const void* v) const
-        {
-            *(storage) = new T(*reinterpret_cast<T const*>(v));
-        }
-
-        /** Clears storage.
-         * @param storage pointer to a pointer to storage
-         */
-        virtual void clear(void** storage) const
-        {
-            delete reinterpret_cast<T*>(*storage);
-        }
-
-        /** Returns type-name as string.
-         * @return name of type class
-         */
-        virtual std::string type() const
-        {
-            return demangledType<T>();
-        }
-
-        /** Compares type.
-         * @param ti type information
-         * @return true if type matches
-         */
-        virtual bool matches(const std::type_info& ti) const
-        {
-            return typeid(T) == ti;
-        }
-
-        /** Compares two storages.
-         * @param storage pointer to a pointer to storage
-         * @param other_storage pointer to a pointer to another storage
-         * @return true if both storages have same value
-         */
-        bool equals(void** storage, void** other_storage) const
-        {
-            T typed_storage = *(reinterpret_cast<T*>(*storage));
-            T typed_other_storage = *(reinterpret_cast<T*>(*other_storage));
-            return typed_storage == typed_other_storage;
-        }
-
-        virtual std::string policy_name() const {
-            return "owning";
-        }
-
-        virtual PolicyType policy_type() const {
-            return PolicyType::OWNING;
-        }
-    };
-
-    template <typename T>
-    class NonOwningAnyPolicy : public BaseAnyPolicy
-    {
-    public:
-        /** Puts provided value pointed by v (untyped to be generic) to storage.
-         * @param storage pointer to a pointer to storage
-         * @param v pointer to value
-         */
-        virtual void set(void** storage, const void* v) const
-        {
-            *(storage) = const_cast<void*>(v);
-        }
-
-        /** Clears storage.
-         * @param storage pointer to a pointer to storage
-         */
-        virtual void clear(void** storage) const
-        {
-        }
-
-        /** Returns type-name as string.
-         * @return name of type class
-         */
-        virtual std::string type() const
-        {
-            return demangledType<T>();
-        }
-
-        /** Compares type.
-         * @param ti type information
-         * @return true if type matches
-         */
-        virtual bool matches(const std::type_info& ti) const
-        {
-            return typeid(T) == ti;
-        }
-
-        /** Compares two storages.
-         * @param storage pointer to a pointer to storage
-         * @param other_storage pointer to a pointer to another storage
-         * @return true if both storages have same value
-         */
-        bool equals(void** storage, void** other_storage) const
-        {
-            T typed_storage = *(reinterpret_cast<T*>(*storage));
-            T typed_other_storage = *(reinterpret_cast<T*>(*other_storage));
-            return typed_storage == typed_other_storage;
-        }
-
-        virtual std::string policy_name() const {
-            return "non owning";
-        }
-
-        virtual PolicyType policy_type() const {
-            return PolicyType::NON_OWNING;
-        }
-    };
-
-    template <typename T>
-    static BaseAnyPolicy* owning_policy()
-    {
-        typedef PointerValueAnyPolicy<T> Policy;
-        static Policy policy;
-        return &policy;
-    }
-
-    template <typename T>
-    static BaseAnyPolicy* non_owning_policy()
-    {
-        typedef NonOwningAnyPolicy<T> Policy;
-        static Policy policy;
-        return &policy;
-    }
-
-    /** @brief Allows to store objects of arbitrary types
-     * by using a BaseAnyPolicy and provides a type agnostic API.
-     * See its usage in CSGObject::Self, CSGObject::set(), CSGObject::get()
-     * and CSGObject::has().
-     * .
-     */
-    class Any
-    {
-    public:
-        /** Used to denote an empty Any object */
-        struct Empty;
-
-        /** Empty value constructor */
-        Any() : Any(owning_policy<Empty>(), nullptr)
-        {
-        }
-
-        /** Constructor to copy value */
-        template <typename T>
-        explicit Any(const T& v) : Any(owning_policy<T>(), nullptr)
-        {
-            policy->set(&storage, &v);
-        }
-
-        /** Base constructor */
-        Any(BaseAnyPolicy* the_policy, void* the_storage) : policy(the_policy), storage(the_storage)
-        {
-        }
-
-        /** Copy constructor */
-        Any(const Any& other) : Any(other.policy, nullptr)
-        {
-            assert_same_policy_type(other.policy);
-            policy->set(&storage, other.storage);
-        }
-
-        /** Assignment operator
-         * @param other another Any object
-         * @return Any object
-         */
-        Any& operator=(const Any& other)
-        {
-            assert_same_policy_type(other.policy);
-            policy->clear(&storage);
-            policy = other.policy;
-            policy->set(&storage, other.storage);
-            return *(this);
-        }
-
-        /** Equality operator
-         * @param lhs Any object on left hand side
-         * @param rhs Any object on right hand side
-         * @return true if both are equal
-         */
-        friend inline bool operator==(const Any& lhs, const Any& rhs);
-
-        /** Inequality operator
-         * @param lhs Any object on left hand side
-         * @param rhs Any object on right hand side
-         * @return false if both are equal
-         */
-        friend inline bool operator!=(const Any& lhs, const Any& rhs);
-
-        /** Destructor */
-        ~Any()
-        {
-            policy->clear(&storage);
-        }
-
-        /** Casts hidden value to provided type, fails otherwise.
-         * @return type-casted value
-         */
-        template <typename T>
-        T& as() const
-        {
-            if (same_type<T>())
-            {
-                return *(reinterpret_cast<T*>(storage));
-            }
-            else
-            {
-                throw std::logic_error("Bad cast to " + demangledType<T>() +
-                        " but the type is " + policy->type());
-            }
-        }
-
-        /** @return true if type is same. */
-        template <typename T>
-        inline bool same_type() const
-        {
-            return (policy == owning_policy<T>()) || (policy == non_owning_policy<T>()) || same_type_fallback<T>();
-        }
-
-        /** @return true if type-id is same. */
-        template <typename T>
-        bool same_type_fallback() const
-        {
-            return policy->matches(typeid(T));
-        }
-
-        /** @return true if Any object is empty. */
-        bool empty() const
-        {
-            return same_type<Empty>();
-        }
-
-    private:
-
-        void assert_same_policy_type(BaseAnyPolicy* other_policy) {
-            if (policy->policy_type() != other_policy->policy_type()) {
-                throw std::logic_error("The policies are different: " +
-                                       policy->policy_name() + " and " +
-                                       other_policy->policy_name());
-            }
-        }
-
-    private:
-        BaseAnyPolicy* policy;
-        void* storage;
-    };
-
-    inline bool operator==(const Any& lhs, const Any& rhs)
-    {
-        void* lhs_storage = lhs.storage;
-        void* rhs_storage = rhs.storage;
-        return lhs.policy == rhs.policy &&
-            lhs.policy->equals(&lhs_storage, &rhs_storage);
-    }
-
-    inline bool operator!=(const Any& lhs, const Any& rhs)
-    {
-        return !(lhs == rhs);
-    }
-
-    /** Used to denote an empty Any object */
-    struct Any::Empty
-    {
-        /** Equality operator */
-        bool operator==(const Empty& other) const
-        {
-            return true;
-        }
-    };
-
-    /** Erases value type i.e. converts it to Any
-     * For input object of any type, it returns an Any object
-     * which stores the input object's raw value. It saves the type
-     * information internally to be recalled later by using recall_type().
-     *
-     * @param v value
-     * @return Any object with the input value
-     */
-    template <typename T>
-    inline Any erase_type(const T& v)
-    {
-        return Any(v);
-    }
-
-    template <typename T>
-    inline Any erase_type_non_owning(T* v)
-    {
-        return Any(non_owning_policy<T>(), v);
-    }
-
-    /** Tries to recall Any type, fails when type is wrong.
-     * Any stores type information of an object internally in a BaseAnyPolicy.
-     * This function returns type-casted value if the internal type information
-     * matches with the provided typename, otherwise throws std::logic_error.
-     *
-     * @param any object of Any
-     * @return type-casted value
-     */
-    template <typename T>
-    inline T recall_type(const Any& any)
-    {
-        return any.as<T>();
-    }
-
+		return demangled_string;
+	}
+
+	enum class PolicyType
+	{
+		OWNING,
+		NON_OWNING
+	};
+
+	/** @brief An interface for a policy to store a value.
+	 * Value can be any data like primitive data-types, shogun objects, etc.
+	 * Policy defines how to handle this data. It works with a
+	 * provided memory region and is able to set value, clear it
+	 * and return the type-name as string.
+	 */
+	class BaseAnyPolicy
+	{
+	public:
+		/** Puts provided value pointed by v (untyped to be generic) to storage.
+		 * @param storage pointer to a pointer to storage
+		 * @param v pointer to value
+		 */
+		virtual void set(void** storage, const void* v) const = 0;
+
+		/** Clears storage.
+		 * @param storage pointer to a pointer to storage
+		 */
+		virtual void clear(void** storage) const = 0;
+
+		/** Returns type-name as string.
+		 * @return name of type class
+		 */
+		virtual std::string type() const = 0;
+
+		/** Returns type info
+		 * @return type info of value's type
+		 */
+		virtual const std::type_info& type_info() const = 0;
+
+		/** Compares type.
+		 * @param ti type information
+		 * @return true if type matches
+		 */
+		virtual bool matches(const std::type_info& ti) const = 0;
+
+		/** Compares two storages.
+		 * @param storage pointer to a pointer to storage
+		 * @param other_storage pointer to a pointer to another storage
+		 * @return true if both storages have same value
+		 */
+		virtual bool equals(void** storage, void** other_storage) const = 0;
+
+		/** Returns the name of policy.
+		 * @return name of policy
+		 */
+		virtual std::string policy_name() const = 0;
+
+		/** Returns the type of policy.
+		 * @return type of policy
+		 */
+		virtual PolicyType policy_type() const = 0;
+	};
+
+	/** @brief This is one concrete implementation of policy that
+	 * uses void pointers to store values.
+	 */
+	template <typename T>
+	class PointerValueAnyPolicy : public BaseAnyPolicy
+	{
+	public:
+		/** Puts provided value pointed by v (untyped to be generic) to storage.
+		 * @param storage pointer to a pointer to storage
+		 * @param v pointer to value
+		 */
+		virtual void set(void** storage, const void* v) const
+		{
+			*(storage) = new T(*reinterpret_cast<T const*>(v));
+		}
+
+		/** Clears storage.
+		 * @param storage pointer to a pointer to storage
+		 */
+		virtual void clear(void** storage) const
+		{
+			delete reinterpret_cast<T*>(*storage);
+		}
+
+		/** Returns type-name as string.
+		 * @return name of type class
+		 */
+		virtual std::string type() const
+		{
+			return demangledType<T>();
+		}
+
+		/** Returns type info
+		 * @return type info of value's type
+		 */
+		virtual const std::type_info& type_info() const
+		{
+			return typeid(T);
+		}
+
+		/** Compares type.
+		 * @param ti type information
+		 * @return true if type matches
+		 */
+		virtual bool matches(const std::type_info& ti) const
+		{
+			return typeid(T) == ti;
+		}
+
+		/** Compares two storages.
+		 * @param storage pointer to a pointer to storage
+		 * @param other_storage pointer to a pointer to another storage
+		 * @return true if both storages have same value
+		 */
+		bool equals(void** storage, void** other_storage) const
+		{
+			T typed_storage = *(reinterpret_cast<T*>(*storage));
+			T typed_other_storage = *(reinterpret_cast<T*>(*other_storage));
+			return typed_storage == typed_other_storage;
+		}
+
+		virtual std::string policy_name() const
+		{
+			return "owning";
+		}
+
+		virtual PolicyType policy_type() const
+		{
+			return PolicyType::OWNING;
+		}
+	};
+
+	template <typename T>
+	class NonOwningAnyPolicy : public BaseAnyPolicy
+	{
+	public:
+		/** Puts provided value pointed by v (untyped to be generic) to storage.
+		 * @param storage pointer to a pointer to storage
+		 * @param v pointer to value
+		 */
+		virtual void set(void** storage, const void* v) const
+		{
+			*(storage) = const_cast<void*>(v);
+		}
+
+		/** Clears storage.
+		 * @param storage pointer to a pointer to storage
+		 */
+		virtual void clear(void** storage) const
+		{
+		}
+
+		/** Returns type-name as string.
+		 * @return name of type class
+		 */
+		virtual std::string type() const
+		{
+			return demangledType<T>();
+		}
+
+		/** Returns type info
+		 * @return type info of value's type
+		 */
+		virtual const std::type_info& type_info() const
+		{
+			return typeid(T);
+		}
+
+		/** Compares type.
+		 * @param ti type information
+		 * @return true if type matches
+		 */
+		virtual bool matches(const std::type_info& ti) const
+		{
+			return typeid(T) == ti;
+		}
+
+		/** Compares two storages.
+		 * @param storage pointer to a pointer to storage
+		 * @param other_storage pointer to a pointer to another storage
+		 * @return true if both storages have same value
+		 */
+		bool equals(void** storage, void** other_storage) const
+		{
+			T typed_storage = *(reinterpret_cast<T*>(*storage));
+			T typed_other_storage = *(reinterpret_cast<T*>(*other_storage));
+			return typed_storage == typed_other_storage;
+		}
+
+		virtual std::string policy_name() const
+		{
+			return "non owning";
+		}
+
+		virtual PolicyType policy_type() const
+		{
+			return PolicyType::NON_OWNING;
+		}
+	};
+
+	template <typename T>
+	static BaseAnyPolicy* owning_policy()
+	{
+		typedef PointerValueAnyPolicy<T> Policy;
+		static Policy policy;
+		return &policy;
+	}
+
+	template <typename T>
+	static BaseAnyPolicy* non_owning_policy()
+	{
+		typedef NonOwningAnyPolicy<T> Policy;
+		static Policy policy;
+		return &policy;
+	}
+
+	/** @brief Allows to store objects of arbitrary types
+	 * by using a BaseAnyPolicy and provides a type agnostic API.
+	 * See its usage in CSGObject::Self, CSGObject::set(), CSGObject::get()
+	 * and CSGObject::has().
+	 * .
+	 */
+	class Any
+	{
+	public:
+		/** Used to denote an empty Any object */
+		struct Empty;
+
+		/** Empty value constructor */
+		Any() : Any(owning_policy<Empty>(), nullptr)
+		{
+		}
+
+		/** Constructor to copy value */
+		template <typename T>
+		explicit Any(const T& v) : Any(owning_policy<T>(), nullptr)
+		{
+			policy->set(&storage, &v);
+		}
+
+		/** Base constructor */
+		Any(BaseAnyPolicy* the_policy, void* the_storage)
+		    : policy(the_policy), storage(the_storage)
+		{
+		}
+
+		/** Copy constructor */
+		Any(const Any& other) : Any(other.policy, nullptr)
+		{
+			assert_same_policy_type(other.policy);
+			policy->set(&storage, other.storage);
+		}
+
+		/** Assignment operator
+		 * @param other another Any object
+		 * @return Any object
+		 */
+		Any& operator=(const Any& other)
+		{
+			assert_same_policy_type(other.policy);
+			policy->clear(&storage);
+			policy = other.policy;
+			policy->set(&storage, other.storage);
+			return *(this);
+		}
+
+		/** Equality operator
+		 * @param lhs Any object on left hand side
+		 * @param rhs Any object on right hand side
+		 * @return true if both are equal
+		 */
+		friend inline bool operator==(const Any& lhs, const Any& rhs);
+
+		/** Inequality operator
+		 * @param lhs Any object on left hand side
+		 * @param rhs Any object on right hand side
+		 * @return false if both are equal
+		 */
+		friend inline bool operator!=(const Any& lhs, const Any& rhs);
+
+		/** Destructor */
+		~Any()
+		{
+			policy->clear(&storage);
+		}
+
+		/** Casts hidden value to provided type, fails otherwise.
+		 * @return type-casted value
+		 */
+		template <typename T>
+		T& as() const
+		{
+			if (same_type<T>())
+			{
+				return *(reinterpret_cast<T*>(storage));
+			}
+			else
+			{
+				throw std::logic_error(
+				    "Bad cast to " + demangledType<T>() + " but the type is " +
+				    policy->type());
+			}
+		}
+
+		/** @return true if type is same. */
+		template <typename T>
+		inline bool same_type() const
+		{
+			return (policy == owning_policy<T>()) ||
+			       (policy == non_owning_policy<T>()) ||
+			       same_type_fallback<T>();
+		}
+
+		/** @return true if type-id is same. */
+		template <typename T>
+		bool same_type_fallback() const
+		{
+			return policy->matches(typeid(T));
+		}
+
+		/** @return true if Any object is empty. */
+		bool empty() const
+		{
+			return same_type<Empty>();
+		}
+
+		const std::type_info& type_info() const
+		{
+			return policy->type_info();
+		}
+
+	private:
+		void assert_same_policy_type(BaseAnyPolicy* other_policy)
+		{
+			if (policy->policy_type() != other_policy->policy_type())
+			{
+				throw std::logic_error(
+				    "The policies are different: " + policy->policy_name() +
+				    " and " + other_policy->policy_name());
+			}
+		}
+
+	private:
+		BaseAnyPolicy* policy;
+		void* storage;
+	};
+
+	inline bool operator==(const Any& lhs, const Any& rhs)
+	{
+		void* lhs_storage = lhs.storage;
+		void* rhs_storage = rhs.storage;
+		return lhs.policy == rhs.policy &&
+		       lhs.policy->equals(&lhs_storage, &rhs_storage);
+	}
+
+	inline bool operator!=(const Any& lhs, const Any& rhs)
+	{
+		return !(lhs == rhs);
+	}
+
+	/** Used to denote an empty Any object */
+	struct Any::Empty
+	{
+		/** Equality operator */
+		bool operator==(const Empty& other) const
+		{
+			return true;
+		}
+	};
+
+	/** Erases value type i.e. converts it to Any
+	 * For input object of any type, it returns an Any object
+	 * which stores the input object's raw value. It saves the type
+	 * information internally to be recalled later by using recall_type().
+	 *
+	 * @param v value
+	 * @return Any object with the input value
+	 */
+	template <typename T>
+	inline Any erase_type(const T& v)
+	{
+		return Any(v);
+	}
+
+	template <typename T>
+	inline Any erase_type_non_owning(T* v)
+	{
+		return Any(non_owning_policy<T>(), v);
+	}
+
+	/** Tries to recall Any type, fails when type is wrong.
+	 * Any stores type information of an object internally in a BaseAnyPolicy.
+	 * This function returns type-casted value if the internal type information
+	 * matches with the provided typename, otherwise throws std::logic_error.
+	 *
+	 * @param any object of Any
+	 * @return type-casted value
+	 */
+	template <typename T>
+	inline T recall_type(const Any& any)
+	{
+		return any.as<T>();
+	}
 }
 
-#endif  //_ANY_H_
+#endif //_ANY_H_
diff --git a/src/shogun/lib/config.h.in b/src/shogun/lib/config.h.in
index e3699f1d55c..2cf9d91a036 100644
--- a/src/shogun/lib/config.h.in
+++ b/src/shogun/lib/config.h.in
@@ -10,11 +10,11 @@
 #cmakedefine HAVE_CURL 1
 #cmakedefine HAVE_JSON 1
 #cmakedefine HAVE_XML 1
-#cmakedefine HAVE_LARGEFILE 1
 #cmakedefine HAVE_DOXYGEN 1
 #cmakedefine HAVE_LAPACK 1
 #cmakedefine HAVE_MVEC 1
 #cmakedefine HAVE_PROTOBUF 1
+#cmakedefine HAVE_TFLOGGER 1
 
 #cmakedefine HAVE_ARPACK 1
 #cmakedefine HAVE_VIENNACL 1
@@ -42,6 +42,13 @@
 /* Tells ViennaCL to use OpenCL as computation backend */
 #cmakedefine VIENNACL_WITH_OPENCL 1
 
+/* Eigen Lapack optimization flags */
+#cmakedefine EIGEN_USE_BLAS 1
+#cmakedefine EIGEN_USE_LAPACKE 1
+#cmakedefine EIGEN_USE_LAPACKE_STRICT 1
+#cmakedefine EIGEN_USE_MKL_VML 1
+#cmakedefine EIGEN_USE_MKL_ALL 1
+
 /* for linear algebra global backend setups */
 #cmakedefine USE_EIGEN3_GLOBAL 1
 #cmakedefine USE_VIENNACL_GLOBAL 1
@@ -90,7 +97,6 @@
 
 #cmakedefine HAVE_FDOPEN 1
 
-#cmakedefine USE_SPINLOCKS 1
 #cmakedefine USE_SHORTREAL_KERNELCACHE 1
 #cmakedefine USE_BIGSTATES 1
 
@@ -110,7 +116,6 @@
 #cmakedefine USE_GZIP 1
 #cmakedefine USE_BZIP2 1
 #cmakedefine USE_LZMA 1
-#cmakedefine USE_REFERENCE_COUNTING 1
 #cmakedefine USE_SNAPPY 1
 
 #cmakedefine HAVE_SSE2 1
@@ -126,8 +131,6 @@
 
 #cmakedefine HAVE_CXX0X 1
 #cmakedefine HAVE_CXX11 1
-#cmakedefine HAVE_CXX11_ATOMIC 1
-#cmakedefine HAVE_STD_UNORDERED_MAP 1
 
 /* does the compiler support abi::__cxa_demangle */
 #cmakedefine HAVE_CXA_DEMANGLE 1
diff --git a/src/shogun/lib/cpu.h b/src/shogun/lib/cpu.h
index 0916756d799..fa434779062 100644
--- a/src/shogun/lib/cpu.h
+++ b/src/shogun/lib/cpu.h
@@ -36,9 +36,23 @@
 SG_FORCED_INLINE static void CpuRelax()
 {
 #ifdef _MSC_VER
-        _mm_pause();
+	_mm_pause();
+#elif defined(__i386__) || defined(__x86_64__)
+	asm volatile("pause");
+#elif defined(__arm__) || defined(__aarch64__)
+	asm volatile("yield");
+#elif defined(__powerpc__) || defined(__ppc__)
+	asm volatile("or 27,27,27");
+#elif defined(__s390__) || defined(__s390x__)
+	asm volatile("" : : : "memory");
 #else
-        asm("pause");
+#warning "Unknown architecture, defaulting to delaying loop."
+	static uint32_t bar = 13;
+	static uint32_t* foo = &bar;
+	for (unsigned int i = 0; i < 100000; i++)
+	{
+		*foo = (*foo * 33) + 17;
+	}
 #endif
 }
 
diff --git a/src/shogun/lib/external/brent.cpp b/src/shogun/lib/external/brent.cpp
deleted file mode 100644
index 72b7ec6c1a7..00000000000
--- a/src/shogun/lib/external/brent.cpp
+++ /dev/null
@@ -1,1525 +0,0 @@
-# include <cstdlib>
-# include <iostream>
-# include <cmath>
-# include <ctime>
-
-using namespace std;
-
-# include "brent.h"
-
-namespace shogun
-{
-
-//****************************************************************************80
-
-double glomin ( double a, double b, double c, double m, double e, double t,
-  func_base& f, double &x )
-
-//****************************************************************************80
-//
-//  Purpose:
-//
-//    GLOMIN seeks a global minimum of a function F(X) in an interval [A,B].
-//
-//  Discussion:
-//
-//    This function assumes that F(X) is twice continuously differentiable
-//    over [A,B] and that F''(X) <= M for all X in [A,B].
-//
-//  Licensing:
-//
-//    This code is distributed under the GNU LGPL license.
-//
-//  Modified:
-//
-//    06 May 2012
-//
-//  Author:
-//
-//    Original FORTRAN77 version by Richard Brent.
-//    C++ version by John Burkardt.
-//    Modifications by John Denker.
-//
-//  Reference:
-//
-//    Richard Brent,
-//    Algorithms for Minimization Without Derivatives,
-//    Dover, 2002,
-//    ISBN: 0-486-41998-3,
-//    LC: QA402.5.B74.
-//
-//  Parameters:
-//
-//    Input, double A, B, the endpoints of the interval.
-//    It must be the case that A < B.
-//
-//    Input, double C, an initial guess for the global
-//    minimizer.  If no good guess is known, C = A or B is acceptable.
-//
-//    Input, double M, the bound on the second derivative.
-//
-//    Input, double E, a positive tolerance, a bound for the
-//    absolute error in the evaluation of F(X) for any X in [A,B].
-//
-//    Input, double T, a positive error tolerance.
-//
-//    Input, func_base& F, a user-supplied c++ functor whose
-//    global minimum is being sought.  The input and output
-//    of F() are of type double.
-//
-//    Output, double &X, the estimated value of the abscissa
-//    for which F attains its global minimum value in [A,B].
-//
-//    Output, double GLOMIN, the value F(X).
-//
-{
-  double a0;
-  double a2;
-  double a3;
-  double d0;
-  double d1;
-  double d2;
-  double h;
-  int k;
-  double m2;
-  double macheps;
-  double p;
-  double q;
-  double qs;
-  double r;
-  double s;
-  double sc;
-  double y;
-  double y0;
-  double y1;
-  double y2;
-  double y3;
-  double yb;
-  double z0;
-  double z1;
-  double z2;
-
-  a0 = b;
-  x = a0;
-  a2 = a;
-  y0 = f ( b );
-  yb = y0;
-  y2 = f ( a );
-  y = y2;
-
-  if ( y0 < y )
-  {
-    y = y0;
-  }
-  else
-  {
-    x = a;
-  }
-
-  if ( m <= 0.0 || b <= a )
-  {
-    return y;
-  }
-
-  macheps = r8_epsilon ( );
-
-  m2 = 0.5 * ( 1.0 + 16.0 * macheps ) * m;
-
-  if ( c <= a || b <= c )
-  {
-    sc = 0.5 * ( a + b );
-  }
-  else
-  {
-    sc = c;
-  }
-
-  y1 = f ( sc );
-  k = 3;
-  d0 = a2 - sc;
-  h = 9.0 / 11.0;
-
-  if ( y1 < y )
-  {
-    x = sc;
-    y = y1;
-  }
-//
-//  Loop.
-//
-  for ( ; ; )
-  {
-    d1 = a2 - a0;
-    d2 = sc - a0;
-    z2 = b - a2;
-    z0 = y2 - y1;
-    z1 = y2 - y0;
-    r = d1 * d1 * z0 - d0 * d0 * z1;
-    p = r;
-    qs = 2.0 * ( d0 * z1 - d1 * z0 );
-    q = qs;
-
-    if ( k < 1000000 || y2 <= y )
-    {
-      for ( ; ; )
-      {
-        if ( q * ( r * ( yb - y2 ) + z2 * q * ( ( y2 - y ) + t ) ) <
-          z2 * m2 * r * ( z2 * q - r ) )
-        {
-          a3 = a2 + r / q;
-          y3 = f ( a3 );
-
-          if ( y3 < y )
-          {
-            x = a3;
-            y = y3;
-          }
-        }
-        k = ( ( 1611 * k ) % 1048576 );
-        q = 1.0;
-        r = ( b - a ) * 0.00001 * ( double ) ( k );
-
-        if ( z2 <= r )
-        {
-          break;
-        }
-      }
-    }
-    else
-    {
-      k = ( ( 1611 * k ) % 1048576 );
-      q = 1.0;
-      r = ( b - a ) * 0.00001 * ( double ) ( k );
-
-      while ( r < z2 )
-      {
-        if ( q * ( r * ( yb - y2 ) + z2 * q * ( ( y2 - y ) + t ) ) <
-          z2 * m2 * r * ( z2 * q - r ) )
-        {
-          a3 = a2 + r / q;
-          y3 = f ( a3 );
-
-          if ( y3 < y )
-          {
-            x = a3;
-            y = y3;
-          }
-        }
-        k = ( ( 1611 * k ) % 1048576 );
-        q = 1.0;
-        r = ( b - a ) * 0.00001 * ( double ) ( k );
-      }
-    }
-
-    r = m2 * d0 * d1 * d2;
-    s = sqrt ( ( ( y2 - y ) + t ) / m2 );
-    h = 0.5 * ( 1.0 + h );
-    p = h * ( p + 2.0 * r * s );
-    q = q + 0.5 * qs;
-    r = - 0.5 * ( d0 + ( z0 + 2.01 * e ) / ( d0 * m2 ) );
-
-    if ( r < s || d0 < 0.0 )
-    {
-      r = a2 + s;
-    }
-    else
-    {
-      r = a2 + r;
-    }
-
-    if ( 0.0 < p * q )
-    {
-      a3 = a2 + p / q;
-    }
-    else
-    {
-      a3 = r;
-    }
-
-    for ( ; ; )
-    {
-      a3 = r8_max ( a3, r );
-
-      if ( b <= a3 )
-      {
-        a3 = b;
-        y3 = yb;
-      }
-      else
-      {
-        y3 = f ( a3 );
-      }
-
-      if ( y3 < y )
-      {
-        x = a3;
-        y = y3;
-      }
-
-      d0 = a3 - a2;
-
-      if ( a3 <= r )
-      {
-        break;
-      }
-
-      p = 2.0 * ( y2 - y3 ) / ( m * d0 );
-
-      if ( ( 1.0 + 9.0 * macheps ) * d0 <= r8_abs ( p ) )
-      {
-        break;
-      }
-
-      if ( 0.5 * m2 * ( d0 * d0 + p * p ) <= ( y2 - y ) + ( y3 - y ) + 2.0 * t )
-      {
-        break;
-      }
-      a3 = 0.5 * ( a2 + a3 );
-      h = 0.9 * h;
-    }
-
-    if ( b <= a3 )
-    {
-      break;
-    }
-
-    a0 = sc;
-    sc = a2;
-    a2 = a3;
-    y0 = y1;
-    y1 = y2;
-    y2 = y3;
-  }
-
-  return y;
-}
-//****************************************************************************80
-
-double local_min ( double a, double b, double t, func_base& f,
-  double &x )
-
-//****************************************************************************80
-//
-//  Purpose:
-//
-//    LOCAL_MIN seeks a local minimum of a function F(X) in an interval [A,B].
-//
-//  Discussion:
-//
-//    The method used is a combination of golden section search and
-//    successive parabolic interpolation.  Convergence is never much slower
-//    than that for a Fibonacci search.  If F has a continuous second
-//    derivative which is positive at the minimum (which is not at A or
-//    B), then convergence is superlinear, and usually of the order of
-//    about 1.324....
-//
-//    The values EPS and T define a tolerance TOL = EPS * abs ( X ) + T.
-//    F is never evaluated at two points closer than TOL.
-//
-//    If F is a unimodal function and the computed values of F are always
-//    unimodal when separated by at least SQEPS * abs ( X ) + (T/3), then
-//    LOCAL_MIN approximates the abscissa of the global minimum of F on the
-//    interval [A,B] with an error less than 3*SQEPS*abs(LOCAL_MIN)+T.
-//
-//    If F is not unimodal, then LOCAL_MIN may approximate a local, but
-//    perhaps non-global, minimum to the same accuracy.
-//
-//  Licensing:
-//
-//    This code is distributed under the GNU LGPL license.
-//
-//  Modified:
-//
-//    17 July 2011
-//
-//  Author:
-//
-//    Original FORTRAN77 version by Richard Brent.
-//    C++ version by John Burkardt.
-//    Modifications by John Denker.
-//
-//  Reference:
-//
-//    Richard Brent,
-//    Algorithms for Minimization Without Derivatives,
-//    Dover, 2002,
-//    ISBN: 0-486-41998-3,
-//    LC: QA402.5.B74.
-//
-//  Parameters:
-//
-//    Input, double A, B, the endpoints of the interval.
-//
-//    Input, double T, a positive absolute error tolerance.
-//
-//    Input, func_base& F, a user-supplied c++ functor whose
-//    local minimum is being sought.  The input and output
-//    of F() are of type double.
-//
-//    Output, double &X, the estimated value of an abscissa
-//    for which F attains a local minimum value in [A,B].
-//
-//    Output, double LOCAL_MIN, the value F(X).
-//
-{
-  double c;
-  double d = 0.0;
-  double e;
-  double eps;
-  double fu;
-  double fv;
-  double fw;
-  double fx;
-  double m;
-  double p;
-  double q;
-  double r;
-  double sa;
-  double sb;
-  double t2;
-  double tol;
-  double u;
-  double v;
-  double w;
-//
-//  C is the square of the inverse of the golden ratio.
-//
-  c = 0.5 * ( 3.0 - sqrt ( 5.0 ) );
-
-  eps = sqrt ( r8_epsilon ( ) );
-
-  sa = a;
-  sb = b;
-  x = sa + c * ( b - a );
-  w = x;
-  v = w;
-  e = 0.0;
-  fx = f ( x );
-  fw = fx;
-  fv = fw;
-
-  for ( ; ; )
-  {
-    m = 0.5 * ( sa + sb ) ;
-    tol = eps * r8_abs ( x ) + t;
-    t2 = 2.0 * tol;
-//
-//  Check the stopping criterion.
-//
-    if ( r8_abs ( x - m ) <= t2 - 0.5 * ( sb - sa ) )
-    {
-      break;
-    }
-//
-//  Fit a parabola.
-//
-    r = 0.0;
-    q = r;
-    p = q;
-
-    if ( tol < r8_abs ( e ) )
-    {
-      r = ( x - w ) * ( fx - fv );
-      q = ( x - v ) * ( fx - fw );
-      p = ( x - v ) * q - ( x - w ) * r;
-      q = 2.0 * ( q - r );
-      if ( 0.0 < q )
-      {
-        p = - p;
-      }
-      q = r8_abs ( q );
-      r = e;
-      e = d;
-    }
-
-    if ( r8_abs ( p ) < r8_abs ( 0.5 * q * r ) &&
-         q * ( sa - x ) < p &&
-         p < q * ( sb - x ) )
-    {
-//
-//  Take the parabolic interpolation step.
-//
-      d = p / q;
-      u = x + d;
-//
-//  F must not be evaluated too close to A or B.
-//
-      if ( ( u - sa ) < t2 || ( sb - u ) < t2 )
-      {
-        if ( x < m )
-        {
-          d = tol;
-        }
-        else
-        {
-          d = - tol;
-        }
-      }
-    }
-//
-//  A golden-section step.
-//
-    else
-    {
-      if ( x < m )
-      {
-        e = sb - x;
-      }
-      else
-      {
-        e = sa - x;
-      }
-      d = c * e;
-    }
-//
-//  F must not be evaluated too close to X.
-//
-    if ( tol <= r8_abs ( d ) )
-    {
-      u = x + d;
-    }
-    else if ( 0.0 < d )
-    {
-      u = x + tol;
-    }
-    else
-    {
-      u = x - tol;
-    }
-
-    fu = f ( u );
-//
-//  Update A, B, V, W, and X.
-//
-    if ( fu <= fx )
-    {
-      if ( u < x )
-      {
-        sb = x;
-      }
-      else
-      {
-        sa = x;
-      }
-      v = w;
-      fv = fw;
-      w = x;
-      fw = fx;
-      x = u;
-      fx = fu;
-    }
-    else
-    {
-      if ( u < x )
-      {
-        sa = u;
-      }
-      else
-      {
-        sb = u;
-      }
-
-      if ( fu <= fw || w == x )
-      {
-        v = w;
-        fv = fw;
-        w = u;
-        fw = fu;
-      }
-      else if ( fu <= fv || v == x || v== w )
-      {
-        v = u;
-        fv = fu;
-      }
-    }
-  }
-  return fx;
-}
-//****************************************************************************80
-
-double local_min_rc ( double &a, double &b, int &status, double value )
-
-//****************************************************************************80
-//
-//  Purpose:
-//
-//    LOCAL_MIN_RC seeks a minimizer of a scalar function of a scalar variable.
-//
-//  Discussion:
-//
-//    This routine seeks an approximation to the point where a function
-//    F attains a minimum on the interval (A,B).
-//
-//    The method used is a combination of golden section search and
-//    successive parabolic interpolation.  Convergence is never much
-//    slower than that for a Fibonacci search.  If F has a continuous
-//    second derivative which is positive at the minimum (which is not
-//    at A or B), then convergence is superlinear, and usually of the
-//    order of about 1.324...
-//
-//    The routine is a revised version of the Brent local minimization
-//    algorithm, using reverse communication.
-//
-//    It is worth stating explicitly that this routine will NOT be
-//    able to detect a minimizer that occurs at either initial endpoint
-//    A or B.  If this is a concern to the user, then the user must
-//    either ensure that the initial interval is larger, or to check
-//    the function value at the returned minimizer against the values
-//    at either endpoint.
-//
-//  Licensing:
-//
-//    This code is distributed under the GNU LGPL license.
-//
-//  Modified:
-//
-//    17 July 2011
-//
-//  Author:
-//
-//    John Burkardt
-//
-//  Reference:
-//
-//    Richard Brent,
-//    Algorithms for Minimization Without Derivatives,
-//    Dover, 2002,
-//    ISBN: 0-486-41998-3,
-//    LC: QA402.5.B74.
-//
-//    David Kahaner, Cleve Moler, Steven Nash,
-//    Numerical Methods and Software,
-//    Prentice Hall, 1989,
-//    ISBN: 0-13-627258-4,
-//    LC: TA345.K34.
-//
-//  Parameters
-//
-//    Input/output, double &A, &B.  On input, the left and right
-//    endpoints of the initial interval.  On output, the lower and upper
-//    bounds for an interval containing the minimizer.  It is required
-//    that A < B.
-//
-//    Input/output, int &STATUS, used to communicate between
-//    the user and the routine.  The user only sets STATUS to zero on the first
-//    call, to indicate that this is a startup call.  The routine returns STATUS
-//    positive to request that the function be evaluated at ARG, or returns
-//    STATUS as 0, to indicate that the iteration is complete and that
-//    ARG is the estimated minimizer.
-//
-//    Input, double VALUE, the function value at ARG, as requested
-//    by the routine on the previous call.
-//
-//    Output, double LOCAL_MIN_RC, the currently considered point.
-//    On return with STATUS positive, the user is requested to evaluate the
-//    function at this point, and return the value in VALUE.  On return with
-//    STATUS zero, this is the routine's estimate for the function minimizer.
-//
-//  Local parameters:
-//
-//    C is the squared inverse of the golden ratio.
-//
-//    EPS is the square root of the relative machine precision.
-//
-{
-  static double arg;
-  static double c;
-  static double d;
-  static double e;
-  static double eps;
-  static double fu;
-  static double fv;
-  static double fw;
-  static double fx;
-  static double midpoint;
-  static double p;
-  static double q;
-  static double r;
-  static double tol;
-  static double tol1;
-  static double tol2;
-  static double u;
-  static double v;
-  static double w;
-  static double x;
-//
-//  STATUS (INPUT) = 0, startup.
-//
-  if ( status == 0 )
-  {
-    if ( b <= a )
-    {
-      cout << "\n";
-      cout << "LOCAL_MIN_RC - Fatal error!\n";
-      cout << "  A < B is required, but\n";
-      cout << "  A = " << a << "\n";
-      cout << "  B = " << b << "\n";
-      status = -1;
-      exit ( 1 );
-    }
-    c = 0.5 * ( 3.0 - sqrt ( 5.0 ) );
-
-    eps = sqrt ( r8_epsilon ( ) );
-    tol = r8_epsilon ( );
-
-    v = a + c * ( b - a );
-    w = v;
-    x = v;
-    e = 0.0;
-
-    status = 1;
-    arg = x;
-
-    return arg;
-  }
-//
-//  STATUS (INPUT) = 1, return with initial function value of FX.
-//
-  else if ( status == 1 )
-  {
-    fx = value;
-    fv = fx;
-    fw = fx;
-  }
-//
-//  STATUS (INPUT) = 2 or more, update the data.
-//
-  else if ( 2 <= status )
-  {
-    fu = value;
-
-    if ( fu <= fx )
-    {
-      if ( x <= u )
-      {
-        a = x;
-      }
-      else
-      {
-        b = x;
-      }
-      v = w;
-      fv = fw;
-      w = x;
-      fw = fx;
-      x = u;
-      fx = fu;
-    }
-    else
-    {
-      if ( u < x )
-      {
-        a = u;
-      }
-      else
-      {
-        b = u;
-      }
-
-      if ( fu <= fw || w == x )
-      {
-        v = w;
-        fv = fw;
-        w = u;
-        fw = fu;
-      }
-      else if ( fu <= fv || v == x || v == w )
-      {
-        v = u;
-        fv = fu;
-      }
-    }
-  }
-//
-//  Take the next step.
-//
-  midpoint = 0.5 * ( a + b );
-  tol1 = eps * r8_abs ( x ) + tol / 3.0;
-  tol2 = 2.0 * tol1;
-//
-//  If the stopping criterion is satisfied, we can exit.
-//
-  if ( r8_abs ( x - midpoint ) <= ( tol2 - 0.5 * ( b - a ) ) )
-  {
-    status = 0;
-    return arg;
-  }
-//
-//  Is golden-section necessary?
-//
-  if ( r8_abs ( e ) <= tol1 )
-  {
-    if ( midpoint <= x )
-    {
-      e = a - x;
-    }
-    else
-    {
-      e = b - x;
-    }
-    d = c * e;
-  }
-//
-//  Consider fitting a parabola.
-//
-  else
-  {
-    r = ( x - w ) * ( fx - fv );
-    q = ( x - v ) * ( fx - fw );
-    p = ( x - v ) * q - ( x - w ) * r;
-    q = 2.0 * ( q - r );
-    if ( 0.0 < q )
-    {
-      p = - p;
-    }
-    q = r8_abs ( q );
-    r = e;
-    e = d;
-//
-//  Choose a golden-section step if the parabola is not advised.
-//
-    if (
-      ( r8_abs ( 0.5 * q * r ) <= r8_abs ( p ) ) ||
-      ( p <= q * ( a - x ) ) ||
-      ( q * ( b - x ) <= p ) )
-    {
-      if ( midpoint <= x )
-      {
-        e = a - x;
-      }
-      else
-      {
-        e = b - x;
-      }
-      d = c * e;
-    }
-//
-//  Choose a parabolic interpolation step.
-//
-    else
-    {
-      d = p / q;
-      u = x + d;
-
-      if ( ( u - a ) < tol2 )
-      {
-        d = tol1 * r8_sign ( midpoint - x );
-      }
-
-      if ( ( b - u ) < tol2 )
-      {
-        d = tol1 * r8_sign ( midpoint - x );
-      }
-    }
-  }
-//
-//  F must not be evaluated too close to X.
-//
-  if ( tol1 <= r8_abs ( d ) )
-  {
-    u = x + d;
-  }
-  if ( r8_abs ( d ) < tol1 )
-  {
-    u = x + tol1 * r8_sign ( d );
-  }
-//
-//  Request value of F(U).
-//
-  arg = u;
-  status = status + 1;
-
-  return arg;
-}
-//****************************************************************************80
-
-double r8_abs ( double x )
-
-//****************************************************************************80
-//
-//  Purpose:
-//
-//    R8_ABS returns the absolute value of an R8.
-//
-//  Licensing:
-//
-//    This code is distributed under the GNU LGPL license.
-//
-//  Modified:
-//
-//    07 May 2006
-//
-//  Author:
-//
-//    John Burkardt
-//
-//  Parameters:
-//
-//    Input, double X, the quantity whose absolute value is desired.
-//
-//    Output, double R8_ABS, the absolute value of X.
-//
-{
-  double value;
-
-  if ( 0.0 <= x )
-  {
-    value = x;
-  }
-  else
-  {
-    value = - x;
-  }
-  return value;
-}
-//****************************************************************************80
-
-double r8_epsilon ( )
-
-//****************************************************************************80
-//
-//  Purpose:
-//
-//    R8_EPSILON returns the R8 round off unit.
-//
-//  Discussion:
-//
-//    R8_EPSILON is a number R which is a power of 2 with the property that,
-//    to the precision of the computer's arithmetic,
-//      1 < 1 + R
-//    but
-//      1 = ( 1 + R / 2 )
-//
-//  Licensing:
-//
-//    This code is distributed under the GNU LGPL license.
-//
-//  Modified:
-//
-//    08 May 2006
-//
-//  Author:
-//
-//    John Burkardt
-//
-//  Parameters:
-//
-//    Output, double R8_EPSILON, the double precision round-off unit.
-//
-{
-  double r;
-
-  r = 1.0;
-
-  while ( 1.0 < ( double ) ( 1.0 + r )  )
-  {
-    r = r / 2.0;
-  }
-
-  return ( 2.0 * r );
-}
-//****************************************************************************80
-
-double r8_max ( double x, double y )
-
-//****************************************************************************80
-//
-//  Purpose:
-//
-//    R8_MAX returns the maximum of two R8's.
-//
-//  Licensing:
-//
-//    This code is distributed under the GNU LGPL license.
-//
-//  Modified:
-//
-//    18 August 2004
-//
-//  Author:
-//
-//    John Burkardt
-//
-//  Parameters:
-//
-//    Input, double X, Y, the quantities to compare.
-//
-//    Output, double R8_MAX, the maximum of X and Y.
-//
-{
-  double value;
-
-  if ( y < x )
-  {
-    value = x;
-  }
-  else
-  {
-    value = y;
-  }
-  return value;
-}
-//****************************************************************************80
-
-double r8_sign ( double x )
-
-//****************************************************************************80
-//
-//  Purpose:
-//
-//    R8_SIGN returns the sign of an R8.
-//
-//  Licensing:
-//
-//    This code is distributed under the GNU LGPL license.
-//
-//  Modified:
-//
-//    18 October 2004
-//
-//  Author:
-//
-//    John Burkardt
-//
-//  Parameters:
-//
-//    Input, double X, the number whose sign is desired.
-//
-//    Output, double R8_SIGN, the sign of X.
-//
-{
-  double value;
-
-  if ( x < 0.0 )
-  {
-    value = -1.0;
-  }
-  else
-  {
-    value = 1.0;
-  }
-  return value;
-}
-//****************************************************************************80
-
-void timestamp ( )
-
-//****************************************************************************80
-//
-//  Purpose:
-//
-//    TIMESTAMP prints the current YMDHMS date as a time stamp.
-//
-//  Example:
-//
-//    31 May 2001 09:45:54 AM
-//
-//  Licensing:
-//
-//    This code is distributed under the GNU LGPL license.
-//
-//  Modified:
-//
-//    24 September 2003
-//
-//  Author:
-//
-//    John Burkardt
-//
-//  Parameters:
-//
-//    None
-//
-{
-# define TIME_SIZE 40
-
-  static char time_buffer[TIME_SIZE];
-  const struct tm *tm;
-  time_t now;
-
-  now = time ( NULL );
-  tm = localtime ( &now );
-
-  strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );
-
-  cout << time_buffer << "\n";
-
-  return;
-# undef TIME_SIZE
-}
-//****************************************************************************80
-
-double zero ( double a, double b, double t, func_base& f )
-
-//****************************************************************************80
-//
-//  Purpose:
-//
-//    ZERO seeks the root of a function F(X) in an interval [A,B].
-//
-//  Discussion:
-//
-//    The interval [A,B] must be a change of sign interval for F.
-//    That is, F(A) and F(B) must be of opposite signs.  Then
-//    assuming that F is continuous implies the existence of at least
-//    one value C between A and B for which F(C) = 0.
-//
-//    The location of the zero is determined to within an accuracy
-//    of 6 * MACHEPS * r8_abs ( C ) + 2 * T.
-//
-//  Licensing:
-//
-//    This code is distributed under the GNU LGPL license.
-//
-//  Modified:
-//
-//    06 May 2012
-//
-//  Author:
-//
-//    Original FORTRAN77 version by Richard Brent.
-//    C++ version by John Burkardt.
-//    Modifications by John Denker.
-//
-//  Reference:
-//
-//    Richard Brent,
-//    Algorithms for Minimization Without Derivatives,
-//    Dover, 2002,
-//    ISBN: 0-486-41998-3,
-//    LC: QA402.5.B74.
-//
-//  Parameters:
-//
-//    Input, double A, B, the endpoints of the change of sign interval.
-//
-//    Input, double T, a positive error tolerance.
-//
-//    Input, func_base& F, the name of a user-supplied c++ functor
-//    whose zero is being sought.  The input and output
-//    of F() are of type double.
-//
-//    Output, double ZERO, the estimated value of a zero of
-//    the function F.
-//
-{
-  double c;
-  double d;
-  double e;
-  double fa;
-  double fb;
-  double fc;
-  double m;
-  double macheps;
-  double p;
-  double q;
-  double r;
-  double s;
-  double sa;
-  double sb;
-  double tol;
-//
-//  Make local copies of A and B.
-//
-  sa = a;
-  sb = b;
-  fa = f ( sa );
-  fb = f ( sb );
-
-  c = sa;
-  fc = fa;
-  e = sb - sa;
-  d = e;
-
-  macheps = r8_epsilon ( );
-
-  for ( ; ; )
-  {
-    if ( r8_abs ( fc ) < r8_abs ( fb ) )
-    {
-      sa = sb;
-      sb = c;
-      c = sa;
-      fa = fb;
-      fb = fc;
-      fc = fa;
-    }
-
-    tol = 2.0 * macheps * r8_abs ( sb ) + t;
-    m = 0.5 * ( c - sb );
-
-    if ( r8_abs ( m ) <= tol || fb == 0.0 )
-    {
-      break;
-    }
-
-    if ( r8_abs ( e ) < tol || r8_abs ( fa ) <= r8_abs ( fb ) )
-    {
-      e = m;
-      d = e;
-    }
-    else
-    {
-      s = fb / fa;
-
-      if ( sa == c )
-      {
-        p = 2.0 * m * s;
-        q = 1.0 - s;
-      }
-      else
-      {
-        q = fa / fc;
-        r = fb / fc;
-        p = s * ( 2.0 * m * a * ( q - r ) - ( sb - sa ) * ( r - 1.0 ) );
-        q = ( q - 1.0 ) * ( r - 1.0 ) * ( s - 1.0 );
-      }
-
-      if ( 0.0 < p )
-      {
-        q = - q;
-      }
-      else
-      {
-        p = - p;
-      }
-
-      s = e;
-      e = d;
-
-      if ( 2.0 * p < 3.0 * m * q - r8_abs ( tol * q ) &&
-        p < r8_abs ( 0.5 * s * q ) )
-      {
-        d = p / q;
-      }
-      else
-      {
-        e = m;
-        d = e;
-      }
-    }
-    sa = sb;
-    fa = fb;
-
-    if ( tol < r8_abs ( d ) )
-    {
-      sb = sb + d;
-    }
-    else if ( 0.0 < m )
-    {
-      sb = sb + tol;
-    }
-    else
-    {
-      sb = sb - tol;
-    }
-
-    fb = f ( sb );
-
-    if ( ( 0.0 < fb && 0.0 < fc ) || ( fb <= 0.0 && fc <= 0.0 ) )
-    {
-      c = sa;
-      fc = fa;
-      e = sb - sa;
-      d = e;
-    }
-  }
-  return sb;
-}
-//****************************************************************************80
-
-void zero_rc ( double a, double b, double t, double &arg, int &status,
-  double value )
-
-//****************************************************************************80
-//
-//  Purpose:
-//
-//    ZERO_RC seeks the root of a function F(X) using reverse communication.
-//
-//  Discussion:
-//
-//    The interval [A,B] must be a change of sign interval for F.
-//    That is, F(A) and F(B) must be of opposite signs.  Then
-//    assuming that F is continuous implies the existence of at least
-//    one value C between A and B for which F(C) = 0.
-//
-//    The location of the zero is determined to within an accuracy
-//    of 6 * MACHEPS * r8_abs ( C ) + 2 * T.
-//
-//    The routine is a revised version of the Brent zero finder
-//    algorithm, using reverse communication.
-//
-//  Licensing:
-//
-//    This code is distributed under the GNU LGPL license.
-//
-//  Modified:
-//
-//    17 July 2011
-//
-//  Author:
-//
-//    John Burkardt
-//
-//  Reference:
-//
-//    Richard Brent,
-//    Algorithms for Minimization Without Derivatives,
-//    Dover, 2002,
-//    ISBN: 0-486-41998-3,
-//    LC: QA402.5.B74.
-//
-//  Parameters:
-//
-//    Input, double A, B, the endpoints of the change of sign interval.
-//
-//    Input, double T, a positive error tolerance.
-//
-//    Output, double &ARG, the currently considered point.  The user
-//    does not need to initialize this value.  On return with STATUS positive,
-//    the user is requested to evaluate the function at ARG, and return
-//    the value in VALUE.  On return with STATUS zero, ARG is the routine's
-//    estimate for the function's zero.
-//
-//    Input/output, int &STATUS, used to communicate between
-//    the user and the routine.  The user only sets STATUS to zero on the first
-//    call, to indicate that this is a startup call.  The routine returns STATUS
-//    positive to request that the function be evaluated at ARG, or returns
-//    STATUS as 0, to indicate that the iteration is complete and that
-//    ARG is the estimated zero
-//
-//    Input, double VALUE, the function value at ARG, as requested
-//    by the routine on the previous call.
-//
-{
-  static double c;
-  static double d;
-  static double e;
-  static double fa;
-  static double fb;
-  static double fc;
-  double m;
-  static double macheps;
-  double p;
-  double q;
-  double r;
-  double s;
-  static double sa;
-  static double sb;
-  double tol;
-//
-//  Input STATUS = 0.
-//  Initialize, request F(A).
-//
-  if ( status == 0 )
-  {
-    macheps = r8_epsilon ( );
-
-    sa = a;
-    sb = b;
-    e = sb - sa;
-    d = e;
-
-    status = 1;
-    arg = a;
-    return;
-  }
-//
-//  Input STATUS = 1.
-//  Receive F(A), request F(B).
-//
-  else if ( status == 1 )
-  {
-    fa = value;
-    status = 2;
-    arg = sb;
-    return;
-  }
-//
-//  Input STATUS = 2
-//  Receive F(B).
-//
-  else if ( status == 2 )
-  {
-    fb = value;
-
-    if ( 0.0 < fa * fb )
-    {
-      status = -1;
-      return;
-    }
-    c = sa;
-    fc = fa;
-  }
-  else
-  {
-    fb = value;
-
-    if ( ( 0.0 < fb && 0.0 < fc ) || ( fb <= 0.0 && fc <= 0.0 ) )
-    {
-      c = sa;
-      fc = fa;
-      e = sb - sa;
-      d = e;
-    }
-  }
-//
-//  Compute the next point at which a function value is requested.
-//
-  if ( r8_abs ( fc ) < r8_abs ( fb ) )
-  {
-    sa = sb;
-    sb = c;
-    c = sa;
-    fa = fb;
-    fb = fc;
-    fc = fa;
-  }
-
-  tol = 2.0 * macheps * r8_abs ( sb ) + t;
-  m = 0.5 * ( c - sb );
-
-  if ( r8_abs ( m ) <= tol || fb == 0.0 )
-  {
-    status = 0;
-    arg = sb;
-    return;
-  }
-
-  if ( r8_abs ( e ) < tol || r8_abs ( fa ) <= r8_abs ( fb ) )
-  {
-    e = m;
-    d = e;
-  }
-  else
-  {
-    s = fb / fa;
-
-    if ( sa == c )
-    {
-      p = 2.0 * m * s;
-      q = 1.0 - s;
-    }
-    else
-    {
-      q = fa / fc;
-      r = fb / fc;
-      p = s * ( 2.0 * m * a * ( q - r ) - ( sb - sa ) * ( r - 1.0 ) );
-      q = ( q - 1.0 ) * ( r - 1.0 ) * ( s - 1.0 );
-    }
-
-    if ( 0.0 < p )
-    {
-      q = - q;
-    }
-    else
-    {
-      p = - p;
-    }
-    s = e;
-    e = d;
-
-    if ( 2.0 * p < 3.0 * m * q - r8_abs ( tol * q ) &&
-         p < r8_abs ( 0.5 * s * q ) )
-    {
-      d = p / q;
-    }
-    else
-    {
-      e = m;
-      d = e;
-    }
-  }
-
-  sa = sb;
-  fa = fb;
-
-  if ( tol < r8_abs ( d ) )
-  {
-    sb = sb + d;
-  }
-  else if ( 0.0 < m )
-  {
-    sb = sb + tol;
-  }
-  else
-  {
-    sb = sb - tol;
-  }
-
-  arg = sb;
-  status = status + 1;
-
-  return;
-}
-
-// ======================================================================
-// === Simple wrapper functions
-// === for convenience and/or compatibility.
-//
-// === The three functions are the same as above,
-// === except that they take a plain function F
-// === instead of a c++ functor.  In all cases, the
-// === input and output of F() are of type double.
-
-typedef double DoubleOfDouble (double);
-
-class func_wrapper : public func_base {
-  DoubleOfDouble* func;
-public:
-  func_wrapper(DoubleOfDouble* f) {
-    func = f;
-  }
-  virtual double operator() (double x){
-    return func(x);
-  }
-};
-
-//****************************************************************************80
-
-double glomin ( double a, double b, double c, double m, double e,
-         double t, double f ( double x ), double &x ){
-  func_wrapper foo(f);
-  return glomin(a, b, c, m, e, t, foo, x);
-}
-
-//****************************************************************************80
-
-double local_min ( double a, double b, double t, double f ( double x ),
-  double &x ){
-  func_wrapper foo(f);
-  return local_min(a, b, t, foo, x);
-}
-
-//****************************************************************************80
-
-double zero ( double a, double b, double t, double f ( double x ) ){
-  func_wrapper foo(f);
-  return zero(a, b, t, foo);
-}
-
-// ======================================================================
-// Generally useful functor to evaluate a monic polynomial.
-// For details, see class definition in brent.hpp
-
-double monicPoly::operator()(double x){
-  double rslt(1);
-  for (int ii = coeff.size()-1; ii >= 0; ii--){
-    rslt *= x;
-    rslt += coeff[ii];
-  }
-  return rslt;
-}
-
-// Similarly, evaluate a general polynomial (not necessarily monic):
-double Poly::operator()(double x){
-  double rslt(0);
-  for (int ii = coeff.size()-1; ii >= 0; ii--){
-    rslt *= x;
-    rslt += coeff[ii];
-  }
-  return rslt;
-}
-
-}
diff --git a/src/shogun/lib/external/brent.h b/src/shogun/lib/external/brent.h
deleted file mode 100644
index 4568561c848..00000000000
--- a/src/shogun/lib/external/brent.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#include <vector>
-namespace shogun
-{
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-class func_base{
-public:
-  virtual double operator() (double) = 0;
-};
-
-class monicPoly : public func_base {
-public:
-  std::vector<double> coeff;
-  virtual double operator() (double x);
-// constructors:
-  monicPoly(const size_t degree)
-   : coeff(degree) {}
-  monicPoly(const std::vector<double>& v)
-   : coeff(v) {}
-  monicPoly(const double* c, size_t degree)
-   : coeff(std::vector<double>(c, c+degree)) {}
-};
-
-class Poly : public func_base {
-public:
-  std::vector<double> coeff;    // a vector of size nterms i.e. 1+degree
-  virtual double operator() (double x);
-// constructors:
-  Poly(const size_t degree)
-   : coeff(1+degree) {}
-  Poly(const std::vector<double>& v)
-   : coeff(v) {}
-  Poly(const double* c, size_t degree)
-   : coeff(std::vector<double>(c, 1+c+degree)) {}
-};
-
-double glomin ( double a, double b, double c, double m, double e, double t,
-  func_base& f, double &x );
-double local_min ( double a, double b, double t, func_base& f,
-  double &x );
-double local_min_rc ( double &a, double &b, int &status, double value );
-double r8_abs ( double x );
-double r8_epsilon ( );
-double r8_max ( double x, double y );
-double r8_sign ( double x );
-void timestamp ( );
-double zero ( double a, double b, double t, func_base& f );
-void zero_rc ( double a, double b, double t, double &arg, int &status,
-  double value );
-
-// === simple wrapper functions
-// === for convenience and/or compatibility
-double glomin ( double a, double b, double c, double m, double e, double t,
-  double f ( double x ), double &x );
-double local_min ( double a, double b, double t, double f ( double x ),
-  double &x );
-double zero ( double a, double b, double t, double f ( double x ) );
-#endif
-}
diff --git a/src/shogun/lib/external/cdflib.cpp b/src/shogun/lib/external/cdflib.cpp
index a89515ffc99..3983ddace6c 100644
--- a/src/shogun/lib/external/cdflib.cpp
+++ b/src/shogun/lib/external/cdflib.cpp
@@ -10049,7 +10049,7 @@ void negative_binomial_cdf_values ( int *n_data, int *f, int *s, double *p,
     1, 2, 3,
     0, 1, 2 };
 
-  if ( n_data < 0 )
+  if ( *n_data < 0 )
   {
     *n_data = 0;
   }
diff --git a/src/shogun/lib/external/gpdt.cpp b/src/shogun/lib/external/gpdt.cpp
deleted file mode 100644
index d140b3e9a2f..00000000000
--- a/src/shogun/lib/external/gpdt.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
-/******************************************************************************
- ***        GPDT - Gradient Projection Decomposition Technique              ***
- ******************************************************************************
- ***                                                                        ***
- *** GPDT is a C++ software designed to train large-scale Support Vector    ***
- *** Machines for binary classification in both scalar and distributed      ***
- *** memory parallel environments. It uses the Joachims' problem            ***
- *** decomposition technique to split the whole quadratic programming (QP)  ***
- *** problem into a sequence of smaller QP subproblems, each one being      ***
- *** solved by a suitable gradient projection method (GPM). The presently   ***
- *** implemented GPMs are the Generalized Variable Projection Method        ***
- *** GVPM (T. Serafini, G. Zanghirati, L. Zanni, "Gradient Projection       ***
- *** Methods for Quadratic Programs and Applications in Training Support    ***
- *** Vector Machines"; Optim. Meth. Soft. 20, 2005, 353-378) and the        ***
- *** Dai-Fletcher Method DFGPM (Y. Dai and R. Fletcher,"New Algorithms for  ***
- *** Singly Linear Constrained Quadratic Programs Subject to Lower and      ***
- *** Upper Bounds"; Math. Prog. to appear).                                 ***
- ***                                                                        ***
- *** Authors:                                                               ***
- ***  Thomas Serafini, Luca Zanni                                           ***
- ***   Dept. of Mathematics, University of Modena and Reggio Emilia - ITALY ***
- ***   serafini.thomas@unimo.it, zanni.luca@unimo.it                        ***
- ***  Gaetano Zanghirati                                                    ***
- ***   Dept. of Mathematics, University of Ferrara - ITALY                  ***
- ***   g.zanghirati@unife.it                                                ***
- ***                                                                        ***
- *** Software homepage: http://dm.unife.it/gpdt                             ***
- ***                                                                        ***
- *** This work is supported by the Italian FIRB Projects                    ***
- ***      'Statistical Learning: Theory, Algorithms and Applications'       ***
- ***      (grant RBAU01877P), http://slipguru.disi.unige.it/ASTA            ***
- *** and                                                                    ***
- ***      'Parallel Algorithms and Numerical Nonlinear Optimization'        ***
- ***      (grant RBAU01JYPN), http://dm.unife.it/pn2o                       ***
- ***                                                                        ***
- *** Copyright (C) 2004-2008 by T. Serafini, G. Zanghirati, L. Zanni.       ***
- ***                                                                        ***
- ***                     COPYRIGHT NOTIFICATION                             ***
- ***                                                                        ***
- *** Permission to copy and modify this software and its documentation      ***
- *** for internal research use is granted, provided that this notice is     ***
- *** retained thereon and on all copies or modifications. The authors and   ***
- *** their respective Universities makes no representations as to the       ***
- *** suitability and operability of this software for any purpose. It is    ***
- *** provided "as is" without express or implied warranty.                  ***
- *** Use of this software for commercial purposes is expressly prohibited   ***
- *** without contacting the authors.                                        ***
- ***                                                                        ***
- *** This program is free software; you can redistribute it and/or modify   ***
- *** it under the terms of the GNU General Public License as published by   ***
- *** the Free Software Foundation; either version 3 of the License, or      ***
- *** (at your option) any later version.                                    ***
- ***                                                                        ***
- *** This program is distributed in the hope that it will be useful,        ***
- *** but WITHOUT ANY WARRANTY; without even the implied warranty of         ***
- *** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ***
- *** GNU General Public License for more details.                           ***
- ***                                                                        ***
- *** You should have received a copy of the GNU General Public License      ***
- *** along with this program; if not, write to the Free Software            ***
- *** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              ***
- ***                                                                        ***
- *** File:     gpdt.cpp                                                     ***
- *** Type:     scalar                                                       ***
- *** Version:  1.0                                                          ***
- *** Date:     October, 2005                                                ***
- *** Revision: 1                                                            ***
- ***                                                                        ***
- *** SHOGUN adaptions  Written (W) 2006-2008 Soeren Sonnenburg              ***
- ******************************************************************************/
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <math.h>
-#include <shogun/lib/external/gpdt.h>
-#include <shogun/lib/external/gpdtsolve.h>
-
-using namespace shogun;
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-void    fatalError(const char *msg1, const char *msg2);
-
-/******************************************************************************/
-/*** Class constructor                                                      ***/
-/******************************************************************************/
-QPproblem::QPproblem()
-{
-  /*** set problem defaults ***/
-  maxmw                = 40;
-  c_const              = 10.0;
-  projection_solver    = SOLVER_FLETCHER;
-  projection_projector = 1;
-  PreprocessMode       = 0;
-  delta                = 1.0e-3;
-  DELTAsv              = EPS_SV;
-  ker_type             = 2;
-  chunk_size           = 400;
-  q                    = -1;
-  y                    = NULL;
-  tau_proximal         = 0.0;
-  dim = 1;
-}
-
-/******************************************************************************/
-/*** Class destructor                                                       ***/
-/******************************************************************************/
-QPproblem::~QPproblem()
-{
-  //if (y != NULL) free(y);
-}
-
-/******************************************************************************/
-/*** Setter method for the subproblem features                              ***/
-/******************************************************************************/
-void QPproblem::copy_subproblem(QPproblem* dst, QPproblem* p, int32_t len, int32_t *perm)
-{
-  int32_t k;
-
-  *dst=*p;
-  dst->ell = len;
-
-  dst->KER->SetSubproblem(p->KER, len, perm);
-  dst->y = SG_MALLOC(int32_t, len);
-  for (k = 0; k < len; k++)
-      dst->y[k] = p->y[perm[k]];
-}
-
-namespace shogun
-{
-/******************************************************************************/
-/*** Extract the samples information from an SVMlight-compliant data file   ***/
-/******************************************************************************/
-int32_t prescan_document(char *file, int32_t *lines, int32_t *vlen, int32_t *ll)
-{
-  FILE    *fl;
-  int32_t ic;
-  char    c;
-  int64_t    current_length, current_vlen;
-
-  if ((fl = fopen (file, "r")) == NULL)
-      return(-1);
-  current_length = 0;
-  current_vlen   = 0;
-
-  *ll    = 0;  /* length of the longest input line (the read buffer should
-                  be allocated with this size)                              */
-  *lines = 1;  /* number of lines in the file                               */
-  *vlen  = 0;  /* max number of nonzero components in a vector              */
-
-  while ((ic = getc(fl)) != EOF)
-  {
-    c = (char)ic;
-    current_length++;
-
-    if (c == ' ')
-        current_vlen++;
-
-    if (c == '\n')
-    {
-        (*lines)++;
-        if (current_length > (*ll))
-            *ll = current_length;
-        if (current_vlen > (*vlen))
-            *vlen = current_vlen;
-        current_length = 0;
-        current_vlen   = 0;
-    }
-  }
-  fclose(fl);
-  return(0);
-}
-}
-/******************************************************************************/
-/*** return 1 if problem is single class, 0 if two-class                    ***/
-/******************************************************************************/
-int32_t QPproblem::Check2Class()
-{
-  int32_t i;
-
-  for (i = 1; i < ell; i++)
-      if (y[i] != y[0])
-          return 0;
-  return 1;
-}
-
-namespace shogun
-{
-/******************************************************************************/
-/*** Compute the size of data splitting for preprocessing                   ***/
-/******************************************************************************/
-void SplitParts(
-	int32_t n, int32_t part, int32_t parts, int32_t *dim, int32_t *off)
-{
-  int32_t r;
-
-  r    = n % parts;
-  *dim = n / parts;
-
-  if (part < r)
-  {
-     (*dim)++;
-     *off = *dim * part;
-  }
-  else
-     *off = *dim * part + r;
-}
-}
-/******************************************************************************/
-/*** Kernel class constructor                                               ***/
-/******************************************************************************/
-sKernel::sKernel (CKernel* k, int32_t l)
-{
-  kernel=k;
-  ell=l;
-  nor   = NULL;
-  vaux  = NULL;
-  lx    = NULL;
-  ix    = NULL;
-  x     = NULL;
-  IsSubproblem      = 0;
-  KernelEvaluations = 0.0;
-}
-
-/******************************************************************************/
-/*** Set the problem data for kernel evaluation                             ***/
-/******************************************************************************/
-void sKernel::SetData(
-	float32_t **x_, int32_t **ix_, int32_t *lx_, int32_t _ell, int32_t _dim)
-{
-  int32_t i, j, k;
-
-  dim  = _dim;
-  ell  = _ell;
-  nor  = SG_MALLOC(float64_t, ell);
-  vaux = SG_CALLOC(float32_t, dim);
-
-  IsSubproblem = 0;
-  x  = x_;
-  ix = ix_;
-  lx = lx_;
-
-  // unroll one (sparse) vector
-  vauxRow = 0;
-  i       = vauxRow;
-  for (k = 0; k < lx[i]; k++)
-      vaux[ix[i][k]] = x[i][k];
-
-  // compute the squared Euclidean norm of each vector
-  for (i = 0; i < ell; i++)
-  {
-      nor[i] = 0.0;
-      for (j = 0; j < lx[i]; j++)
-          nor[i] += (float64_t)(x[i][j]*x[i][j]);
-  }
-}
-
-/******************************************************************************/
-/*** Set the subproblem data                                                ***/
-/******************************************************************************/
-void sKernel::SetSubproblem(sKernel* ker, int32_t len, int32_t *perm)
-{
-  int32_t k;
-
-  /* arrays allocations */
-  nor  = SG_MALLOC(float64_t, len);
-  vaux = SG_CALLOC(float32_t, ker->dim);
-
-  lx = SG_MALLOC(int32_t, len);
-  ix = SG_MALLOC(int32_t*, len);
-  x  = SG_MALLOC(float32_t*, len);
-  IsSubproblem = 1;
-
-  for (k = 0; k < len; k++)
-  {
-      x[k]   = ker->x[perm[k]];
-      ix[k]  = ker->ix[perm[k]];
-      lx[k]  = ker->lx[perm[k]];
-      nor[k] = ker->nor[perm[k]];
-  }
-
-  // unroll one (sparse) vector
-  vauxRow = 0;
-  for (k = 0; k < lx[vauxRow]; k++)
-      vaux[ix[vauxRow][k]] = x[vauxRow][k];
-}
-
-/******************************************************************************/
-/*** Kernel class destructor                                                ***/
-/******************************************************************************/
-sKernel::~sKernel()
-{
-  int32_t i;
-
-  SG_FREE(nor);
-  SG_FREE(vaux);
-
-  SG_FREE(lx);
-  if (ix != NULL)
-  {
-      if (!IsSubproblem)
-          for (i = 0; i < ell; i++)
-              SG_FREE(ix[i]);
-      SG_FREE(ix);
-  }
-  if (x != NULL)
-  {
-      if (!IsSubproblem)
-          for (i = 0; i < ell; i++)
-              SG_FREE(x[i]);
-      SG_FREE(x);
-  }
-}
-
-#endif // DOXYGEN_SHOULD_SKIP_THIS
-
-/******************************************************************************/
-/*** End of gpdt.cpp file                                                   ***/
-/******************************************************************************/
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/external/gpdt.h b/src/shogun/lib/external/gpdt.h
deleted file mode 100644
index 992eaefc246..00000000000
--- a/src/shogun/lib/external/gpdt.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/******************************************************************************
- ***        GPDT - Gradient Projection Decomposition Technique              ***
- ******************************************************************************
- ***                                                                        ***
- *** GPDT is a C++ software designed to train large-scale Support Vector    ***
- *** Machines for binary classification in both scalar and distributed      ***
- *** memory parallel environments. It uses the Joachims' problem            ***
- *** decomposition technique to split the whole quadratic programming (QP)  ***
- *** problem into a sequence of smaller QP subproblems, each one being      ***
- *** solved by a suitable gradient projection method (GPM). The presently   ***
- *** implemented GPMs are the Generalized Variable Projection Method        ***
- *** GVPM (T. Serafini, G. Zanghirati, L. Zanni, "Gradient Projection       ***
- *** Methods for Quadratic Programs and Applications in Training Support    ***
- *** Vector Machines"; Optim. Meth. Soft. 20, 2005, 353-378) and the        ***
- *** Dai-Fletcher Method DFGPM (Y. Dai and R. Fletcher,"New Algorithms for  ***
- *** Singly Linear Constrained Quadratic Programs Subject to Lower and      ***
- *** Upper Bounds"; Math. Prog. to appear).                                 ***
- ***                                                                        ***
- *** Authors:                                                               ***
- ***  Thomas Serafini, Luca Zanni                                           ***
- ***   Dept. of Mathematics, University of Modena and Reggio Emilia - ITALY ***
- ***   serafini.thomas@unimo.it, zanni.luca@unimo.it                        ***
- ***  Gaetano Zanghirati                                                    ***
- ***   Dept. of Mathematics, University of Ferrara - ITALY                  ***
- ***   g.zanghirati@unife.it                                                ***
- ***                                                                        ***
- *** Software homepage: http://dm.unife.it/gpdt                             ***
- ***                                                                        ***
- *** This work is supported by the Italian FIRB Projects                    ***
- ***      'Statistical Learning: Theory, Algorithms and Applications'       ***
- ***      (grant RBAU01877P), http://slipguru.disi.unige.it/ASTA            ***
- *** and                                                                    ***
- ***      'Parallel Algorithms and Numerical Nonlinear Optimization'        ***
- ***      (grant RBAU01JYPN), http://dm.unife.it/pn2o                       ***
- ***                                                                        ***
- *** Copyright (C) 2004 by T. Serafini, G. Zanghirati, L. Zanni.            ***
- ***                                                                        ***
- ***                     COPYRIGHT NOTIFICATION                             ***
- ***                                                                        ***
- *** Permission to copy and modify this software and its documentation      ***
- *** for internal research use is granted, provided that this notice is     ***
- *** retained thereon and on all copies or modifications. The authors and   ***
- *** their respective Universities makes no representations as to the       ***
- *** suitability and operability of this software for any purpose. It is    ***
- *** provided "as is" without express or implied warranty.                  ***
- *** Use of this software for commercial purposes is expressly prohibited   ***
- *** without contacting the authors.                                        ***
- ***                                                                        ***
- *** This program is free software; you can redistribute it and/or modify   ***
- *** it under the terms of the GNU General Public License as published by   ***
- *** the Free Software Foundation; either version 3 of the License, or      ***
- *** (at your option) any later version.                                    ***
- ***                                                                        ***
- *** This program is distributed in the hope that it will be useful,        ***
- *** but WITHOUT ANY WARRANTY; without even the implied warranty of         ***
- *** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ***
- *** GNU General Public License for more details.                           ***
- ***                                                                        ***
- *** You should have received a copy of the GNU General Public License      ***
- *** along with this program; if not, write to the Free Software            ***
- *** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              ***
- ***                                                                        ***
- *** File:     gpdt.h                                                       ***
- *** Type:     scalar                                                       ***
- *** Version:  1.0                                                          ***
- *** Date:     October, 2005                                                ***
- *** Revision: 1                                                            ***
- ***                                                                        ***
- ******************************************************************************/
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/kernel/Kernel.h>
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-
-namespace shogun
-{
-#define MAXLENGTH 256
-#define cachetype KERNELCACHE_ELEM
-#define EPS_SV    1.0e-9   /* precision for multipliers */
-
-enum {
-  SOLVER_VPM      = 0,
-  SOLVER_FLETCHER = 1
-};
-
-/** s kernel */
-class sKernel
-{
-public:
-  /** kernel type */
-  int32_t  ker_type;
-  /** lx */
-  int32_t  *lx;
-  /** ix */
-  int32_t  **ix;
-  /** x */
-  float32_t  **x;
-  /** nor */
-  float64_t *nor;
-  /** sigma */
-  float64_t sigma;
-  /** degree */
-  float64_t degree;
-  /** normalization factor */
-  float64_t norm;
-  /** c poly */
-  float64_t c_poly;
-  /** kernel evaluations */
-  float64_t KernelEvaluations;
-
-  /** call kernel fun
-   *
-   * @param i
-   * @param j
-   * @return something floaty
-   */
-  float64_t (sKernel::*kernel_fun)(int32_t i, int32_t j);
-
-  /** constructor
-   *
-   * @param k kernel
-   * @param ell ell
-   */
-  sKernel (shogun::CKernel* k, int32_t ell);
-  ~sKernel();
-
-  /** set data
-   *
-   * @param x_ new x
-   * @param ix_ new ix
-   * @param lx_ new lx
-   * @param ell new ell
-   * @param dim dim
-   */
-  void SetData(
-	float32_t **x_, int32_t **ix_, int32_t *lx_, int32_t ell, int32_t dim);
-
-  /** set subproblem
-   *
-   * @param ker kernel
-   * @param len len
-   * @param perm perm
-   */
-  void   SetSubproblem (sKernel* ker, int32_t len, int32_t *perm);
-
-  /** get an item from the kernel
-   *
-   * @param i index i
-   * @param j index j
-   * @return item from kernel at index i, j
-   */
-  float64_t Get(int32_t i, int32_t j)
-  {
-    KernelEvaluations += 1.0F;
-    return kernel->kernel(i, j);
-  }
-
-  /** add something
-   *
-   * @param v v
-   * @param j j
-   * @param mul mul
-   */
-  void   Add           (float64_t *v, int32_t j, float64_t mul);
-
-  /** prod something
-   *
-   * @param v v
-   * @param j j
-   * @return something floaty
-   */
-  float64_t Prod          (float64_t *v, int32_t j);
-
-  /** get kernel
-   *
-   * @return kernel
-   */
-  inline CKernel* get_kernel()
-  {
-    return kernel;
-  }
-
-private:
-  CKernel* kernel;
-  int32_t    vauxRow;
-  int32_t    IsSubproblem;
-  int32_t    ell, dim;
-  float32_t  *vaux;
-
-  float64_t dot     (int32_t i, int32_t j);
-};
-
-void SplitParts (
-	int32_t n, int32_t part, int32_t parts, int32_t *dim, int32_t *off);
-void SplitNum   (int32_t n, int32_t *nloc, int32_t *noff);
-}
-#endif // DOXYGEN_SHOULD_SKIP_THIS
-
-/******************************************************************************/
-/*** End of gpdt.h file                                                     ***/
-/******************************************************************************/
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/external/gpdtsolve.cpp b/src/shogun/lib/external/gpdtsolve.cpp
deleted file mode 100644
index 031670a78e8..00000000000
--- a/src/shogun/lib/external/gpdtsolve.cpp
+++ /dev/null
@@ -1,1580 +0,0 @@
-/******************************************************************************
- ***        GPDT - Gradient Projection Decomposition Technique              ***
- ******************************************************************************
- ***                                                                        ***
- *** GPDT is a C++ software designed to train large-scale Support Vector    ***
- *** Machines for binary classification in both scalar and distributed      ***
- *** memory parallel environments. It uses the Joachims' problem            ***
- *** decomposition technique to split the whole quadratic programming (QP)  ***
- *** problem into a sequence of smaller QP subproblems, each one being      ***
- *** solved by a suitable gradient projection method (GPM). The presently   ***
- *** implemented GPMs are the Generalized Variable Projection Method        ***
- *** GVPM (T. Serafini, G. Zanghirati, L. Zanni, "Gradient Projection       ***
- *** Methods for Quadratic Programs and Applications in Training Support    ***
- *** Vector Machines"; Optim. Meth. Soft. 20, 2005, 353-378) and the        ***
- *** Dai-Fletcher Method DFGPM (Y. Dai and R. Fletcher,"New Algorithms for  ***
- *** Singly Linear Constrained Quadratic Programs Subject to Lower and      ***
- *** Upper Bounds"; Math. Prog. to appear).                                 ***
- ***                                                                        ***
- *** Authors:                                                               ***
- ***  Thomas Serafini, Luca Zanni                                           ***
- ***   Dept. of Mathematics, University of Modena and Reggio Emilia - ITALY ***
- ***   serafini.thomas@unimo.it, zanni.luca@unimo.it                        ***
- ***  Gaetano Zanghirati                                                    ***
- ***   Dept. of Mathematics, University of Ferrara - ITALY                  ***
- ***   g.zanghirati@unife.it                                                ***
- ***                                                                        ***
- *** Software homepage: http://dm.unife.it/gpdt                             ***
- ***                                                                        ***
- *** This work is supported by the Italian FIRB Projects                    ***
- ***      'Statistical Learning: Theory, Algorithms and Applications'       ***
- ***      (grant RBAU01877P), http://slipguru.disi.unige.it/ASTA            ***
- *** and                                                                    ***
- ***      'Parallel Algorithms and Numerical Nonlinear Optimization'        ***
- ***      (grant RBAU01JYPN), http://dm.unife.it/pn2o                       ***
- ***                                                                        ***
- *** Copyright (C) 2004-2008 by T. Serafini, G. Zanghirati, L. Zanni.       ***
- ***                                                                        ***
- ***                     COPYRIGHT NOTIFICATION                             ***
- ***                                                                        ***
- *** Permission to copy and modify this software and its documentation      ***
- *** for internal research use is granted, provided that this notice is     ***
- *** retained thereon and on all copies or modifications. The authors and   ***
- *** their respective Universities makes no representations as to the       ***
- *** suitability and operability of this software for any purpose. It is    ***
- *** provided "as is" without express or implied warranty.                  ***
- *** Use of this software for commercial purposes is expressly prohibited   ***
- *** without contacting the authors.                                        ***
- ***                                                                        ***
- *** This program is free software; you can redistribute it and/or modify   ***
- *** it under the terms of the GNU General Public License as published by   ***
- *** the Free Software Foundation; either version 3 of the License, or      ***
- *** (at your option) any later version.                                    ***
- ***                                                                        ***
- *** This program is distributed in the hope that it will be useful,        ***
- *** but WITHOUT ANY WARRANTY; without even the implied warranty of         ***
- *** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ***
- *** GNU General Public License for more details.                           ***
- ***                                                                        ***
- *** You should have received a copy of the GNU General Public License      ***
- *** along with this program; if not, write to the Free Software            ***
- *** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              ***
- ***                                                                        ***
- *** File:     gpdtsolve.cpp                                                ***
- *** Type:     scalar                                                       ***
- *** Version:  1.0                                                          ***
- *** Date:     November, 2006                                                ***
- *** Revision: 2                                                            ***
- ***                                                                        ***
- *** SHOGUN adaptions  Written (W) 2006-2009 Soeren Sonnenburg              ***
- ******************************************************************************/
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include <shogun/lib/external/gpm.h>
-#include <shogun/lib/external/gpdt.h>
-#include <shogun/lib/external/gpdtsolve.h>
-#include <shogun/lib/Signal.h>
-#include <shogun/io/SGIO.h>
-
-using namespace shogun;
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-namespace shogun
-{
-#define y_in(i)      y[index_in[(i)]]
-#define y_out(i)     y[index_out[(i)]]
-#define alpha_in(i)  alpha[index_in[(i)]]
-#define alpha_out(i) alpha[index_out[(i)]]
-#define minfty       (-1.0e+30)  // minus infinity
-
-uint32_t Randnext = 1;
-
-#define ThRand    (Randnext = Randnext * 1103515245L + 12345L)
-#define ThRandPos ((Randnext = Randnext * 1103515245L + 12345L) & 0x7fffffff)
-
-FILE        *fp;
-
-/* utility routines prototyping */
-void quick_si (int32_t a[], int32_t k);
-void quick_s3 (int32_t a[], int32_t k, int32_t ia[]);
-void quick_s2 (float64_t a[], int32_t k, int32_t ia[]);
-
-/******************************************************************************/
-/*** Class for caching strategy implementation                              ***/
-/******************************************************************************/
-class sCache
-{
-
-public:
-  sCache  (sKernel* sk, int32_t Mbyte, int32_t ell);
-  ~sCache ();
-
-  cachetype *FillRow (int32_t row, int32_t IsC = 0);
-  cachetype *GetRow  (int32_t row);
-
-  int32_t DivideMP (int32_t *out, int32_t *in, int32_t n);
-
-  /*** Itarations counter ***/
-  void Iteration() { nit++; }
-
-  /*** Cache size control ***/
-  int32_t CheckCycle()
-  {
-    int32_t us;
-    cache_entry *pt = first_free->next;
-
-    for (us = 0; pt != first_free; us++, pt = pt->next);
-    if (us != maxmw-1)
-        return 1;
-    else
-        return 0;
-  }
-
-private:
-
-  struct cache_entry
-  {
-    int32_t row;      // unused row
-    int32_t last_access_it;
-    cache_entry *prev, *next;
-    cachetype   *data;
-  };
-
-  sKernel* KER;
-  int32_t maxmw, ell;
-  int32_t nit;
-
-  cache_entry *mw;
-  cache_entry *first_free;
-  cache_entry **pindmw;    // 0 if unused row
-  cachetype   *onerow;
-
-  cachetype   *FindFree(int32_t row, int32_t IsC);
-};
-
-
-/******************************************************************************/
-/*** Cache class constructor                                                ***/
-/******************************************************************************/
-sCache::sCache(sKernel* sk, int32_t Mbyte, int32_t _ell) : KER(sk), ell(_ell)
-{
-  int32_t i;
-
-  // size in dwords of one cache row
-  maxmw = (sizeof(cache_entry) + sizeof(cache_entry *)
-           + ell*sizeof(cachetype)) / 4;
-  // number of cache rows
-  maxmw = Mbyte*1024*(1024/4) / maxmw;
-
-  /* arrays allocation */
-  mw     = SG_MALLOC(cache_entry, maxmw);
-  pindmw = SG_MALLOC(cache_entry*,  ell);
-  onerow = SG_MALLOC(cachetype,     ell);
-
-  /* arrays initialization */
-  for (i = 0; i < maxmw; i++)
-  {
-      mw[i].prev           = (i == 0 ? &mw[maxmw-1] : &mw[i-1]);
-      mw[i].next           = (i == maxmw-1 ? &mw[0] : &mw[i+1]);
-      mw[i].data           = SG_MALLOC(cachetype, ell);
-      mw[i].row            = -1;    // unused row
-      mw[i].last_access_it = -1;
-  }
-  for (i = 0; i < ell; i++)
-      pindmw[i] = 0;
-
-  first_free = &mw[0];
-  nit        = 0;
-}
-
-/******************************************************************************/
-/*** Cache class destructor                                                 ***/
-/******************************************************************************/
-sCache::~sCache()
-{
-  int32_t i;
-
-  for (i = maxmw-1; i >= 0; i--)
-      SG_FREE(mw[i].data);
-
-  SG_FREE(onerow);
-  SG_FREE(pindmw);
-  SG_FREE(mw);
-}
-
-
-/******************************************************************************/
-/*** Retrieve a cached row                                                  ***/
-/******************************************************************************/
-cachetype *sCache::GetRow(int32_t row)
-{
-  cache_entry *c;
-
-  c = pindmw[row];
-  if (c == NULL)
-      return NULL;
-
-  c->last_access_it = nit;
-  if (c == first_free)
-  {
-      first_free = first_free->next;
-  }
-  else
-  {
-      // move "row" as the most recently used.
-      c->prev->next    = c->next;
-      c->next->prev    = c->prev;
-      c->next          = first_free;
-      c->prev          = first_free->prev;
-      first_free->prev = c;
-      c->prev->next    = c;
-  }
-
-  return c->data;
-}
-
-/******************************************************************************
- *** Look for a free cache row                                              ***
- *** IMPORTANT: call this method only if you are sure that "row"            ***
- ***            is not already in the cache ( i.e. after calling GetRow() ) ***
- ******************************************************************************/
-cachetype *sCache::FindFree(int32_t row, int32_t IsC)
-{
-  cachetype *pt;
-
-  if (first_free->row != -1) // cache row already contains data
-  {
-      if (first_free->last_access_it == nit || IsC)
-          return 0;
-      else
-          pindmw[first_free->row] = 0;
-  }
-  first_free->row            = row;
-  first_free->last_access_it = nit;
-  pindmw[row]                = first_free;
-
-  pt         = first_free->data;
-  first_free = first_free->next;
-
-  return pt;
-}
-
-
-/******************************************************************************/
-/*** Enter data in a cache row                                              ***/
-/******************************************************************************/
-cachetype *sCache::FillRow(int32_t row, int32_t IsC)
-{
-  int32_t j;
-  cachetype *pt;
-
-  pt = GetRow(row);
-  if (pt != NULL)
-      return pt;
-
-  pt = FindFree(row, IsC);
-  if (pt == 0)
-      pt = onerow;
-
-  // Compute all the row elements
-  for (j = 0; j < ell; j++)
-      pt[j] = (cachetype)KER->Get(row, j);
-  return pt;
-}
-
-
-/******************************************************************************/
-/*** Expand a sparse row in a full cache row                                ***/
-/******************************************************************************/
-int32_t sCache::DivideMP(int32_t *out, int32_t *in, int32_t n)
-{
-   /********************************************************************
-    * Input meaning:                                                   *
-    *    in  = vector containing row to be extracted in the cache      *
-    *    n   = size of in                                              *
-    *    out = the indexes of "in" of the components to be computed    *
-    *          by this processor (first those in the cache, then the   *
-    *          ones not yet computed)                                  *
-    * Returns: the number of components of this processor              *
-    ********************************************************************/
-
-  int32_t *remained, nremained, k;
-  int32_t i;
-
-  remained = SG_MALLOC(int32_t, n);
-
-  nremained = 0;
-  k = 0;
-  for (i = 0; i < n; i++)
-  {
-      if (pindmw[in[i]] != NULL)
-          out[k++] = i;
-      else
-          remained[nremained++] = i;
-  }
-  for (i = 0; i < nremained; i++)
-      out[k++] = remained[i];
-
-  SG_FREE(remained);
-  return n;
-}
-
-/******************************************************************************/
-/*** Check solution optimality                                              ***/
-/******************************************************************************/
-int32_t QPproblem::optimal()
-{
-  /***********************************************************************
-   * Returns 1 if the computed solution is optimal, otherwise returns 0. *
-   * To verify the optimality it checks the KKT optimality conditions.   *
-   ***********************************************************************/
-  register int32_t i, j, margin_sv_number, z, k, s, kin, z1, znew=0, nnew;
-
-  float64_t gx_i, aux, s1, s2;
-
-  /* sort -y*grad and store in ing the swaps done */
-  for (j = 0; j < ell; j++)
-  {
-      grad[j] = y[j] - st[j];
-      ing[j]  = j;
-  }
-
-  quick_s2(grad,ell,ing);
-
-  /* compute bee */
-  margin_sv_number = 0;
-
-  for (i = chunk_size - 1; i >= 0; i--)
-      if (is_free(index_in[i]))
-      {
-          margin_sv_number++;
-          bee = y_in(i) - st[index_in[i]];
-          break;
-      }
-
-  if (margin_sv_number > 0)
-  {
-      aux=-1.0;
-      for (i = nb-1; i >= 0; i--)
-      {
-          gx_i = bee + st[index_out[i]];
-          if ((is_zero(index_out[i]) && (gx_i*y_out(i) < (1.0-delta))) ||
-             (is_bound(index_out[i]) && (gx_i*y_out(i) > (1.0+delta))) ||
-             (is_free(index_out[i])  &&
-             ((gx_i*y_out(i) < 1.0-delta) || (gx_i*y_out(i) > 1.0+delta))))
-          {
-              if (fabs(gx_i*y_out(i) - 1.0) > aux)
-                  aux = fabs(gx_i*y_out(i) - 1.0);
-          }
-      }
-  }
-  else
-  {
-      for (i = ell - 1; i >= 0; i--)
-          if (is_free(i))
-          {
-              margin_sv_number++;
-              bee = y[i] - st[i];
-              break;
-          }
-      if (margin_sv_number == 0)
-      {
-          s1 = -minfty;
-          s2 = -minfty;
-          for (j = 0; j < ell; j++)
-              if ( (alpha[ing[j]] > DELTAsv) &&  (y[ing[j]] >= 0) )
-              {
-                  s1 = grad[j];
-                  break;
-              }
-          for (j = 0; j < ell; j++)
-              if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] <= 0) )
-              {
-                  s2 = grad[j];
-                  break;
-              }
-          if (s1 < s2)
-              aux = s1;
-          else
-              aux = s2;
-
-          s1 = minfty;
-          s2 = minfty;
-          for (j = ell-1; j >=0; j--)
-              if ( (alpha[ing[j]] > DELTAsv) && (y[ing[j]] <= 0) )
-              {
-                  s1 = grad[j];
-                  break;
-              }
-          for (j = ell-1; j >=0; j--)
-              if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] >= 0) )
-              {
-                  s2 = grad[j];
-                  break;
-              }
-          if (s2 > s1) s1 = s2;
-
-          bee = 0.5 * (s1+aux);
-      }
-
-      aux = -1.0;
-      for (i = ell-1; i >= 0; i--)
-      {
-          gx_i = bee + st[i];
-          if ((is_zero(i) && (gx_i*y[i] < (1.0-delta))) ||
-             (is_bound(i) && (gx_i*y[i] > (1.0+delta))) ||
-             (is_free(i)  &&
-             ((gx_i*y[i] < 1.0-delta) || (gx_i*y[i] > 1.0+delta))))
-          {
-              if (fabs(gx_i*y[i] - 1.0) > aux)
-                  aux = fabs(gx_i*y[i] - 1.0);
-          }
-      }
-  }
-
-  if (aux < 0.0)
-      return 1;
-  else
-  {
-      if (verbosity > 1)
-          SG_SINFO("  Max KKT violation: %lf\n", aux)
-      else if (verbosity > 0)
-          SG_SINFO("  %lf\n", aux)
-
-      if (fabs(kktold-aux) < delta*0.01 &&  aux < delta*2.0)
-      {
-          if (DELTAvpm > InitialDELTAvpm*0.1)
-          {
-              DELTAvpm = (DELTAvpm*0.5 > InitialDELTAvpm*0.1 ?
-                                            DELTAvpm*0.5 : InitialDELTAvpm*0.1);
-              SG_SINFO("Inner tolerance changed to: %lf\n", DELTAvpm)
-          }
-      }
-
-      kktold = aux;
-
- /*****************************************************************************
-  *** Update the working set (T. Serafini, L. Zanni, "On the Working Set    ***
-  *** Selection in Gradient Projection-based Decomposition Techniques for   ***
-  *** Support Vector Machines"; Optim. Meth. Soft. 20, 2005).               ***
-  *****************************************************************************/
-      for (j = 0; j < chunk_size; j++)
-          inold[j] = index_in[j];
-
-      z  = -1;  /* index of the last entered component from the top    */
-      z1 = ell; /* index of the last entered component from the bottom */
-      k  = 0;
-      j  = 0;
-      while (k < q)
-      {
-          i = z + 1; /* index of the candidate components from the top */
-          while (i < z1)
-          {
-              if ( is_free(ing[i]) ||
-                   (-y[ing[i]]>=0 && is_zero(ing[i])) ||
-                   (-y[ing[i]]<=0 && is_bound(ing[i]))
-                 )
-              {
-                  znew = i; /* index of the component to select from the top */
-                  break;
-              }
-              i++;
-          }
-          if (i == z1) break;
-
-          s = z1 - 1;
-          while (znew < s)
-          {
-              if ( is_free(ing[s]) ||
-                   (y[ing[s]]>=0 && is_zero(ing[s])) ||
-                   (y[ing[s]]<=0 && is_bound(ing[s]))
-                 )
-              {
-                  z1 = s;
-                  z  = znew;
-                  break;
-              }
-              s--;
-          }
-          if (znew == s) break;
-
-          index_in[k++] = ing[z];
-          index_in[k++] = ing[z1];
-      }
-
-      if (k < q)
-      {
-          if (verbosity > 1)
-              SG_SINFO("  New q: %i\n", k)
-          q = k;
-      }
-
-      quick_si(index_in, q);
-
-      s = 0;
-      j = 0;
-      for (k = 0; k < chunk_size; k++)
-      {
-          z = inold[k];
-          for (i = j; i < q; i++)
-              if (z <= index_in[i])
-                  break;
-
-          if (i == q)
-          {
-              for (i = k; i < chunk_size; i++)
-              {
-                  ing[s] = inold[i]; /* older components not in the new basis */
-                  s      = s+1;
-              }
-              break;
-          }
-
-          if (z == index_in[i])
-              j      = i + 1; /* the component is still in the basis  */
-          else
-          {
-              ing[s] = z;     /* older component not in the new basis */
-              s      = s + 1;
-              j      = i;
-          }
-      }
-
-      for (i = 0; i < s; i++)
-      {
-          bmemrid[i] = bmem[ing[i]];
-          pbmr[i]    = i;
-      }
-
-      quick_s3(bmemrid, s, pbmr);
-
-      /* check if support vectors not at bound enter the basis */
-      j = q;
-      i = 0;
-      while (j < chunk_size && i < s)
-      {
-          if (is_free(ing[pbmr[i]]))
-          {
-              index_in[j] = ing[pbmr[i]];
-              j++;
-          }
-          i++;
-      }
-
-      /* choose which bound variables keep in basis (alpha = 0 or alpha = C) */
-      if (j < chunk_size)
-      {
-          i = 0;
-          while (j < chunk_size && i < s)
-          {
-              if (is_zero(ing[pbmr[i]]))
-              {
-                  index_in[j] = ing[pbmr[i]];
-                  j++;
-              }
-              i++;
-          }
-          if (j < chunk_size)
-          {
-              i = 0;
-              while (j < chunk_size && i < s)
-              {
-                  if (is_bound(ing[pbmr[i]]))
-                  {
-                      index_in[j] = ing[pbmr[i]];
-                      j++;
-                  }
-                  i++;
-              }
-          }
-      }
-
-      quick_si(index_in, chunk_size);
-
-      for (i = 0; i < chunk_size; i++)
-          bmem[index_in[i]]++;
-
-      j = 0;
-      k = 0;
-      for (i = 0; i < chunk_size; i++)
-      {
-          for (z = j; z < index_in[i]; z++)
-          {
-              index_out[k] = z;
-              k++;
-          }
-          j = index_in[i]+1;
-      }
-      for (z = j; z < ell; z++)
-      {
-          index_out[k] = z;
-          k++;
-      }
-
-      for (i = 0; i < nb; i++)
-          bmem[index_out[i]] = 0;
-
-      kin = 0;
-      j   = 0;
-      for (k = 0; k < chunk_size; k++)
-      {
-          z = index_in[k];
-          for (i = j; i < chunk_size; i++)
-              if (z <= inold[i])
-                  break;
-          if (i == chunk_size)
-          {
-              for (s = k; s < chunk_size; s++)
-              {
-                  incom[s] = -1;
-                  cec[index_in[s]]++;
-              }
-              kin = kin + chunk_size - k ;
-              break;
-          }
-
-          if (z == inold[i])
-          {
-              incom[k] = i;
-              j        = i+1;
-          }
-          else
-          {
-              incom[k] = -1;
-              j        = i;
-              kin      = kin + 1;
-              cec[index_in[k]]++;
-          }
-      }
-
-      nnew = kin & (~1);
-      if (nnew < 10)
-          nnew = 10;
-      if (nnew < chunk_size/10)
-          nnew = chunk_size/10;
-      if (nnew < q)
-      {
-          q = nnew;
-          q = q & (~1);
-      }
-
-      if (kin == 0)
-      {
-          DELTAkin *= 0.1;
-          if (DELTAkin < 1.0e-6)
-          {
-              SG_SINFO("\n***ERROR***: GPDT stops with tolerance")
-              SG_SINFO(
-              " %lf because it is unable to change the working set.\n", kktold);
-              return 1;
-          }
-          else
-          {
-              SG_SINFO("Inner tolerance temporary changed to:")
-              SG_SINFO(" %e\n", DELTAvpm*DELTAkin)
-          }
-      }
-      else
-          DELTAkin = 1.0;
-
-      if (verbosity > 1)
-      {
-          SG_SINFO("  Working set: new components: %i", kin)
-          SG_SINFO(",  new parameter n: %i\n", q)
-      }
-
-      return 0;
-   }
-}
-
-/******************************************************************************/
-/*** Optional preprocessing: random distribution                            ***/
-/******************************************************************************/
-int32_t QPproblem::Preprocess0(int32_t *aux, int32_t *sv)
-{
-  int32_t i, j;
-
-  Randnext = 1;
-  memset(sv, 0, ell*sizeof(int32_t));
-  for (i = 0; i < chunk_size; i++)
-  {
-      do
-      {
-          j = ThRandPos % ell;
-      } while (sv[j] != 0);
-      sv[j] = 1;
-  }
-  return(chunk_size);
-}
-
-/******************************************************************************/
-/*** Optional preprocessing: block parallel distribution                    ***/
-/******************************************************************************/
-int32_t QPproblem::Preprocess1(sKernel* kernel, int32_t *aux, int32_t *sv)
-{
-  int32_t    s;    // elements owned by the processor
-  int32_t    sl;   // elements of the n-1 subproblems
-  int32_t    n, i, off, j, k, ll;
-  int32_t    nsv, nbsv;
-  int32_t    *sv_loc, *bsv_loc, *sp_y;
-  float32_t  *sp_D=NULL;
-  float64_t *sp_alpha, *sp_h;
-
-  s  = ell;
-  /* divide the s elements into n blocks smaller than preprocess_size */
-  n  = (s + preprocess_size - 1) / preprocess_size;
-  sl = 1 + s / n;
-
-  if (verbosity > 0)
-  {
-      SG_SINFO("  Preprocessing: examples = %d", s)
-      SG_SINFO(", subp. = %d", n)
-      SG_SINFO(", size = %d\n",sl)
-  }
-
-  sv_loc   = SG_MALLOC(int32_t, s);
-  bsv_loc  = SG_MALLOC(int32_t, s);
-  sp_alpha = SG_MALLOC(float64_t, sl);
-  sp_h     = SG_MALLOC(float64_t, sl);
-  sp_y     = SG_MALLOC(int32_t, sl);
-
-  if (sl < 500)
-      sp_D = SG_MALLOC(float32_t, sl*sl);
-
-  for (i = 0; i < sl; i++)
-       sp_h[i] = -1.0;
-  memset(alpha, 0, ell*sizeof(float64_t));
-
-  /* randomly reorder the component to select */
-  for (i = 0; i < ell; i++)
-      aux[i] = i;
-  Randnext = 1;
-  for (i = 0; i < ell; i++)
-  {
-      j  = ThRandPos % ell;
-      k  = ThRandPos % ell;
-      ll = aux[j]; aux[j] = aux[k]; aux[k] = ll;
-  }
-
-  nbsv = nsv = 0;
-  for (i = 0; i < n; i++)
-  {
-      if (verbosity > 0)
-          SG_SINFO("%d...", i)
-      SplitParts(s, i, n, &ll, &off);
-
-      if (sl < 500)
-      {
-          for (j = 0; j < ll; j++)
-          {
-              sp_y[j] = y[aux[j+off]];
-              for (k = j; k < ll; k++)
-                  sp_D[k*sl + j] = sp_D[j*sl + k]
-                                 = y[aux[j+off]] * y[aux[k+off]]
-                                   * (float32_t)kernel->Get(aux[j+off], aux[k+off]);
-          }
-
-          memset(sp_alpha, 0, sl*sizeof(float64_t));
-
-          /* call the gradient projection QP solver */
-          gpm_solver(projection_solver, projection_projector, ll, sp_D, sp_h,
-                     c_const, 0.0, sp_y, sp_alpha, delta*10, NULL);
-      }
-      else
-      {
-          QPproblem p2;
-		  QPproblem::copy_subproblem(&p2, this, ll, aux + off);
-          p2.chunk_size     = (int32_t) ((float64_t)chunk_size / sqrt((float64_t)n));
-          p2.q              = (int32_t) ((float64_t)q / sqrt((float64_t)n));
-          p2.maxmw          = ll*ll*4 / (1024 * 1024);
-          if (p2.maxmw > maxmw / 2)
-              p2.maxmw = maxmw / 2;
-          p2.verbosity      = 0;
-          p2.delta          = delta * 10.0;
-          p2.PreprocessMode = 0;
-          kernel->KernelEvaluations += p2.gpdtsolve(sp_alpha);
-      }
-
-      for (j = 0; j < ll; j++)
-      {
-          /* modify bound support vector approximation */
-          if (sp_alpha[j] < (c_const-DELTAsv))
-              sp_alpha[j] = 0.0;
-          else
-              sp_alpha[j] = c_const;
-          if (sp_alpha[j] > DELTAsv)
-          {
-              if (sp_alpha[j] < (c_const-DELTAsv))
-                  sv_loc[nsv++]   = aux[j+off];
-              else
-                  bsv_loc[nbsv++] = aux[j+off];
-              alpha[aux[j+off]] = sp_alpha[j];
-          }
-      }
-  }
-
-  Randnext = 1;
-
-  /* add the known support vectors to the working set */
-  memset(sv, 0, ell*sizeof(int32_t));
-  ll = (nsv < chunk_size ? nsv : chunk_size);
-  for (i = 0; i < ll; i++)
-  {
-      do {
-           j = sv_loc[ThRandPos % nsv];
-      } while (sv[j] != 0);
-      sv[j] = 1;
-  }
-
-  /* add the known bound support vectors to the working set */
-  ll = ((nsv+nbsv) < chunk_size ? (nsv+nbsv) : chunk_size);
-  for (; i < ll; i++)
-  {
-      do {
-           j = bsv_loc[ThRandPos % nbsv];
-      } while (sv[j] != 0);
-      sv[j] = 1;
-  }
-
-  /* eventually fill up the working set with other components
-     randomly chosen                                          */
-  for (; i < chunk_size; i++)
-  {
-      do {
-           j = ThRandPos % ell;
-      } while (sv[j] != 0);
-      sv[j] = 1;
-  }
-
-
-  /* dealloc temporary arrays */
-  if (sl < 500) SG_FREE(sp_D);
-  SG_FREE(sp_y    );
-  SG_FREE(sp_h    );
-  SG_FREE(sv_loc  );
-  SG_FREE(bsv_loc );
-  SG_FREE(sp_alpha);
-
-  if (verbosity > 0)
-  {
-      SG_SINFO("\n  Preprocessing: SV = %d", nsv)
-      SG_SINFO(", BSV = %d\n", nbsv)
-  }
-
-  return(nsv);
-}
-
-/******************************************************************************/
-/*** Compute the QP problem solution                                        ***/
-/******************************************************************************/
-float64_t QPproblem::gpdtsolve(float64_t *solution)
-{
-  int32_t i, j, k, z, iin, jin, nit, tot_vpm_iter, lsCount;
-  int32_t tot_vpm_secant, projCount, proximal_count;
-  int32_t vpmWarningThreshold;
-  int32_t  nzin, nzout;
-  int32_t *sp_y;               /* labels vector                             */
-  int32_t *indnzin, *indnzout; /* nonzero components indices vectors        */
-  float32_t     *sp_D;               /* quadratic part of the objective function  */
-  float64_t    *sp_h, *sp_hloc,     /* linear part of the objective function     */
-            *sp_alpha,*stloc;    /* variables and gradient updating vectors   */
-  float64_t    sp_e, aux, fval, tau_proximal_this, dfval;
-  float64_t    *vau;
-  float64_t    *weight;
-  float64_t    tot_prep_time, tot_vpm_time, tot_st_time, total_time;
-  sCache    *Cache;
-  cachetype *ptmw;
-  clock_t   t, ti;
-
-  Cache = new sCache(KER, maxmw, ell);
-    if (chunk_size > ell) chunk_size = ell;
-
-  if (chunk_size <= 20)
-      vpmWarningThreshold = 30*chunk_size;
-  else if (chunk_size <= 200)
-      vpmWarningThreshold = 20*chunk_size + 200;
-  else
-      vpmWarningThreshold = 10*chunk_size + 2200;
-
-  kktold = 10000.0;
-  if (delta <= 5e-3)
-  {
-      if ( (chunk_size <= 20) | ((float64_t)chunk_size/ell <= 0.001) )
-          DELTAvpm = delta * 0.1;
-      else if ( (chunk_size <= 200) | ((float64_t)chunk_size/ell <= 0.005) )
-          DELTAvpm = delta * 0.5;
-      else
-          DELTAvpm = delta;
-  }
-  else
-  {
-      if ( (chunk_size <= 200) | ((float64_t)chunk_size/ell <= 0.005) )
-          DELTAvpm = (1e-3 < delta*0.1) ? 1e-3 : delta*0.1;
-      else
-          DELTAvpm = 5e-3;
-  }
-
-  InitialDELTAvpm = DELTAvpm;
-  DELTAsv         = EPS_SV * c_const;
-  DELTAkin        = 1.0;
-
-  q               = q & (~1);
-  nb              = ell - chunk_size;
-  tot_vpm_iter    = 0;
-  tot_vpm_secant  = 0;
-
-  tot_prep_time = tot_vpm_time = tot_st_time = total_time = 0.0;
-
-  ti = clock();
-
-  /* arrays allocation */
-  SG_SDEBUG("ell:%d, chunk_size:%d, nb:%d dim:%d\n", ell, chunk_size,nb, dim)
-  ing       = SG_MALLOC(int32_t, ell);
-  inaux     = SG_MALLOC(int32_t, ell);
-  index_in  = SG_MALLOC(int32_t, chunk_size);
-  index_out = SG_MALLOC(int32_t, ell);
-  indnzout  = SG_MALLOC(int32_t, nb);
-  alpha     = SG_MALLOC(float64_t, ell);
-
-  memset(alpha, 0, ell*sizeof(float64_t));
-  memset(ing,   0, ell*sizeof(int32_t));
-
-  if (verbosity > 0 && PreprocessMode != 0)
-      SG_SINFO("\n*********** Begin setup step...\n")
-  t = clock();
-
-  switch(PreprocessMode)
-  {
-    case 1: Preprocess1(KER, inaux, ing); break;
-    case 0:
-    default:
-            Preprocess0(inaux, ing); break;
-  }
-
-  for (j = k = i = 0; i < ell; i++)
-      if (ing[i] == 0)
-          index_out[j++] = i;
-      else
-          index_in[k++]  = i;
-
-  t = clock() - t;
-  if (verbosity > 0 && PreprocessMode != 0)
-  {
-      SG_SINFO("  Time for setup: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC)
-      SG_SINFO(
-              "\n\n*********** Begin decomposition technique...\n");
-  }
-
-  /* arrays allocation */
-  bmem     = SG_MALLOC(int32_t, ell);
-  bmemrid  = SG_MALLOC(int32_t, chunk_size);
-  pbmr     = SG_MALLOC(int32_t, chunk_size);
-  cec      = SG_MALLOC(int32_t, ell);
-  indnzin  = SG_MALLOC(int32_t, chunk_size);
-  inold    = SG_MALLOC(int32_t, chunk_size);
-  incom    = SG_MALLOC(int32_t, chunk_size);
-  vau      = SG_MALLOC(float64_t, ell);
-  grad     = SG_MALLOC(float64_t, ell);
-  weight   = SG_MALLOC(float64_t, dim);
-  st       = SG_MALLOC(float64_t, ell);
-  stloc    = SG_MALLOC(float64_t, ell);
-
-  for (i = 0; i < ell; i++)
-  {
-      bmem[i] = 0;
-      cec[i]  = 0;
-      st[i]   = 0;
-  }
-
-  sp_y     = SG_MALLOC(int32_t, chunk_size);
-  sp_D     = SG_MALLOC(float32_t, chunk_size*chunk_size);
-  sp_alpha = SG_MALLOC(float64_t, chunk_size);
-  sp_h     = SG_MALLOC(float64_t, chunk_size);
-  sp_hloc  = SG_MALLOC(float64_t, chunk_size);
-
-  for (i = 0; i < chunk_size; i++)
-      cec[index_in[i]] = cec[index_in[i]]+1;
-
-  for (i = chunk_size-1; i >= 0; i--)
-  {
-      incom[i]          = -1;
-      sp_alpha[i]       = 0.0;
-      bmem[index_in[i]] = 1;
-  }
-
-  if (verbosity == 1)
-  {
-      SG_SINFO("  IT  | Prep Time | Solver IT | Solver Time |")
-      SG_SINFO(" Grad Time | KKT violation\n")
-      SG_SINFO("------+-----------+-----------+-------------+")
-      SG_SINFO("-----------+--------------\n")
-  }
-
-  /***************************************************************************/
-  /* Begin the problem resolution loop                                       */
-  nit = 0;
-  do
-  {
-      t = clock();
-      if ((nit % 10) == 9)
-      {
-          if ((t-ti) > 0)
-              total_time += (float64_t)(t-ti) / CLOCKS_PER_SEC;
-          else
-              total_time += (float64_t)(ti-t) / CLOCKS_PER_SEC;
-          ti = t;
-      }
-
-      if (verbosity > 1)
-          SG_SINFO("\n*********** ITERATION: %d\n", nit + 1)
-      else if (verbosity > 0)
-          SG_SINFO("%5d |", nit + 1)
-      else
-          SG_SINFO(".")
-      fflush(stdout);
-
-      nzout = 0;
-      for (k = 0; k < nb; k++)
-          if (alpha_out(k)>DELTAsv)
-          {
-              indnzout[nzout] = index_out[k];
-              nzout++;
-          }
-
-      sp_e = 0.;
-      for (j = 0; j < nzout; j++)
-      {
-          vau[j] = y[indnzout[j]]*alpha[indnzout[j]];
-          sp_e  += vau[j];
-      }
-
-      if (verbosity > 1)
-          SG_SINFO("  spe: %e ", sp_e)
-
-      for (i = 0; i < chunk_size; i++)
-          sp_y[i] = y_in(i);
-
-      /* Construct the objective function Hessian */
-      for (i = 0; i < chunk_size; i++)
-      {
-          iin  = index_in[i];
-          ptmw = Cache->GetRow(iin);
-          if (ptmw != 0)
-          {
-              for (j = 0; j <= i; j++)
-                  sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j] * ptmw[index_in[j]];
-          }
-          else if (incom[i] == -1)
-              for (j = 0; j <= i; j++)
-                  sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j]
-                                           * (float32_t)KER->Get(iin, index_in[j]);
-          else
-          {
-              for (j = 0; j < i; j++)
-                  if (incom[j] == -1)
-                      sp_D[i*chunk_size + j]
-                         = sp_y[i]*sp_y[j] * (float32_t)KER->Get(iin, index_in[j]);
-                  else
-                      sp_D[i*chunk_size + j]
-                         = sp_D[incom[j]*chunk_size + incom[i]];
-              sp_D[i*chunk_size + i]
-                  = sp_y[i]*sp_y[i] * (float32_t)KER->Get(iin, index_in[i]);
-          }
-      }
-      for (i = 0; i < chunk_size; i++)
-      {
-          for (j = 0; j < i; j++)
-              sp_D[j*chunk_size + i] = sp_D[i*chunk_size + j];
-      }
-
-      if (nit == 0 && PreprocessMode > 0)
-      {
-         for (i = 0; i < chunk_size; i++)
-         {
-             iin  = index_in[i];
-             aux  = 0.;
-             ptmw = Cache->GetRow(iin);
-             if (ptmw == NULL)
-                 for (j = 0; j < nzout; j++)
-                     aux += vau[j] * KER->Get(iin, indnzout[j]);
-             else
-                 for (j = 0; j < nzout; j++)
-                     aux += vau[j] * ptmw[indnzout[j]];
-             sp_h[i] = y_in(i) * aux - 1.0;
-         }
-      }
-      else
-      {
-         for (i = 0; i < chunk_size; i++)
-             vau[i] = alpha_in(i);
-         for (i = 0; i < chunk_size; i++)
-         {
-             aux = 0.0;
-             for (j = 0; j < chunk_size; j++)
-                 aux += sp_D[i*chunk_size + j] * vau[j];
-             sp_h[i] = st[index_in[i]] * y_in(i) - 1.0 - aux;
-         }
-      }
-
-    t = clock() - t;
-    if (verbosity > 1)
-        SG_SINFO("  Preparation Time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC)
-    else if (verbosity > 0)
-        SG_SINFO("  %8.2lf |", (float64_t)t/CLOCKS_PER_SEC)
-    tot_prep_time += (float64_t)t/CLOCKS_PER_SEC;
-
-    /*** Proximal point modification: first type ***/
-
-    if (tau_proximal < 0.0)
-      tau_proximal_this = 0.0;
-    else
-      tau_proximal_this = tau_proximal;
-    proximal_count = 0;
-    do {
-      t = clock();
-      for (i = 0; i < chunk_size; i++)
-      {
-          vau[i]                  = sp_D[i*chunk_size + i];
-          sp_h[i]                -= tau_proximal_this * alpha_in(i);
-          sp_D[i*chunk_size + i] += (float32_t)tau_proximal_this;
-      }
-
-      if (kktold < delta*100)
-          for (i = 0; i < chunk_size; i++)
-              sp_alpha[i] = alpha_in(i);
-      else
-          for (i = 0; i < chunk_size; i++)
-              sp_alpha[i] = 0.0;
-
-      /*** call the chosen inner gradient projection QP solver ***/
-      i = gpm_solver(projection_solver, projection_projector, chunk_size,
-                    sp_D, sp_h, c_const, sp_e, sp_y, sp_alpha,
-                    DELTAvpm*DELTAkin, &lsCount, &projCount);
-
-      if (i > vpmWarningThreshold)
-      {
-        if (ker_type == 2)
-        {
-            SG_SINFO("\n WARNING: inner subproblem hard to solve;")
-            SG_SINFO(" setting a smaller -q or")
-            SG_SINFO(" tuning -c and -g options might help.\n")
-        }
-        else
-        {
-            SG_SINFO("\n WARNING: inner subproblem hard to solve;")
-            SG_SINFO(" set a smaller -q or")
-            SG_SINFO(" try a better data scaling.\n")
-        }
-      }
-
-      t = clock() - t;
-      tot_vpm_iter   += i;
-      tot_vpm_secant += projCount;
-      tot_vpm_time   += (float64_t)t/CLOCKS_PER_SEC;
-      if (verbosity > 1)
-      {
-          SG_SINFO("  Solver it: %d", i)
-          SG_SINFO(", ls: %d", lsCount)
-          SG_SINFO(", time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC)
-      }
-      else if (verbosity > 0)
-      {
-          SG_SINFO("    %6d", i)
-          SG_SINFO(" |    %8.2lf |", (float64_t)t/CLOCKS_PER_SEC)
-      }
-
-      /*** Proximal point modification: second type ***/
-
-      for (i = 0; i < chunk_size; i++)
-          sp_D[i*chunk_size + i] = (float32_t)vau[i];
-      tau_proximal_this = 0.0;
-      if (tau_proximal < 0.0)
-      {
-        dfval = 0.0;
-        for (i = 0; i < chunk_size; i++)
-        {
-          aux = 0.0;
-          for (j = 0; j < chunk_size; j++)
-            aux += sp_D[i*chunk_size + j]*(alpha_in(j) - sp_alpha[j]);
-          dfval += (0.5*aux - st[index_in[i]]*y_in(i) + 1.0) * (alpha_in(i) - sp_alpha[i]);
-        }
-
-        aux=0.0;
-        for (i = 0; i < chunk_size; i++)
-            aux +=  (alpha_in(i) - sp_alpha[i])*(alpha_in(i) - sp_alpha[i]);
-
-        if ((-dfval/aux) < -0.5*tau_proximal)
-        {
-          tau_proximal_this = -tau_proximal;
-          if (verbosity > 0)
-            SG_SDEBUG("tau threshold: %lf  ", -dfval/aux)
-        }
-      }
-      proximal_count++;
-    } while (tau_proximal_this != 0.0 && proximal_count < 2); // Proximal point loop
-
-    t = clock();
-
-    nzin = 0;
-    for (j = 0; j < chunk_size; j++)
-    {
-        if (nit == 0)
-            aux = sp_alpha[j];
-        else
-            aux = sp_alpha[j] - alpha_in(j);
-        if (fabs(aux) > DELTAsv)
-        {
-            indnzin[nzin] = index_in[j];
-            grad[nzin]    = aux * y_in(j);
-            nzin++;
-        }
-    }
-
-	// in case of LINADD enabled use faster linadd variant
-	if (KER->get_kernel()->has_property(KP_LINADD) && get_linadd_enabled())
-	{
-		KER->get_kernel()->clear_normal() ;
-
-		for (j = 0; j < nzin; j++)
-			KER->get_kernel()->add_to_normal(indnzin[j], grad[j]);
-
-        if (nit == 0 && PreprocessMode > 0)
-		{
-			for (j = 0; j < nzout; j++)
-			{
-				jin = indnzout[j];
-				KER->get_kernel()->add_to_normal(jin, alpha[jin] * y[jin]);
-			}
-		}
-
-        for (i = 0; i < ell; i++)
-            st[i] += KER->get_kernel()->compute_optimized(i);
-	}
-	else  // nonlinear kernel
-    {
-        k = Cache->DivideMP(ing, indnzin, nzin);
-        for (j = 0; j < k; j++)
-        {
-            ptmw = Cache->FillRow(indnzin[ing[j]]);
-            for (i = 0; i < ell; i++)
-                st[i] += grad[ing[j]] * ptmw[i];
-        }
-
-        if (PreprocessMode > 0 && nit == 0)
-        {
-            clock_t ti2;
-
-            ti2 = clock();
-            for (j = 0; j < nzout; j++)
-            {
-                jin  = indnzout[j];
-                ptmw = Cache->FillRow(jin);
-                for (i = 0; i < ell; i++)
-                    st[i] += alpha[jin] * y[jin] * ptmw[i];
-            }
-            if (verbosity > 1)
-                SG_SINFO(
-                 "  G*x0 time: %.2lf\n", (float64_t)(clock()-ti2)/CLOCKS_PER_SEC);
-        }
-    }
-
-    /*** sort the vectors for cache managing ***/
-
-    t = clock() - t;
-    if (verbosity > 1)
-        SG_SINFO("  Gradient updating time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC)
-    else if (verbosity > 0)
-        SG_SINFO("  %8.2lf |", (float64_t)t/CLOCKS_PER_SEC)
-    tot_st_time += (float64_t)t/CLOCKS_PER_SEC;
-
-    /*** global updating of the solution vector ***/
-    for (i = 0; i < chunk_size; i++)
-        alpha_in(i) = sp_alpha[i];
-
-    if (verbosity > 1)
-    {
-        j = k = 0;
-        for (i = 0; i < ell; i++)
-        {
-            if (is_free(i))  j++;
-            if (is_bound(i)) k++;
-        }
-        SG_SINFO("  SV: %d", j+k)
-        SG_SINFO(",  BSV: %d\n", k)
-    }
-    Cache->Iteration();
-    nit = nit+1;
-  } while (!optimal() && !(CSignal::cancel_computations()));
-  /* End of the problem resolution loop                                      */
-  /***************************************************************************/
-
-  t = clock();
-  if ((t-ti) > 0)
-      total_time += (float64_t)(t-ti) / CLOCKS_PER_SEC;
-  else
-      total_time += (float64_t)(ti-t) / CLOCKS_PER_SEC;
-  ti = t;
-
-  sg_memcpy(solution, alpha, ell * sizeof(float64_t));
-
-  /* objective function evaluation */
-  fval = 0.0;
-  for (i = 0; i < ell; i++)
-      fval += alpha[i]*(y[i]*st[i]*0.5 - 1.0);
-
-  SG_SINFO("\n------+-----------+-----------+-------------+")
-  SG_SINFO("-----------+--------------\n")
-  SG_SINFO(
-      "\n- TOTAL ITERATIONS: %i\n", nit);
-
-  if (verbosity > 1)
-  {
-      j = 0;
-      k = 0;
-      z = 0;
-      for (i = ell - 1; i >= 0; i--)
-      {
-           if (cec[i] > 0) j++;
-           if (cec[i] > 1) k++;
-           if (cec[i] > 2) z++;
-      }
-      SG_SINFO(
-        "- Variables entering the working set at least one time:  %i\n", j);
-      SG_SINFO(
-        "- Variables entering the working set at least two times:  %i\n", k);
-      SG_SINFO(
-        "- Variables entering the working set at least three times:  %i\n", z);
-  }
-
-
-  SG_FREE(bmem);
-  SG_FREE(bmemrid);
-  SG_FREE(pbmr);
-  SG_FREE(cec);
-  SG_FREE(ing);
-  SG_FREE(inaux);
-  SG_FREE(indnzin);
-  SG_FREE(index_in);
-  SG_FREE(inold);
-  SG_FREE(incom);
-  SG_FREE(indnzout);
-  SG_FREE(index_out);
-  SG_FREE(vau);
-  SG_FREE(alpha);
-  SG_FREE(weight);
-  SG_FREE(grad);
-  SG_FREE(stloc);
-  SG_FREE(st);
-  SG_FREE(sp_h);
-  SG_FREE(sp_hloc);
-  SG_FREE(sp_y);
-  SG_FREE(sp_D);
-  SG_FREE(sp_alpha);
-  delete Cache;
-
-  aux = KER->KernelEvaluations;
-  SG_SINFO("- Total CPU time: %lf\n", total_time)
-  if (verbosity > 0)
-  {
-      SG_SINFO(
-              "- Total kernel evaluations: %.0lf\n", aux);
-      SG_SINFO(
-              "- Total inner solver iterations: %i\n", tot_vpm_iter);
-      if (projection_projector == 1)
-          SG_SINFO(
-              "- Total projector iterations: %i\n", tot_vpm_secant);
-      SG_SINFO(
-              "- Total preparation time: %lf\n", tot_prep_time);
-      SG_SINFO(
-              "- Total inner solver time: %lf\n", tot_vpm_time);
-      SG_SINFO(
-              "- Total gradient updating time: %lf\n", tot_st_time);
-  }
-  SG_SINFO("- Objective function value: %lf\n", fval)
-  objective_value=fval;
-  return aux;
-}
-
-/******************************************************************************/
-/*** Quick sort for integer vectors                                         ***/
-/******************************************************************************/
-void quick_si(int32_t a[], int32_t n)
-{
-  int32_t i, j, s, d, l, x, w, ps[20], pd[20];
-
-  l     = 0;
-  ps[0] = 0;
-  pd[0] = n-1;
-  do
-  {
-      s = ps[l];
-      d = pd[l];
-      l--;
-      do
-      {
-          i = s;
-          j = d;
-          x = a[(s+d)/2];
-          do
-          {
-              while (a[i] < x) i++;
-              while (a[j] > x) j--;
-              if (i <= j)
-              {
-                  w    = a[i];
-                  a[i] = a[j];
-                  i++;
-                  a[j] = w;
-                  j--;
-              }
-          } while(i<=j);
-          if (j-s > d-i)
-          {
-              l++;
-              ps[l] = s;
-              pd[l] = j;
-              s     = i;
-          }
-          else
-          {
-              if (i < d)
-              {
-                  l++;
-                  ps[l] = i;
-                  pd[l] = d;
-              }
-          d = j;
-          }
-      } while (s < d);
-  } while (l >= 0);
-}
-
-/******************************************************************************/
-/*** Quick sort for real vectors returning also the exchanges               ***/
-/******************************************************************************/
-void quick_s2(float64_t a[], int32_t n, int32_t ia[])
-{
-  int32_t     i, j, s, d, l, iw, ps[20], pd[20];
-  float64_t  x, w;
-
-  l     = 0;
-  ps[0] = 0;
-  pd[0] = n-1;
-  do
-  {
-      s = ps[l];
-      d = pd[l];
-      l--;
-      do
-      {
-          i = s;
-          j = d;
-          x = a[(s+d)/2];
-          do
-          {
-              while (a[i] < x) i++;
-              while (a[j] > x) j--;
-              if (i <= j)
-              {
-                  iw    = ia[i];
-                  w     = a[i];
-                  ia[i] = ia[j];
-                  a[i]  = a[j];
-                  i++;
-                  ia[j] = iw;
-                  a[j]  = w;
-                  j--;
-              }
-          } while (i <= j);
-          if (j-s > d-i)
-          {
-              l++;
-              ps[l] = s;
-              pd[l] = j;
-              s     = i;
-          }
-          else
-          {
-              if (i < d)
-              {
-                  l++;
-                  ps[l] = i;
-                  pd[l] = d;
-              }
-              d = j;
-          }
-      } while (s < d);
-  } while(l>=0);
-}
-
-/******************************************************************************/
-/*** Quick sort for integer vectors returning also the exchanges            ***/
-/******************************************************************************/
-void quick_s3(int32_t a[], int32_t n, int32_t ia[])
-{
-  int32_t i, j, s, d, l, iw, w, x, ps[20], pd[20];
-
-  l     = 0;
-  ps[0] = 0;
-  pd[0] = n-1;
-  do
-  {
-      s = ps[l];
-      d = pd[l];
-      l--;
-      do
-      {
-          i = s;
-          j = d;
-          x = a[(s+d)/2];
-          do
-          {
-              while (a[i] < x) i++;
-              while (a[j] > x) j--;
-              if (i <= j)
-              {
-                 iw    = ia[i];
-                 w     = a[i];
-                 ia[i] = ia[j];
-                 a[i]  = a[j];
-                 i++;
-                 ia[j] = iw;
-                 a[j]  = w;
-                 j--;
-              }
-          } while (i <= j);
-          if (j-s > d-i)
-          {
-              l++;
-              ps[l] = s;
-              pd[l] = j;
-              s     = i;
-          }
-          else
-          {
-              if (i < d)
-              {
-                  l++;
-                  ps[l] = i;
-                  pd[l] = d;
-              }
-              d = j;
-          }
-      } while (s < d);
-  } while (l >= 0);
-}
-}
-
-#endif // DOXYGEN_SHOULD_SKIP_THIS
-
-/******************************************************************************/
-/*** End of gpdtsolve.cpp file                                              ***/
-/******************************************************************************/
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/external/gpdtsolve.h b/src/shogun/lib/external/gpdtsolve.h
deleted file mode 100644
index fceba025b2a..00000000000
--- a/src/shogun/lib/external/gpdtsolve.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- *** Authors:                                                               ***
- ***  Thomas Serafini, Luca Zanni                                           ***
- ***   Dept. of Mathematics, University of Modena and Reggio Emilia - ITALY ***
- ***   serafini.thomas@unimo.it, zanni.luca@unimo.it                        ***
- ***  Gaetano Zanghirati                                                    ***
- ***   Dept. of Mathematics, University of Ferrara - ITALY                  ***
- ***   g.zanghirati@unife.it                                                ***
- ***                                                                        ***
- *** Software homepage: http://dm.unife.it/gpdt                             ***
- ***                                                                        ***
- *** This work is supported by the Italian FIRB Projects                    ***
- ***      'Statistical Learning: Theory, Algorithms and Applications'       ***
- ***      (grant RBAU01877P), http://slipguru.disi.unige.it/ASTA            ***
- *** and                                                                    ***
- ***      'Parallel Algorithms and Numerical Nonlinear Optimization'        ***
- ***      (grant RBAU01JYPN), http://dm.unife.it/pn2o                       ***
- ***                                                                        ***
- *** Copyright (C) 2004 by T. Serafini, G. Zanghirati, L. Zanni.            ***
- ***                                                                        ***
- *** SHOGUN adaptions  Written (W) 2006-2008 Soeren Sonnenburg              ***
- */
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/base/SGObject.h>
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-namespace shogun
-{
-/** class QProblem */
-class QPproblem
-{
-// ----------------- Public Data ---------------
-public:
-  /** chunk size */
-  int32_t     chunk_size;
-  /** ell */
-  int32_t     ell;
-  /** y */
-  int32_t    *y;
-  /** delta sv */
-  float64_t DELTAsv;
-  /** q */
-  int32_t     q;
-  /** max mw */
-  int32_t     maxmw;
-  /** c const */
-  float64_t  c_const;
-  /** bee */
-  float64_t  bee;
-  /** delta */
-  float64_t  delta;
-  /** linadd */
-  bool linadd;
-
-  /** kernel */
-  sKernel* KER;
-  /** kernel type */
-  int32_t     ker_type;
-  /** projection solver */
-  int32_t     projection_solver;
-  /** projection projector */
-  int32_t     projection_projector;
-  /** preprocess mode */
-  int32_t     PreprocessMode;
-  /** preprocess size */
-  int32_t     preprocess_size;
-  /** verbosity */
-  int32_t     verbosity;
-  /** tau proximal */
-  float64_t  tau_proximal;
-  /** objective value */
-  float64_t objective_value;
-
-// ----------------- Public Methods ---------------
-  /** constructor */
-  QPproblem ();
-  ~QPproblem();
-
-  /** read SVM file
-   *
-   * @param fInput input filename
-   * @return an int
-   */
-  int32_t  ReadSVMFile    (char *fInput);
-
-  /** read GPDT binary
-   *
-   * @param fName input filename
-   * @return an int
-   */
-  int32_t  ReadGPDTBinary(char *fName);
-
-  /** check if 2-class
-   *
-   * @return an int
-   */
-  int32_t  Check2Class    ();
-
-  /** subproblem
-   *
-   * @param dst where to copy subproblem to
-   * @param ker problem kernel
-   * @param len length
-   * @param perm perm
-   */
-  static void copy_subproblem(QPproblem* dst, QPproblem* ker, int32_t len, int32_t *perm);
-
-  /** PrepMP */
-  void PrepMP         ();
-
-  /** solve gpdt
-   *
-   * @param solution
-   * @return something floaty
-   */
-  float64_t  gpdtsolve      (float64_t *solution);
-
-  /** solve pgpdt
-   *
-   * @param solution
-   * @return something floaty
-   */
-  float64_t  pgpdtsolve     (float64_t *solution);
-
-  /** check if lineadd is enabled
-   *
-   * @return if lineadd is enabled
-   */
-  inline bool get_linadd_enabled()
-  {
-    return linadd;
-  }
-
-  /** @return object name */
-  virtual const char* get_name() const { return "QPproblem"; }
-
-// ----------------- Private Data  ---------------
-private:
-  int32_t    dim;
-  int32_t    *index_in, *index_out;
-  int32_t    *ing, *inaux, *inold, *incom;
-  int32_t    *cec;
-  int32_t    nb;
-  int32_t    *bmem, *bmemrid, *pbmr;
-  int32_t    my_chunk_size;  // chunk_size for the current processor
-  int32_t    my_spD_offset;  // offset of the current processor into sp_D matrix
-  int32_t    recvl[32], displ[32];
-  float64_t kktold;
-  float64_t DELTAvpm, InitialDELTAvpm, DELTAkin;
-  float64_t *alpha;
-  float64_t *grad, *st;
-
-// ----------------- Private Methods ---------------
-private:
-  int32_t  Preprocess0 (int32_t *aux, int32_t *sv);
-  int32_t  Preprocess1 (sKernel* KER, int32_t *aux, int32_t *sv);
-  int32_t  optimal     ();
-
-  bool is_zero(int32_t  i) { return (alpha[i] < DELTAsv); }
-  bool is_free(int32_t  i)
-       { return (alpha[i] > DELTAsv && alpha[i] < (c_const - DELTAsv)); }
-  bool is_bound(int32_t i) { return (alpha[i] > (c_const - DELTAsv)); }
-
-};
-}
-#endif // DOXYGEN_SHOULD_SKIP_THIS
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/external/gpm.cpp b/src/shogun/lib/external/gpm.cpp
deleted file mode 100644
index f3108a16338..00000000000
--- a/src/shogun/lib/external/gpm.cpp
+++ /dev/null
@@ -1,1246 +0,0 @@
-/******************************************************************************
- ***        GPDT - Gradient Projection Decomposition Technique              ***
- ******************************************************************************
- ***                                                                        ***
- *** GPDT is a C++ software designed to train large-scale Support Vector    ***
- *** Machines for binary classification in both scalar and distributed      ***
- *** memory parallel environments. It uses the Joachims' problem            ***
- *** decomposition technique to split the whole quadratic programming (QP)  ***
- *** problem into a sequence of smaller QP subproblems, each one being      ***
- *** solved by a suitable gradient projection method (GPM). The presently   ***
- *** implemented GPMs are the Generalized Variable Projection Method        ***
- *** GVPM (T. Serafini, G. Zanghirati, L. Zanni, "Gradient Projection       ***
- *** Methods for Quadratic Programs and Applications in Training Support    ***
- *** Vector Machines"; Optim. Meth. Soft. 20, 2005, 353-378) and the        ***
- *** Dai-Fletcher Method DFGPM (Y. Dai and R. Fletcher,"New Algorithms for  ***
- *** Singly Linear Constrained Quadratic Programs Subject to Lower and      ***
- *** Upper Bounds"; Math. Prog. to appear).                                 ***
- ***                                                                        ***
- *** Authors:                                                               ***
- ***  Thomas Serafini, Luca Zanni                                           ***
- ***   Dept. of Mathematics, University of Modena and Reggio Emilia - ITALY ***
- ***   serafini.thomas@unimo.it, zanni.luca@unimo.it                        ***
- ***  Gaetano Zanghirati                                                    ***
- ***   Dept. of Mathematics, University of Ferrara - ITALY                  ***
- ***   g.zanghirati@unife.it                                                ***
- ***                                                                        ***
- *** Software homepage: http://dm.unife.it/gpdt                             ***
- ***                                                                        ***
- *** This work is supported by the Italian FIRB Projects                    ***
- ***      'Statistical Learning: Theory, Algorithms and Applications'       ***
- ***      (grant RBAU01877P), http://slipguru.disi.unige.it/ASTA            ***
- *** and                                                                    ***
- ***      'Parallel Algorithms and Numerical Nonlinear Optimization'        ***
- ***      (grant RBAU01JYPN), http://dm.unife.it/pn2o                       ***
- ***                                                                        ***
- *** Copyright (C) 2004-2008 by T. Serafini, G. Zanghirati, L. Zanni.       ***
- ***                                                                        ***
- ***                     COPYRIGHT NOTIFICATION                             ***
- ***                                                                        ***
- *** Permission to copy and modify this software and its documentation      ***
- *** for internal research use is granted, provided that this notice is     ***
- *** retained thereon and on all copies or modifications. The authors and   ***
- *** their respective Universities makes no representations as to the       ***
- *** suitability and operability of this software for any purpose. It is    ***
- *** provided "as is" without express or implied warranty.                  ***
- *** Use of this software for commercial purposes is expressly prohibited   ***
- *** without contacting the authors.                                        ***
- ***                                                                        ***
- *** This program is free software; you can redistribute it and/or modify   ***
- *** it under the terms of the GNU General Public License as published by   ***
- *** the Free Software Foundation; either version 3 of the License, or      ***
- *** (at your option) any later version.                                    ***
- ***                                                                        ***
- *** This program is distributed in the hope that it will be useful,        ***
- *** but WITHOUT ANY WARRANTY; without even the implied warranty of         ***
- *** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ***
- *** GNU General Public License for more details.                           ***
- ***                                                                        ***
- *** You should have received a copy of the GNU General Public License      ***
- *** along with this program; if not, write to the Free Software            ***
- *** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              ***
- ***                                                                        ***
- *** File:     gpm.cpp                                                      ***
- *** Type:     scalar                                                       ***
- *** Version:  1.0                                                          ***
- *** Date:     October, 2005                                                ***
- *** Revision: 1                                                            ***
- ***                                                                        ***
- *** SHOGUN adaptions  Written (W) 2006-2008 Soeren Sonnenburg              ***
- ******************************************************************************/
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <shogun/lib/external/gpdt.h>
-#include <shogun/io/SGIO.h>
-
-namespace shogun
-{
-#define maxvpm           30000  /* max number of method iterations allowed  */
-#define maxprojections   200
-#define in               8000   /* max size of the QP problem to solve      */
-#define alpha_max        1e10
-#define alpha_min        1e-10
-
-extern uint32_t Randnext;
-#define ThRand    (Randnext = Randnext * 1103515245L + 12345L)
-#define ThRandPos ((Randnext = Randnext * 1103515245L + 12345L) & 0x7fffffff)
-
-int32_t InnerProjector(
-	int32_t method, int32_t n, int32_t *iy, float64_t e, float64_t *qk, float64_t l,
-	float64_t u, float64_t *x, float64_t &lambda);
-
-/* Uncomment if you want to allocate vectors on the stack  *
- * instead of the heap. On some architectures this helps   *
- * improving speed, but may generate a stack overflow      */
-// #define VARIABLES_ON_STACK
-
-/* Uncomment if you want to use the adaptive steplength    *
-   in the GVPM solver                                      */
-#define VPM_ADA
-
-
-/******************************************************************************
- *** Generalized Variable Projection Method (T. Serafini, G. Zanghirati,    ***
- *** L. Zanni, "Gradient Projection Methods for Quadratic Programs and      ***
- *** Applications in Training Support Vector Machines";                     ***
- *** Optim. Meth. Soft. 20, 2005, 353-378)                                  ***
- ******************************************************************************/
-int32_t gvpm(
-	int32_t Projector, int32_t n, float32_t *vecA, float64_t *b, float64_t c,
-	float64_t e, int32_t *iy, float64_t *x, float64_t tol, int32_t *ls,
-	int32_t *proj)
-{
-  int32_t i, j, iter, it, it2, luv, info;
-  float64_t  gd, max, normd, dAd, lam, lamnew, alpha, kktlam, ak, bk;
-
-  int32_t lscount = 0, projcount = 0;
-  float64_t  eps     = 1.0e-16;
-  float64_t  DELTAsv, ProdDELTAsv;
-  float64_t  lam_ext;
-
-  /* solver-specific settings */
-#ifdef VPM_ADA
-  int32_t     nc = 1, ia1 = -1;
-  float64_t  alpha1, alpha2;
-#endif
-
-  /* allocation-dependant settings */
-#ifdef VARIABLES_ON_STACK
-
-  int32_t     ipt[in], ipt2[in], uv[in];
-  float64_t  g[in], y[in], tempv[in], d[in], Ad[in], t[in];
-
-#else
-
-  int32_t     *ipt, *ipt2, *uv;
-  float64_t  *g, *y, *tempv, *d, *Ad, *t;
-
-  /*** array allocations ***/
-  ipt   = SG_MALLOC(int32_t, n);
-  ipt2  = SG_MALLOC(int32_t, n);
-  uv    = SG_MALLOC(int32_t, n);
-  g     = SG_MALLOC(float64_t, n);
-  y     = SG_MALLOC(float64_t, n);
-  d     = SG_MALLOC(float64_t, n);
-  Ad    = SG_MALLOC(float64_t, n);
-  t     = SG_MALLOC(float64_t, n);
-  tempv = SG_MALLOC(float64_t, n);
-
-#endif
-
-  DELTAsv = EPS_SV * c;
-  if (tol <= 1.0e-5 || n <= 20)
-      ProdDELTAsv = 0.0F;
-  else
-      ProdDELTAsv = EPS_SV * c;
-
-  for (i = 0; i < n; i++)
-      tempv[i] = -x[i];
-  lam_ext = 0.0;
-  projcount += InnerProjector(Projector, n, iy, e, tempv, 0, c, x, lam_ext);
-
-  /* compute g = A*x + b in sparse form          *
-   * (inline computation for better perfomrance) */
-  {
-    float32_t *tempA;
-
-    it = 0;
-    for (i = 0; i < n; i++)
-        if (fabs(x[i]) > ProdDELTAsv*1e-2)
-            ipt[it++] = i;
-
-    memset(t, 0, n*sizeof(float64_t));
-    for (i = 0; i < it; i++)
-    {
-        tempA = vecA + ipt[i]*n;
-        for (j = 0; j < n; j++)
-            t[j] += (tempA[j] * x[ipt[i]]);
-    }
-  }
-
-  for (i = 0; i < n; i++)
-  {
-    g[i] = t[i] + b[i],
-    y[i] = g[i] - x[i];
-  }
-
-  projcount += InnerProjector(Projector, n, iy, e, y, 0, c, tempv, lam_ext);
-
-  max = alpha_min;
-  for (i = 0; i < n; i++)
-  {
-      y[i] = tempv[i] - x[i];
-      if (fabs(y[i]) > max)
-          max = fabs(y[i]);
-  }
-
-  if (max < c*tol*1e-3)
-  {
-      lscount = 0;
-      iter    = 0;
-      goto Clean;
-  }
-
-  alpha = 1.0 / max;
-
-  for (iter = 1; iter <= maxvpm; iter++)
-  {
-      for (i = 0; i < n; i++)
-          tempv[i] = alpha*g[i] - x[i];
-
-      projcount += InnerProjector(Projector, n, iy, e, tempv, 0, c, y, lam_ext);
-
-      gd = 0.0;
-      for (i = 0; i < n; i++)
-      {
-          d[i] = y[i] - x[i];
-          gd  += d[i] * g[i];
-      }
-
-      /* compute Ad = A*d  or  Ad = Ay-t depending on their sparsity  *
-       * (inline computation for better perfomrance)                  */
-      {
-         float32_t *tempA;
-
-         it = it2 = 0;
-         for (i = 0; i < n; i++)
-             if (fabs(d[i]) > (ProdDELTAsv*1.0e-2))
-                 ipt[it++] = i;
-         for (i = 0; i < n; i++)
-             if (fabs(y[i]) > ProdDELTAsv)
-                 ipt2[it2++] = i;
-
-         memset(Ad, 0, n*sizeof(float64_t));
-         if (it < it2) // Ad = A*d
-         {
-             for (i = 0; i < it; i++)
-             {
-                 tempA = vecA + ipt[i]*n;
-                 for (j = 0; j < n; j++)
-                     Ad[j] += (tempA[j] * d[ipt[i]]);
-             }
-         }
-         else          // Ad = A*y - t
-         {
-            for (i = 0; i < it2; i++)
-            {
-                tempA = vecA + ipt2[i]*n;
-                for (j = 0; j < n; j++)
-                    Ad[j] += (tempA[j] * y[ipt2[i]]);
-            }
-            for (j = 0; j < n; j++)
-                Ad[j] -= t[j];
-         }
-      }
-
-      normd = 0.0;
-      for (i = 0; i < n; i++)
-          normd += d[i] * d[i];
-
-      dAd = 0.0;
-      for (i = 0; i < n; i++)
-          dAd += d[i]*Ad[i];
-
-      if (dAd > eps*normd && gd < 0.0)
-      {
-          lam = lamnew = -gd/dAd;
-          if (lam > 1.0 || lam < 0.0)
-              lam = 1.0;
-          else
-              lscount++;
-
-#ifdef VPM_ADA
-
-          /*** use the adaptive switching rule for steplength selection ***/
-
-          // compute alpha1 = (d'* (d.*diaga)) / (d'*Ad);
-          alpha1 = normd / dAd;
-
-          // alpha2 = d'*Ad / (Ad' * (Ad./diaga));
-          alpha2 = 0.0;
-          for (i = 0; i < n; i++)
-               alpha2 += Ad[i] * Ad[i];
-          alpha2 = dAd / alpha2;
-
-          if ( (nc > 2
-                && (
-                     (ia1 == 1
-                      && (
-                           lamnew < 0.1 || (alpha1 > alpha && alpha2 < alpha)
-                         )
-                     )
-                     ||
-                     (ia1 == -1
-                      && (
-                           lamnew > 5.0 || (alpha1 > alpha && alpha2 < alpha)
-                         )
-                     )
-                   )
-               )
-               || nc > 9 )
-          {
-              ia1 = -ia1;
-              nc  = 0;
-          }
-
-          if (ia1 == 1)
-              alpha = alpha1;
-          else
-              alpha = alpha2;
-
-          if (alpha < alpha_min)
-              alpha = alpha_min;
-          else if (alpha > alpha_max)
-              alpha = alpha_max;
-
-          nc++;
-
-#else
-
-          /*** use the fixed switching rule for steplength selection ***/
-
-          if ((iter % 6) < 3) // alpha = d'*Ad / (Ad' * (Ad./diaga));
-          {
-              alpha = 0.0;
-              for (i = 0; i < n; i++)
-                  alpha += Ad[i] * Ad[i];
-              alpha = dAd / alpha;
-          }
-          else                // alpha = (d'* (d.*diaga)) / (d'*Ad);
-          {
-              alpha = 0.0;
-              for (i = 0; i < n; i++)
-                  alpha += d[i] * d[i];
-              alpha = alpha / dAd;
-          }
-
-#endif
-
-      }
-      else
-      {
-          lam   = 1.0;
-          alpha = alpha_max;
-      }
-
-      for (i = 0; i < n; i++)
-      {
-          x[i] = x[i] + lam*d[i];
-          t[i] = t[i] + lam*Ad[i];
-          g[i] = b[i] + t[i];
-      }
-
-      /*** stopping criterion based on KKT conditions ***/
-      bk = 0.0;
-      ak = 0.0;
-      for (i = 0; i < n; i++)
-      {
-          bk +=  x[i] * x[i];
-          ak +=  d[i] * d[i];
-      }
-
-      if (lam*sqrt(ak) < tol*10 * sqrt(bk))
-      {
-          it     = 0;
-          luv    = 0;
-          kktlam = 0.0;
-          for (i = 0; i < n; i++)
-          {
-              if (x[i] > DELTAsv && x[i] < c-DELTAsv)
-              {
-                  ipt[it++] = i;
-                  kktlam    = kktlam - iy[i]*g[i];
-              }
-              else
-                  uv[luv++] = i;
-          }
-
-          if (it == 0)
-          {
-              if (lam*sqrt(ak) < tol*0.5 * sqrt(bk))
-              goto Clean;
-          }
-          else
-          {
-              kktlam = kktlam/it;
-              info   = 1;
-              for (i = 0; i < it; i++)
-			  {
-                  if (fabs(iy[ipt[i]]*g[ipt[i]]+kktlam) > tol)
-                  {
-                      info = 0;
-                      break;
-                  }
-			  }
-
-              if (info == 1)
-			  {
-                  for (i = 0; i < luv; i++)
-				  {
-                      if (x[uv[i]] <= DELTAsv)
-                      {
-                          if (g[uv[i]] + kktlam*iy[uv[i]] < -tol)
-                          {
-                              info = 0;
-                              break;
-                          }
-                      }
-                      else
-                      {
-                          if (g[uv[i]] + kktlam*iy[uv[i]] > tol)
-                          {
-                              info = 0;
-                              break;
-                          }
-                      }
-				  }
-			  }
-
-              if (info == 1)
-                  goto Clean;
-          }
-      } // stopping rule based on the norm of d_k
-  }
-
-  SG_SWARNING("GVPM exits after maxvpm = %d iterations.\n", maxvpm)
-
-Clean:
-
-  /*** allocation-dependant freeing ***/
-#ifndef VARIABLES_ON_STACK
-  SG_FREE(t);
-  SG_FREE(uv);
-  SG_FREE(ipt2);
-  SG_FREE(ipt);
-  SG_FREE(g);
-  SG_FREE(y);
-  SG_FREE(tempv);
-  SG_FREE(d);
-  SG_FREE(Ad);
-#endif
-
-  if (ls != NULL)   *ls   = lscount;
-  if (proj != NULL) *proj = projcount;
-  return(iter);
-}
-
-/******************************************************************************
- *** Dai-Fletcher QP solver (Y. Dai and R. Fletcher,"New Algorithms for     ***
- *** Singly Linear Constrained Quadratic Programs Subject to Lower and      ***
- *** Upper Bounds"; Math. Prog. to appear)                                  ***
- ******************************************************************************/
-int32_t FletcherAlg2A(
-	int32_t Projector, int32_t n, float32_t *vecA, float64_t *b, float64_t c,
-	float64_t e, int32_t *iy, float64_t *x, float64_t tol, int32_t *ls,
-	int32_t *proj)
-{
-  int32_t i, j, iter, it, it2, luv, info, lscount = 0, projcount = 0;
-  float64_t gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam, lam_ext;
-  float64_t eps     = 1.0e-16;
-  float64_t DELTAsv, ProdDELTAsv;
-
-  /*** variables for the adaptive nonmonotone linesearch ***/
-  int32_t L, llast;
-  float64_t fr, fbest, fv, fc, fv0;
-
-  /*** allocation-dependant settings ***/
-#ifdef VARIABLES_ON_STACK
-
-  int32_t ipt[in], ipt2[in], uv[in];
-  float64_t g[in], y[in], tempv[in], d[in], Ad[in], t[in],
-         xplus[in], tplus[in], sk[in], yk[in];
-#else
-
-  int32_t *ipt, *ipt2, *uv;
-  float64_t *g, *y, *tempv, *d, *Ad, *t, *xplus, *tplus, *sk, *yk;
-
-  /*** arrays allocation ***/
-  ipt   = SG_MALLOC(int32_t, n);
-  ipt2  = SG_MALLOC(int32_t, n);
-  uv    = SG_MALLOC(int32_t, n);
-  g     = SG_MALLOC(float64_t, n);
-  y     = SG_MALLOC(float64_t, n);
-  tempv = SG_MALLOC(float64_t, n);
-  d     = SG_MALLOC(float64_t, n);
-  Ad    = SG_MALLOC(float64_t, n);
-  t     = SG_MALLOC(float64_t, n);
-  xplus = SG_MALLOC(float64_t, n);
-  tplus = SG_MALLOC(float64_t, n);
-  sk    = SG_MALLOC(float64_t, n);
-  yk    = SG_MALLOC(float64_t, n);
-
-#endif
-
-  DELTAsv = EPS_SV * c;
-  if (tol <= 1.0e-5 || n <= 20)
-      ProdDELTAsv = 0.0F;
-  else
-      ProdDELTAsv = EPS_SV * c;
-
-  for (i = 0; i < n; i++)
-      tempv[i] = -x[i];
-
-  lam_ext = 0.0;
-
-  projcount += InnerProjector(Projector, n, iy, e, tempv, 0, c, x, lam_ext);
-
-  // g = A*x + b;
-  // SparseProd(n, t, A, x, ipt);
-  {
-    float32_t *tempA;
-
-    it = 0;
-    for (i = 0; i < n; i++)
-        if (fabs(x[i]) > ProdDELTAsv)
-            ipt[it++] = i;
-
-    memset(t, 0, n*sizeof(float64_t));
-    for (i = 0; i < it; i++)
-    {
-         tempA = vecA + ipt[i] * n;
-         for (j = 0; j < n; j++)
-             t[j] += (tempA[j]*x[ipt[i]]);
-    }
-  }
-
-  for (i = 0; i < n; i++)
-  {
-    g[i] = t[i] + b[i],
-    y[i] = g[i] - x[i];
-  }
-
-  projcount += InnerProjector(Projector, n, iy, e, y, 0, c, tempv, lam_ext);
-
-  max = alpha_min;
-  for (i = 0; i < n; i++)
-  {
-      y[i] = tempv[i] - x[i];
-      if (fabs(y[i]) > max)
-          max = fabs(y[i]);
-  }
-
-  if (max < c*tol*1e-3)
-  {
-      lscount = 0;
-      iter    = 0;
-      goto Clean;
-  }
-
-  alpha = 1.0 / max;
-
-  fv0   = 0.0;
-  for (i = 0; i < n; i++)
-      fv0 += x[i] * (0.5*t[i] + b[i]);
-
-  /*** adaptive nonmonotone linesearch ***/
-  L     = 2;
-  fr    = alpha_max;
-  fbest = fv0;
-  fc    = fv0;
-  llast = 0;
-  akold = bkold = 0.0;
-
-  for (iter = 1; iter <= maxvpm; iter++)
-  {
-      for (i = 0; i < n; i++)
-          tempv[i] = alpha*g[i] - x[i];
-
-      projcount += InnerProjector(Projector, n, iy, e, tempv, 0, c, y, lam_ext);
-
-      gd = 0.0;
-      for (i = 0; i < n; i++)
-      {
-          d[i] = y[i] - x[i];
-          gd  += d[i] * g[i];
-      }
-
-      /* compute Ad = A*d  or  Ad = A*y - t depending on their sparsity */
-      {
-         float32_t *tempA;
-
-         it = it2 = 0;
-         for (i = 0; i < n; i++)
-             if (fabs(d[i]) > (ProdDELTAsv*1.0e-2))
-                 ipt[it++]   = i;
-         for (i = 0; i < n; i++)
-             if (fabs(y[i]) > ProdDELTAsv)
-                 ipt2[it2++] = i;
-
-         memset(Ad, 0, n*sizeof(float64_t));
-         if (it < it2) // compute Ad = A*d
-         {
-            for (i = 0; i < it; i++)
-            {
-                tempA = vecA + ipt[i]*n;
-                for (j = 0; j < n; j++)
-                    Ad[j] += (tempA[j] * d[ipt[i]]);
-            }
-         }
-         else          // compute Ad = A*y-t
-         {
-            for (i = 0; i < it2; i++)
-            {
-                tempA = vecA + ipt2[i]*n;
-                for (j = 0; j < n; j++)
-                    Ad[j] += (tempA[j] * y[ipt2[i]]);
-            }
-            for (j = 0; j < n; j++)
-                Ad[j] -= t[j];
-         }
-      }
-
-      ak = 0.0;
-      for (i = 0; i < n; i++)
-          ak += d[i] * d[i];
-
-      bk = 0.0;
-      for (i = 0; i < n; i++)
-          bk += d[i]*Ad[i];
-
-      if (bk > eps*ak && gd < 0.0)    // ak is normd
-          lamnew = -gd/bk;
-      else
-          lamnew = 1.0;
-
-      fv = 0.0;
-      for (i = 0; i < n; i++)
-      {
-          xplus[i] = x[i] + d[i];
-          tplus[i] = t[i] + Ad[i];
-          fv      += xplus[i] * (0.5*tplus[i] + b[i]);
-      }
-
-      if ((iter == 1 && fv >= fv0) || (iter > 1 && fv >= fr))
-      {
-          lscount++;
-          fv = 0.0;
-          for (i = 0; i < n; i++)
-          {
-              xplus[i] = x[i] + lamnew*d[i];
-              tplus[i] = t[i] + lamnew*Ad[i];
-              fv      += xplus[i] * (0.5*tplus[i] + b[i]);
-          }
-      }
-
-      for (i = 0; i < n; i++)
-      {
-          sk[i] = xplus[i] - x[i];
-          yk[i] = tplus[i] - t[i];
-          x[i]  = xplus[i];
-          t[i]  = tplus[i];
-          g[i]  = t[i] + b[i];
-      }
-
-      // update the line search control parameters
-
-      if (fv < fbest)
-      {
-          fbest = fv;
-          fc    = fv;
-          llast = 0;
-      }
-      else
-      {
-          fc = (fc > fv ? fc : fv);
-          llast++;
-          if (llast == L)
-          {
-              fr    = fc;
-              fc    = fv;
-              llast = 0;
-          }
-      }
-
-      ak = bk = 0.0;
-      for (i = 0; i < n; i++)
-      {
-          ak += sk[i] * sk[i];
-          bk += sk[i] * yk[i];
-      }
-
-      if (bk < eps*ak)
-          alpha = alpha_max;
-      else
-      {
-          if (bkold < eps*akold)
-              alpha = ak/bk;
-          else
-              alpha = (akold+ak)/(bkold+bk);
-
-          if (alpha > alpha_max)
-              alpha = alpha_max;
-          else if (alpha < alpha_min)
-              alpha = alpha_min;
-      }
-
-      akold = ak;
-      bkold = bk;
-
-      /*** stopping criterion based on KKT conditions ***/
-
-      bk = 0.0;
-      for (i = 0; i < n; i++)
-          bk +=  x[i] * x[i];
-
-      if (sqrt(ak) < tol*10 * sqrt(bk))
-      {
-          it     = 0;
-          luv    = 0;
-          kktlam = 0.0;
-          for (i = 0; i < n; i++)
-          {
-              if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv))
-              {
-                  ipt[it++] = i;
-                  kktlam    = kktlam - iy[i]*g[i];
-              }
-              else
-                  uv[luv++] = i;
-          }
-
-          if (it == 0)
-          {
-              if (sqrt(ak) < tol*0.5 * sqrt(bk))
-                  goto Clean;
-          }
-          else
-          {
-
-              kktlam = kktlam/it;
-              info   = 1;
-              for (i = 0; i < it; i++)
-                  if ( fabs(iy[ipt[i]] * g[ipt[i]] + kktlam) > tol )
-                  {
-                      info = 0;
-                      break;
-                  }
-
-              if (info == 1)
-			  {
-                  for (i = 0; i < luv; i++)
-				  {
-                      if (x[uv[i]] <= DELTAsv)
-                      {
-                          if (g[uv[i]] + kktlam*iy[uv[i]] < -tol)
-                          {
-                              info = 0;
-                              break;
-                          }
-                      }
-                      else
-                      {
-                          if (g[uv[i]] + kktlam*iy[uv[i]] > tol)
-                          {
-                              info = 0;
-                              break;
-                          }
-                      }
-				  }
-			  }
-
-              if (info == 1)
-                  goto Clean;
-          }
-      }
-  }
-
-  SG_SWARNING("Dai-Fletcher method exits after maxvpm = %d iterations.\n", maxvpm)
-
-Clean:
-
-#ifndef VARIABLES_ON_STACK
-  SG_FREE(sk);
-  SG_FREE(yk);
-  SG_FREE(tplus);
-  SG_FREE(xplus);
-  SG_FREE(t);
-  SG_FREE(uv);
-  SG_FREE(ipt2);
-  SG_FREE(ipt);
-  SG_FREE(g);
-  SG_FREE(y);
-  SG_FREE(tempv);
-  SG_FREE(d);
-  SG_FREE(Ad);
-#endif
-
-  if (ls != NULL)   *ls   = lscount;
-  if (proj != NULL) *proj = projcount;
-  return(iter);
-
-}
-
-/******************************************************************************/
-/*** Encapsulating method to call the chosen Gradient Projection Method     ***/
-/******************************************************************************/
-int32_t gpm_solver(
-	int32_t Solver, int32_t Projector, int32_t n, float32_t *A, float64_t *b,
-	float64_t c, float64_t e, int32_t *iy, float64_t *x, float64_t tol,
-	int32_t *ls, int32_t *proj)
-{
-  /*** Uncomment the following if you need to scale the QP Hessian matrix
-   *** before calling the chosen solver
-  int32_t    i, j;
-  float32_t  *ptrA;
-  float64_t max, s;
-
-  max = fabs(A[0][0]);
-  for (i = 1; i < n; i++)
-      if (fabs(A[i][i]) > max)
-          max = fabs(A[i][i]);
-
-  s    = 1.0 / max;
-  ptrA = vecA;
-  for (i = 0; i < n; i++)
-      for (j = 0;j < n; j++)
-          *ptrA++ = (float32_t)(A[i][j]*s);
-
-  if (Solver == SOLVER_FLETCHER)
-      j = FletcherAlg2A(n, vecA, b, c/s, e/s, iy, x, tol, ls);
-  else
-      j = gvpm(n, vecA, b, c/s, e/s, iy, x, tol, ls);
-
-  for (i = 0; i < n; i++)
-      x[i] *= s;
-  ***/
-
-  /*** calling the chosen solver with unscaled data ***/
-  if (Solver == SOLVER_FLETCHER)
-    return FletcherAlg2A(Projector, n, A, b, c, e, iy, x, tol, ls, proj);
-  else
-    return gvpm(Projector, n, A, b, c, e, iy, x, tol, ls, proj);
-}
-
-/******************************************************************************
- *** Piecewise linear monotone target function for the Dai-Fletcher         ***
- *** projector (Y. Dai and R. Fletcher, "New Algorithms for Singly Linear   ***
- *** Constrained Quadratic Programs Subject to Lower and Upper Bounds";     ***
- *** Math. Prog. to appear)                                                 ***
- ******************************************************************************/
-float64_t ProjectR(
-	float64_t *x, int32_t n, float64_t lambda, int32_t *a, float64_t b,
-	float64_t *c, float64_t l, float64_t u)
-{
-  int32_t i;
-  float64_t r = 0.0;
-
-  for (i = 0; i < n; i++)
-  {
-      x[i] = -c[i] + lambda*(float64_t)a[i];
-      if (x[i] >= u) x[i] = u;
-      else if (x[i] < l) x[i] = l;
-      r += (float64_t)a[i]*x[i];
-  }
-
-  return (r - b);
-}
-
-/******************************************************************************
- *** Dai-Fletcher QP projector (Y. Dai and R. Fletcher, "New Algorithms for ***
- *** Singly Linear Constrained Quadratic Programs Subject to Lower and      ***
- *** Upper Bounds"; Math. Prog. to appear)                                  ***
- ******************************************************************************/
-/***                                                                        ***
- *** Solves the problem        min  x'*x/2 + c'*x                           ***
- ***                       subj to  a'*x - b = 0                            ***
- ***                                l <= x <= u                             ***
- ******************************************************************************/
-int32_t ProjectDai(
-	int32_t n, int32_t *a, float64_t b, float64_t *c, float64_t l, float64_t u,
-	float64_t *x, float64_t &lam_ext)
-{
-  float64_t lambda, lambdal, lambdau, dlambda, lambda_new, tol_lam;
-  float64_t r, rl, ru, s, tol_r;
-  int32_t iter;
-
-  tol_lam = 1.0e-11;
-  tol_r   = 1.0e-10 * sqrt((u-l)*(float64_t)n);
-  lambda  = lam_ext;
-  dlambda = 0.5;
-  iter    = 1;
-  b       = -b;
-
-  // Bracketing Phase
-  r = ProjectR(x, n, lambda, a, b, c, l, u);
-  if (fabs(r) < tol_r)
-      return 0;
-
-  if (r < 0.0)
-  {
-      lambdal = lambda;
-      rl      = r;
-      lambda  = lambda + dlambda;
-      r       = ProjectR(x, n, lambda, a, b, c, l, u);
-      while (r < 0.0)
-      {
-         lambdal = lambda;
-         s       = rl/r - 1.0;
-         if (s < 0.1) s = 0.1;
-         dlambda = dlambda + dlambda/s;
-         lambda  = lambda + dlambda;
-         rl      = r;
-         r       = ProjectR(x, n, lambda, a, b, c, l, u);
-      }
-      lambdau = lambda;
-      ru      = r;
-  }
-  else
-  {
-      lambdau = lambda;
-      ru      = r;
-      lambda  = lambda - dlambda;
-      r       = ProjectR(x, n, lambda, a, b, c, l, u);
-      while (r > 0.0)
-      {
-         lambdau = lambda;
-         s       = ru/r - 1.0;
-         if (s < 0.1) s = 0.1;
-         dlambda = dlambda + dlambda/s;
-         lambda  = lambda - dlambda;
-         ru      = r;
-         r       = ProjectR(x, n, lambda, a, b, c, l, u);
-      }
-    lambdal = lambda;
-    rl      = r;
-  }
-
-
-  // Secant Phase
-  s       = 1.0 - rl/ru;
-  dlambda = dlambda/s;
-  lambda  = lambdau - dlambda;
-  r       = ProjectR(x, n, lambda, a, b, c, l, u);
-
-  while (   fabs(r) > tol_r
-         && dlambda > tol_lam * (1.0 + fabs(lambda))
-         && iter    < maxprojections                )
-  {
-     iter++;
-     if (r > 0.0)
-     {
-         if (s <= 2.0)
-         {
-             lambdau = lambda;
-             ru      = r;
-             s       = 1.0 - rl/ru;
-             dlambda = (lambdau - lambdal) / s;
-             lambda  = lambdau - dlambda;
-         }
-         else
-         {
-             s          = ru/r-1.0;
-             if (s < 0.1) s = 0.1;
-             dlambda    = (lambdau - lambda) / s;
-             lambda_new = 0.75*lambdal + 0.25*lambda;
-             if (lambda_new < (lambda - dlambda))
-                 lambda_new = lambda - dlambda;
-             lambdau    = lambda;
-             ru         = r;
-             lambda     = lambda_new;
-             s          = (lambdau - lambdal) / (lambdau - lambda);
-         }
-     }
-     else
-     {
-         if (s >= 2.0)
-         {
-             lambdal = lambda;
-             rl      = r;
-             s       = 1.0 - rl/ru;
-             dlambda = (lambdau - lambdal) / s;
-             lambda  = lambdau - dlambda;
-         }
-         else
-         {
-             s          = rl/r - 1.0;
-             if (s < 0.1) s = 0.1;
-             dlambda    = (lambda-lambdal) / s;
-             lambda_new = 0.75*lambdau + 0.25*lambda;
-             if (lambda_new > (lambda + dlambda))
-                 lambda_new = lambda + dlambda;
-             lambdal    = lambda;
-             rl         = r;
-             lambda     = lambda_new;
-             s          = (lambdau - lambdal) / (lambdau-lambda);
-         }
-     }
-     r = ProjectR(x, n, lambda, a, b, c, l, u);
-  }
-
-  lam_ext = lambda;
-  if (iter >= maxprojections)
-      SG_SERROR("Projector exits after max iterations: %d\n", iter)
-
-  return (iter);
-}
-
-#define SWAP(a,b) { register float64_t t=(a);(a)=(b);(b)=t; }
-
-/*** Median computation using Quick Select ***/
-float64_t quick_select(float64_t *arr, int32_t n)
-{
-  int32_t low, high;
-  int32_t median;
-  int32_t middle, l, h;
-
-  low    = 0;
-  high   = n-1;
-  median = (low + high) / 2;
-
-  for (;;)
-  {
-    if (high <= low)
-        return arr[median];
-
-    if (high == low + 1)
-    {
-        if (arr[low] > arr[high])
-            SWAP(arr[low], arr[high]);
-        return arr[median];
-    }
-
-    middle = (low + high) / 2;
-    if (arr[middle] > arr[high]) SWAP(arr[middle], arr[high]);
-    if (arr[low]    > arr[high]) SWAP(arr[low],    arr[high]);
-    if (arr[middle] > arr[low])  SWAP(arr[middle], arr[low]);
-
-    SWAP(arr[middle], arr[low+1]);
-
-    l = low + 1;
-    h = high;
-    for (;;)
-    {
-      do l++; while (arr[low] > arr[l]);
-      do h--; while (arr[h]   > arr[low]);
-      if (h < l)
-          break;
-      SWAP(arr[l], arr[h]);
-    }
-
-    SWAP(arr[low], arr[h]);
-    if (h <= median)
-        low = l;
-    if (h >= median)
-        high = h - 1;
-  }
-}
-
-/******************************************************************************
- *** Pardalos-Kovoor projector (P.M. Pardalos and N. Kovoor, "An Algorithm  ***
- *** for a Singly Constrained Class of Quadratic Programs Subject to Upper  ***
- *** and Lower Bounds"; Math. Prog. 46, 1990, 321-328).                     ***
- ******************************************************************************
- *** Solves the problem                                                     ***
- ***                       min  x'*x/2 + qk'*x                              ***
- ***                   subj to  iy'*x + e = 0                               ***
- ***                            l <= x <= u                                 ***
- ***                            iy(i) ~= 0                                  ***
- ******************************************************************************/
-
-int32_t Pardalos(
-	int32_t n, int32_t *iy, float64_t e, float64_t *qk, float64_t low,
-	float64_t up, float64_t *x)
-{
-  int32_t i, l, iter; /* conters    */
-  int32_t luv, lxint; /* dimensions */
-  float64_t d, xmin, xmax, xmold, xmid, xx, ts, sw, s, s1, testsum;
-
-  /*** allocation-dependant settings ***/
-#ifdef VARIABLES_ON_STACK
-  int32_t uv[in], uvt[in];
-  float64_t xint[2*in+2], xint2[2*in+2], a[in], b[in], at[in], bt[in];
-  float64_t newdia[in], newdt[in];
-#else
-
-  int32_t *uv, *uvt;
-  float64_t *xint, *xint2, *a, *b, *at, *bt, *newdia, *newdt;
-
-  /*** arrays allocation ***/
-  uv     = SG_MALLOC(int32_t, n);
-  uvt    = SG_MALLOC(int32_t, n);
-  a      = SG_MALLOC(float64_t, n);
-  b      = SG_MALLOC(float64_t, n);
-  at     = SG_MALLOC(float64_t, n);
-  bt     = SG_MALLOC(float64_t, n);
-  newdia = SG_MALLOC(float64_t, n);
-  newdt  = SG_MALLOC(float64_t, n);
-  xint   = SG_MALLOC(float64_t, (2*n + 2));
-  xint2  = SG_MALLOC(float64_t, (2*n + 2));
-
-#endif
-
-  d = 0.0;
-  for (i = 0; i < n; i++)
-      d += iy[i] * qk[i];
-  d = 0.5 * (d-e);
-
-  for (i = 0; i < n; i++)
-  {
-      /* The following computations should divide by iy[i] instead           *
-       * of multiply by iy[i], but this is correct for binary classification *
-       * with labels -1 and 1.                                               */
-      if (iy[i] > 0)
-      {
-          a[i] = ((qk[i] + low) * iy[i]) * 0.5;
-          b[i] = ((up + qk[i]) * iy[i]) * 0.5;
-      }
-      else
-      {
-          b[i] = ((qk[i] + low) * iy[i]) * 0.5;
-          a[i] = ((up + qk[i]) * iy[i]) * 0.5;
-      }
-      newdia[i] = (iy[i]*iy[i]);
-  }
-
-  xmin = -1e33;
-  xmax = 1e33;
-
-  /* arrays initialization */
-  for (i = 0; i < n; i++)
-  {
-      uv[i]     = i;    /* contains the unset variables */
-      xint[i]   = a[i];
-      xint[n+i] = b[i];
-  }
-
-  xmid        = xmin;
-  xint[2*n]   = xmin;
-  xint[2*n+1] = xmax;
-  ts          = 0.0;
-  sw          = 0.0;
-  luv         = n;
-  lxint       = 2*n+2;
-
-  iter = 0;
-  do {
-     for (i = 0; i < luv; i++)
-     {
-         at[i]    = a[uv[i]];
-         bt[i]    = b[uv[i]];
-         newdt[i] = newdia[uv[i]];
-     }
-
-     xmold = xmid;
-     xmid = quick_select(xint, lxint);
-     if (xmold == xmid)
-         xmid = xint[(int32_t)(ThRandPos % lxint)];
-
-     s  = ts;
-     s1 = sw;
-     for (i = 0; i < luv; i++)
-     {
-         if (xmid > bt[i])
-             s  += newdt[i]*bt[i];
-         else if (xmid < at[i])
-             s  += newdt[i]*at[i];
-         else
-             s1 += newdt[i];
-     }
-
-     testsum = s + s1*xmid;
-     if (testsum <= (d+(1e-15)))
-         xmin = xmid;
-     if (testsum >= (d-(1e-15)))
-         xmax = xmid;
-
-     l = 0;
-     for (i = 0; i < lxint; i++)
-         if((xint[i] >= xmin) && (xint[i] <= xmax))
-            xint2[l++] = xint[i];
-     lxint = l;
-     sg_memcpy(xint, xint2, lxint*sizeof(float64_t));
-
-     l = 0;
-     for (i = 0; i < luv; i++)
-     {
-         if (xmin >= bt[i])
-             ts += newdt[i]*bt[i];
-         else if (xmax <= at[i])
-             ts += newdt[i]*at[i];
-         else if ((at[i] <= xmin) && (bt[i] >= xmax))
-             sw += newdt[i];
-         else
-             uvt[l++] = uv[i];
-    }
-    luv = l;
-    sg_memcpy(uv, uvt, luv*sizeof(int32_t));
-    iter++;
-  } while(luv != 0 && iter < maxprojections);
-
-  if (sw == 0)
-      xx = xmin;
-  else
-      xx = (d-ts) / sw;
-
-  for (i = 0; i < n; i++)
-  {
-      if (b[i] <= xmin)
-          x[i] = b[i];
-      else if (a[i] >= xmax)
-          x[i] = a[i];
-      else if ((a[i]<=xmin) && (xmax<=b[i]))
-          x[i] = xx;
-      else
-          SG_SWARNING("Inner solver troubles...\n")
-  }
-
-  for (i = 0; i < n; i++)
-      x[i] = (2.0*x[i]*iy[i]-qk[i]);
-
-#ifndef VARIABLES_ON_STACK
-  SG_FREE(newdt);
-  SG_FREE(newdia);
-  SG_FREE(a);
-  SG_FREE(b);
-  SG_FREE(uvt);
-  SG_FREE(uv);
-  SG_FREE(bt);
-  SG_FREE(at);
-  SG_FREE(xint2);
-  SG_FREE(xint);
-#endif
-
-  return(iter);
-}
-
-/******************************************************************************/
-/*** Wrapper method to call the selected inner projector                    ***/
-/******************************************************************************/
-int32_t InnerProjector(
-	int32_t method, int32_t n, int32_t *iy, float64_t e, float64_t *qk,
-	float64_t l, float64_t u, float64_t *x, float64_t &lambda)
-{
-  if (method == 0)
-      return Pardalos(n, iy, e, qk, l, u, x);
-  else
-      return ProjectDai(n, iy, e, qk, l, u, x, lambda);
-}
-}
-/******************************************************************************/
-/*** End of gpm.cpp file                                                    ***/
-/******************************************************************************/
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/external/gpm.h b/src/shogun/lib/external/gpm.h
deleted file mode 100644
index eba58bcc2b4..00000000000
--- a/src/shogun/lib/external/gpm.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- *** Authors:                                                               ***
- ***  Thomas Serafini, Luca Zanni                                           ***
- ***   Dept. of Mathematics, University of Modena and Reggio Emilia - ITALY ***
- ***   serafini.thomas@unimo.it, zanni.luca@unimo.it                        ***
- ***  Gaetano Zanghirati                                                    ***
- ***   Dept. of Mathematics, University of Ferrara - ITALY                  ***
- ***   g.zanghirati@unife.it                                                ***
- ***                                                                        ***
- *** Software homepage: http://dm.unife.it/gpdt                             ***
- ***                                                                        ***
- *** This work is supported by the Italian FIRB Projects                    ***
- ***      'Statistical Learning: Theory, Algorithms and Applications'       ***
- ***      (grant RBAU01877P), http://slipguru.disi.unige.it/ASTA            ***
- *** and                                                                    ***
- ***      'Parallel Algorithms and Numerical Nonlinear Optimization'        ***
- ***      (grant RBAU01JYPN), http://dm.unife.it/pn2o                       ***
- ***                                                                        ***
- *** Copyright (C) 2004-2008 by T. Serafini, G. Zanghirati, L. Zanni.       ***
- ***                                                                        ***
- *** SHOGUN adaptions  Written (W) 2006-2008 Soeren Sonnenburg              ***
- */
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/common.h>
-
-namespace shogun
-{
-/** gpm solver
- * @param Solver
- * @param Projector
- * @param n
- * @param A
- * @param b
- * @param c
- * @param e
- * @param iy
- * @param x
- * @param tol
- * @param ls
- * @param proj
- */
-int32_t gpm_solver(
-	int32_t Solver, int32_t Projector, int32_t n, float32_t *A, float64_t *b,
-	float64_t c, float64_t e, int32_t *iy, float64_t *x, float64_t tol,
-	int32_t *ls = 0, int32_t *proj = 0);
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/external/libocas.cpp b/src/shogun/lib/external/libocas.cpp
deleted file mode 100644
index e577fa157ce..00000000000
--- a/src/shogun/lib/external/libocas.cpp
+++ /dev/null
@@ -1,1977 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * libocas.c: Implementation of the OCAS solver for training
- *            linear SVM classifiers.
- *
- * Copyright (C) 2008 Vojtech Franc, xfrancv@cmp.felk.cvut.cz
- *                    Soeren Sonnenburg, soeren.sonnenburg@first.fraunhofer.de
- *-------------------------------------------------------------------- */
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#ifndef _WIN32
-#include <sys/time.h>
-#endif
-#include <time.h>
-#include <stdio.h>
-#include <stdint.h>
-
-#include <shogun/lib/Time.h>
-#include <shogun/lib/external/libocas.h>
-#include <shogun/lib/external/libocas_common.h>
-#include <shogun/lib/external/libqp.h>
-
-namespace shogun
-{
-#define MU 0.1      /* must be from (0,1>   1..means that OCAS becomes equivalent to CPA */
-                    /* see paper Franc&Sonneburg JMLR 2009 */
-
-static const uint32_t QPSolverMaxIter = 10000000;
-
-static float64_t *H;
-static uint32_t BufSize;
-
-/*----------------------------------------------------------------------
- Returns pointer at i-th column of Hessian matrix.
-  ----------------------------------------------------------------------*/
-static const float64_t *get_col( uint32_t i)
-{
-  return( &H[ BufSize*i ] );
-}
-
-/*----------------------------------------------------------------------
-  Returns time of the day in seconds.
-  ----------------------------------------------------------------------*/
-static float64_t get_time()
-{
-	struct timeval tv;
-	if (gettimeofday(&tv, NULL)==0)
-		return tv.tv_sec+((float64_t)(tv.tv_usec))/1e6;
-	else
-		return 0.0;
-}
-
-
-/*----------------------------------------------------------------------
-  Linear binary Ocas-SVM solver with additinal contraint enforceing
-  a subset of weights (indices of the weights given by num_nnw/nnw_idx)
-  to be non-negative.
-
-  ----------------------------------------------------------------------*/
-ocas_return_value_T svm_ocas_solver_nnw(
-            float64_t C,
-            uint32_t nData,
-            uint32_t num_nnw,
-            uint32_t* nnw_idx,
-            float64_t TolRel,
-            float64_t TolAbs,
-            float64_t QPBound,
-            float64_t MaxTime,
-            uint32_t _BufSize,
-            uint8_t Method,
-            int (*add_pw_constr)(uint32_t, uint32_t, void*),
-            void (*clip_neg_W)(uint32_t, uint32_t*, void*),
-            void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
-            float64_t (*update_W)(float64_t, void*),
-            int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, uint32_t, void*),
-            int (*compute_output)(float64_t*, void* ),
-            int (*sort)(float64_t*, float64_t*, uint32_t),
-                        void (*ocas_print)(ocas_return_value_T),
-                        void* user_data)
-{
-  ocas_return_value_T ocas={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-  float64_t *b, *alpha, *diag_H;
-  float64_t *output, *old_output;
-  float64_t xi, sq_norm_W, QPSolverTolRel, dot_prod_WoldW, sq_norm_oldW;
-  float64_t A0, B0, GradVal, t, t1, t2, *Ci, *Bi, *hpf, *hpb;
-  float64_t start_time, ocas_start_time;
-  uint32_t cut_length;
-  uint32_t i, *new_cut;
-  uint32_t *I;
-/*  uint8_t S = 1; */
-  libqp_state_T qp_exitflag;
-
-  float64_t max_cp_norm;
-  float64_t max_b;
-  float64_t Cs[2];
-  uint8_t S[2];
-
-  ocas_start_time = get_time();
-  ocas.qp_solver_time = 0;
-  ocas.output_time = 0;
-  ocas.sort_time = 0;
-  ocas.add_time = 0;
-  ocas.w_time = 0;
-  ocas.print_time = 0;
-
-  BufSize = _BufSize;
-
-  QPSolverTolRel = TolRel*0.5;
-
-  H=NULL;
-  b=NULL;
-  alpha=NULL;
-  new_cut=NULL;
-  I=NULL;
-  diag_H=NULL;
-  output=NULL;
-  old_output=NULL;
-  hpf=NULL;
-  hpb = NULL;
-  Ci=NULL;
-  Bi=NULL;
-
-  /* Hessian matrix contains dot product of normal vectors of selected cutting planes */
-  H = (float64_t*)LIBOCAS_CALLOC(BufSize*BufSize, float64_t);
-  if(H == NULL)
-  {
-          ocas.exitflag=-2;
-          goto cleanup;
-  }
-
-  /* bias of cutting planes */
-  b = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
-  if(b == NULL)
-  {
-          ocas.exitflag=-2;
-          goto cleanup;
-  }
-
-  alpha = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
-  if(alpha == NULL)
-  {
-          ocas.exitflag=-2;
-          goto cleanup;
-  }
-
-  /* indices of examples which define a new cut */
-  new_cut = (uint32_t*)LIBOCAS_CALLOC(nData, uint32_t);
-  if(new_cut == NULL)
-  {
-          ocas.exitflag=-2;
-          goto cleanup;
-  }
-
-  I = (uint32_t*)LIBOCAS_CALLOC(BufSize, uint32_t);
-  if(I == NULL)
-  {
-          ocas.exitflag=-2;
-          goto cleanup;
-  }
-
-  for(i=0; i< BufSize; i++) I[i] = 2;
-
-  diag_H = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
-  if(diag_H == NULL)
-  {
-          ocas.exitflag=-2;
-          goto cleanup;
-  }
-
-  output = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
-  if(output == NULL)
-  {
-          ocas.exitflag=-2;
-          goto cleanup;
-  }
-
-  old_output = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
-  if(old_output == NULL)
-  {
-          ocas.exitflag=-2;
-          goto cleanup;
-  }
-
-  /* array of hinge points used in line-serach  */
-  hpf = (float64_t*) LIBOCAS_CALLOC(nData, float64_t);
-  if(hpf == NULL)
-  {
-          ocas.exitflag=-2;
-          goto cleanup;
-  }
-
-  hpb = (float64_t*) LIBOCAS_CALLOC(nData, float64_t);
-  if(hpb == NULL)
-  {
-          ocas.exitflag=-2;
-          goto cleanup;
-  }
-
-  /* vectors Ci, Bi are used in the line search procedure */
-  Ci = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
-  if(Ci == NULL)
-  {
-          ocas.exitflag=-2;
-          goto cleanup;
-  }
-
-  Bi = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
-  if(Bi == NULL)
-  {
-          ocas.exitflag=-2;
-          goto cleanup;
-  }
-
-  /* initial cutting planes implementing the non-negativity constraints on W*/
-  Cs[0]=10000000.0;
-  Cs[1]=C;
-  S[0]=1;
-  S[1]=1;
-  for(i=0; i < num_nnw; i++)
-  {
-      if(add_pw_constr(nnw_idx[i],i, user_data) != 0)
-      {
-          ocas.exitflag=-2;
-          goto cleanup;
-      }
-      diag_H[i] = 1.0;
-      H[LIBOCAS_INDEX(i,i,BufSize)] = 1.0;
-      I[i] = 1;
-  }
-
-  max_cp_norm = 1;
-  max_b = 0;
-
-  /*  */
-  ocas.nCutPlanes = num_nnw;
-  ocas.exitflag = 0;
-  ocas.nIter = 0;
-
-  /* Compute initial value of Q_P assuming that W is zero vector.*/
-  sq_norm_W = 0;
-  xi = nData;
-  ocas.Q_P = 0.5*sq_norm_W + C*xi;
-  ocas.Q_D = 0;
-
-  /* Compute the initial cutting plane */
-  cut_length = nData;
-  for(i=0; i < nData; i++)
-    new_cut[i] = i;
-
-  ocas.trn_err = nData;
-  ocas.ocas_time = get_time() - ocas_start_time;
-  /*  ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, Q_P-Q_D/abs(Q_P)=%f\n",
-          ocas.nIter,cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P));
-  */
-  ocas_print(ocas);
-
-  /* main loop */
-  while( ocas.exitflag == 0 )
-  {
-    ocas.nIter++;
-
-    /* append a new cut to the buffer and update H */
-    b[ocas.nCutPlanes] = -(float64_t)cut_length;
-
-    max_b = LIBOCAS_MAX(max_b,(float64_t)cut_length);
-
-    start_time = get_time();
-
-    if(add_new_cut( &H[LIBOCAS_INDEX(0,ocas.nCutPlanes,BufSize)], new_cut, cut_length, ocas.nCutPlanes, user_data ) != 0)
-    {
-          ocas.exitflag=-2;
-          goto cleanup;
-    }
-
-    ocas.add_time += get_time() - start_time;
-
-    /* copy new added row:  H(ocas.nCutPlanes,ocas.nCutPlanes,1:ocas.nCutPlanes-1) = H(1:ocas.nCutPlanes-1:ocas.nCutPlanes)' */
-    diag_H[ocas.nCutPlanes] = H[LIBOCAS_INDEX(ocas.nCutPlanes,ocas.nCutPlanes,BufSize)];
-    for(i=0; i < ocas.nCutPlanes; i++) {
-      H[LIBOCAS_INDEX(ocas.nCutPlanes,i,BufSize)] = H[LIBOCAS_INDEX(i,ocas.nCutPlanes,BufSize)];
-    }
-
-    max_cp_norm = LIBOCAS_MAX(max_cp_norm, sqrt(diag_H[ocas.nCutPlanes]));
-
-
-    ocas.nCutPlanes++;
-
-    /* call inner QP solver */
-    start_time = get_time();
-
-    /* compute upper bound on sum of dual variables associated with the positivity constraints */
-    Cs[0] = sqrt((float64_t)nData)*(sqrt(C)*sqrt(max_b) + C*max_cp_norm);
-
-/*    qp_exitflag = libqp_splx_solver(&get_col, diag_H, b, &C, I, &S, alpha,
-                                  ocas.nCutPlanes, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBOCAS_PLUS_INF,0);*/
-    qp_exitflag = libqp_splx_solver(&get_col, diag_H, b, Cs, I, S, alpha,
-                                  ocas.nCutPlanes, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBOCAS_PLUS_INF,0);
-
-    ocas.qp_exitflag = qp_exitflag.exitflag;
-
-    ocas.qp_solver_time += get_time() - start_time;
-    ocas.Q_D = -qp_exitflag.QP;
-
-    ocas.nNZAlpha = 0;
-    for(i=0; i < ocas.nCutPlanes; i++) {
-      if( alpha[i] != 0) ocas.nNZAlpha++;
-    }
-
-    sq_norm_oldW = sq_norm_W;
-    start_time = get_time();
-    compute_W( &sq_norm_W, &dot_prod_WoldW, alpha, ocas.nCutPlanes, user_data );
-    clip_neg_W(num_nnw, nnw_idx, user_data);
-    ocas.w_time += get_time() - start_time;
-
-    /* select a new cut */
-    switch( Method )
-    {
-      /* cutting plane algorithm implemented in SVMperf and BMRM */
-      case 0:
-
-        start_time = get_time();
-        if( compute_output( output, user_data ) != 0)
-        {
-          ocas.exitflag=-2;
-          goto cleanup;
-        }
-        ocas.output_time += get_time()-start_time;
-
-        xi = 0;
-        cut_length = 0;
-        ocas.trn_err = 0;
-        for(i=0; i < nData; i++)
-        {
-          if(output[i] <= 0) ocas.trn_err++;
-
-          if(output[i] <= 1) {
-            xi += 1 - output[i];
-            new_cut[cut_length] = i;
-            cut_length++;
-          }
-        }
-        ocas.Q_P = 0.5*sq_norm_W + C*xi;
-
-        ocas.ocas_time = get_time() - ocas_start_time;
-
-        /*        ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, 1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
-                  ocas.nIter,cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P),
-                  ocas.nNZAlpha, 100*(float64_t)ocas.trn_err/(float64_t)nData, ocas.qp_exitflag );
-        */
-
-        start_time = get_time();
-        ocas_print(ocas);
-        ocas.print_time += get_time() - start_time;
-
-        break;
-
-
-      /* Ocas strategy */
-      case 1:
-
-        /* Linesearch */
-        A0 = sq_norm_W -2*dot_prod_WoldW + sq_norm_oldW;
-        B0 = dot_prod_WoldW - sq_norm_oldW;
-
-        sg_memcpy( old_output, output, sizeof(float64_t)*nData );
-
-        start_time = get_time();
-        if( compute_output( output, user_data ) != 0)
-        {
-          ocas.exitflag=-2;
-          goto cleanup;
-        }
-        ocas.output_time += get_time()-start_time;
-
-        uint32_t num_hp = 0;
-        GradVal = B0;
-        for(i=0; i< nData; i++) {
-
-          Ci[i] = C*(1-old_output[i]);
-          Bi[i] = C*(old_output[i] - output[i]);
-
-          float64_t val;
-          if(Bi[i] != 0)
-            val = -Ci[i]/Bi[i];
-          else
-            val = -LIBOCAS_PLUS_INF;
-
-          if (val>0)
-          {
-/*            hpi[num_hp] = i;*/
-            hpb[num_hp] = Bi[i];
-            hpf[num_hp] = val;
-            num_hp++;
-          }
-
-          if( (Bi[i] < 0 && val > 0) || (Bi[i] > 0 && val <= 0))
-            GradVal += Bi[i];
-
-        }
-
-        t = 0;
-        if( GradVal < 0 )
-        {
-          start_time = get_time();
-/*          if( sort(hpf, hpi, num_hp) != 0)*/
-          if( sort(hpf, hpb, num_hp) != 0 )
-          {
-            ocas.exitflag=-2;
-            goto cleanup;
-          }
-          ocas.sort_time += get_time() - start_time;
-
-          float64_t t_new, GradVal_new;
-          i = 0;
-          while( GradVal < 0 && i < num_hp )
-          {
-            t_new = hpf[i];
-            GradVal_new = GradVal + LIBOCAS_ABS(hpb[i]) + A0*(t_new-t);
-
-            if( GradVal_new >= 0 )
-            {
-              t = t + GradVal*(t-t_new)/(GradVal_new - GradVal);
-            }
-            else
-            {
-              t = t_new;
-              i++;
-            }
-
-            GradVal = GradVal_new;
-          }
-        }
-
-        /*
-        t = hpf[0] - 1;
-        i = 0;
-        GradVal = t*A0 + Bsum;
-        while( GradVal < 0 && i < num_hp && hpf[i] < LIBOCAS_PLUS_INF ) {
-          t = hpf[i];
-          Bsum = Bsum + LIBOCAS_ABS(Bi[hpi[i]]);
-          GradVal = t*A0 + Bsum;
-          i++;
-        }
-        */
-        t = LIBOCAS_MAX(t,0);          /* just sanity check; t < 0 should not ocure */
-
-        /* this guarantees that the new solution will not violate the positivity constraints on W */
-        t = LIBOCAS_MIN(t,1);
-
-        t1 = t;                /* new (best so far) W */
-        t2 = t+MU*(1.0-t);   /* new cutting plane */
-        /*        t2 = t+(1.0-t)/10.0;   */
-
-        /* update W to be the best so far solution */
-        sq_norm_W = update_W( t1, user_data );
-
-        /* select a new cut */
-        xi = 0;
-        cut_length = 0;
-        ocas.trn_err = 0;
-        for(i=0; i < nData; i++ ) {
-
-          if( (old_output[i]*(1-t2) + t2*output[i]) <= 1 )
-          {
-            new_cut[cut_length] = i;
-            cut_length++;
-          }
-
-          output[i] = old_output[i]*(1-t1) + t1*output[i];
-
-          if( output[i] <= 1) xi += 1-output[i];
-          if( output[i] <= 0) ocas.trn_err++;
-
-        }
-
-        ocas.Q_P = 0.5*sq_norm_W + C*xi;
-
-        ocas.ocas_time = get_time() - ocas_start_time;
-
-        /*        ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, 1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
-                   ocas.nIter, cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P),
-                   ocas.nNZAlpha, 100*(float64_t)ocas.trn_err/(float64_t)nData, ocas.qp_exitflag );
-        */
-
-        start_time = get_time();
-        ocas_print(ocas);
-        ocas.print_time += get_time() - start_time;
-
-        break;
-    }
-
-    /* Stopping conditions */
-    if( ocas.Q_P - ocas.Q_D <= TolRel*LIBOCAS_ABS(ocas.Q_P)) ocas.exitflag = 1;
-    if( ocas.Q_P - ocas.Q_D <= TolAbs) ocas.exitflag = 2;
-    if( ocas.Q_P <= QPBound) ocas.exitflag = 3;
-    if( MaxTime > 0 && ocas.ocas_time >= MaxTime) ocas.exitflag = 4;
-    if(ocas.nCutPlanes >= BufSize) ocas.exitflag = -1;
-
-  } /* end of the main loop */
-
-cleanup:
-
-  LIBOCAS_FREE(H);
-  LIBOCAS_FREE(b);
-  LIBOCAS_FREE(alpha);
-  LIBOCAS_FREE(new_cut);
-  LIBOCAS_FREE(I);
-  LIBOCAS_FREE(diag_H);
-  LIBOCAS_FREE(output);
-  LIBOCAS_FREE(old_output);
-  LIBOCAS_FREE(hpf);
-/*  LIBOCAS_FREE(hpi);*/
-  LIBOCAS_FREE(hpb);
-  LIBOCAS_FREE(Ci);
-  LIBOCAS_FREE(Bi);
-
-  ocas.ocas_time = get_time() - ocas_start_time;
-
-  return(ocas);
-}
-
-
-
-/*----------------------------------------------------------------------
-  Linear binary Ocas-SVM solver.
-  ----------------------------------------------------------------------*/
-ocas_return_value_T svm_ocas_solver(
-            float64_t C,
-            uint32_t nData,
-            float64_t TolRel,
-            float64_t TolAbs,
-            float64_t QPBound,
-            float64_t MaxTime,
-            uint32_t _BufSize,
-            uint8_t Method,
-            void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
-            float64_t (*update_W)(float64_t, void*),
-            int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, uint32_t, void*),
-            int (*compute_output)(float64_t*, void* ),
-            int (*sort)(float64_t*, float64_t*, uint32_t),
-			void (*ocas_print)(ocas_return_value_T),
-			void* user_data)
-{
-  ocas_return_value_T ocas={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-  float64_t *b, *alpha, *diag_H;
-  float64_t *output, *old_output;
-  float64_t xi, sq_norm_W, QPSolverTolRel, dot_prod_WoldW, sq_norm_oldW;
-  float64_t A0, B0, GradVal, t, t1, t2, *Ci, *Bi, *hpf, *hpb;
-  float64_t start_time, ocas_start_time;
-  uint32_t cut_length;
-  uint32_t i, *new_cut;
-  uint32_t *I;
-  uint8_t S = 1;
-  libqp_state_T qp_exitflag;
-
-  ocas_start_time = get_time();
-  ocas.qp_solver_time = 0;
-  ocas.output_time = 0;
-  ocas.sort_time = 0;
-  ocas.add_time = 0;
-  ocas.w_time = 0;
-  ocas.print_time = 0;
-  float64_t gap;
-
-  BufSize = _BufSize;
-
-  QPSolverTolRel = TolRel*0.5;
-
-  H=NULL;
-  b=NULL;
-  alpha=NULL;
-  new_cut=NULL;
-  I=NULL;
-  diag_H=NULL;
-  output=NULL;
-  old_output=NULL;
-  hpf=NULL;
-  hpb = NULL;
-  Ci=NULL;
-  Bi=NULL;
-
-  /* Hessian matrix contains dot product of normal vectors of selected cutting planes */
-  H = (float64_t*)LIBOCAS_CALLOC(BufSize*BufSize, float64_t);
-  if(H == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  /* bias of cutting planes */
-  b = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
-  if(b == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  alpha = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
-  if(alpha == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  /* indices of examples which define a new cut */
-  new_cut = (uint32_t*)LIBOCAS_CALLOC(nData, uint32_t);
-  if(new_cut == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  I = (uint32_t*)LIBOCAS_CALLOC(BufSize, uint32_t);
-  if(I == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  for(i=0; i< BufSize; i++) I[i] = 1;
-
-  diag_H = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
-  if(diag_H == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  output = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
-  if(output == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  old_output = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
-  if(old_output == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  /* array of hinge points used in line-serach  */
-  hpf = (float64_t*) LIBOCAS_CALLOC(nData, float64_t);
-  if(hpf == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  hpb = (float64_t*) LIBOCAS_CALLOC(nData, float64_t);
-  if(hpb == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  /* vectors Ci, Bi are used in the line search procedure */
-  Ci = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
-  if(Ci == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  Bi = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
-  if(Bi == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  ocas.nCutPlanes = 0;
-  ocas.exitflag = 0;
-  ocas.nIter = 0;
-
-  /* Compute initial value of Q_P assuming that W is zero vector.*/
-  sq_norm_W = 0;
-  xi = nData;
-  ocas.Q_P = 0.5*sq_norm_W + C*xi;
-  ocas.Q_D = 0;
-
-  /* Compute the initial cutting plane */
-  cut_length = nData;
-  for(i=0; i < nData; i++)
-    new_cut[i] = i;
-
-	gap=(ocas.Q_P-ocas.Q_D)/CMath::abs(ocas.Q_P);
-	SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(TolRel), 6)
-
-  ocas.trn_err = nData;
-  ocas.ocas_time = get_time() - ocas_start_time;
-  /*  ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, Q_P-Q_D/abs(Q_P)=%f\n",
-          ocas.nIter,cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P));
-  */
-  ocas_print(ocas);
-
-  /* main loop */
-  while( ocas.exitflag == 0 )
-  {
-    ocas.nIter++;
-
-    /* append a new cut to the buffer and update H */
-    b[ocas.nCutPlanes] = -(float64_t)cut_length;
-
-    start_time = get_time();
-
-    if(add_new_cut( &H[LIBOCAS_INDEX(0,ocas.nCutPlanes,BufSize)], new_cut, cut_length, ocas.nCutPlanes, user_data ) != 0)
-    {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-    }
-
-    ocas.add_time += get_time() - start_time;
-
-    /* copy new added row:  H(ocas.nCutPlanes,ocas.nCutPlanes,1:ocas.nCutPlanes-1) = H(1:ocas.nCutPlanes-1:ocas.nCutPlanes)' */
-    diag_H[ocas.nCutPlanes] = H[LIBOCAS_INDEX(ocas.nCutPlanes,ocas.nCutPlanes,BufSize)];
-    for(i=0; i < ocas.nCutPlanes; i++) {
-      H[LIBOCAS_INDEX(ocas.nCutPlanes,i,BufSize)] = H[LIBOCAS_INDEX(i,ocas.nCutPlanes,BufSize)];
-    }
-
-    ocas.nCutPlanes++;
-
-    /* call inner QP solver */
-    start_time = get_time();
-
-    qp_exitflag = libqp_splx_solver(&get_col, diag_H, b, &C, I, &S, alpha,
-                                  ocas.nCutPlanes, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBOCAS_PLUS_INF,0);
-
-    ocas.qp_exitflag = qp_exitflag.exitflag;
-
-    ocas.qp_solver_time += get_time() - start_time;
-    ocas.Q_D = -qp_exitflag.QP;
-
-    ocas.nNZAlpha = 0;
-    for(i=0; i < ocas.nCutPlanes; i++) {
-      if( alpha[i] != 0) ocas.nNZAlpha++;
-    }
-
-    sq_norm_oldW = sq_norm_W;
-    start_time = get_time();
-    compute_W( &sq_norm_W, &dot_prod_WoldW, alpha, ocas.nCutPlanes, user_data );
-    ocas.w_time += get_time() - start_time;
-
-    /* select a new cut */
-    switch( Method )
-    {
-      /* cutting plane algorithm implemented in SVMperf and BMRM */
-      case 0:
-
-        start_time = get_time();
-        if( compute_output( output, user_data ) != 0)
-        {
-          ocas.exitflag=-2;
-          goto cleanup;
-        }
-        ocas.output_time += get_time()-start_time;
-				gap=(ocas.Q_P-ocas.Q_D)/CMath::abs(ocas.Q_P);
-        SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(TolRel), 6)
-
-        xi = 0;
-        cut_length = 0;
-        ocas.trn_err = 0;
-        for(i=0; i < nData; i++)
-        {
-          if(output[i] <= 0) ocas.trn_err++;
-
-          if(output[i] <= 1) {
-            xi += 1 - output[i];
-            new_cut[cut_length] = i;
-            cut_length++;
-          }
-        }
-        ocas.Q_P = 0.5*sq_norm_W + C*xi;
-
-        ocas.ocas_time = get_time() - ocas_start_time;
-
-        /*        ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, 1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
-                  ocas.nIter,cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P),
-                  ocas.nNZAlpha, 100*(float64_t)ocas.trn_err/(float64_t)nData, ocas.qp_exitflag );
-        */
-
-        start_time = get_time();
-        ocas_print(ocas);
-        ocas.print_time += get_time() - start_time;
-
-        break;
-
-
-      /* Ocas strategy */
-      case 1:
-
-        /* Linesearch */
-        A0 = sq_norm_W -2*dot_prod_WoldW + sq_norm_oldW;
-        B0 = dot_prod_WoldW - sq_norm_oldW;
-
-        sg_memcpy( old_output, output, sizeof(float64_t)*nData );
-
-        start_time = get_time();
-        if( compute_output( output, user_data ) != 0)
-        {
-          ocas.exitflag=-2;
-          goto cleanup;
-        }
-        ocas.output_time += get_time()-start_time;
-
-        uint32_t num_hp = 0;
-        GradVal = B0;
-        for(i=0; i< nData; i++) {
-
-          Ci[i] = C*(1-old_output[i]);
-          Bi[i] = C*(old_output[i] - output[i]);
-
-          float64_t val;
-          if(Bi[i] != 0)
-            val = -Ci[i]/Bi[i];
-          else
-            val = -LIBOCAS_PLUS_INF;
-
-          if (val>0)
-          {
-/*            hpi[num_hp] = i;*/
-            hpb[num_hp] = Bi[i];
-            hpf[num_hp] = val;
-            num_hp++;
-          }
-
-          if( (Bi[i] < 0 && val > 0) || (Bi[i] > 0 && val <= 0))
-            GradVal += Bi[i];
-
-        }
-
-        t = 0;
-        if( GradVal < 0 )
-        {
-          start_time = get_time();
-/*          if( sort(hpf, hpi, num_hp) != 0)*/
-          if( sort(hpf, hpb, num_hp) != 0 )
-          {
-            ocas.exitflag=-2;
-            goto cleanup;
-          }
-          ocas.sort_time += get_time() - start_time;
-
-          float64_t t_new, GradVal_new;
-          i = 0;
-          while( GradVal < 0 && i < num_hp )
-          {
-            t_new = hpf[i];
-            GradVal_new = GradVal + LIBOCAS_ABS(hpb[i]) + A0*(t_new-t);
-
-            if( GradVal_new >= 0 )
-            {
-              t = t + GradVal*(t-t_new)/(GradVal_new - GradVal);
-            }
-            else
-            {
-              t = t_new;
-              i++;
-            }
-
-            GradVal = GradVal_new;
-          }
-        }
-
-        /*
-        t = hpf[0] - 1;
-        i = 0;
-        GradVal = t*A0 + Bsum;
-        while( GradVal < 0 && i < num_hp && hpf[i] < LIBOCAS_PLUS_INF ) {
-          t = hpf[i];
-          Bsum = Bsum + LIBOCAS_ABS(Bi[hpi[i]]);
-          GradVal = t*A0 + Bsum;
-          i++;
-        }
-        */
-        t = LIBOCAS_MAX(t,0);          /* just sanity check; t < 0 should not ocure */
-
-        t1 = t;                /* new (best so far) W */
-        t2 = t+MU*(1.0-t);   /* new cutting plane */
-        /*        t2 = t+(1.0-t)/10.0;   */
-
-        /* update W to be the best so far solution */
-        sq_norm_W = update_W( t1, user_data );
-
-        /* select a new cut */
-        xi = 0;
-        cut_length = 0;
-        ocas.trn_err = 0;
-        for(i=0; i < nData; i++ ) {
-
-          if( (old_output[i]*(1-t2) + t2*output[i]) <= 1 )
-          {
-            new_cut[cut_length] = i;
-            cut_length++;
-          }
-
-          output[i] = old_output[i]*(1-t1) + t1*output[i];
-
-          if( output[i] <= 1) xi += 1-output[i];
-          if( output[i] <= 0) ocas.trn_err++;
-
-        }
-
-        ocas.Q_P = 0.5*sq_norm_W + C*xi;
-
-        ocas.ocas_time = get_time() - ocas_start_time;
-
-        /*        ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, 1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
-                   ocas.nIter, cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P),
-                   ocas.nNZAlpha, 100*(float64_t)ocas.trn_err/(float64_t)nData, ocas.qp_exitflag );
-        */
-
-        start_time = get_time();
-        ocas_print(ocas);
-        ocas.print_time += get_time() - start_time;
-
-        break;
-    }
-
-    /* Stopping conditions */
-    if( ocas.Q_P - ocas.Q_D <= TolRel*LIBOCAS_ABS(ocas.Q_P)) ocas.exitflag = 1;
-    if( ocas.Q_P - ocas.Q_D <= TolAbs) ocas.exitflag = 2;
-    if( ocas.Q_P <= QPBound) ocas.exitflag = 3;
-    if( MaxTime > 0 && ocas.ocas_time >= MaxTime) ocas.exitflag = 4;
-    if(ocas.nCutPlanes >= BufSize) ocas.exitflag = -1;
-
-  } /* end of the main loop */
-
-cleanup:
-
-  LIBOCAS_FREE(H);
-  LIBOCAS_FREE(b);
-  LIBOCAS_FREE(alpha);
-  LIBOCAS_FREE(new_cut);
-  LIBOCAS_FREE(I);
-  LIBOCAS_FREE(diag_H);
-  LIBOCAS_FREE(output);
-  LIBOCAS_FREE(old_output);
-  LIBOCAS_FREE(hpf);
-/*  LIBOCAS_FREE(hpi);*/
-  LIBOCAS_FREE(hpb);
-  LIBOCAS_FREE(Ci);
-  LIBOCAS_FREE(Bi);
-
-  ocas.ocas_time = get_time() - ocas_start_time;
-
-  return(ocas);
-}
-
-
-/*----------------------------------------------------------------------
-  Binary linear Ocas-SVM solver which allows using different C for each
-  training example.
-  ----------------------------------------------------------------------*/
-ocas_return_value_T svm_ocas_solver_difC(
-            float64_t *C,
-            uint32_t nData,
-            float64_t TolRel,
-            float64_t TolAbs,
-            float64_t QPBound,
-            float64_t MaxTime,
-            uint32_t _BufSize,
-            uint8_t Method,
-            void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
-            float64_t (*update_W)(float64_t, void*),
-            int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, uint32_t, void*),
-            int (*compute_output)(float64_t*, void* ),
-            int (*sort)(float64_t*, float64_t*, uint32_t),
-			void (*ocas_print)(ocas_return_value_T),
-			void* user_data)
-{
-  ocas_return_value_T ocas={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-  float64_t *b, *alpha, *diag_H;
-  float64_t *output, *old_output;
-  float64_t xi, sq_norm_W, QPSolverTolRel, dot_prod_WoldW, sq_norm_oldW;
-  float64_t A0, B0, GradVal, t, t1, t2, *Ci, *Bi, *hpf, *hpb;
-  float64_t start_time, ocas_start_time;
-  float64_t qp_b = 1.0;
-  float64_t new_b;
-  uint32_t cut_length;
-  uint32_t i, *new_cut;
-  uint32_t *I;
-  uint8_t S = 1;
-  libqp_state_T qp_exitflag;
-
-  ocas_start_time = get_time();
-  ocas.qp_solver_time = 0;
-  ocas.output_time = 0;
-  ocas.sort_time = 0;
-  ocas.add_time = 0;
-  ocas.w_time = 0;
-  ocas.print_time = 0;
-
-  BufSize = _BufSize;
-
-  QPSolverTolRel = TolRel*0.5;
-
-  H=NULL;
-  b=NULL;
-  alpha=NULL;
-  new_cut=NULL;
-  I=NULL;
-  diag_H=NULL;
-  output=NULL;
-  old_output=NULL;
-  hpf=NULL;
-  hpb = NULL;
-  Ci=NULL;
-  Bi=NULL;
-
-  /* Hessian matrix contains dot product of normal vectors of selected cutting planes */
-  H = (float64_t*)LIBOCAS_CALLOC(BufSize*BufSize, float64_t);
-  if(H == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  /* bias of cutting planes */
-  b = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
-  if(b == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  alpha = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
-  if(alpha == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  /* indices of examples which define a new cut */
-  new_cut = (uint32_t*)LIBOCAS_CALLOC(nData, uint32_t);
-  if(new_cut == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  I = (uint32_t*)LIBOCAS_CALLOC(BufSize, uint32_t);
-  if(I == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  for(i=0; i< BufSize; i++) I[i] = 1;
-
-  diag_H = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
-  if(diag_H == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  output = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
-  if(output == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  old_output = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
-  if(old_output == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  /* array of hinge points used in line-serach  */
-  hpf = (float64_t*) LIBOCAS_CALLOC(nData, float64_t);
-  if(hpf == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  hpb = (float64_t*) LIBOCAS_CALLOC(nData, float64_t);
-  if(hpb == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  /* vectors Ci, Bi are used in the line search procedure */
-  Ci = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
-  if(Ci == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  Bi = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
-  if(Bi == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  ocas.nCutPlanes = 0;
-  ocas.exitflag = 0;
-  ocas.nIter = 0;
-
-  /* Compute initial value of Q_P assuming that W is zero vector.*/
-  sq_norm_W = 0;
-/*
-  xi = nData;
-  ocas.Q_P = 0.5*sq_norm_W + C*xi;
-*/
-  ocas.Q_D = 0;
-
-  /* Compute the initial cutting plane */
-  cut_length = nData;
-  new_b = 0;
-  for(i=0; i < nData; i++)
-  {
-    new_cut[i] = i;
-    new_b += C[i];
-  }
-
-  ocas.Q_P = 0.5*sq_norm_W + new_b;
-
-
-  ocas.trn_err = nData;
-  ocas.ocas_time = get_time() - ocas_start_time;
-  /*  ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, Q_P-Q_D/abs(Q_P)=%f\n",
-          ocas.nIter,cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P));
-  */
-  ocas_print(ocas);
-
-  /* main loop */
-  while( ocas.exitflag == 0 )
-  {
-    ocas.nIter++;
-
-    /* append a new cut to the buffer and update H */
-/*    b[ocas.nCutPlanes] = -(float64_t)cut_length*C;*/
-    b[ocas.nCutPlanes] = -new_b;
-
-    start_time = get_time();
-
-    if(add_new_cut( &H[LIBOCAS_INDEX(0,ocas.nCutPlanes,BufSize)], new_cut, cut_length, ocas.nCutPlanes, user_data ) != 0)
-    {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-    }
-
-    ocas.add_time += get_time() - start_time;
-
-    /* copy new added row:  H(ocas.nCutPlanes,ocas.nCutPlanes,1:ocas.nCutPlanes-1) = H(1:ocas.nCutPlanes-1:ocas.nCutPlanes)' */
-    diag_H[ocas.nCutPlanes] = H[LIBOCAS_INDEX(ocas.nCutPlanes,ocas.nCutPlanes,BufSize)];
-    for(i=0; i < ocas.nCutPlanes; i++) {
-      H[LIBOCAS_INDEX(ocas.nCutPlanes,i,BufSize)] = H[LIBOCAS_INDEX(i,ocas.nCutPlanes,BufSize)];
-    }
-
-    ocas.nCutPlanes++;
-
-    /* call inner QP solver */
-    start_time = get_time();
-
-/*    qp_exitflag = libqp_splx_solver(&get_col, diag_H, b, &C, I, &S, alpha,*/
-/*                                  ocas.nCutPlanes, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBOCAS_PLUS_INF,0);*/
-    qp_exitflag = libqp_splx_solver(&get_col, diag_H, b, &qp_b, I, &S, alpha,
-                                  ocas.nCutPlanes, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBOCAS_PLUS_INF,0);
-
-    ocas.qp_exitflag = qp_exitflag.exitflag;
-
-    ocas.qp_solver_time += get_time() - start_time;
-    ocas.Q_D = -qp_exitflag.QP;
-
-    ocas.nNZAlpha = 0;
-    for(i=0; i < ocas.nCutPlanes; i++) {
-      if( alpha[i] != 0) ocas.nNZAlpha++;
-    }
-
-    sq_norm_oldW = sq_norm_W;
-    start_time = get_time();
-    compute_W( &sq_norm_W, &dot_prod_WoldW, alpha, ocas.nCutPlanes, user_data );
-    ocas.w_time += get_time() - start_time;
-
-    /* select a new cut */
-    switch( Method )
-    {
-      /* cutting plane algorithm implemented in SVMperf and BMRM */
-      case 0:
-
-        start_time = get_time();
-        if( compute_output( output, user_data ) != 0)
-        {
-          ocas.exitflag=-2;
-          goto cleanup;
-        }
-        ocas.output_time += get_time()-start_time;
-
-        xi = 0;
-        cut_length = 0;
-        ocas.trn_err = 0;
-        new_b = 0;
-        for(i=0; i < nData; i++)
-        {
-          if(output[i] <= 0) ocas.trn_err++;
-
-/*          if(output[i] <= 1) {*/
-/*            xi += 1 - output[i];*/
-          if(output[i] <= C[i]) {
-            xi += C[i] - output[i];
-            new_cut[cut_length] = i;
-            cut_length++;
-            new_b += C[i];
-          }
-        }
-/*        ocas.Q_P = 0.5*sq_norm_W + C*xi;*/
-        ocas.Q_P = 0.5*sq_norm_W + xi;
-
-        ocas.ocas_time = get_time() - ocas_start_time;
-
-        /*        ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, 1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
-                  ocas.nIter,cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P),
-                  ocas.nNZAlpha, 100*(float64_t)ocas.trn_err/(float64_t)nData, ocas.qp_exitflag );
-        */
-
-        start_time = get_time();
-        ocas_print(ocas);
-        ocas.print_time += get_time() - start_time;
-
-        break;
-
-
-      /* Ocas strategy */
-      case 1:
-
-        /* Linesearch */
-        A0 = sq_norm_W -2*dot_prod_WoldW + sq_norm_oldW;
-        B0 = dot_prod_WoldW - sq_norm_oldW;
-
-        sg_memcpy( old_output, output, sizeof(float64_t)*nData );
-
-        start_time = get_time();
-        if( compute_output( output, user_data ) != 0)
-        {
-          ocas.exitflag=-2;
-          goto cleanup;
-        }
-        ocas.output_time += get_time()-start_time;
-
-        uint32_t num_hp = 0;
-        GradVal = B0;
-        for(i=0; i< nData; i++) {
-
-/*          Ci[i] = C*(1-old_output[i]);*/
-/*          Bi[i] = C*(old_output[i] - output[i]);*/
-          Ci[i] = (C[i]-old_output[i]);
-          Bi[i] = old_output[i] - output[i];
-
-          float64_t val;
-          if(Bi[i] != 0)
-            val = -Ci[i]/Bi[i];
-          else
-            val = -LIBOCAS_PLUS_INF;
-
-          if (val>0)
-          {
-/*            hpi[num_hp] = i;*/
-            hpb[num_hp] = Bi[i];
-            hpf[num_hp] = val;
-            num_hp++;
-          }
-
-          if( (Bi[i] < 0 && val > 0) || (Bi[i] > 0 && val <= 0))
-            GradVal += Bi[i];
-
-        }
-
-        t = 0;
-        if( GradVal < 0 )
-        {
-          start_time = get_time();
-/*          if( sort(hpf, hpi, num_hp) != 0)*/
-          if( sort(hpf, hpb, num_hp) != 0 )
-          {
-            ocas.exitflag=-2;
-            goto cleanup;
-          }
-          ocas.sort_time += get_time() - start_time;
-
-          float64_t t_new, GradVal_new;
-          i = 0;
-          while( GradVal < 0 && i < num_hp )
-          {
-            t_new = hpf[i];
-            GradVal_new = GradVal + LIBOCAS_ABS(hpb[i]) + A0*(t_new-t);
-
-            if( GradVal_new >= 0 )
-            {
-              t = t + GradVal*(t-t_new)/(GradVal_new - GradVal);
-            }
-            else
-            {
-              t = t_new;
-              i++;
-            }
-
-            GradVal = GradVal_new;
-          }
-        }
-
-        /*
-        t = hpf[0] - 1;
-        i = 0;
-        GradVal = t*A0 + Bsum;
-        while( GradVal < 0 && i < num_hp && hpf[i] < LIBOCAS_PLUS_INF ) {
-          t = hpf[i];
-          Bsum = Bsum + LIBOCAS_ABS(Bi[hpi[i]]);
-          GradVal = t*A0 + Bsum;
-          i++;
-        }
-        */
-        t = LIBOCAS_MAX(t,0);          /* just sanity check; t < 0 should not ocure */
-
-        t1 = t;                /* new (best so far) W */
-        t2 = t+(1.0-t)*MU;   /* new cutting plane */
-        /*        t2 = t+(1.0-t)/10.0;   new cutting plane */
-
-        /* update W to be the best so far solution */
-        sq_norm_W = update_W( t1, user_data );
-
-        /* select a new cut */
-        xi = 0;
-        cut_length = 0;
-        ocas.trn_err = 0;
-        new_b = 0;
-        for(i=0; i < nData; i++ ) {
-
-/*          if( (old_output[i]*(1-t2) + t2*output[i]) <= 1 ) */
-          if( (old_output[i]*(1-t2) + t2*output[i]) <= C[i] )
-          {
-            new_cut[cut_length] = i;
-            cut_length++;
-            new_b += C[i];
-          }
-
-          output[i] = old_output[i]*(1-t1) + t1*output[i];
-
-/*          if( output[i] <= 1) xi += 1-output[i];*/
-          if( output[i] <= C[i]) xi += C[i]-output[i];
-          if( output[i] <= 0) ocas.trn_err++;
-
-        }
-
-/*        ocas.Q_P = 0.5*sq_norm_W + C*xi;*/
-        ocas.Q_P = 0.5*sq_norm_W + xi;
-
-        ocas.ocas_time = get_time() - ocas_start_time;
-
-        /*        ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, 1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
-                   ocas.nIter, cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P),
-                   ocas.nNZAlpha, 100*(float64_t)ocas.trn_err/(float64_t)nData, ocas.qp_exitflag );
-        */
-
-        start_time = get_time();
-        ocas_print(ocas);
-        ocas.print_time += get_time() - start_time;
-
-        break;
-    }
-
-    /* Stopping conditions */
-    if( ocas.Q_P - ocas.Q_D <= TolRel*LIBOCAS_ABS(ocas.Q_P)) ocas.exitflag = 1;
-    if( ocas.Q_P - ocas.Q_D <= TolAbs) ocas.exitflag = 2;
-    if( ocas.Q_P <= QPBound) ocas.exitflag = 3;
-    if( MaxTime > 0 && ocas.ocas_time >= MaxTime) ocas.exitflag = 4;
-    if(ocas.nCutPlanes >= BufSize) ocas.exitflag = -1;
-
-  } /* end of the main loop */
-
-cleanup:
-
-  LIBOCAS_FREE(H);
-  LIBOCAS_FREE(b);
-  LIBOCAS_FREE(alpha);
-  LIBOCAS_FREE(new_cut);
-  LIBOCAS_FREE(I);
-  LIBOCAS_FREE(diag_H);
-  LIBOCAS_FREE(output);
-  LIBOCAS_FREE(old_output);
-  LIBOCAS_FREE(hpf);
-/*  LIBOCAS_FREE(hpi);*/
-  LIBOCAS_FREE(hpb);
-  LIBOCAS_FREE(Ci);
-  LIBOCAS_FREE(Bi);
-
-  ocas.ocas_time = get_time() - ocas_start_time;
-
-  return(ocas);
-}
-
-
-
-/*----------------------------------------------------------------------
-  Multiclass SVM-Ocas solver
-  ----------------------------------------------------------------------*/
-
-/* Helper function needed by the multi-class SVM linesearch.
-
-  - This function finds a simplified representation of a piece-wise linear function
-  by splitting the domain into intervals and fining active terms for these intevals */
-static void findactive(float64_t *Theta, float64_t *SortedA, uint32_t *nSortedA, float64_t *A, float64_t *B, int n,
-            int (*sort)(float64_t*, float64_t*, uint32_t))
-{
-  float64_t tmp, theta;
-  uint32_t i, j, idx, idx2 = 0, start;
-
-  sort(A,B,n);
-
-  idx = 0;
-  i = 0;
-  while( i < (uint32_t)n-1 && A[i] == A[i+1])
-  {
-    if( B[i+1] > B[idx] )
-    {
-      idx = i+1;
-    }
-    i++;
-  }
-
-  (*nSortedA) = 1;
-  SortedA[0] = A[idx];
-
-  while(1)
-  {
-    start = idx + 1;
-    while( start < (uint32_t)n && A[idx] == A[start])
-      start++;
-
-    theta = LIBOCAS_PLUS_INF;
-    for(j=start; j < (uint32_t)n; j++)
-    {
-      tmp = (B[j] - B[idx])/(A[idx]-A[j]);
-      if( tmp < theta)
-      {
-        theta = tmp;
-        idx2 = j;
-      }
-    }
-
-    if( theta < LIBOCAS_PLUS_INF)
-    {
-      Theta[(*nSortedA) - 1] = theta;
-      SortedA[(*nSortedA)] = A[idx2];
-      (*nSortedA)++;
-      idx = idx2;
-    }
-    else
-      return;
-  }
-}
-
-
-/*----------------------------------------------------------------------
-  Multiclass linear OCAS-SVM solver.
-  ----------------------------------------------------------------------*/
-ocas_return_value_T msvm_ocas_solver(
-            float64_t C,
-            float64_t *data_y,
-            uint32_t nY,
-            uint32_t nData,
-            float64_t TolRel,
-            float64_t TolAbs,
-            float64_t QPBound,
-            float64_t MaxTime,
-            uint32_t _BufSize,
-            uint8_t Method,
-            void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
-            float64_t (*update_W)(float64_t, void*),
-            int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, void*),
-            int (*compute_output)(float64_t*, void* ),
-            int (*sort)(float64_t*, float64_t*, uint32_t),
-			void (*ocas_print)(ocas_return_value_T),
-			void* user_data)
-{
-  ocas_return_value_T ocas={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-  float64_t *b, *alpha, *diag_H;
-  float64_t *output, *old_output;
-  float64_t xi, sq_norm_W, QPSolverTolRel, QPSolverTolAbs, dot_prod_WoldW, sq_norm_oldW;
-  float64_t A0, B0, t, t1, t2, R, tmp, element_b, x;
-  float64_t *A, *B, *theta, *Theta, *sortedA, *Add;
-  float64_t start_time, ocas_start_time, grad_sum, grad, min_x = 0, old_x, old_grad;
-  uint32_t i, y, y2, ypred = 0, *new_cut, cnt1, cnt2, j, nSortedA, idx;
-  uint32_t *I;
-  uint8_t S = 1;
-  libqp_state_T qp_exitflag;
-
-  ocas_start_time = get_time();
-  ocas.qp_solver_time = 0;
-  ocas.output_time = 0;
-  ocas.sort_time = 0;
-  ocas.add_time = 0;
-  ocas.w_time = 0;
-  ocas.print_time = 0;
-
-  BufSize = _BufSize;
-
-  QPSolverTolRel = TolRel*0.5;
-  QPSolverTolAbs = TolAbs*0.5;
-
-  H=NULL;
-  b=NULL;
-  alpha=NULL;
-  new_cut=NULL;
-  I=NULL;
-  diag_H=NULL;
-  output=NULL;
-  old_output=NULL;
-  A = NULL;
-  B = NULL;
-  theta = NULL;
-  Theta = NULL;
-  sortedA = NULL;
-  Add = NULL;
-
-  /* Hessian matrix contains dot product of normal vectors of selected cutting planes */
-  H = (float64_t*)LIBOCAS_CALLOC(BufSize*BufSize, float64_t);
-  if(H == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  /* bias of cutting planes */
-  b = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
-  if(b == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  alpha = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
-  if(alpha == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  /* indices of examples which define a new cut */
-  new_cut = (uint32_t*)LIBOCAS_CALLOC(nData, uint32_t);
-  if(new_cut == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  I = (uint32_t*)LIBOCAS_CALLOC(BufSize, uint32_t);
-  if(I == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  for(i=0; i< BufSize; i++)
-    I[i] = 1;
-
-  diag_H = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
-  if(diag_H == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  output = (float64_t*)LIBOCAS_CALLOC(nData*nY, float64_t);
-  if(output == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  old_output = (float64_t*)LIBOCAS_CALLOC(nData*nY, float64_t);
-  if(old_output == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  /* auxciliary variables used in the linesearch */
-  A = (float64_t*)LIBOCAS_CALLOC(nData*nY, float64_t);
-  if(A == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  B = (float64_t*)LIBOCAS_CALLOC(nData*nY, float64_t);
-  if(B == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  theta = (float64_t*)LIBOCAS_CALLOC(nY, float64_t);
-  if(theta == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  sortedA = (float64_t*)LIBOCAS_CALLOC(nY, float64_t);
-  if(sortedA == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  Theta = (float64_t*)LIBOCAS_CALLOC(nData*nY, float64_t);
-  if(Theta == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  Add = (float64_t*)LIBOCAS_CALLOC(nData*nY, float64_t);
-  if(Add == NULL)
-  {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-  }
-
-  /* Set initial values*/
-  ocas.nCutPlanes = 0;
-  ocas.exitflag = 0;
-  ocas.nIter = 0;
-  ocas.Q_D = 0;
-  ocas.trn_err = nData;
-  R = (float64_t)nData;
-  sq_norm_W = 0;
-  element_b = (float64_t)nData;
-  ocas.Q_P = 0.5*sq_norm_W + C*R;
-
-  /* initial cutting plane */
-  for(i=0; i < nData; i++)
-  {
-    y2 = (uint32_t)data_y[i];
-
-    if(y2 > 0)
-      new_cut[i] = 0;
-    else
-      new_cut[i] = 1;
-
-  }
-
-  ocas.ocas_time = get_time() - ocas_start_time;
-
-  start_time = get_time();
-  ocas_print(ocas);
-  ocas.print_time += get_time() - start_time;
-
-  /* main loop of the OCAS */
-  while( ocas.exitflag == 0 )
-  {
-    ocas.nIter++;
-
-    /* append a new cut to the buffer and update H */
-    b[ocas.nCutPlanes] = -(float64_t)element_b;
-
-    start_time = get_time();
-
-    if(add_new_cut( &H[LIBOCAS_INDEX(0,ocas.nCutPlanes,BufSize)], new_cut, ocas.nCutPlanes, user_data ) != 0)
-    {
-	  ocas.exitflag=-2;
-	  goto cleanup;
-    }
-
-    ocas.add_time += get_time() - start_time;
-
-    /* copy newly appended row: H(ocas.nCutPlanes,ocas.nCutPlanes,1:ocas.nCutPlanes-1) = H(1:ocas.nCutPlanes-1:ocas.nCutPlanes)' */
-    diag_H[ocas.nCutPlanes] = H[LIBOCAS_INDEX(ocas.nCutPlanes,ocas.nCutPlanes,BufSize)];
-    for(i=0; i < ocas.nCutPlanes; i++)
-    {
-      H[LIBOCAS_INDEX(ocas.nCutPlanes,i,BufSize)] = H[LIBOCAS_INDEX(i,ocas.nCutPlanes,BufSize)];
-    }
-
-    ocas.nCutPlanes++;
-
-    /* call inner QP solver */
-    start_time = get_time();
-
-    qp_exitflag = libqp_splx_solver(&get_col, diag_H, b, &C, I, &S, alpha,
-                                  ocas.nCutPlanes, QPSolverMaxIter, QPSolverTolAbs, QPSolverTolRel, -LIBOCAS_PLUS_INF,0);
-
-    ocas.qp_exitflag = qp_exitflag.exitflag;
-
-    ocas.qp_solver_time += get_time() - start_time;
-    ocas.Q_D = -qp_exitflag.QP;
-
-    ocas.nNZAlpha = 0;
-    for(i=0; i < ocas.nCutPlanes; i++)
-      if( alpha[i] != 0) ocas.nNZAlpha++;
-
-    sq_norm_oldW = sq_norm_W;
-    start_time = get_time();
-    compute_W( &sq_norm_W, &dot_prod_WoldW, alpha, ocas.nCutPlanes, user_data );
-    ocas.w_time += get_time() - start_time;
-
-    /* select a new cut */
-    switch( Method )
-    {
-      /* cutting plane algorithm implemented in SVMperf and BMRM */
-      case 0:
-
-        start_time = get_time();
-        if( compute_output( output, user_data ) != 0)
-        {
-          ocas.exitflag=-2;
-          goto cleanup;
-        }
-        ocas.output_time += get_time()-start_time;
-
-        /* the following loop computes: */
-        element_b = 0.0;    /*  element_b = R(old_W) - g'*old_W */
-        R = 0;              /*  R(W) = sum_i max_y ( [[y != y_i]] + (w_y- w_y_i)'*x_i )    */
-        ocas.trn_err = 0;   /*  trn_err = sum_i [[y != y_i ]]                              */
-                            /* new_cut[i] = argmax_i ( [[y != y_i]] + (w_y- w_y_i)'*x_i )  */
-        for(i=0; i < nData; i++)
-        {
-          y2 = (uint32_t)data_y[i];
-
-          for(xi=-LIBOCAS_PLUS_INF, y=0; y < nY; y++)
-          {
-            if(y2 != y && xi < output[LIBOCAS_INDEX(y,i,nY)])
-            {
-              xi = output[LIBOCAS_INDEX(y,i,nY)];
-              ypred = y;
-            }
-          }
-
-          if(xi >= output[LIBOCAS_INDEX(y2,i,nY)])
-            ocas.trn_err ++;
-
-          xi = LIBOCAS_MAX(0,xi+1-output[LIBOCAS_INDEX(y2,i,nY)]);
-          R += xi;
-          if(xi > 0)
-          {
-            element_b++;
-            new_cut[i] = ypred;
-          }
-          else
-            new_cut[i] = y2;
-        }
-
-        ocas.Q_P = 0.5*sq_norm_W + C*R;
-
-        ocas.ocas_time = get_time() - ocas_start_time;
-
-        start_time = get_time();
-        ocas_print(ocas);
-        ocas.print_time += get_time() - start_time;
-
-        break;
-
-      /* The OCAS solver */
-      case 1:
-        sg_memcpy( old_output, output, sizeof(float64_t)*nData*nY );
-
-        start_time = get_time();
-        if( compute_output( output, user_data ) != 0)
-        {
-          ocas.exitflag=-2;
-          goto cleanup;
-        }
-        ocas.output_time += get_time()-start_time;
-
-        A0 = sq_norm_W - 2*dot_prod_WoldW + sq_norm_oldW;
-        B0 = dot_prod_WoldW - sq_norm_oldW;
-
-        for(i=0; i < nData; i++)
-        {
-          y2 = (uint32_t)data_y[i];
-
-          for(y=0; y < nY; y++)
-          {
-            A[LIBOCAS_INDEX(y,i,nY)] = C*(output[LIBOCAS_INDEX(y,i,nY)] - old_output[LIBOCAS_INDEX(y,i,nY)]
-                                       + old_output[LIBOCAS_INDEX(y2,i,nY)] - output[LIBOCAS_INDEX(y2,i,nY)]);
-            B[LIBOCAS_INDEX(y,i,nY)] = C*(old_output[LIBOCAS_INDEX(y,i,nY)] - old_output[LIBOCAS_INDEX(y2,i,nY)]
-                                       + (float64_t)(y != y2));
-          }
-        }
-
-        /* linesearch */
-/*      new_x = msvm_linesearch_mex(A0,B0,AA*C,BB*C);*/
-
-        grad_sum = B0;
-        cnt1 = 0;
-        cnt2 = 0;
-        for(i=0; i < nData; i++)
-        {
-          findactive(theta,sortedA,&nSortedA,&A[i*nY],&B[i*nY],nY,sort);
-
-          idx = 0;
-          while( idx < nSortedA-1 && theta[idx] < 0 )
-            idx++;
-
-          grad_sum += sortedA[idx];
-
-          for(j=idx; j < nSortedA-1; j++)
-          {
-            Theta[cnt1] = theta[j];
-            cnt1++;
-          }
-
-          for(j=idx+1; j < nSortedA; j++)
-          {
-            Add[cnt2] = -sortedA[j-1]+sortedA[j];
-            cnt2++;
-          }
-        }
-
-        start_time = get_time();
-        sort(Theta,Add,cnt1);
-        ocas.sort_time += get_time() - start_time;
-
-        grad = grad_sum;
-        if(grad >= 0)
-        {
-          min_x = 0;
-        }
-        else
-        {
-          old_x = 0;
-          old_grad = grad;
-
-          for(i=0; i < cnt1; i++)
-          {
-            x = Theta[i];
-
-            grad = x*A0 + grad_sum;
-
-            if(grad >=0)
-            {
-
-              min_x = (grad*old_x - old_grad*x)/(grad - old_grad);
-
-              break;
-            }
-            else
-            {
-              grad_sum = grad_sum + Add[i];
-
-              grad = x*A0 + grad_sum;
-              if( grad >= 0)
-              {
-                min_x = x;
-                break;
-              }
-            }
-
-            old_grad = grad;
-            old_x = x;
-          }
-        }
-        /* end of the linesearch which outputs min_x */
-
-        t = min_x;
-        t1 = t;                /* new (best so far) W */
-        t2 = t+(1.0-t)*MU;   /* new cutting plane */
-        /*        t2 = t+(1.0-t)/10.0;    */
-
-        /* update W to be the best so far solution */
-        sq_norm_W = update_W( t1, user_data );
-
-        /* the following code  computes a new cutting plane: */
-        element_b = 0.0;    /*  element_b = R(old_W) - g'*old_W */
-                            /* new_cut[i] = argmax_i ( [[y != y_i]] + (w_y- w_y_i)'*x_i )  */
-        for(i=0; i < nData; i++)
-        {
-          y2 = (uint32_t)data_y[i];
-
-          for(xi=-LIBOCAS_PLUS_INF, y=0; y < nY; y++)
-          {
-            tmp = old_output[LIBOCAS_INDEX(y,i,nY)]*(1-t2) + t2*output[LIBOCAS_INDEX(y,i,nY)];
-            if(y2 != y && xi < tmp)
-            {
-              xi = tmp;
-              ypred = y;
-            }
-          }
-
-          tmp = old_output[LIBOCAS_INDEX(y2,i,nY)]*(1-t2) + t2*output[LIBOCAS_INDEX(y2,i,nY)];
-          xi = LIBOCAS_MAX(0,xi+1-tmp);
-          if(xi > 0)
-          {
-            element_b++;
-            new_cut[i] = ypred;
-          }
-          else
-            new_cut[i] = y2;
-        }
-
-        /* compute Risk, class. error and update outputs to correspond to the new W */
-        ocas.trn_err = 0;   /*  trn_err = sum_i [[y != y_i ]]                       */
-        R = 0;
-        for(i=0; i < nData; i++)
-        {
-          y2 = (uint32_t)data_y[i];
-
-          for(tmp=-LIBOCAS_PLUS_INF, y=0; y < nY; y++)
-          {
-            output[LIBOCAS_INDEX(y,i,nY)] = old_output[LIBOCAS_INDEX(y,i,nY)]*(1-t1) + t1*output[LIBOCAS_INDEX(y,i,nY)];
-
-            if(y2 != y && tmp < output[LIBOCAS_INDEX(y,i,nY)])
-            {
-              ypred = y;
-              tmp = output[LIBOCAS_INDEX(y,i,nY)];
-            }
-          }
-
-          R += LIBOCAS_MAX(0,1+tmp - output[LIBOCAS_INDEX(y2,i,nY)]);
-          if( tmp >= output[LIBOCAS_INDEX(y2,i,nY)])
-            ocas.trn_err ++;
-        }
-
-        ocas.Q_P = 0.5*sq_norm_W + C*R;
-
-
-        /* get time and print status */
-        ocas.ocas_time = get_time() - ocas_start_time;
-
-        start_time = get_time();
-        ocas_print(ocas);
-        ocas.print_time += get_time() - start_time;
-
-        break;
-
-    }
-
-    /* Stopping conditions */
-    if( ocas.Q_P - ocas.Q_D <= TolRel*LIBOCAS_ABS(ocas.Q_P)) ocas.exitflag = 1;
-    if( ocas.Q_P - ocas.Q_D <= TolAbs) ocas.exitflag = 2;
-    if( ocas.Q_P <= QPBound) ocas.exitflag = 3;
-    if( MaxTime > 0 && ocas.ocas_time >= MaxTime) ocas.exitflag = 4;
-    if(ocas.nCutPlanes >= BufSize) ocas.exitflag = -1;
-
-  } /* end of the main loop */
-
-cleanup:
-
-  LIBOCAS_FREE(H);
-  LIBOCAS_FREE(b);
-  LIBOCAS_FREE(alpha);
-  LIBOCAS_FREE(new_cut);
-  LIBOCAS_FREE(I);
-  LIBOCAS_FREE(diag_H);
-  LIBOCAS_FREE(output);
-  LIBOCAS_FREE(old_output);
-  LIBOCAS_FREE(A);
-  LIBOCAS_FREE(B);
-  LIBOCAS_FREE(theta);
-  LIBOCAS_FREE(Theta);
-  LIBOCAS_FREE(sortedA);
-  LIBOCAS_FREE(Add);
-
-  ocas.ocas_time = get_time() - ocas_start_time;
-
-  return(ocas);
-}
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/external/libocas.h b/src/shogun/lib/external/libocas.h
deleted file mode 100644
index 1fb247a0849..00000000000
--- a/src/shogun/lib/external/libocas.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * libocas.h: Implementation of the OCAS solver for training
- *            linear SVM classifiers.
- *
- * Copyright (C) 2008, 2009 Vojtech Franc, xfrancv@cmp.felk.cvut.cz
- *                          Soeren Sonnenburg, soeren.sonnenburg@first.fraunhofer.de
- *  Implementation of SVM-Ocas solver.
- *-------------------------------------------------------------------- */
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/common.h>
-
-#ifndef libocas_h
-#define libocas_h
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-namespace shogun
-{
-#define LIBOCAS_PLUS_INF (-log(0.0))
-#define LIBOCAS_CALLOC(x,y) SG_CALLOC(y,x)
-#define LIBOCAS_FREE(x) SG_FREE(x)
-#define LIBOCAS_INDEX(ROW,COL,NUM_ROWS) ((COL)*(NUM_ROWS)+(ROW))
-#define LIBOCAS_MIN(A,B) ((A) > (B) ? (B) : (A))
-#define LIBOCAS_MAX(A,B) ((A) < (B) ? (B) : (A))
-#define LIBOCAS_ABS(A) ((A) < 0 ? -(A) : (A))
-
-typedef struct {
-  uint32_t nIter;        /* number of iterations */
-  uint32_t nCutPlanes;   /* number of cutitng buffered planes */
-  uint32_t nNZAlpha;     /* number of non-zero Lagrangeans (effective number of CPs) */
-  uint32_t trn_err;      /* number of training errors */
-  float64_t Q_P;            /* primal objective value */
-  float64_t Q_D;            /* dual objective value */
-  float64_t output_time;    /* time spent in computing outputs */
-  float64_t sort_time;      /* time spent in sorting */
-  float64_t add_time;       /* time spent in adding examples to compute cutting planes */
-  float64_t w_time;         /* time spent in computing parameter vector  */
-  float64_t qp_solver_time; /* time spent in inner QP solver  */
-  float64_t ocas_time;      /* total time spent in svm_ocas_solver */
-  float64_t print_time;     /* time spent in ocas_print function */
-  int8_t qp_exitflag;    /* exitflag from the last call of the inner QP solver */
-  int8_t exitflag;       /*  1 .. ocas.Q_P - ocas.Q_D <= TolRel*ABS(ocas.Q_P)
-                             2 .. ocas.Q_P - ocas.Q_D <= TolAbs
-                             3 .. ocas.Q_P <= QPBound
-                             4 .. optimization time >= MaxTime
-                            -1 .. ocas.nCutPlanes >= BufSize
-                            -2 .. not enough memory for the solver */
-} ocas_return_value_T;
-
-/* binary linear SVM solver */
-ocas_return_value_T svm_ocas_solver(
-         float64_t C,            /* regularizarion constant */
-         uint32_t nData,      /* number of exmaples */
-         float64_t TolRel,       /* halts if 1-Q_P/Q_D <= TolRel */
-         float64_t TolAbs,       /* halts if Q_P-Q_D <= TolRel */
-         float64_t QPBound,      /* halts if QP <= QPBound */
-         float64_t MaxTime,      /* maximal time in seconds spent in optmization */
-         uint32_t BufSize,    /* maximal number of buffered cutting planes  */
-         uint8_t Method,      /* 0..standard CP (SVM-Perf,BMRM), 1..OCAS */
-         void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
-         float64_t (*update_W)(float64_t, void*),
-         int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, uint32_t, void*),
-         int (*compute_output)( float64_t*, void* ),
-         int (*sort)(float64_t*, float64_t*, uint32_t),
-         void (*ocas_print)(ocas_return_value_T),
-         void* user_data);
-
-/* binary linear SVM solver which allows using different C for each example*/
-ocas_return_value_T svm_ocas_solver_difC(
-         float64_t *C,           /* regularizarion constants for each example */
-         uint32_t nData,      /* number of exmaples */
-         float64_t TolRel,       /* halts if 1-Q_P/Q_D <= TolRel */
-         float64_t TolAbs,       /* halts if Q_P-Q_D <= TolRel */
-         float64_t QPBound,      /* halts if QP <= QPBound */
-         float64_t MaxTime,      /* maximal time in seconds spent in optmization */
-         uint32_t BufSize,    /* maximal number of buffered cutting planes  */
-         uint8_t Method,      /* 0..standard CP (SVM-Perf,BMRM), 1..OCAS */
-         void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
-         float64_t (*update_W)(float64_t, void*),
-         int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, uint32_t, void*),
-         int (*compute_output)( float64_t*, void* ),
-         int (*sort)(float64_t*, float64_t*, uint32_t),
-         void (*ocas_print)(ocas_return_value_T),
-         void* user_data);
-
-/* multi-class (Singer-Crammer formulation) linear SVM solver */
-ocas_return_value_T msvm_ocas_solver(
-            float64_t C,
-            float64_t *data_y,
-            uint32_t nY,
-            uint32_t nData,
-            float64_t TolRel,
-            float64_t TolAbs,
-            float64_t QPBound,
-            float64_t MaxTime,
-            uint32_t _BufSize,
-            uint8_t Method,
-            void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
-            float64_t (*update_W)(float64_t, void*),
-            int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, void*),
-            int (*compute_output)(float64_t*, void* ),
-            int (*sort)(float64_t*, float64_t*, uint32_t),
-			void (*ocas_print)(ocas_return_value_T),
-			void* user_data);
-
-
-/* binary linear SVM solver */
-ocas_return_value_T svm_ocas_solver_nnw(
-         float64_t C,            /* regularizarion constant */
-         uint32_t nData,      /* number of exmaples */
-         uint32_t num_nnw,    /* number of components of W which must non-negative*/
-         uint32_t* nnw_idx,   /* indices of W which must be non-negative */
-         float64_t TolRel,       /* halts if 1-Q_P/Q_D <= TolRel */
-         float64_t TolAbs,       /* halts if Q_P-Q_D <= TolRel */
-         float64_t QPBound,      /* halts if QP <= QPBound */
-         float64_t MaxTime,      /* maximal time in seconds spent in optmization */
-         uint32_t BufSize,    /* maximal number of buffered cutting planes  */
-         uint8_t Method,      /* 0..standard CP (SVM-Perf,BMRM), 1..OCAS */
-         int (*add_pw_constr)(uint32_t, uint32_t, void*),
-         void (*clip_neg_w)(uint32_t, uint32_t*, void*),
-         void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
-         float64_t (*update_W)(float64_t, void*),
-         int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, uint32_t, void*),
-         int (*compute_output)( float64_t*, void* ),
-         int (*sort)(float64_t*, float64_t*, uint32_t),
-         void (*ocas_print)(ocas_return_value_T),
-         void* user_data);
-
-}
-#endif // DOXYGEN_SHOULD_SKIP_THIS
-#endif /* libocas_h */
-#endif // USE_GPL_SHOGUN
-
diff --git a/src/shogun/lib/external/libocas_common.h b/src/shogun/lib/external/libocas_common.h
deleted file mode 100644
index 4e481fd4c4d..00000000000
--- a/src/shogun/lib/external/libocas_common.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/mathematics/Math.h>
-#include <shogun/io/SGIO.h>
-
-
-namespace shogun
-{
-#define OCAS_PLUS_INF CMath::INFTY
-#define OCAS_CALLOC(...) calloc(__VA_ARGS__)
-#define OCAS_FREE(...) SG_FREE(__VA_ARGS__)
-
-#define INDEX2(ROW,COL,NUM_ROWS) ((COL)*(NUM_ROWS)+(ROW))
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/external/libqp.h b/src/shogun/lib/external/libqp.h
deleted file mode 100644
index cd249236d41..00000000000
--- a/src/shogun/lib/external/libqp.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*-----------------------------------------------------------------------
- * libqp.h: Library for Quadratic Programming optimization.
- *
- * The library provides two solvers:
- *   1. Solver for QP task with simplex constraints.
- *      See function ./lib/libqp_splx.c for definition of the QP task.
- *
- *   2. Solver for QP task with box constraints and a single linear
- *      equality constraint.
- *      See function ./lib/libqp_gsmo.c for definiton of the QP task.
- *
- * Copyright (C) 2006-2008 Vojtech Franc, xfrancv@cmp.felk.cvut.cz
- * Center for Machine Perception, CTU FEL Prague
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation;
- * Version 3, 29 June 2007
- *-------------------------------------------------------------------- */
-
-#ifndef libqp_h
-#define libqp_h
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <math.h>
-
-#include <shogun/lib/common.h>
-namespace shogun
-{
-#define LIBQP_PLUS_INF (-log(0.0))
-#define LIBQP_CALLOC(x,y) SG_CALLOC(y,x)
-#define LIBQP_FREE(x) SG_FREE(x)
-#define LIBQP_INDEX(ROW,COL,NUM_ROWS) ((COL)*(NUM_ROWS)+(ROW))
-#define LIBQP_MIN(A,B) ((A) > (B) ? (B) : (A))
-#define LIBQP_MAX(A,B) ((A) < (B) ? (B) : (A))
-#define LIBQP_ABS(A) ((A) < 0 ? -(A) : (A))
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-/** QP solver return value */
-typedef struct {
-  /** number of iterations */
-  uint32_t nIter;
-  /** primal objective value */
-  float64_t QP;
-  /** dual objective value */
-  float64_t QD;
-  /** exit flag */
-  int8_t exitflag;      /* -1 ... not enough memory
-                            0 ... nIter >= MaxIter
-                            1 ... QP - QD <= TolRel*ABS(QP)
-                            2 ... QP - QD <= TolAbs
-                            3 ... QP <= QP_TH
-                            4 ... eps-KKT conditions satisfied */
-} libqp_state_T;
-#endif
-
-/** QP solver for tasks with simplex constraints */
-libqp_state_T libqp_splx_solver(const float64_t* (*get_col)(uint32_t),
-                  float64_t *diag_H,
-                  float64_t *f,
-                  float64_t *b,
-                  uint32_t *I,
-                  uint8_t *S,
-                  float64_t *x,
-                  uint32_t n,
-                  uint32_t MaxIter,
-                  float64_t TolAbs,
-                  float64_t TolRel,
-                  float64_t QP_TH,
-                  void (*print_state)(libqp_state_T state));
-
-/** Generalized SMO algorithm */
-libqp_state_T libqp_gsmo_solver(const float64_t* (*get_col)(uint32_t),
-            float64_t *diag_H,
-            float64_t *f,
-            float64_t *a,
-            float64_t b,
-            float64_t *LB,
-            float64_t *UB,
-            float64_t *x,
-            uint32_t n,
-            uint32_t MaxIter,
-            float64_t TolKKT,
-            void (*print_state)(libqp_state_T state));
-
-}
-#endif //USE_GPL_SHOGUN
-#endif /* libqp_h */
diff --git a/src/shogun/lib/external/libqp_gsmo.cpp b/src/shogun/lib/external/libqp_gsmo.cpp
deleted file mode 100644
index aa8cc8f49f9..00000000000
--- a/src/shogun/lib/external/libqp_gsmo.cpp
+++ /dev/null
@@ -1,257 +0,0 @@
-/*-----------------------------------------------------------------------
- * libqp_gsmo.c: implementation of the Generalized SMO algorithm.
- *
- * DESCRIPTION
- *  The library provides function which solves the following instance of
- *  a convex Quadratic Programming task:
- *
- *  min QP(x) := 0.5*x'*H*x + f'*x  
- *   x                                      
- *
- *   s.t.    a'*x = b 
- *           LB[i] <= x[i] <= UB[i]   for all i=1..n
- *
- * A precision of the found solution is controlled by the input argument
- * TolKKT which defines tightness of the relaxed Karush-Kuhn-Tucker 
- * stopping conditions.
- *
- * INPUT ARGUMENTS
- *  get_col   function which returns pointer to the i-th column of H.
- *  diag_H [float64_t n x 1] vector containing values on the diagonal of H.
- *  f [float64_t n x 1] vector.
- *  a [float64_t n x 1] Vector which must not contain zero entries.
- *  b [float64_t 1 x 1] Scalar.
- *  LB [float64_t n x 1] Lower bound; -inf is allowed.
- *  UB [float64_t n x 1] Upper bound; inf is allowed.
- *  x [float64_t n x 1] solution vector; must be feasible.
- *  n [uint32_t 1 x 1] dimension of H.
- *  MaxIter [uint32_t 1 x 1] max number of iterations.
- *  TolKKT [float64_t 1 x 1] Tightness of KKT stopping conditions.
- *  print_state  print function; if == NULL it is not called.
- *
- * RETURN VALUE
- *  structure [libqp_state_T]
- *   .QP [1x1] Primal objective value.
- *   .exitflag [1 x 1] Indicates which stopping condition was used:
- *     -3  ... initial solution vector does not satisfy equality constraint
- *     -2  ... initial solution vector does not satisfy bounds
- *     -1  ... not enough memory
- *      0  ... Maximal number of iterations reached: nIter >= MaxIter.
- *      4  ... Relaxed KKT conditions satisfied. 
- *   .nIter [1x1] Number of iterations.
- *
- * REFERENCE
- *  S.S. Keerthi, E.G. Gilbert. Convergence of a generalized SMO algorithm 
- *   for SVM classier design. Technical Report CD-00-01, Control Division, 
- *   Dept. of Mechanical and Production Engineering, National University 
- *   of Singapore, 2000. 
- *   http://citeseer.ist.psu.edu/keerthi00convergence.html  
- *
- *
- * Copyright (C) 2006-2008 Vojtech Franc, xfrancv@cmp.felk.cvut.cz
- * Center for Machine Perception, CTU FEL Prague
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public 
- * License as published by the Free Software Foundation; 
- * Version 3, 29 June 2007
- *-------------------------------------------------------------------- */
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <math.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-#include <limits.h>
-
-#include <shogun/lib/common.h>
-#include <shogun/lib/common.h>
-#include <shogun/lib/external/libqp.h>
-
-namespace shogun
-{
-
-libqp_state_T libqp_gsmo_solver(const float64_t* (*get_col)(uint32_t),
-                                float64_t *diag_H,
-                                float64_t *f,
-                                float64_t *a,
-                                float64_t b,
-                                float64_t *LB,
-                                float64_t *UB,
-                                float64_t *x,
-                                uint32_t n,
-                                uint32_t MaxIter,
-                                float64_t TolKKT,
-                                void (*print_state)(libqp_state_T state))
-{
-	float64_t *col_u;
-	float64_t *col_v;
-	float64_t *Nabla;
-	float64_t minF_up;
-	float64_t maxF_low;
-	float64_t tau;
-	float64_t F_i;
-	float64_t tau_ub, tau_lb;
-	uint32_t i, j;
-	uint32_t u=0, v=0;
-	libqp_state_T state;
-	float64_t atx = 0.0;
-
-	Nabla = NULL;
-
-	/* ------------------------------------------------------------ */
-	/* Initialization                                               */
-	/* ------------------------------------------------------------ */
-
-	// check bounds of initial guess
-	for (i=0; i<n; i++)
-	{
-		if (x[i]>UB[i])
-		{
-			state.exitflag = -2;
-			goto cleanup;
-		}
-		if (x[i]<LB[i])
-		{
-			state.exitflag = -2;
-			goto cleanup;
-		}
-	}
-
-	// check equality constraint
-	for (i=0; i<n; i++)
-		atx += a[i]*x[i];
-	if (fabs(b-atx)>1e-9)
-	{
-		printf("%f \ne %f\n",b,atx);
-		state.exitflag = -3;
-		goto cleanup;
-	}
-
-	/* Nabla = H*x + f is gradient*/
-	Nabla = (float64_t*)LIBQP_CALLOC(n, float64_t);
-	if( Nabla == NULL )
-	{
-		state.exitflag=-1;
-		goto cleanup;
-	}
-
-	/* compute gradient */
-	for( i=0; i < n; i++ ) 
-	{
-		Nabla[i] += f[i];
-		if( x[i] != 0 ) {
-			col_u = (float64_t*)get_col(i);      
-			for( j=0; j < n; j++ ) {
-				Nabla[j] += col_u[j]*x[i];
-			}
-		}
-	}
-
-	if( print_state != NULL) 
-	{
-		state.QP = 0;
-		for(i = 0; i < n; i++ ) 
-			state.QP += 0.5*(x[i]*Nabla[i]+x[i]*f[i]); 
-
-		print_state( state );
-	}
-
-
-	/* ------------------------------------------------------------ */
-	/* Main optimization loop                                       */
-	/* ------------------------------------------------------------ */
-
-	state.nIter = 0;
-	state.exitflag = 100;
-	while( state.exitflag == 100 ) 
-	{
-		state.nIter ++;     
-
-		/* find the most violating pair of variables */
-		minF_up = LIBQP_PLUS_INF;
-		maxF_low = -LIBQP_PLUS_INF;
-		for(i = 0; i < n; i++ ) 
-		{
-
-			F_i = Nabla[i]/a[i];
-
-			if(LB[i] < x[i] && x[i] < UB[i]) 
-			{ /* i is from I_0 */
-				if( minF_up > F_i) { minF_up = F_i; u = i; }
-				if( maxF_low < F_i) { maxF_low = F_i; v = i; }
-			} 
-			else if((a[i] > 0 && x[i] == LB[i]) || (a[i] < 0 && x[i] == UB[i])) 
-			{ /* i is from I_1 or I_2 */
-				if( minF_up > F_i) { minF_up = F_i; u = i; }
-			}
-			else if((a[i] > 0 && x[i] == UB[i]) || (a[i] < 0 && x[i] == LB[i])) 
-			{ /* i is from I_3 or I_4 */
-				if( maxF_low < F_i) { maxF_low = F_i; v = i; }
-			}
-		}
-
-		/* check KKT conditions */
-		if( maxF_low - minF_up <= TolKKT )
-			state.exitflag = 4;
-		else 
-		{
-			/* SMO update of the most violating pair */
-			col_u = (float64_t*)get_col(u);
-			col_v = (float64_t*)get_col(v);
-
-			if( a[u] > 0 ) 
-			{ tau_lb = (LB[u]-x[u])*a[u]; tau_ub = (UB[u]-x[u])*a[u]; }
-			else
-			{ tau_ub = (LB[u]-x[u])*a[u]; tau_lb = (UB[u]-x[u])*a[u]; }
-
-			if( a[v] > 0 )
-			{ tau_lb = LIBQP_MAX(tau_lb,(x[v]-UB[v])*a[v]); tau_ub = LIBQP_MIN(tau_ub,(x[v]-LB[v])*a[v]); }
-			else
-			{ tau_lb = LIBQP_MAX(tau_lb,(x[v]-LB[v])*a[v]); tau_ub = LIBQP_MIN(tau_ub,(x[v]-UB[v])*a[v]); }
-
-			tau = (Nabla[v]/a[v]-Nabla[u]/a[u])/
-				(diag_H[u]/(a[u]*a[u]) + diag_H[v]/(a[v]*a[v]) - 2*col_u[v]/(a[u]*a[v]));
-
-			tau = LIBQP_MIN(LIBQP_MAX(tau,tau_lb),tau_ub);
-
-			x[u] += tau/a[u];
-			x[v] -= tau/a[v];
-
-			/* update Nabla */
-			for(i = 0; i < n; i++ ) 
-				Nabla[i] += col_u[i]*tau/a[u] - col_v[i]*tau/a[v];
-
-		}
-
-		if( state.nIter >= MaxIter )
-			state.exitflag = 0;
-
-		if( print_state != NULL) 
-		{
-			state.QP = 0;
-			for(i = 0; i < n; i++ ) 
-				state.QP += 0.5*(x[i]*Nabla[i]+x[i]*f[i]); 
-
-			print_state( state );
-		}
-
-	}  
-
-	/* compute primal objective value */
-	state.QP = 0;
-	for(i = 0; i < n; i++ ) 
-		state.QP += 0.5*(x[i]*Nabla[i]+x[i]*f[i]); 
-
-cleanup:  
-
-	LIBQP_FREE(Nabla);
-
-	return( state ); 
-}
-
-} /* shogun namespace */
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/external/libqp_splx.cpp b/src/shogun/lib/external/libqp_splx.cpp
deleted file mode 100644
index 06680512fde..00000000000
--- a/src/shogun/lib/external/libqp_splx.cpp
+++ /dev/null
@@ -1,413 +0,0 @@
-/*-----------------------------------------------------------------------
- * libqp_splx.c: solver for Quadratic Programming task with 
- * simplex constraints.
- *
- * DESCRIPTION
- *  The library provides function which solves the following instance of
- *  a convex Quadratic Programmin task:
- *  
- *   min QP(x):= 0.5*x'*H*x + f'*x  
- *    x
- *
- * subject to:   
- *   sum_{i in I_k} x[i] == b[k]  for all k such that S[k] == 0 
- *   sum_{i in I_k} x[i] <= b[k]  for all k such that S[k] == 1
- *                             x(i) >= 0 for all i=1:n
- *   
- *  where I_k = { i | I[i] == k}, k={1,...,m}.
- *
- * A precision of the found solution is controled by the input argumens
- * MaxIter, TolAbs, QP_TH and MaxIter which define the stopping conditions:
- * 
- *  nIter >= MaxIter     ->  exitflag = 0   Number of iterations
- *  QP-QD <= TolAbs      ->  exitflag = 1   Abs. tolerance (duality gap)
- *  QP-QD <= QP*TolRel   ->  exitflag = 2   Relative tolerance
- *  QP <= QP_TH          ->  exitflag = 3   Threshold on objective value
- *
- * where QP and QD are primal respectively dual objective values.
- *
- * INPUT ARGUMENTS
- *  get_col   function which returns pointer to the i-th column of H.
- *  diag_H [float64_t n x 1] vector containing values on the diagonal of H.
- *  f [float64_t n x 1] vector.
- *  b [float64_t n x 1] vector of positive numbers.
- *  I [uint16_T n x 1] vector containing numbers 1...m. 
- *  S [uint8_T n x 1] vector containing numbers 0 and 1.
- *  x [float64_t n x 1] solution vector; must be feasible.
- *  n [uint32_t 1 x 1] dimension of H.
- *  MaxIter [uint32_t 1 x 1] max number of iterations.
- *  TolAbs [float64_t 1 x 1] Absolute tolerance.
- *  TolRel [float64_t 1 x 1] Relative tolerance.
- *  QP_TH  [float64_t 1 x 1] Threshold on the primal value.
- *  print_state  print function; if == NULL it is not called.
- *
- * RETURN VALUE
- *  structure [libqp_state_T] 
- *  .QP [1 x 1] Primal objective value.
- *  .QD [1 x 1] Dual objective value.
- *  .nIter [1 x 1] Number of iterations.
- *  .exitflag [1 x 1] Indicates which stopping condition was used:
- *    -1  ... Not enough memory.
- *     0  ... Maximal number of iteations reached: nIter >= MaxIter.
- *     1  ... Relarive tolerance reached: QP-QD <= abs(QP)*TolRel
- *     2  ... Absolute tolerance reached: QP-QD <= TolAbs
- *     3  ... Objective value reached threshold: QP <= QP_TH.
- *
- * REFERENCE
- *  The algorithm is described in:
- *  V. Franc, V. Hlavac. A Novel Algorithm for Learning Support Vector Machines
- *   with Structured Output Spaces. Research Report K333 22/06, CTU-CMP-2006-04. 
- *   May, 2006. ftp://cmp.felk.cvut.cz/pub/cmp/articles/franc/Franc-TR-2006-04.ps
- *
- * Copyright (C) 2006-2008 Vojtech Franc, xfrancv@cmp.felk.cvut.cz
- * Center for Machine Perception, CTU FEL Prague
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public 
- * License as published by the Free Software Foundation; 
- * Version 3, 29 June 2007
- *-------------------------------------------------------------------- */
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <math.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-#include <limits.h>
-
-#include <shogun/lib/common.h>
-#include <shogun/lib/external/libqp.h>
-namespace shogun
-{
-
-libqp_state_T libqp_splx_solver(const float64_t* (*get_col)(uint32_t),
-                  float64_t *diag_H,
-                  float64_t *f,
-                  float64_t *b,
-                  uint32_t *I,
-                  uint8_t *S,
-                  float64_t *x,
-                  uint32_t n,
-                  uint32_t MaxIter,
-                  float64_t TolAbs,
-                  float64_t TolRel,
-                  float64_t QP_TH,
-				  void (*print_state)(libqp_state_T state))
-{
-  float64_t *d;
-  float64_t *col_u, *col_v;
-  float64_t *x_neq;
-  float64_t tmp;
-  float64_t improv;
-  float64_t tmp_num;
-  float64_t tmp_den=0;
-  float64_t tau=0;
-  float64_t delta;
-  uint32_t *inx;
-  uint32_t *nk;
-  uint32_t m;
-  int32_t u=0;
-  int32_t v=0;
-  uint32_t k;
-  uint32_t i, j;
-  libqp_state_T state;
-
-  
-  /* ------------------------------------------------------------ 
-    Initialization                                               
-  ------------------------------------------------------------ */
-  state.nIter = 0;
-  state.QP = LIBQP_PLUS_INF;
-  state.QD = -LIBQP_PLUS_INF;
-  state.exitflag = 100;
-
-  inx=NULL;
-  nk=NULL;
-  d=NULL;
-  x_neq = NULL;
-
-  /* count number of constraints */
-  for( i=0, m=0; i < n; i++ ) 
-    m = LIBQP_MAX(m,I[i]);
-
-  /* auxciliary variables for tranforming equalities to inequalities */
-  x_neq = (float64_t*) LIBQP_CALLOC(m, float64_t);
-  if( x_neq == NULL )
-  {
-	  state.exitflag=-1;
-	  goto cleanup;
-  }
-
-  /* inx is translation table between variable index i and its contraint */
-  inx = (uint32_t*) LIBQP_CALLOC(m*n, uint32_t);
-  if( inx == NULL )
-  {
-	  state.exitflag=-1;
-	  goto cleanup;
-  }
-
-  /* nk is the number of variables coupled by i-th linear constraint */
-  nk = (uint32_t*) LIBQP_CALLOC(m, uint32_t);
-  if( nk == NULL )
-  {
-	  state.exitflag=-1;
-	  goto cleanup;
-  }
-
-  /* setup auxciliary variables */
-  for( i=0; i < m; i++ ) 
-    x_neq[i] = b[i];
-
-
-  /* create inx and nk */
-  for( i=0; i < n; i++ ) {
-     k = I[i]-1;
-     inx[LIBQP_INDEX(nk[k],k,n)] = i;
-     nk[k]++;     
-
-     if(S[k] != 0) 
-       x_neq[k] -= x[i];
-  }
-    
-  /* d = H*x + f is gradient*/
-  d = (float64_t*) LIBQP_CALLOC(n, float64_t);
-  if( d == NULL )
-  {
-	  state.exitflag=-1;
-	  goto cleanup;
-  }
- 
-  /* compute gradient */
-  for( i=0; i < n; i++ ) 
-  {
-    d[i] += f[i];
-    if( x[i] > 0 ) {
-      col_u = (float64_t*)get_col(i);      
-      for( j=0; j < n; j++ ) {
-          d[j] += col_u[j]*x[i];
-      }
-    }
-  }
-  
-  /* compute state.QP = 0.5*x'*(f+d);
-             state.QD = 0.5*x'*(f-d); */
-  for( i=0, state.QP = 0, state.QD=0; i < n; i++) 
-  {
-    state.QP += x[i]*(f[i]+d[i]);
-    state.QD += x[i]*(f[i]-d[i]);
-  }
-  state.QP = 0.5*state.QP;
-  state.QD = 0.5*state.QD;
-  
-  for( i=0; i < m; i++ ) 
-  {
-    for( j=0, tmp = LIBQP_PLUS_INF; j < nk[i]; j++ ) 
-      tmp = LIBQP_MIN(tmp, d[inx[LIBQP_INDEX(j,i,n)]]);
-
-    if(S[i] == 0) 
-      state.QD += b[i]*tmp;
-    else
-      state.QD += b[i]*LIBQP_MIN(tmp,0);
-  }
-  
-  /* print initial state */
-  if( print_state != NULL) 
-    print_state( state );
-
-  /* ------------------------------------------------------------ 
-    Main optimization loop 
-  ------------------------------------------------------------ */
-  while( state.exitflag == 100 ) 
-  {
-    state.nIter ++;
-
-    /* go over blocks of variables coupled by lin. constraint */
-    for( k=0; k < m; k++ ) 
-    {       
-        
-      /* compute u = argmin_{i in I_k} d[i] 
-             delta =  sum_{i in I_k} x[i]*d[i] - b*min_{i in I_k} */
-      for( j=0, tmp = LIBQP_PLUS_INF, delta = 0; j < nk[k]; j++ ) 
-      {
-        i = inx[LIBQP_INDEX(j,k,n)];
-        delta += x[i]*d[i];
-        if( tmp > d[i] ) {
-          tmp = d[i];
-          u = i;
-        }
-      }
-
-      if(S[k] != 0 && d[u] > 0) 
-        u = -1;
-      else
-        delta -= b[k]*d[u];
-            
-      /* if satisfied then k-th block of variables needs update */
-      if( delta > TolAbs/m && delta > TolRel*LIBQP_ABS(state.QP)/m) 
-      {         
-        /* for fixed u select v = argmax_{i in I_k} Improvement(i) */
-        if( u != -1 ) 
-        {
-          col_u = (float64_t*)get_col(u);
-          improv = -LIBQP_PLUS_INF;
-          for( j=0; j < nk[k]; j++ ) 
-          {
-            i = inx[LIBQP_INDEX(j,k,n)];
-           
-            if(x[i] > 0 && i != uint32_t(u)) 
-            {
-              tmp_num = x[i]*(d[i] - d[u]); 
-              tmp_den = x[i]*x[i]*(diag_H[u] - 2*col_u[i] + diag_H[i]);
-              if( tmp_den > 0 ) 
-              {
-                if( tmp_num < tmp_den ) 
-                  tmp = tmp_num*tmp_num / tmp_den;
-                else 
-                  tmp = tmp_num - 0.5 * tmp_den;
-                 
-                if( tmp > improv ) 
-                { 
-                  improv = tmp;
-                  tau = LIBQP_MIN(1,tmp_num/tmp_den);
-                  v = i;
-                } 
-              }
-            }
-          }
-
-          /* check if virtual variable can be for updated */
-          if(x_neq[k] > 0 && S[k] != 0) 
-          {
-            tmp_num = -x_neq[k]*d[u]; 
-            tmp_den = x_neq[k]*x_neq[k]*diag_H[u];
-            if( tmp_den > 0 ) 
-            {
-              if( tmp_num < tmp_den ) 
-                tmp = tmp_num*tmp_num / tmp_den;
-              else 
-                tmp = tmp_num - 0.5 * tmp_den;
-                 
-              if( tmp > improv ) 
-              { 
-                improv = tmp;
-                tau = LIBQP_MIN(1,tmp_num/tmp_den);
-                v = -1;
-              } 
-            }
-          }
-
-          /* minimize objective w.r.t variable u and v */
-          if(v != -1)
-          {
-            tmp = x[v]*tau;
-            x[u] += tmp;
-            x[v] -= tmp;
-
-            /* update d = H*x + f */
-            col_v = (float64_t*)get_col(v);
-            for(i = 0; i < n; i++ )              
-              d[i] += tmp*(col_u[i]-col_v[i]);
-          }
-          else
-          {
-            tmp = x_neq[k]*tau;
-            x[u] += tmp;
-            x_neq[k] -= tmp;
-
-            /* update d = H*x + f */
-            for(i = 0; i < n; i++ )              
-              d[i] += tmp*col_u[i];
-          }
-        }
-        else
-        {
-          improv = -LIBQP_PLUS_INF;
-          for( j=0; j < nk[k]; j++ ) 
-          {
-            i = inx[LIBQP_INDEX(j,k,n)];
-           
-            if(x[i] > 0) 
-            {
-              tmp_num = x[i]*d[i]; 
-              tmp_den = x[i]*x[i]*diag_H[i];
-              if( tmp_den > 0 ) 
-              {
-                if( tmp_num < tmp_den ) 
-                  tmp = tmp_num*tmp_num / tmp_den;
-                else 
-                  tmp = tmp_num - 0.5 * tmp_den;
-                 
-                if( tmp > improv ) 
-                { 
-                  improv = tmp;
-                  tau = LIBQP_MIN(1,tmp_num/tmp_den);
-                  v = i;
-                } 
-              }
-            }
-          }
-
-          tmp = x[v]*tau;
-          x_neq[k] += tmp;
-          x[v] -= tmp;
-
-          /* update d = H*x + f */
-          col_v = (float64_t*)get_col(v);
-          for(i = 0; i < n; i++ )              
-            d[i] -= tmp*col_v[i];
-        }
-
-        /* update objective value */
-        state.QP = state.QP - improv;
-      }
-    }
-    
-    /* Compute primal and dual objectives */
-    for( i=0, state.QP = 0, state.QD=0; i < n; i++) 
-    {
-       state.QP += x[i]*(f[i]+d[i]);
-       state.QD += x[i]*(f[i]-d[i]);
-    }
-    state.QP = 0.5*state.QP;
-    state.QD = 0.5*state.QD;
-
-    for( k=0; k < m; k++ ) 
-    { 
-      for( j=0,tmp = LIBQP_PLUS_INF; j < nk[k]; j++ ) {
-        i = inx[LIBQP_INDEX(j,k,n)];
-        tmp = LIBQP_MIN(tmp, d[i]);
-      }
-      
-      if(S[k] == 0) 
-        state.QD += b[k]*tmp;
-      else
-        state.QD += b[k]*LIBQP_MIN(tmp,0);
-    }
-
-    /* print state */
-    if( print_state != NULL) 
-      print_state( state );
-
-    /* check stopping conditions */
-    if(state.QP-state.QD <= LIBQP_ABS(state.QP)*TolRel ) state.exitflag = 1;
-    else if( state.QP-state.QD <= TolAbs ) state.exitflag = 2;
-    else if( state.QP <= QP_TH ) state.exitflag = 3;
-    else if( state.nIter >= MaxIter) state.exitflag = 0;
-  }
-
-  /*----------------------------------------------------------
-    Clean up
-  ---------------------------------------------------------- */
-cleanup:
-  LIBQP_FREE( d );
-  LIBQP_FREE( inx );
-  LIBQP_FREE( nk );
-  LIBQP_FREE( x_neq );
-  
-  return( state ); 
-}
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/external/pr_loqo.cpp b/src/shogun/lib/external/pr_loqo.cpp
deleted file mode 100644
index 00153c8ca3f..00000000000
--- a/src/shogun/lib/external/pr_loqo.cpp
+++ /dev/null
@@ -1,682 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Purpose:     solves quadratic programming problem for pattern recognition
- *              for support vectors
- *
- * Written (W) 1997-1998 Alex J. Smola
- * Written (W) 1999-2009 Soeren Sonnenburg
- * Written (W) 1999-2008 Gunnar Raetsch
- * Copyright (C) 1997-2009 Fraunhofer Institute FIRST and Max-Planck-Society
- */
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/common.h>
-#include <shogun/io/SGIO.h>
-#include <shogun/mathematics/lapack.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/lib/external/pr_loqo.h>
-
-#include <math.h>
-#include <time.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-namespace shogun
-{
-
-#define PREDICTOR 1
-#define CORRECTOR 2
-
-/*****************************************************************
-  replace this by any other function that will exit gracefully
-  in a larger system
-  ***************************************************************/
-
-void nrerror(char error_text[])
-{
-	SG_SDEBUG("terminating optimizer - %s\n", error_text)
- // exit(1);
-}
-
-/*****************************************************************
-   taken from numerical recipes and modified to accept pointers
-   moreover numerical recipes code seems to be buggy (at least the
-   ones on the web)
-
-   cholesky solver and backsubstitution
-   leaves upper right triangle intact (rows first order)
-   ***************************************************************/
-
-#ifdef HAVE_LAPACK
-bool choldc(float64_t* a, int32_t n, float64_t* p)
-{
-	if (n<=0)
-		return false;
-
-	float64_t* a2=SG_MALLOC(float64_t, n*n);
-
-	for (int32_t i=0; i<n; i++)
-	{
-		for (int32_t j=0; j<n; j++)
-			a2[n*i+j]=a[n*i+j];
-	}
-
-	/* int for calling external lib */
-	int result=clapack_dpotrf(CblasRowMajor, CblasUpper, (int) n, a2, (int) n);
-
-	for (int32_t i=0; i<n; i++)
-		p[i]=a2[(n+1)*i];
-
-	for (int32_t i=0; i<n; i++)
-	{
-		for (int32_t j=i+1; j<n; j++)
-		{
-			a[n*j+i]=a2[n*i+j];
-		}
-	}
-
-	if (result>0)
-		SG_SDEBUG("Choldc failed, matrix not positive definite\n")
-
-	SG_FREE(a2);
-
-	return result==0;
-}
-#else
-bool choldc(float64_t a[], int32_t n, float64_t p[])
-{
-	void nrerror(char error_text[]);
-	int32_t i, j, k;
-	float64_t sum;
-
-	for (i = 0; i < n; i++)
-	{
-		for (j = i; j < n; j++)
-		{
-			sum=a[n*i + j];
-
-			for (k=i-1; k>=0; k--)
-				sum -= a[n*i + k]*a[n*j + k];
-
-			if (i == j)
-			{
-				if (sum <= 0.0)
-				{
-					SG_SDEBUG("Choldc failed, matrix not positive definite")
-					sum = 0.0;
-					return false;
-				}
-
-				p[i]=sqrt(sum);
-
-			}
-			else
-				a[n*j + i] = sum/p[i];
-		}
-	}
-
-	return true;
-}
-#endif
-
-void cholsb(
-	float64_t a[], int32_t n, float64_t p[], float64_t b[], float64_t x[])
-{
-  int32_t i, k;
-  float64_t sum;
-
-  for (i=0; i<n; i++) {
-    sum=b[i];
-    for (k=i-1; k>=0; k--) sum -= a[n*i + k]*x[k];
-    x[i]=sum/p[i];
-  }
-
-  for (i=n-1; i>=0; i--) {
-    sum=x[i];
-    for (k=i+1; k<n; k++) sum -= a[n*k + i]*x[k];
-    x[i]=sum/p[i];
-  }
-}
-
-/*****************************************************************
-  sometimes we only need the forward or backward pass of the
-  backsubstitution, hence we provide these two routines separately
-  ***************************************************************/
-
-void chol_forward(
-	float64_t a[], int32_t n, float64_t p[], float64_t b[], float64_t x[])
-{
-  int32_t i, k;
-  float64_t sum;
-
-  for (i=0; i<n; i++) {
-    sum=b[i];
-    for (k=i-1; k>=0; k--) sum -= a[n*i + k]*x[k];
-    x[i]=sum/p[i];
-  }
-}
-
-void chol_backward(
-	float64_t a[], int32_t n, float64_t p[], float64_t b[], float64_t x[])
-{
-  int32_t i, k;
-  float64_t sum;
-
-  for (i=n-1; i>=0; i--) {
-    sum=b[i];
-    for (k=i+1; k<n; k++) sum -= a[n*k + i]*x[k];
-    x[i]=sum/p[i];
-  }
-}
-
-/*****************************************************************
-  solves the system | -H_x A' | |x_x| = |c_x|
-                    |  A   H_y| |x_y|   |c_y|
-
-  with H_x (and H_y) positive (semidefinite) matrices
-  and n, m the respective sizes of H_x and H_y
-
-  for variables see pg. 48 of notebook or do the calculations on a
-  sheet of paper again
-
-  predictor solves the whole thing, corrector assues that H_x didn't
-  change and relies on the results of the predictor. therefore do
-  _not_ modify workspace
-
-  if you want to speed tune anything in the code here's the right
-  place to do so: about 95% of the time is being spent in
-  here. something like an iterative refinement would be nice,
-  especially when switching from float64_t to single precision. if you
-  have a fast parallel cholesky use it instead of the numrec
-  implementations.
-
-  side effects: changes H_y (but this is just the unit matrix or zero anyway
-  in our case)
-  ***************************************************************/
-
-bool solve_reduced(
-	int32_t n, int32_t m, float64_t h_x[], float64_t h_y[], float64_t a[],
-	float64_t x_x[], float64_t x_y[], float64_t c_x[], float64_t c_y[],
-	float64_t workspace[], int32_t step)
-{
-  int32_t i,j,k;
-
-  float64_t *p_x;
-  float64_t *p_y;
-  float64_t *t_a;
-  float64_t *t_c;
-  float64_t *t_y;
-
-  p_x = workspace;		/* together n + m + n*m + n + m = n*(m+2)+2*m */
-  p_y = p_x + n;
-  t_a = p_y + m;
-  t_c = t_a + n*m;
-  t_y = t_c + n;
-
-  if (step == PREDICTOR) {
-    if (!choldc(h_x, n, p_x))	/* do cholesky decomposition */
-		return false;
-
-    for (i=0; i<m; i++)         /* forward pass for A' */
-      chol_forward(h_x, n, p_x, a+i*n, t_a+i*n);
-
-    for (i=0; i<m; i++)         /* compute (h_y + a h_x^-1A') */
-      for (j=i; j<m; j++)
-	for (k=0; k<n; k++)
-	  h_y[m*i + j] += t_a[n*j + k] * t_a[n*i + k];
-
-    choldc(h_y, m, p_y);	/* and cholesky decomposition */
-  }
-
-  chol_forward(h_x, n, p_x, c_x, t_c);
-				/* forward pass for c */
-
-  for (i=0; i<m; i++) {		/* and solve for x_y */
-    t_y[i] = c_y[i];
-    for (j=0; j<n; j++)
-      t_y[i] += t_a[i*n + j] * t_c[j];
-  }
-
-  cholsb(h_y, m, p_y, t_y, x_y);
-
-  for (i=0; i<n; i++) {		/* finally solve for x_x */
-    t_c[i] = -t_c[i];
-    for (j=0; j<m; j++)
-      t_c[i] += t_a[j*n + i] * x_y[j];
-  }
-
-  chol_backward(h_x, n, p_x, t_c, x_x);
-  return true;
-}
-
-/*****************************************************************
-  matrix vector multiplication (symmetric matrix but only one triangle
-  given). computes m*x = y
-  no need to tune it as it's only of O(n^2) but cholesky is of
-  O(n^3). so don't waste your time _here_ although it isn't very
-  elegant.
-  ***************************************************************/
-
-void matrix_vector(int32_t n, float64_t m[], float64_t x[], float64_t y[])
-{
-  int32_t i, j;
-
-  for (i=0; i<n; i++) {
-    y[i] = m[(n+1) * i] * x[i];
-
-    for (j=0; j<i; j++)
-      y[i] += m[i + n*j] * x[j];
-
-    for (j=i+1; j<n; j++)
-      y[i] += m[n*i + j] * x[j];
-  }
-}
-
-/*****************************************************************
-  call only this routine; this is the only one you're interested in
-  for doing quadratical optimization
-
-  the restart feature exists but it may not be of much use due to the
-  fact that an initial setting, although close but not very close the
-  the actual solution will result in very good starting diagnostics
-  (primal and dual feasibility and small infeasibility gap) but incur
-  later stalling of the optimizer afterwards as we have to enforce
-  positivity of the slacks.
-  ***************************************************************/
-
-int32_t pr_loqo(
-	int32_t n, int32_t m, float64_t c[], float64_t h_x[], float64_t a[],
-	float64_t b[], float64_t l[], float64_t u[], float64_t primal[],
-	float64_t dual[], int32_t verb, float64_t sigfig_max, int32_t counter_max,
-	float64_t margin, float64_t bound, int32_t restart)
-{
-  /* the knobs to be tuned ... */
-  /* float64_t margin = -0.95;	   we will go up to 95% of the
-				   distance between old variables and zero */
-  /* float64_t bound = 10;		   preset value for the start. small
-				   values give good initial
-				   feasibility but may result in slow
-				   convergence afterwards: we're too
-				   close to zero */
-  /* to be allocated */
-  float64_t *workspace;
-  float64_t *diag_h_x;
-  float64_t *h_y;
-  float64_t *c_x;
-  float64_t *c_y;
-  float64_t *h_dot_x;
-  float64_t *rho;
-  float64_t *nu;
-  float64_t *tau;
-  float64_t *sigma;
-  float64_t *gamma_z;
-  float64_t *gamma_s;
-
-  float64_t *hat_nu;
-  float64_t *hat_tau;
-
-  float64_t *delta_x;
-  float64_t *delta_y;
-  float64_t *delta_s;
-  float64_t *delta_z;
-  float64_t *delta_g;
-  float64_t *delta_t;
-
-  float64_t *d;
-
-  /* from the header - pointers into primal and dual */
-  float64_t *x;
-  float64_t *y;
-  float64_t *g;
-  float64_t *z;
-  float64_t *s;
-  float64_t *t;
-
-  /* auxiliary variables */
-  float64_t b_plus_1;
-  float64_t c_plus_1;
-
-  float64_t x_h_x;
-  float64_t primal_inf;
-  float64_t dual_inf;
-
-  float64_t sigfig;
-  float64_t primal_obj, dual_obj;
-  float64_t mu;
-  float64_t alfa=-1;
-  int32_t counter = 0;
-
-  int32_t status = STILL_RUNNING;
-
-  int32_t i,j;
-
-  /* memory allocation */
-  workspace = SG_MALLOC(float64_t, (n*(m+2)+2*m));
-  diag_h_x  = SG_MALLOC(float64_t, n);
-  h_y       = SG_MALLOC(float64_t, m*m);
-  c_x       = SG_MALLOC(float64_t, n);
-  c_y       = SG_MALLOC(float64_t, m);
-  h_dot_x   = SG_MALLOC(float64_t, n);
-
-  rho       = SG_MALLOC(float64_t, m);
-  nu        = SG_MALLOC(float64_t, n);
-  tau       = SG_MALLOC(float64_t, n);
-  sigma     = SG_MALLOC(float64_t, n);
-
-  gamma_z   = SG_MALLOC(float64_t, n);
-  gamma_s   = SG_MALLOC(float64_t, n);
-
-  hat_nu    = SG_MALLOC(float64_t, n);
-  hat_tau   = SG_MALLOC(float64_t, n);
-
-  delta_x   = SG_MALLOC(float64_t, n);
-  delta_y   = SG_MALLOC(float64_t, m);
-  delta_s   = SG_MALLOC(float64_t, n);
-  delta_z   = SG_MALLOC(float64_t, n);
-  delta_g   = SG_MALLOC(float64_t, n);
-  delta_t   = SG_MALLOC(float64_t, n);
-
-  d         = SG_MALLOC(float64_t, n);
-
-  /* pointers into the external variables */
-  x = primal;			/* n */
-  g = x + n;			/* n */
-  t = g + n;			/* n */
-
-  y = dual;			/* m */
-  z = y + m;			/* n */
-  s = z + n;			/* n */
-
-  /* initial settings */
-  b_plus_1 = 1;
-  c_plus_1 = 0;
-  for (i=0; i<n; i++) c_plus_1 += c[i];
-
-  /* get diagonal terms */
-  for (i=0; i<n; i++) diag_h_x[i] = h_x[(n+1)*i];
-
-  /* starting point */
-  if (restart == 1) {
-				/* x, y already preset */
-    for (i=0; i<n; i++) {	/* compute g, t for primal feasibility */
-      g[i] = CMath::max(CMath::abs(x[i] - l[i]), bound);
-      t[i] = CMath::max(CMath::abs(u[i] - x[i]), bound);
-    }
-
-    matrix_vector(n, h_x, x, h_dot_x); /* h_dot_x = h_x * x */
-
-    for (i=0; i<n; i++) {	/* sigma is a dummy variable to calculate z, s */
-      sigma[i] = c[i] + h_dot_x[i];
-      for (j=0; j<m; j++)
-	sigma[i] -= a[n*j + i] * y[j];
-
-      if (sigma[i] > 0) {
-	s[i] = bound;
-	z[i] = sigma[i] + bound;
-      }
-      else {
-	s[i] = bound - sigma[i];
-	z[i] = bound;
-      }
-    }
-  }
-  else {			/* use default start settings */
-    for (i=0; i<m; i++)
-      for (j=i; j<m; j++)
-	h_y[i*m + j] = (i==j) ? 1 : 0;
-
-    for (i=0; i<n; i++) {
-      c_x[i] = c[i];
-      h_x[(n+1)*i] += 1;
-    }
-
-    for (i=0; i<m; i++)
-      c_y[i] = b[i];
-
-    /* and solve the system [-H_x A'; A H_y] [x, y] = [c_x; c_y] */
-    solve_reduced(n, m, h_x, h_y, a, x, y, c_x, c_y, workspace,
-		  PREDICTOR);
-
-    /* initialize the other variables */
-    for (i=0; i<n; i++) {
-      g[i] = CMath::max(CMath::abs(x[i] - l[i]), bound);
-      z[i] = CMath::max(CMath::abs(x[i]), bound);
-      t[i] = CMath::max(CMath::abs(u[i] - x[i]), bound);
-      s[i] = CMath::max(CMath::abs(x[i]), bound);
-    }
-  }
-
-  for (i=0, mu=0; i<n; i++)
-    mu += z[i] * g[i] + s[i] * t[i];
-  mu = mu / (2*n);
-
-  /* the main loop */
-  if (verb >= STATUS) {
-	  SG_SDEBUG("counter | pri_inf  | dual_inf  | pri_obj   | dual_obj  | ")
-	  SG_SDEBUG("sigfig | alpha  | nu \n")
-	  SG_SDEBUG("-------------------------------------------------------")
-	  SG_SDEBUG("---------------------------\n")
-  }
-
-  while (status == STILL_RUNNING) {
-    /* predictor */
-
-    /* put back original diagonal values */
-    for (i=0; i<n; i++)
-      h_x[(n+1) * i] = diag_h_x[i];
-
-    matrix_vector(n, h_x, x, h_dot_x); /* compute h_dot_x = h_x * x */
-
-    for (i=0; i<m; i++) {
-      rho[i] = b[i];
-      for (j=0; j<n; j++)
-	rho[i] -= a[n*i + j] * x[j];
-    }
-
-    for (i=0; i<n; i++) {
-      nu[i] = l[i] - x[i] + g[i];
-      tau[i] = u[i] - x[i] - t[i];
-
-      sigma[i] = c[i] - z[i] + s[i] + h_dot_x[i];
-      for (j=0; j<m; j++)
-	sigma[i] -= a[n*j + i] * y[j];
-
-      gamma_z[i] = - z[i];
-      gamma_s[i] = - s[i];
-    }
-
-    /* instrumentation */
-    x_h_x = 0;
-    primal_inf = 0;
-    dual_inf = 0;
-
-    for (i=0; i<n; i++) {
-      x_h_x += h_dot_x[i] * x[i];
-      primal_inf += CMath::sq(tau[i]);
-      primal_inf += CMath::sq(nu[i]);
-      dual_inf += CMath::sq(sigma[i]);
-    }
-    for (i=0; i<m; i++)
-      primal_inf += CMath::sq(rho[i]);
-    primal_inf = sqrt(primal_inf)/b_plus_1;
-    dual_inf = sqrt(dual_inf)/c_plus_1;
-
-    primal_obj = 0.5 * x_h_x;
-    dual_obj = -0.5 * x_h_x;
-    for (i=0; i<n; i++) {
-      primal_obj += c[i] * x[i];
-      dual_obj += l[i] * z[i] - u[i] * s[i];
-    }
-    for (i=0; i<m; i++)
-      dual_obj += b[i] * y[i];
-
-    sigfig = log10(CMath::abs(primal_obj) + 1) -
-             log10(CMath::abs(primal_obj - dual_obj));
-    sigfig = CMath::max(sigfig, 0.0);
-
-    /* the diagnostics - after we computed our results we will
-       analyze them */
-
-    if (counter > counter_max) status = ITERATION_LIMIT;
-    if (sigfig  > sigfig_max)  status = OPTIMAL_SOLUTION;
-    if (primal_inf > 10e100)   status = PRIMAL_INFEASIBLE;
-    if (dual_inf > 10e100)     status = DUAL_INFEASIBLE;
-    if ((primal_inf > 10e100) & (dual_inf > 10e100)) status = PRIMAL_AND_DUAL_INFEASIBLE;
-    if (CMath::abs(primal_obj) > 10e100) status = PRIMAL_UNBOUNDED;
-    if (CMath::abs(dual_obj) > 10e100) status = DUAL_UNBOUNDED;
-
-    /* write some nice routine to enforce the time limit if you
-       _really_ want, however it's quite useless as you can compute
-       the time from the maximum number of iterations as every
-       iteration costs one cholesky decomposition plus a couple of
-       backsubstitutions */
-
-    /* generate report */
-    if ((verb >= FLOOD) | ((verb == STATUS) & (status != 0)))
-     SG_SDEBUG("%7i | %.2e | %.2e | % .2e | % .2e | %6.3f | %.4f | %.2e\n",
-	     counter, primal_inf, dual_inf, primal_obj, dual_obj,
-	     sigfig, alfa, mu);
-
-    counter++;
-
-    if (status == 0) {		/* we may keep on going, otherwise
-				   it'll cost one loop extra plus a
-				   messed up main diagonal of h_x */
-      /* intermediate variables (the ones with hat) */
-      for (i=0; i<n; i++) {
-	hat_nu[i] = nu[i] + g[i] * gamma_z[i] / z[i];
-	hat_tau[i] = tau[i] - t[i] * gamma_s[i] / s[i];
-	/* diagonal terms */
-	d[i] = z[i] / g[i] + s[i] / t[i];
-      }
-
-      /* initialization before the cholesky solver */
-      for (i=0; i<n; i++) {
-	h_x[(n+1)*i] = diag_h_x[i] + d[i];
-	c_x[i] = sigma[i] - z[i] * hat_nu[i] / g[i] -
-	  s[i] * hat_tau[i] / t[i];
-      }
-      for (i=0; i<m; i++) {
-	c_y[i] = rho[i];
-	for (j=i; j<m; j++)
-	  h_y[m*i + j] = 0;
-      }
-
-      /* and do it */
-      if (!solve_reduced(n, m, h_x, h_y, a, delta_x, delta_y, c_x, c_y, workspace, PREDICTOR))
-	  {
-		  status=INCONSISTENT;
-		  goto exit_optimizer;
-	  }
-
-      for (i=0; i<n; i++) {
-	/* backsubstitution */
-	delta_s[i] = s[i] * (delta_x[i] - hat_tau[i]) / t[i];
-	delta_z[i] = z[i] * (hat_nu[i] - delta_x[i]) / g[i];
-
-	delta_g[i] = g[i] * (gamma_z[i] - delta_z[i]) / z[i];
-	delta_t[i] = t[i] * (gamma_s[i] - delta_s[i]) / s[i];
-
-	/* central path (corrector) */
-	gamma_z[i] = mu / g[i] - z[i] - delta_z[i] * delta_g[i] / g[i];
-	gamma_s[i] = mu / t[i] - s[i] - delta_s[i] * delta_t[i] / t[i];
-
-	/* (some more intermediate variables) the hat variables */
-	hat_nu[i] = nu[i] + g[i] * gamma_z[i] / z[i];
-	hat_tau[i] = tau[i] - t[i] * gamma_s[i] / s[i];
-
-	/* initialization before the cholesky */
-	c_x[i] = sigma[i] - z[i] * hat_nu[i] / g[i] - s[i] * hat_tau[i] / t[i];
-      }
-
-      for (i=0; i<m; i++) {	/* comput c_y and rho */
-	c_y[i] = rho[i];
-	for (j=i; j<m; j++)
-	  h_y[m*i + j] = 0;
-      }
-
-      /* and do it */
-      solve_reduced(n, m, h_x, h_y, a, delta_x, delta_y, c_x, c_y, workspace,
-		    CORRECTOR);
-
-      for (i=0; i<n; i++) {
-	/* backsubstitution */
-	delta_s[i] = s[i] * (delta_x[i] - hat_tau[i]) / t[i];
-	delta_z[i] = z[i] * (hat_nu[i] - delta_x[i]) / g[i];
-
-	delta_g[i] = g[i] * (gamma_z[i] - delta_z[i]) / z[i];
-	delta_t[i] = t[i] * (gamma_s[i] - delta_s[i]) / s[i];
-      }
-
-      alfa = -1;
-      for (i=0; i<n; i++) {
-	alfa = CMath::min(alfa, delta_g[i]/g[i]);
-	alfa = CMath::min(alfa, delta_t[i]/t[i]);
-	alfa = CMath::min(alfa, delta_s[i]/s[i]);
-	alfa = CMath::min(alfa, delta_z[i]/z[i]);
-      }
-      alfa = (margin - 1) / alfa;
-
-      /* compute mu */
-      for (i=0, mu=0; i<n; i++)
-	mu += z[i] * g[i] + s[i] * t[i];
-      mu = mu / (2*n);
-      mu = mu * CMath::sq((alfa - 1) / (alfa + 10));
-
-      for (i=0; i<n; i++) {
-	x[i] += alfa * delta_x[i];
-	g[i] += alfa * delta_g[i];
-	t[i] += alfa * delta_t[i];
-	z[i] += alfa * delta_z[i];
-	s[i] += alfa * delta_s[i];
-      }
-
-      for (i=0; i<m; i++)
-	y[i] += alfa * delta_y[i];
-    }
-  }
-
-exit_optimizer:
-  if ((status == 1) && (verb >= STATUS)) {
-	  SG_SDEBUG("----------------------------------------------------------------------------------\n")
-	  SG_SDEBUG("optimization converged\n")
-  }
-
-  /* free memory */
-  SG_FREE(workspace);
-  SG_FREE(diag_h_x);
-  SG_FREE(h_y);
-  SG_FREE(c_x);
-  SG_FREE(c_y);
-  SG_FREE(h_dot_x);
-
-  SG_FREE(rho);
-  SG_FREE(nu);
-  SG_FREE(tau);
-  SG_FREE(sigma);
-  SG_FREE(gamma_z);
-  SG_FREE(gamma_s);
-
-  SG_FREE(hat_nu);
-  SG_FREE(hat_tau);
-
-  SG_FREE(delta_x);
-  SG_FREE(delta_y);
-  SG_FREE(delta_s);
-  SG_FREE(delta_z);
-  SG_FREE(delta_g);
-  SG_FREE(delta_t);
-
-  SG_FREE(d);
-
-  /* and return to sender */
-  return status;
-}
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/external/pr_loqo.h b/src/shogun/lib/external/pr_loqo.h
deleted file mode 100644
index 335d7391e62..00000000000
--- a/src/shogun/lib/external/pr_loqo.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Purpose:     solves quadratic programming problem for pattern recognition
- *              for support vectors
- *
- * Written (W) 1997-1998 Alex J. Smola
- * Written (W) 1999-2009 Soeren Sonnenburg
- * Written (W) 1999-2008 Gunnar Raetsch
- * Copyright (C) 1997-2009 Fraunhofer Institute FIRST and Max-Planck-Society
- */
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-namespace shogun
-{
-/* verbosity levels */
-
-#define QUIET 0
-#define STATUS 1
-#define FLOOD 2
-
-/* status outputs */
-
-#define STILL_RUNNING               0
-#define OPTIMAL_SOLUTION            1
-#define SUBOPTIMAL_SOLUTION         2
-#define ITERATION_LIMIT             3
-#define PRIMAL_INFEASIBLE           4
-#define DUAL_INFEASIBLE             5
-#define PRIMAL_AND_DUAL_INFEASIBLE  6
-#define INCONSISTENT                7
-#define PRIMAL_UNBOUNDED            8
-#define DUAL_UNBOUNDED              9
-#define TIME_LIMIT                  10
-
-/*
- * solve the quadratic programming problem
- *
- * minimize   c' * x + 1/2 x' * H * x
- * subject to A*x = b
- *            l <= x <= u
- *
- *  for a documentation see R. Vanderbei, LOQO: an Interior Point Code
- *                          for Quadratic Programming
- */
-
-/**
- * n   : number of primal variables
- * m   : number of constraints (typically 1)
- * h_x : dot product matrix (n.n)
- * a   : constraint matrix (n.m)
- * b   : constant term (m)
- * l   : lower bound (n)
- * u   : upper bound (m)
- *
- * primal : workspace for primal variables, has to be of size 3 n
- *
- *  x = primal;			n
- *  g = x + n;			n
- *  t = g + n;			n
- *
- * dual : workspace for dual variables, has to be of size m + 2 n
- *
- *  y = dual;			m
- *  z = y + m;			n
- *  s = z + n;			n
- *
- * verb       : verbosity level
- * sigfig_max : number of significant digits
- * counter_max: stopping criterion
- * restart    : 1 if restart desired
- *
- */
-int32_t pr_loqo(
-	int32_t n, int32_t m, float64_t c[], float64_t h_x[], float64_t a[],
-	float64_t b[], float64_t l[], float64_t u[], float64_t primal[],
-	float64_t dual[], int32_t verb, float64_t sigfig_max, int32_t counter_max,
-	float64_t margin, float64_t bound, int32_t restart);
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/external/shogun_libsvm.cpp b/src/shogun/lib/external/shogun_libsvm.cpp
index 07a10dc466f..e6cbb412915 100644
--- a/src/shogun/lib/external/shogun_libsvm.cpp
+++ b/src/shogun/lib/external/shogun_libsvm.cpp
@@ -35,12 +35,14 @@
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 
-#include <shogun/lib/external/shogun_libsvm.h>
-#include <shogun/kernel/Kernel.h>
+#include <shogun/base/init.h>
+#include <shogun/base/progress.h>
 #include <shogun/io/SGIO.h>
-#include <shogun/lib/Time.h>
+#include <shogun/kernel/Kernel.h>
 #include <shogun/lib/Signal.h>
+#include <shogun/lib/Time.h>
 #include <shogun/lib/common.h>
+#include <shogun/lib/external/shogun_libsvm.h>
 #include <shogun/mathematics/Math.h>
 
 #include <stdio.h>
@@ -50,6 +52,8 @@
 #include <string.h>
 #include <stdarg.h>
 
+#include <rxcpp/rx.hpp>
+
 namespace shogun
 {
 
@@ -289,7 +293,7 @@ LibSVMKernel::~LibSVMKernel()
 //
 class Solver {
 public:
-	Solver() {};
+	Solver() : m_cancel_computation(false){};
 	virtual ~Solver() {};
 
 	struct SolutionInfo {
@@ -321,6 +325,7 @@ class Solver {
 	float64_t *G_bar;		// gradient, if we treat free variables as 0
 	int32_t l;
 	bool unshrink;	// XXX
+	std::atomic<bool> m_cancel_computation;
 
 	float64_t get_C(int32_t i)
 	{
@@ -342,10 +347,51 @@ class Solver {
 	virtual int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
 	virtual float64_t calculate_rho();
 	virtual void do_shrinking();
+
+	/* Custom implementation of signal handling */
+	rxcpp::subscription connect_to_signal_handler();
+	void reset_computation_variables();
+#ifndef SWIG
+	/** @return whether the algorithm needs to be stopped */
+	SG_FORCED_INLINE bool cancel_computation() const
+	{
+		return m_cancel_computation.load();
+	}
+#endif
+	void on_pause()
+	{
+	}
+	void on_next()
+	{
+		m_cancel_computation.store(false);
+	}
+	void on_complete()
+	{
+	}
+
 private:
 	bool be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2);
 };
 
+rxcpp::subscription Solver::connect_to_signal_handler()
+{
+	// Subscribe this algorithm to the signal handler
+	auto subscriber = rxcpp::make_subscriber<int>(
+		[this](int i) {
+			if (i == SG_PAUSE_COMP)
+				this->on_pause();
+			else
+				this->on_next();
+		},
+		[this]() { this->on_complete(); });
+	return get_global_signal()->get_observable()->subscribe(subscriber);
+}
+
+void Solver::reset_computation_variables()
+{
+	m_cancel_computation.store(false);
+}
+
 void Solver::swap_index(int32_t i, int32_t j)
 {
 	Q->swap_index(i,j);
@@ -402,6 +448,8 @@ void Solver::Solve(
 	const schar *p_y, float64_t *p_alpha, float64_t p_Cp, float64_t p_Cn,
 	float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias)
 {
+	auto sub = connect_to_signal_handler();
+
 	this->l = p_l;
 	this->Q = &p_Q;
 	QD=Q->get_QD();
@@ -429,9 +477,9 @@ void Solver::Solve(
 	}
 
 	// initialize gradient
-	CSignal::clear_cancel();
 	CTime start_time;
 	{
+		auto pb = progress(range(l));
 		G = SG_MALLOC(float64_t, l);
 		G_bar = SG_MALLOC(float64_t, l);
 		int32_t i;
@@ -442,7 +490,7 @@ void Solver::Solve(
 		}
 		SG_SINFO("Computing gradient for initial set of non-zero alphas\n")
 		//CMath::display_vector(alpha, l, "alphas");
-		for(i=0;i<l && !CSignal::cancel_computations(); i++)
+		for (i = 0; i < l && !cancel_computation(); i++)
 		{
 			if(!is_lower_bound(i))
 			{
@@ -455,17 +503,17 @@ void Solver::Solve(
 					for(j=0;j<l;j++)
 						G_bar[j] += get_C(i) * Q_i[j];
 			}
-			SG_SPROGRESS(i, 0, l)
+			pb.print_progress();
 		}
-		SG_SDONE()
+		pb.complete();
 	}
 
 	// optimization step
 
 	int32_t iter = 0;
 	int32_t counter = CMath::min(l,1000)+1;
-
-	while (!CSignal::cancel_computations())
+	auto pb = progress(range(10));
+	while (!cancel_computation())
 	{
 		if (Q->max_train_time > 0 && start_time.cur_time_diff() > Q->max_train_time)
 		  break;
@@ -494,7 +542,8 @@ void Solver::Solve(
 				counter = 1;	// do shrinking next iteration
 		}
 
-		SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6)
+		pb.print_absolute(
+			gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps));
 
 		++iter;
 
@@ -672,6 +721,7 @@ void Solver::Solve(
 		}
 #endif
 	}
+	pb.complete_absolute();
 
 	// calculate rho
 
@@ -708,6 +758,9 @@ void Solver::Solve(
 	SG_FREE(active_set);
 	SG_FREE(G);
 	SG_FREE(G_bar);
+
+	sub.unsubscribe();
+	reset_computation_variables();
 }
 
 // return 1 if already optimal, return 0 otherwise
@@ -1829,7 +1882,7 @@ void solve_c_svc_weighted(
 		if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
 	}
 
-	WeightedSolver s = WeightedSolver(prob->C);
+	WeightedSolver s{prob->C};
 	s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
 		alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias);
 
diff --git a/src/shogun/lib/external/ssl.cpp b/src/shogun/lib/external/ssl.cpp
deleted file mode 100644
index a24296f8c6a..00000000000
--- a/src/shogun/lib/external/ssl.cpp
+++ /dev/null
@@ -1,1145 +0,0 @@
-/*    Copyright 2006 Vikas Sindhwani (vikass@cs.uchicago.edu)
-	  SVM-lin: Fast SVM Solvers for Supervised and Semi-supervised Learning
-
-	  This file is part of SVM-lin.
-
-	  SVM-lin is free software; you can redistribute it and/or modify
-	  it under the terms of the GNU General Public License as published by
-	  the Free Software Foundation; either version 2 of the License, or
-	  (at your option) any later version.
-
-	  SVM-lin is distributed in the hope that it will be useful,
-	  but WITHOUT ANY WARRANTY; without even the implied warranty of
-	  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-	  GNU General Public License for more details.
-
-	  You should have received a copy of the GNU General Public License
-	  along with SVM-lin (see gpl.txt); if not, write to the Free Software
-	  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-	  */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <algorithm>
-
-#include <shogun/io/SGIO.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/features/SparseFeatures.h>
-#include <shogun/lib/external/ssl.h>
-
-namespace shogun
-{
-void ssl_train(struct data *Data,
-		struct options *Options,
-		struct vector_double *Weights,
-		struct vector_double *Outputs)
-{
-	// initialize
-	initialize_ssl(Weights,Data->n,0.0);
-	initialize_ssl(Outputs,Data->m,0.0);
-	vector_int    *Subset  = SG_MALLOC(vector_int, 1);
-	initialize_ssl(Subset,Data->m);
-	// call the right algorithm
-	int32_t optimality = 0;
-	switch(Options->algo)
-	{
-		case -1:
-			SG_SINFO("Regularized Least Squares Regression (CGLS)\n")
-			optimality=CGLS(Data,Options,Subset,Weights,Outputs);
-			break;
-		case RLS:
-			SG_SINFO("Regularized Least Squares Classification (CGLS)\n")
-			optimality=CGLS(Data,Options,Subset,Weights,Outputs);
-			break;
-		case SVM:
-			SG_SINFO("Modified Finite Newton L2-SVM (L2-SVM-MFN)\n")
-			optimality=L2_SVM_MFN(Data,Options,Weights,Outputs,0);
-			break;
-		case TSVM:
-			SG_SINFO("Transductive L2-SVM (TSVM)\n")
-			optimality=TSVM_MFN(Data,Options,Weights,Outputs);
-			break;
-		case DA_SVM:
-			SG_SINFO("Deterministic Annealing Semi-supervised L2-SVM (DAS3VM)\n")
-			optimality=DA_S3VM(Data,Options,Weights,Outputs);
-			break;
-		default:
-			SG_SERROR("Algorithm unspecified\n")
-	}
-
-	if (!optimality)
-		SG_SWARNING("SSL-Algorithm terminated without reaching optimum.\n")
-
-	SG_FREE(Subset->vec);
-    SG_FREE(Subset);
-	return;
-}
-
-int32_t CGLS(
-	const struct data *Data, const struct options *Options,
-	const struct vector_int *Subset, struct vector_double *Weights,
-	struct vector_double *Outputs)
-{
-	SG_SDEBUG("CGLS starting...")
-
-	/* Disassemble the structures */
-	int32_t active = Subset->d;
-	int32_t *J = Subset->vec;
-	CDotFeatures* features=Data->features;
-	float64_t *Y = Data->Y;
-	float64_t *C = Data->C;
-	int32_t n  = Data->n;
-	float64_t lambda = Options->lambda;
-	int32_t cgitermax = Options->cgitermax;
-	float64_t epsilon = Options->epsilon;
-	float64_t *beta = Weights->vec;
-	float64_t *o  = Outputs->vec;
-	// initialize z
-	float64_t *z = SG_MALLOC(float64_t, active);
-	float64_t *q = SG_MALLOC(float64_t, active);
-	int32_t ii=0;
-	for (int32_t i = active ; i-- ;){
-		ii=J[i];
-		z[i]  = C[ii]*(Y[ii] - o[ii]);
-	}
-	float64_t *r = SG_MALLOC(float64_t, n);
-	for (int32_t i = n ; i-- ;)
-		r[i] = 0.0;
-	for (register int32_t j=0; j < active; j++)
-	{
-		features->add_to_dense_vec(z[j], J[j], r, n-1);
-		r[n-1]+=Options->bias*z[j]; //bias (modelled as last dim)
-	}
-	float64_t *p = SG_MALLOC(float64_t, n);
-	float64_t omega1 = 0.0;
-	for (int32_t i = n ; i-- ;)
-	{
-		r[i] -= lambda*beta[i];
-		p[i] = r[i];
-		omega1 += r[i]*r[i];
-	}
-	float64_t omega_p = omega1;
-	float64_t omega_q = 0.0;
-	float64_t inv_omega2 = 1/omega1;
-	float64_t scale = 0.0;
-	float64_t omega_z=0.0;
-	float64_t gamma = 0.0;
-	int32_t cgiter = 0;
-	int32_t optimality = 0;
-	float64_t epsilon2 = epsilon*epsilon;
-	// iterate
-	while(cgiter < cgitermax)
-	{
-		cgiter++;
-		omega_q=0.0;
-		float64_t t=0.0;
-		register int32_t i,j;
-		// #pragma omp parallel for private(i,j)
-		for (i=0; i < active; i++)
-		{
-			ii=J[i];
-			t=features->dense_dot(ii, p, n-1);
-			t+=Options->bias*p[n-1]; //bias (modelled as last dim)
-			q[i]=t;
-			omega_q += C[ii]*t*t;
-		}
-		gamma = omega1/(lambda*omega_p + omega_q);
-		inv_omega2 = 1/omega1;
-		for (i = n ; i-- ;)
-		{
-			r[i] = 0.0;
-			beta[i] += gamma*p[i];
-		}
-		omega_z=0.0;
-		for (i = active ; i-- ;)
-		{
-			ii=J[i];
-			o[ii] += gamma*q[i];
-			z[i] -= gamma*C[ii]*q[i];
-			omega_z+=z[i]*z[i];
-		}
-		for (j=0; j < active; j++)
-		{
-			t=z[j];
-
-			features->add_to_dense_vec(t, J[j], r, n-1);
-			r[n-1]+=Options->bias*t; //bias (modelled as last dim)
-		}
-		omega1 = 0.0;
-		for (i = n ; i-- ;)
-		{
-			r[i] -= lambda*beta[i];
-			omega1 += r[i]*r[i];
-		}
-		if(omega1 < epsilon2*omega_z)
-		{
-			optimality=1;
-			break;
-		}
-		omega_p=0.0;
-		scale=omega1*inv_omega2;
-		for(i = n ; i-- ;)
-		{
-			p[i] = r[i] + p[i]*scale;
-			omega_p += p[i]*p[i];
-		}
-	}
-	SG_SDEBUG("...Done.")
-	SG_SINFO("CGLS converged in %d iteration(s)", cgiter)
-
-	SG_FREE(z);
-	SG_FREE(q);
-	SG_FREE(r);
-	SG_FREE(p);
-	return optimality;
-}
-
-int32_t L2_SVM_MFN(
-	const struct data *Data, struct options *Options,
-	struct vector_double *Weights, struct vector_double *Outputs,
-	int32_t ini)
-{
-	/* Disassemble the structures */
-	CDotFeatures* features=Data->features;
-	float64_t *Y = Data->Y;
-	float64_t *C = Data->C;
-	int32_t n  = Data->n;
-	int32_t m  = Data->m;
-	float64_t lambda = Options->lambda;
-	float64_t epsilon;
-	float64_t *w = Weights->vec;
-	float64_t *o = Outputs->vec;
-	float64_t F_old = 0.0;
-	float64_t F = 0.0;
-	float64_t diff=0.0;
-	vector_int *ActiveSubset = SG_MALLOC(vector_int, 1);
-	ActiveSubset->vec = SG_MALLOC(int32_t, m);
-	ActiveSubset->d = m;
-	// initialize
-	if(ini==0) {
-		epsilon=BIG_EPSILON;
-		Options->cgitermax=SMALL_CGITERMAX;
-		Options->epsilon=BIG_EPSILON;
-	}
-	else {epsilon = Options->epsilon;}
-	for (int32_t i=0;i<n;i++) F+=w[i]*w[i];
-	F=0.5*lambda*F;
-	int32_t active=0;
-	int32_t inactive=m-1; // l-1
-	for (int32_t i=0; i<m ; i++)
-	{
-		diff=1-Y[i]*o[i];
-		if(diff>0)
-		{
-			ActiveSubset->vec[active]=i;
-			active++;
-			F+=0.5*C[i]*diff*diff;
-		}
-		else
-		{
-			ActiveSubset->vec[inactive]=i;
-			inactive--;
-		}
-	}
-	ActiveSubset->d=active;
-	int32_t iter=0;
-	int32_t opt=0;
-	int32_t opt2=0;
-	vector_double *Weights_bar = SG_MALLOC(vector_double, 1);
-	vector_double *Outputs_bar = SG_MALLOC(vector_double, 1);
-	float64_t *w_bar = SG_MALLOC(float64_t, n);
-	float64_t *o_bar = SG_MALLOC(float64_t, m);
-	Weights_bar->vec=w_bar;
-	Outputs_bar->vec=o_bar;
-	Weights_bar->d=n;
-	Outputs_bar->d=m;
-	float64_t delta=0.0;
-	float64_t t=0.0;
-	int32_t ii = 0;
-	while(iter<MFNITERMAX)
-	{
-		iter++;
-		SG_SDEBUG("L2_SVM_MFN Iteration# %d (%d active examples, objective_value = %f)\n", iter, active, F)
-		for (int32_t i=n; i-- ;)
-			w_bar[i]=w[i];
-		for (int32_t i=m; i-- ;)
-			o_bar[i]=o[i];
-
-		opt=CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar);
-		for(register int32_t i=active; i < m; i++)
-		{
-			ii=ActiveSubset->vec[i];
-
-			t=features->dense_dot(ii, w_bar, n-1);
-			t+=Options->bias*w_bar[n-1]; //bias (modelled as last dim)
-
-			o_bar[ii]=t;
-		}
-		if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;};
-		opt2=1;
-		for (int32_t i=0;i<m;i++)
-		{
-			ii=ActiveSubset->vec[i];
-			if(i<active)
-				opt2=(opt2 && (Y[ii]*o_bar[ii]<=1+epsilon));
-			else
-				opt2=(opt2 && (Y[ii]*o_bar[ii]>=1-epsilon));
-			if(opt2==0) break;
-		}
-		if(opt && opt2) // l
-		{
-			if(epsilon==BIG_EPSILON)
-			{
-				epsilon=EPSILON;
-				Options->epsilon=EPSILON;
-				SG_SDEBUG("epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f",  BIG_EPSILON , EPSILON)
-				continue;
-			}
-			else
-			{
-				for (int32_t i=n; i-- ;)
-					w[i]=w_bar[i];
-				for (int32_t i=m; i-- ;)
-					o[i]=o_bar[i];
-				SG_FREE(ActiveSubset->vec);
-				SG_FREE(ActiveSubset);
-				SG_FREE(o_bar);
-				SG_FREE(w_bar);
-				SG_FREE(Weights_bar);
-				SG_FREE(Outputs_bar);
-				SG_SINFO("L2_SVM_MFN converged (optimality) in %d", iter)
-				return 1;
-			}
-		}
-		delta=line_search(w,w_bar,lambda,o,o_bar,Y,C,n,m);
-		SG_SDEBUG("LINE_SEARCH delta = %f\n", delta)
-		F_old=F;
-		F=0.0;
-		for (int32_t i=n; i-- ;) {
-			w[i]+=delta*(w_bar[i]-w[i]);
-			F+=w[i]*w[i];
-		}
-		F=0.5*lambda*F;
-		active=0;
-		inactive=m-1;
-		for (int32_t i=0; i<m ; i++)
-		{
-			o[i]+=delta*(o_bar[i]-o[i]);
-			diff=1-Y[i]*o[i];
-			if(diff>0)
-			{
-				ActiveSubset->vec[active]=i;
-				active++;
-				F+=0.5*C[i]*diff*diff;
-			}
-			else
-			{
-				ActiveSubset->vec[inactive]=i;
-				inactive--;
-			}
-		}
-		ActiveSubset->d=active;
-		if(CMath::abs(F-F_old)<RELATIVE_STOP_EPS*CMath::abs(F_old))
-		{
-			SG_FREE(ActiveSubset->vec);
-			SG_FREE(ActiveSubset);
-			SG_FREE(o_bar);
-			SG_FREE(w_bar);
-			SG_FREE(Weights_bar);
-			SG_FREE(Outputs_bar);
-			SG_SINFO("L2_SVM_MFN converged (rel. criterion) in %d iterations", iter)
-			return 2;
-		}
-	}
-	SG_FREE(ActiveSubset->vec);
-	SG_FREE(ActiveSubset);
-	SG_FREE(o_bar);
-	SG_FREE(w_bar);
-	SG_FREE(Weights_bar);
-	SG_FREE(Outputs_bar);
-	SG_SINFO("L2_SVM_MFN converged (max iter exceeded) in %d iterations", iter)
-	return 0;
-}
-
-float64_t line_search(float64_t *w,
-		float64_t *w_bar,
-		float64_t lambda,
-		float64_t *o,
-		float64_t *o_bar,
-		float64_t *Y,
-		float64_t *C,
-		int32_t d, /* data dimensionality -- 'n' */
-		int32_t l) /* number of examples */
-{
-	float64_t omegaL = 0.0;
-	float64_t omegaR = 0.0;
-	float64_t diff=0.0;
-	for(int32_t i=d; i--; )
-	{
-		diff=w_bar[i]-w[i];
-		omegaL+=w[i]*diff;
-		omegaR+=w_bar[i]*diff;
-	}
-	omegaL=lambda*omegaL;
-	omegaR=lambda*omegaR;
-	float64_t L=0.0;
-	float64_t R=0.0;
-	int32_t ii=0;
-	for (int32_t i=0;i<l;i++)
-	{
-		if(Y[i]*o[i]<1)
-		{
-			diff=C[i]*(o_bar[i]-o[i]);
-			L+=(o[i]-Y[i])*diff;
-			R+=(o_bar[i]-Y[i])*diff;
-		}
-	}
-	L+=omegaL;
-	R+=omegaR;
-	Delta* deltas=SG_MALLOC(Delta, l);
-	int32_t p=0;
-	for(int32_t i=0;i<l;i++)
-	{
-		diff=Y[i]*(o_bar[i]-o[i]);
-
-		if(Y[i]*o[i]<1)
-		{
-			if(diff>0)
-			{
-				deltas[p].delta=(1-Y[i]*o[i])/diff;
-				deltas[p].index=i;
-				deltas[p].s=-1;
-				p++;
-			}
-		}
-		else
-		{
-			if(diff<0)
-			{
-				deltas[p].delta=(1-Y[i]*o[i])/diff;
-				deltas[p].index=i;
-				deltas[p].s=1;
-				p++;
-			}
-		}
-	}
-	std::sort(deltas,deltas+p);
-	float64_t delta_prime=0.0;
-	for (int32_t i=0;i<p;i++)
-	{
-		delta_prime = L + deltas[i].delta*(R-L);
-		if(delta_prime>=0)
-			break;
-		ii=deltas[i].index;
-		diff=(deltas[i].s)*C[ii]*(o_bar[ii]-o[ii]);
-		L+=diff*(o[ii]-Y[ii]);
-		R+=diff*(o_bar[ii]-Y[ii]);
-	}
-	SG_FREE(deltas);
-	return (-L/(R-L));
-}
-
-int32_t TSVM_MFN(
-	const struct data *Data, struct options *Options,
-	struct vector_double *Weights, struct vector_double *Outputs)
-{
-	/* Setup labeled-only examples and train L2_SVM_MFN */
-	struct data *Data_Labeled = SG_MALLOC(data, 1);
-	struct vector_double *Outputs_Labeled = SG_MALLOC(vector_double, 1);
-	initialize_ssl(Outputs_Labeled,Data->l,0.0);
-	SG_SDEBUG("Initializing weights, unknown labels")
-	GetLabeledData(Data_Labeled,Data); /* gets labeled data and sets C=1/l */
-	L2_SVM_MFN(Data_Labeled, Options, Weights,Outputs_Labeled,0);
-	///FIXME Clear(Data_Labeled);
-	/* Use this weight vector to classify R*u unlabeled examples as
-	   positive*/
-	int32_t p=0,q=0;
-	float64_t t=0.0;
-	int32_t *JU = SG_MALLOC(int32_t, Data->u);
-	float64_t *ou = SG_MALLOC(float64_t, Data->u);
-	float64_t lambda_0 = TSVM_LAMBDA_SMALL;
-	for (int32_t i=0;i<Data->m;i++)
-	{
-		if(Data->Y[i]==0.0)
-		{
-			t=Data->features->dense_dot(i, Weights->vec, Data->n-1);
-			t+=Options->bias*Weights->vec[Data->n-1]; //bias (modelled as last dim)
-
-			Outputs->vec[i]=t;
-			Data->C[i]=lambda_0*1.0/Data->u;
-			JU[q]=i;
-			ou[q]=t;
-			q++;
-		}
-		else
-		{
-			Outputs->vec[i]=Outputs_Labeled->vec[p];
-			Data->C[i]=1.0/Data->l;
-			p++;
-		}
-	}
-	std::nth_element(ou,ou+int32_t((1-Options->R)*Data->u-1),ou+Data->u);
-	float64_t thresh=*(ou+int32_t((1-Options->R)*Data->u)-1);
-	SG_FREE(ou);
-	for (int32_t i=0;i<Data->u;i++)
-	{
-		if(Outputs->vec[JU[i]]>thresh)
-			Data->Y[JU[i]]=1.0;
-		else
-			Data->Y[JU[i]]=-1.0;
-	}
-	for (int32_t i=0;i<Data->n;i++)
-		Weights->vec[i]=0.0;
-	for (int32_t i=0;i<Data->m;i++)
-		Outputs->vec[i]=0.0;
-	L2_SVM_MFN(Data,Options,Weights,Outputs,0);
-	int32_t num_switches=0;
-	int32_t s=0;
-	int32_t last_round=0;
-	while(lambda_0 <= Options->lambda_u)
-	{
-		int32_t iter2=0;
-		while(1){
-			s=switch_labels(Data->Y,Outputs->vec,JU,Data->u,Options->S);
-			if(s==0) break;
-			iter2++;
-			SG_SDEBUG("****** lambda_0 = %f iteration = %d ************************************\n", lambda_0, iter2)
-			SG_SDEBUG("Optimizing unknown labels. switched %d labels.\n")
-			num_switches+=s;
-			SG_SDEBUG("Optimizing weights\n")
-			L2_SVM_MFN(Data,Options,Weights,Outputs,1);
-		}
-		if(last_round==1) break;
-		lambda_0=TSVM_ANNEALING_RATE*lambda_0;
-		if(lambda_0 >= Options->lambda_u) {lambda_0 = Options->lambda_u; last_round=1;}
-		for (int32_t i=0;i<Data->u;i++)
-			Data->C[JU[i]]=lambda_0*1.0/Data->u;
-		SG_SDEBUG("****** lambda0 increased to %f%% of lambda_u = %f ************************\n", lambda_0*100/Options->lambda_u, Options->lambda_u)
-		SG_SDEBUG("Optimizing weights\n")
-		L2_SVM_MFN(Data,Options,Weights,Outputs,1);
-	}
-	SG_SDEBUG("Total Number of Switches = %d\n", num_switches)
-	/* reset labels */
-	for (int32_t i=0;i<Data->u;i++) Data->Y[JU[i]] = 0.0;
-	float64_t F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
-	SG_SDEBUG("Objective Value = %f\n",F)
-	delete [] JU;
-	return num_switches;
-}
-
-int32_t switch_labels(float64_t* Y, float64_t* o, int32_t* JU, int32_t u, int32_t S)
-{
-	int32_t npos=0;
-	int32_t nneg=0;
-	for (int32_t i=0;i<u;i++)
-	{
-		if((Y[JU[i]]>0) && (o[JU[i]]<1.0)) npos++;
-		if((Y[JU[i]]<0) && (-o[JU[i]]<1.0)) nneg++;
-	}
-	Delta* positive=SG_MALLOC(Delta, npos);
-	Delta* negative=SG_MALLOC(Delta, nneg);
-	int32_t p=0;
-	int32_t n=0;
-	int32_t ii=0;
-	for (int32_t i=0;i<u;i++)
-	{
-		ii=JU[i];
-		if((Y[ii]>0.0) && (o[ii]<1.0)) {
-			positive[p].delta=o[ii];
-			positive[p].index=ii;
-			positive[p].s=0;
-			p++;};
-			if((Y[ii]<0.0) && (-o[ii]<1.0))
-			{
-				negative[n].delta=-o[ii];
-				negative[n].index=ii;
-				negative[n].s=0;
-				n++;};
-	}
-	std::sort(positive,positive+npos);
-	std::sort(negative,negative+nneg);
-	int32_t s=-1;
-	while(1)
-	{
-		s++;
-		if((s>=S) || (positive[s].delta>=-negative[s].delta) || (s>=npos) || (s>=nneg))
-			break;
-		Y[positive[s].index]=-1.0;
-		Y[negative[s].index]= 1.0;
-	}
-	SG_FREE(positive);
-	SG_FREE(negative);
-	return s;
-}
-
-int32_t DA_S3VM(
-	struct data *Data, struct options *Options, struct vector_double *Weights,
-	struct vector_double *Outputs)
-{
-	float64_t T = DA_INIT_TEMP*Options->lambda_u;
-	int32_t iter1 = 0, iter2 =0;
-	float64_t *p = SG_MALLOC(float64_t, Data->u);
-	float64_t *q = SG_MALLOC(float64_t, Data->u);
-	float64_t *g = SG_MALLOC(float64_t, Data->u);
-	float64_t F,F_min;
-	float64_t *w_min = SG_MALLOC(float64_t, Data->n);
-	float64_t *o_min = SG_MALLOC(float64_t, Data->m);
-	float64_t *w = Weights->vec;
-	float64_t *o = Outputs->vec;
-	float64_t kl_divergence = 1.0;
-	/*initialize */
-	SG_SDEBUG("Initializing weights, p")
-	for (int32_t i=0;i<Data->u; i++)
-		p[i] = Options->R;
-	/* record which examples are unlabeled */
-	int32_t *JU = SG_MALLOC(int32_t, Data->u);
-	int32_t j=0;
-	for(int32_t i=0;i<Data->m;i++)
-	{
-		if(Data->Y[i]==0.0)
-		{JU[j]=i;j++;}
-	}
-	float64_t H = entropy(p,Data->u);
-	optimize_w(Data,p,Options,Weights,Outputs,0);
-	F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
-	F_min = F;
-	for (int32_t i=0;i<Weights->d;i++)
-		w_min[i]=w[i];
-	for (int32_t i=0;i<Outputs->d;i++)
-		o_min[i]=o[i];
-	while((iter1 < DA_OUTER_ITERMAX) && (H > Options->epsilon))
-	{
-		iter1++;
-		iter2=0;
-		kl_divergence=1.0;
-		while((iter2 < DA_INNER_ITERMAX) && (kl_divergence > Options->epsilon))
-		{
-			iter2++;
-			for (int32_t i=0;i<Data->u;i++)
-			{
-				q[i]=p[i];
-				g[i] = Options->lambda_u*((o[JU[i]] > 1 ? 0 : (1 - o[JU[i]])*(1 - o[JU[i]])) - (o[JU[i]]< -1 ? 0 : (1 + o[JU[i]])*(1 + o[JU[i]])));
-			}
-			SG_SDEBUG("Optimizing p.\n")
-			optimize_p(g,Data->u,T,Options->R,p);
-			kl_divergence=KL(p,q,Data->u);
-			SG_SDEBUG("Optimizing weights\n")
-			optimize_w(Data,p,Options,Weights,Outputs,1);
-			F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
-			if(F < F_min)
-			{
-				F_min = F;
-				for (int32_t i=0;i<Weights->d;i++)
-					w_min[i]=w[i];
-				for (int32_t i=0;i<Outputs->d;i++)
-					o_min[i]=o[i];
-			}
-			SG_SDEBUG("***** outer_iter = %d  T = %g  inner_iter = %d  kl = %g  cost = %g *****\n",iter1,T,iter2,kl_divergence,F)
-		}
-		H = entropy(p,Data->u);
-		SG_SDEBUG("***** Finished outer_iter = %d T = %g  Entropy = %g ***\n", iter1,T,H)
-		T = T/DA_ANNEALING_RATE;
-	}
-	for (int32_t i=0;i<Weights->d;i++)
-		w[i]=w_min[i];
-	for (int32_t i=0;i<Outputs->d;i++)
-		o[i]=o_min[i];
-	/* may want to reset the original Y */
-	SG_FREE(p);
-	SG_FREE(q);
-	SG_FREE(g);
-	SG_FREE(JU);
-	SG_FREE(w_min);
-	SG_FREE(o_min);
-	SG_SINFO("(min) Objective Value = %f", F_min)
-	return 1;
-}
-
-int32_t optimize_w(
-	const struct data *Data, const float64_t *p, struct options *Options,
-	struct vector_double *Weights, struct vector_double *Outputs, int32_t ini)
-{
-	int32_t i,j;
-	CDotFeatures* features=Data->features;
-	int32_t n  = Data->n;
-	int32_t m  = Data->m;
-	int32_t u  = Data->u;
-	float64_t lambda = Options->lambda;
-	float64_t epsilon;
-	float64_t *w = Weights->vec;
-	float64_t *o = SG_MALLOC(float64_t, m+u);
-	float64_t *Y = SG_MALLOC(float64_t, m+u);
-	float64_t *C = SG_MALLOC(float64_t, m+u);
-	int32_t *labeled_indices = SG_MALLOC(int32_t, m);
-	float64_t F_old = 0.0;
-	float64_t F = 0.0;
-	float64_t diff=0.0;
-	float64_t lambda_u_by_u = Options->lambda_u/u;
-	vector_int *ActiveSubset = SG_MALLOC(vector_int, 1);
-	ActiveSubset->vec = SG_MALLOC(int32_t, m);
-	ActiveSubset->d = m;
-	// initialize
-	if(ini==0)
-	{
-		epsilon=BIG_EPSILON;
-		Options->cgitermax=SMALL_CGITERMAX;
-		Options->epsilon=BIG_EPSILON;}
-	else {epsilon = Options->epsilon;}
-
-	for(i=0;i<n;i++) F+=w[i]*w[i];
-	F=lambda*F;
-	int32_t active=0;
-	int32_t inactive=m-1; // l-1
-	float64_t temp1;
-	float64_t temp2;
-
-	j = 0;
-	for(i=0; i<m ; i++)
-	{
-		o[i]=Outputs->vec[i];
-		if(Data->Y[i]==0.0)
-		{
-			labeled_indices[i]=0;
-			o[m+j]=o[i];
-			Y[i]=1;
-			Y[m+j]=-1;
-			C[i]=lambda_u_by_u*p[j];
-			C[m+j]=lambda_u_by_u*(1-p[j]);
-			ActiveSubset->vec[active]=i;
-			active++;
-			diff = 1 - CMath::abs(o[i]);
-			if(diff>0)
-			{
-				Data->Y[i] = 2*p[j]-1;
-				Data->C[i] = lambda_u_by_u;
-				temp1 = (1 - o[i]);
-				temp2 = (1 + o[i]);
-				F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2);
-			}
-			else
-			{
-				if(o[i]>0)
-				{
-					Data->Y[i] = -1.0;
-					Data->C[i] = C[m+j];
-				}
-				else
-				{
-					Data->Y[i] = 1.0;
-					Data->C[i] = C[i];
-				}
-				temp1 = (1-Data->Y[i]*o[i]);
-				F+= Data->C[i]*temp1*temp1;
-			}
-			j++;
-		}
-		else
-		{
-			labeled_indices[i]=1;
-			Y[i]=Data->Y[i];
-			C[i]=1.0/Data->l;
-			Data->C[i]=1.0/Data->l;
-			diff=1-Data->Y[i]*o[i];
-			if(diff>0)
-			{
-				ActiveSubset->vec[active]=i;
-				active++;
-				F+=Data->C[i]*diff*diff;
-			}
-			else
-			{
-				ActiveSubset->vec[inactive]=i;
-				inactive--;
-			}
-		}
-	}
-	F=0.5*F;
-	ActiveSubset->d=active;
-	int32_t iter=0;
-	int32_t opt=0;
-	int32_t opt2=0;
-	vector_double *Weights_bar = SG_MALLOC(vector_double, 1);
-	vector_double *Outputs_bar = SG_MALLOC(vector_double, 1);
-	float64_t *w_bar = SG_MALLOC(float64_t, n);
-	float64_t *o_bar = SG_MALLOC(float64_t, m+u);
-	Weights_bar->vec=w_bar;
-	Outputs_bar->vec=o_bar;
-	Weights_bar->d=n;
-	Outputs_bar->d=m; /* read only the top m ; bottom u will be copies */
-	float64_t delta=0.0;
-	float64_t t=0.0;
-	int32_t ii = 0;
-	while(iter<MFNITERMAX)
-	{
-		iter++;
-		SG_SDEBUG("L2_SVM_MFN Iteration# %d (%d active examples,  objective_value = %f)", iter, active, F)
-		for(i=n; i-- ;)
-			w_bar[i]=w[i];
-
-		for(i=m+u; i-- ;)
-			o_bar[i]=o[i];
-		opt=CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar);
-		for(i=active; i < m; i++)
-		{
-			ii=ActiveSubset->vec[i];
-			t=features->dense_dot(ii, w_bar, n-1);
-			t+=Options->bias*w_bar[n-1]; //bias (modelled as last dim)
-
-			o_bar[ii]=t;
-		}
-		// make o_bar consistent in the bottom half
-		j=0;
-		for(i=0; i<m;i++)
-		{
-			if(labeled_indices[i]==0)
-			{o_bar[m+j]=o_bar[i]; j++;};
-		}
-		if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;};
-		opt2=1;
-		for(i=0; i < m ;i++)
-		{
-			ii=ActiveSubset->vec[i];
-			if(i<active)
-			{
-				if(labeled_indices[ii]==1)
-					opt2=(opt2 && (Data->Y[ii]*o_bar[ii]<=1+epsilon));
-				else
-				{
-					if(CMath::abs(o[ii])<1)
-						opt2=(opt2 && (CMath::abs(o_bar[ii])<=1+epsilon));
-					else
-						opt2=(opt2 && (CMath::abs(o_bar[ii])>=1-epsilon));
-				}
-			}
-			else
-				opt2=(opt2 && (Data->Y[ii]*o_bar[ii]>=1-epsilon));
-			if(opt2==0) break;
-		}
-		if(opt && opt2) // l
-		{
-			if(epsilon==BIG_EPSILON)
-			{
-				epsilon=EPSILON;
-				Options->epsilon=EPSILON;
-				SG_SDEBUG("epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f\n", BIG_EPSILON, EPSILON)
-				continue;
-			}
-			else
-			{
-				for(i=n; i-- ;)
-					w[i]=w_bar[i];
-				for(i=m; i-- ;)
-					Outputs->vec[i]=o_bar[i];
-				for(i=m; i-- ;)
-				{
-					if(labeled_indices[i]==0)
-						Data->Y[i]=0.0;
-				}
-				SG_FREE(ActiveSubset->vec);
-				SG_FREE(ActiveSubset);
-				SG_FREE(o_bar);
-				SG_FREE(w_bar);
-				SG_FREE(o);
-				SG_FREE(Weights_bar);
-				SG_FREE(Outputs_bar);
-				SG_FREE(Y);
-				SG_FREE(C);
-				SG_FREE(labeled_indices);
-				SG_SINFO("L2_SVM_MFN converged in %d iteration(s)", iter)
-				return 1;
-			}
-		}
-
-		delta=line_search(w,w_bar,lambda,o,o_bar,Y,C,n,m+u);
-		SG_SDEBUG("LINE_SEARCH delta = %f", delta)
-		F_old=F;
-		F=0.0;
-		for(i=0;i<n;i++) {w[i]+=delta*(w_bar[i]-w[i]);  F+=w[i]*w[i];}
-		F=lambda*F;
-		j=0;
-		active=0;
-		inactive=m-1;
-		for(i=0; i<m ; i++)
-		{
-			o[i]+=delta*(o_bar[i]-o[i]);
-			if(labeled_indices[i]==0)
-			{
-				o[m+j]=o[i];
-				ActiveSubset->vec[active]=i;
-				active++;
-				diff = 1 - CMath::abs(o[i]);
-				if(diff>0)
-				{
-					Data->Y[i] = 2*p[j]-1;
-					Data->C[i] = lambda_u_by_u;
-					temp1 = (1 - o[i]);
-					temp2 = (1 + o[i]);
-					F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2);
-				}
-				else
-				{
-					if(o[i]>0)
-					{
-						Data->Y[i] = -1;
-						Data->C[i] = C[m+j];
-					}
-					else
-					{
-						Data->Y[i] = +1;
-						Data->C[i] = C[i];
-					}
-					temp1=(1-Data->Y[i]*o[i]);
-					F+= Data->C[i]*temp1*temp1;
-				}
-				j++;
-			}
-			else
-			{
-				diff=1-Data->Y[i]*o[i];
-				if(diff>0)
-				{
-					ActiveSubset->vec[active]=i;
-					active++;
-					F+=Data->C[i]*diff*diff;
-				}
-				else
-				{
-					ActiveSubset->vec[inactive]=i;
-					inactive--;
-				}
-			}
-		}
-		F=0.5*F;
-		ActiveSubset->d=active;
-		if(CMath::abs(F-F_old)<EPSILON)
-			break;
-	}
-	for(i=m; i-- ;)
-	{
-		Outputs->vec[i]=o[i];
-		if(labeled_indices[i]==0)
-			Data->Y[i]=0.0;
-	}
-	SG_FREE(ActiveSubset->vec);
-	SG_FREE(labeled_indices);
-	SG_FREE(ActiveSubset);
-	SG_FREE(o_bar);
-	SG_FREE(w_bar);
-	SG_FREE(o);
-	SG_FREE(Weights_bar);
-	SG_FREE(Outputs_bar);
-	SG_FREE(Y);
-	SG_FREE(C);
-	SG_SINFO("L2_SVM_MFN converged in %d iterations", iter)
-	return 0;
-}
-
-void optimize_p(
-	const float64_t* g, int32_t u, float64_t T, float64_t r, float64_t* p)
-{
-	int32_t iter=0;
-	float64_t epsilon=1e-10;
-	int32_t maxiter=500;
-	float64_t nu_minus=g[0];
-	float64_t nu_plus=g[0];
-	for (int32_t i=0;i<u;i++)
-	{
-		if(g[i]<nu_minus) nu_minus=g[i];
-		if(g[i]>nu_plus) nu_plus=g[i];
-	};
-
-	float64_t b=T*log((1-r)/r);
-	nu_minus-=b;
-	nu_plus-=b;
-	float64_t nu=(nu_plus+nu_minus)/2;
-	float64_t Bnu=0.0;
-	float64_t BnuPrime=0.0;
-	float64_t s=0.0;
-	float64_t tmp=0.0;
-	for (int32_t i=0;i<u;i++)
-	{
-		s=exp((g[i]-nu)/T);
-		if(!(CMath::is_infinity(s)))
-		{
-			tmp=1.0/(1.0+s);
-			Bnu+=tmp;
-			BnuPrime+=s*tmp*tmp;
-		}
-	}
-	Bnu=Bnu/u;
-	Bnu-=r;
-	BnuPrime=BnuPrime/(T*u);
-	float64_t nuHat=0.0;
-	while((CMath::abs(Bnu)>epsilon) && (iter < maxiter))
-	{
-		iter++;
-		if(CMath::abs(BnuPrime)>0.0)
-			nuHat=nu-Bnu/BnuPrime;
-		if((CMath::abs(BnuPrime) > 0.0) | (nuHat>nu_plus)  | (nuHat < nu_minus))
-			nu=(nu_minus+nu_plus)/2.0;
-		else
-			nu=nuHat;
-		Bnu=0.0;
-		BnuPrime=0.0;
-		for(int32_t i=0;i<u;i++)
-		{
-			s=exp((g[i]-nu)/T);
-			if(!(CMath::is_infinity(s)))
-			{
-				tmp=1.0/(1.0+s);
-				Bnu+=tmp;
-				BnuPrime+=s*tmp*tmp;
-			}
-		}
-		Bnu=Bnu/u;
-		Bnu-=r;
-		BnuPrime=BnuPrime/(T*u);
-		if(Bnu<0)
-			nu_minus=nu;
-		else
-			nu_plus=nu;
-		if(CMath::abs(nu_minus-nu_plus)<epsilon)
-			break;
-	}
-	if(CMath::abs(Bnu)>epsilon)
-		SG_SWARNING("Warning (Root): root not found to required precision\n")
-
-	for (int32_t i=0;i<u;i++)
-	{
-		s=exp((g[i]-nu)/T);
-		if(CMath::is_infinity(s)) p[i]=0.0;
-		else p[i]=1.0/(1.0+s);
-	}
-	SG_SINFO(" root (nu) = %f B(nu) = %f", nu, Bnu)
-}
-
-float64_t transductive_cost(
-	float64_t normWeights, float64_t *Y, float64_t *Outputs, int32_t m,
-	float64_t lambda, float64_t lambda_u)
-{
-	float64_t F1=0.0,F2=0.0, o=0.0, y=0.0;
-	int32_t u=0,l=0;
-	for (int32_t i=0;i<m;i++)
-	{
-		o=Outputs[i];
-		y=Y[i];
-		if(y==0.0)
-		{F1 += CMath::abs(o) > 1 ? 0 : (1 - CMath::abs(o))*(1 - CMath::abs(o)); u++;}
-		else
-		{F2 += y*o > 1 ? 0 : (1-y*o)*(1-y*o); l++;}
-	}
-	float64_t F;
-	F = 0.5*(lambda*normWeights + lambda_u*F1/u + F2/l);
-	return F;
-}
-
-float64_t entropy(const float64_t *p, int32_t u)
-{
-	float64_t h=0.0;
-	float64_t q=0.0;
-	for (int32_t i=0;i<u;i++)
-	{
-		q=p[i];
-		if(q>0 && q<1)
-			h+= -(q*CMath::log2(q) + (1-q)*CMath::log2(1-q));
-	}
-	return h/u;
-}
-
-float64_t KL(const float64_t *p, const float64_t *q, int32_t u)
-{
-	float64_t h=0.0;
-	float64_t p1=0.0;
-	float64_t q1=0.0;
-	float64_t g=0.0;
-	for (int32_t i=0;i<u;i++)
-	{
-		p1=p[i];
-		q1=q[i];
-		if(p1>1-1e-8) p1-=1e-8;
-		if(p1<1-1e-8) p1+=1e-8;
-		if(q1>1-1e-8) q1-=1e-8;
-		if(q1<1-1e-8) q1+=1e-8;
-		g= (p1*CMath::log2(p1/q1) + (1-p1)*CMath::log2((1-p1)/(1-q1)));
-		if(CMath::abs(g)<1e-12 || CMath::is_nan(g)) g=0.0;
-		h+=g;
-	}
-	return h/u;
-}
-
-/********************** UTILITIES ********************/
-float64_t norm_square(const vector_double *A)
-{
-	float64_t x=0.0, t=0.0;
-	for(int32_t i=0;i<A->d;i++)
-	{
-		t=A->vec[i];
-		x+=t*t;
-	}
-	return x;
-}
-
-void initialize_ssl(struct vector_double *A, int32_t k, float64_t a)
-{
-	float64_t *vec = SG_MALLOC(float64_t, k);
-	for (int32_t i=0;i<k;i++)
-		vec[i]=a;
-	A->vec = vec;
-	A->d   = k;
-	return;
-}
-
-void initialize_ssl(struct vector_int *A, int32_t k)
-{
-	int32_t *vec = SG_MALLOC(int32_t, k);
-	for(int32_t i=0;i<k;i++)
-		vec[i]=i;
-	A->vec = vec;
-	A->d   = k;
-	return;
-}
-
-void GetLabeledData(struct data *D, const struct data *Data)
-{
-	/*FIXME
-	int32_t *J = SG_MALLOC(int, Data->l);
-	D->C   = SG_MALLOC(float64_t, Data->l);
-	D->Y   = SG_MALLOC(float64_t, Data->l);
-	int32_t nz=0;
-	int32_t k=0;
-	int32_t rowptrs_=Data->l;
-	for(int32_t i=0;i<Data->m;i++)
-	{
-		if(Data->Y[i]!=0.0)
-		{
-			J[k]=i;
-			D->Y[k]=Data->Y[i];
-			D->C[k]=1.0/Data->l;
-			nz+=(Data->rowptr[i+1] - Data->rowptr[i]);
-			k++;
-		}
-	}
-	D->val    = SG_MALLOC(float64_t, nz);
-	D->colind = SG_MALLOC(int32_t, nz);
-	D->rowptr = new int32_trowptrs_+1];
-	nz=0;
-	for(int32_t i=0;i<Data->l;i++)
-	{
-		D->rowptr[i]=nz;
-		for(int32_t j=Data->rowptr[J[i]];j<Data->rowptr[J[i]+1];j++)
-		{
-			D->val[nz] = Data->val[j];
-			D->colind[nz] = Data->colind[j];
-			nz++;
-		}
-	}
-	D->rowptr[rowptrs_]=nz;
-	D->nz=nz;
-	D->l=Data->l;
-	D->m=Data->l;
-	D->n=Data->n;
-	D->u=0;
-	SG_FREE(J);*/
-}
-}
diff --git a/src/shogun/lib/external/ssl.h b/src/shogun/lib/external/ssl.h
deleted file mode 100644
index 19d9b5185f8..00000000000
--- a/src/shogun/lib/external/ssl.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/*    Copyright 2006 Vikas Sindhwani (vikass@cs.uchicago.edu)
-	  SVM-lin: Fast SVM Solvers for Supervised and Semi-supervised Learning
-
-	  This file is part of SVM-lin.
-
-	  SVM-lin is free software; you can redistribute it and/or modify
-	  it under the terms of the GNU General Public License as published by
-	  the Free Software Foundation; either version 2 of the License, or
-	  (at your option) any later version.
-
-	  SVM-lin is distributed in the hope that it will be useful,
-	  but WITHOUT ANY WARRANTY; without even the implied warranty of
-	  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-	  GNU General Public License for more details.
-
-	  You should have received a copy of the GNU General Public License
-	  along with SVM-lin (see gpl.txt); if not, write to the Free Software
-	  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-	  */
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-
-#ifndef _SSL_H
-#define _SSL_H
-
-/* OPTIMIZATION CONSTANTS */
-#define CGITERMAX 10000 /* maximum number of CGLS iterations */
-#define SMALL_CGITERMAX 10 /* for heuristic 1 in reference [2] */
-#define EPSILON   1e-6 /* most tolerances are set to this value */
-#define BIG_EPSILON 0.01 /* for heuristic 2 in reference [2] */
-#define RELATIVE_STOP_EPS 1e-9 /* for L2-SVM-MFN relative stopping criterion */
-#define MFNITERMAX 50 /* maximum number of MFN iterations */
-#define TSVM_ANNEALING_RATE 1.5 /* rate at which lambda_u is increased in TSVM */
-#define TSVM_LAMBDA_SMALL 1e-5 /* lambda_u starts from this value */
-#define DA_ANNEALING_RATE 1.5 /* annealing rate for DA */
-#define DA_INIT_TEMP 10 /* initial temperature relative to lambda_u */
-#define DA_INNER_ITERMAX 100 /* maximum fixed temperature iterations for DA */
-#define DA_OUTER_ITERMAX 30 /* maximum number of outer loops for DA */
-
-#include <shogun/lib/config.h>
-
-#include <shogun/lib/common.h>
-#include <shogun/features/DotFeatures.h>
-
-namespace shogun
-{
-/** Data: Input examples are stored in sparse (Compressed Row Storage) format */
-struct data
-{
-	/** number of examples */
-	int32_t m;
-	/** number of labeled examples */
-	int32_t l;
-	/** number of unlabeled examples l+u = m */
-	int32_t u;
-	/** number of features */
-	int32_t n;
-	/** number of non-zeros */
-	int32_t nz;
-
-	/** features */
-	shogun::CDotFeatures* features;
-	/** labels */
-	float64_t *Y;
-	/** cost associated with each example */
-	float64_t *C;
-};
-
-/** defines a vector of doubles */
-struct vector_double
-{
-	/** number of elements */
-	int32_t d;
-	/** ptr to vector elements*/
-	float64_t *vec;
-};
-
-/** defines a vector of ints for index subsets */
-struct vector_int
-{
-	/** number of elements */
-	int32_t d;
-	/** ptr to vector elements */
-	int32_t *vec;
-};
-
-enum { RLS, SVM, TSVM, DA_SVM }; /* currently implemented algorithms */
-
-/** various options user + internal optimisation */
-struct options
-{
-	/* user options */
-	/** regularization parameter */
-	int32_t algo;
-	/** regularization parameter */
-	float64_t lambda;
-	/** regularization parameter over unlabeled examples */
-	float64_t lambda_u;
-	/** maximum number of TSVM switches per fixed-weight label optimization */
-	int32_t S;
-	/** expected fraction of unlabeled examples in positive class */
-	float64_t R;
-	/** cost for positive examples */
-	float64_t Cp;
-	/** cost for negative examples */
-	float64_t Cn;
-
-	/*  internal optimization options */
-	/** all tolerances */
-	float64_t epsilon;
-	/** max iterations for CGLS */
-	int32_t cgitermax;
-	/** max iterations for L2_SVM_MFN */
-	int32_t mfnitermax;
-
-	/** 1.0 if bias is to be used, 0.0 otherwise */
-	float64_t bias;
-};
-
-/** used in line search */
-class Delta {
-	public:
-		/** default constructor */
-		Delta() { delta=0.0; index=0;s=0; }
-
-		/** delta */
-		float64_t delta;
-		/** index */
-		int32_t index;
-		/** s */
-		int32_t s;
-};
-
-inline bool operator<(const Delta& a , const Delta& b)
-{
-	return (a.delta < b.delta);
-}
-
-void initialize_ssl(struct vector_double *A, int32_t k, float64_t a);
-/* initializes a vector_double to be of length k, all elements set to a */
-void initialize_ssl(struct vector_int *A, int32_t k);
-/* initializes a vector_int to be of length k, elements set to 1,2..k. */
-void GetLabeledData(struct data *Data_Labeled, const struct data *Data);
-/* extracts labeled data from Data and copies it into Data_Labeled */
-float64_t norm_square(const vector_double *A); /* returns squared length of A */
-
-/* ssl_train: takes data, options, uninitialized weight and output
-   vector_doubles, routes it to the algorithm */
-/* the learnt weight vector and the outputs it gives on the data matrix are saved */
-void ssl_train(
-	struct data *Data,
-	struct options *Options,
-	struct vector_double *W, /* weight vector */
-	struct vector_double *O); /* output vector */
-
-/* svmlin algorithms and their subroutines */
-
-/* Conjugate Gradient for Sparse Linear Least Squares Problems */
-/* Solves: min_w 0.5*Options->lamda*w'*w + 0.5*sum_{i in Subset} Data->C[i] (Y[i]- w' x_i)^2 */
-/* over a subset of examples x_i specified by vector_int Subset */
-int32_t CGLS(
-	const struct data *Data,
-	const struct options *Options,
-	const struct vector_int *Subset,
-	struct vector_double *Weights,
-	struct vector_double *Outputs);
-
-/* Linear Modified Finite Newton L2-SVM*/
-/* Solves: min_w 0.5*Options->lamda*w'*w + 0.5*sum_i Data->C[i] max(0,1 - Y[i] w' x_i)^2 */
-int32_t L2_SVM_MFN(
-	const struct data *Data,
-	struct options *Options,
-	struct vector_double *Weights,
-	struct vector_double *Outputs,
-	int32_t ini); /* use ini=0 if no good starting guess for Weights, else 1 */
-
-float64_t line_search(
-	float64_t *w,
-	float64_t *w_bar,
-	float64_t lambda,
-	float64_t *o,
-	float64_t *o_bar,
-	float64_t *Y,
-	float64_t *C,
-	int32_t d,
-	int32_t l);
-
-/* Transductive L2-SVM */
-/* Solves : min_(w, Y[i],i in UNlabeled) 0.5*Options->lamda*w'*w + 0.5*(1/Data->l)*sum_{i in labeled} max(0,1 - Y[i] w' x_i)^2 + 0.5*(Options->lambda_u/Data->u)*sum_{i in UNlabeled} max(0,1 - Y[i] w' x_i)^2
-   subject to: (1/Data->u)*sum_{i in UNlabeled} max(0,Y[i]) = Options->R */
-int32_t TSVM_MFN(
-	const struct data *Data,
-	struct options *Options,
-	struct vector_double *Weights,
-	struct vector_double *Outputs);
-
-int32_t switch_labels(
-	float64_t* Y,
-	float64_t* o,
-	int32_t* JU,
-	int32_t u,
-	int32_t S);
-
-/* Deterministic Annealing*/
-int32_t DA_S3VM(
-	struct data *Data,
-	struct options *Options,
-	struct vector_double *Weights,
-	struct vector_double *Outputs);
-
-void optimize_p(
-	const float64_t* g, int32_t u, float64_t T, float64_t r, float64_t*p);
-
-int32_t optimize_w(
-	const struct data *Data,
-	const  float64_t *p,
-	struct options *Options,
-	struct vector_double *Weights,
-	struct vector_double *Outputs,
-	int32_t ini);
-
-float64_t transductive_cost(
-	float64_t normWeights,
-	float64_t *Y,
-	float64_t *Outputs,
-	int32_t m,
-	float64_t lambda,
-	float64_t lambda_u);
-
-float64_t entropy(const  float64_t *p, int32_t u);
-
-/* KL-divergence */
-float64_t KL(const  float64_t *p, const  float64_t *q, int32_t u);
-}
-#endif // _SSL_H
-
-#endif // DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/shogun/lib/malsar/malsar_clustered.cpp b/src/shogun/lib/malsar/malsar_clustered.cpp
deleted file mode 100644
index b2e153ea81f..00000000000
--- a/src/shogun/lib/malsar/malsar_clustered.cpp
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
- */
-
-#include <shogun/lib/malsar/malsar_clustered.h>
-#ifdef USE_GPL_SHOGUN
-
-#ifndef HAVE_CXX11
-#include <shogun/mathematics/Math.h>
-#include <shogun/mathematics/eigen3.h>
-#include <iostream>
-#include <shogun/lib/external/libqp.h>
-
-using namespace Eigen;
-using namespace std;
-
-namespace shogun
-{
-
-static double* H_diag_matrix;
-static int H_diag_matrix_ld;
-
-static const double* get_col(uint32_t j)
-{
-	return H_diag_matrix + j*H_diag_matrix_ld;
-}
-
-malsar_result_t malsar_clustered(
-		CDotFeatures* features,
-		double* y,
-		double rho1,
-		double rho2,
-		const malsar_options& options)
-{
-	int task;
-	int n_feats = features->get_dim_feature_space();
-	SG_SDEBUG("n feats = %d\n", n_feats)
-	int n_vecs = features->get_num_vectors();
-	SG_SDEBUG("n vecs = %d\n", n_vecs)
-	int n_tasks = options.n_tasks;
-	SG_SDEBUG("n tasks = %d\n", n_tasks)
-
-	H_diag_matrix = SG_CALLOC(double, n_tasks*n_tasks);
-	for (int i=0; i<n_tasks; i++)
-		H_diag_matrix[i*n_tasks+i] = 2.0;
-	H_diag_matrix_ld = n_tasks;
-
-	int iter = 0;
-
-	// initialize weight vector and bias for each task
-	MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks);
-	VectorXd Cs = VectorXd::Zero(n_tasks);
-	MatrixXd Ms = MatrixXd::Identity(n_tasks, n_tasks)*options.n_clusters/n_tasks;
-	MatrixXd IM = Ms;
-	MatrixXd IMsqinv = Ms;
-	MatrixXd invEtaMWt = Ms;
-
-	MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws;
-	VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs;
-	MatrixXd Mz=Ms, Mzp=Ms, Mz_old=Ms, delta_Mzp=Ms, gMs=Ms;
-	MatrixXd Mzp_Pz;
-
-	double eta = rho2/rho1;
-	double c = rho1*eta*(1+eta);
-
-	double t=1, t_old=0;
-	double gamma=1, gamma_inc=2;
-	double obj=0.0, obj_old=0.0;
-
-	double* diag_H = SG_MALLOC(double, n_tasks);
-	double* f = SG_MALLOC(double, n_tasks);
-	double* a = SG_MALLOC(double, n_tasks);
-	double* lb = SG_MALLOC(double, n_tasks);
-	double* ub = SG_MALLOC(double, n_tasks);
-	double* x = SG_CALLOC(double, n_tasks);
-
-	//internal::set_is_malloc_allowed(false);
-	bool done = false;
-	while (!done && iter <= options.max_iter)
-	{
-		double alpha = double(t_old - 1)/t;
-		SG_SDEBUG("alpha=%f\n",alpha)
-
-		// compute search point
-		Ws = (1+alpha)*Wz - alpha*Wz_old;
-		Cs = (1+alpha)*Cz - alpha*Cz_old;
-		Ms = (1+alpha)*Mz - alpha*Mz_old;
-
-		// zero gradient
-		gWs.setZero();
-		gCs.setZero();
-		//internal::set_is_malloc_allowed(true);
-		SG_SDEBUG("Computing gradient\n")
-		IM = (eta*MatrixXd::Identity(n_tasks,n_tasks)+Ms);
-		IMsqinv = (IM*IM).inverse();
-		invEtaMWt = IM.inverse()*Ws.transpose();
-		gMs.noalias() = -c*(Ws.transpose()*Ws)*IMsqinv;
-		gWs.noalias() += 2*c*invEtaMWt.transpose();
-		//internal::set_is_malloc_allowed(false);
-
-		// compute gradient and objective at search point
-		double Fs = 0;
-		for (task=0; task<n_tasks; task++)
-		{
-			SGVector<index_t> task_idx = options.tasks_indices[task];
-			int n_vecs_task = task_idx.vlen;
-			for (int i=0; i<n_vecs_task; i++)
-			{
-				double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
-				double bb = CMath::max(aa,0.0);
-
-				// avoid underflow when computing exponential loss
-				Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task;
-				double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_vecs_task;
-				gCs[task] += b;
-				features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
-			}
-		}
-		SG_SDEBUG("Computed gradient\n")
-
-		// add regularizer
-		Fs += c*(Ws*invEtaMWt).trace();
-		SG_SDEBUG("Fs = %f \n", Fs)
-
-		double Fzp = 0.0;
-
-		int inner_iter = 0;
-		// line search, Armijo-Goldstein scheme
-		while (inner_iter <= 1)
-		{
-			Wzp = Ws - gWs/gamma;
-			Czp = Cs - gCs/gamma;
-			// compute singular projection of Ms - gMs/gamma with k
-			//internal::set_is_malloc_allowed(true);
-			EigenSolver<MatrixXd> eigensolver;
-			eigensolver.compute(Ms-gMs/gamma, true);
-			if (eigensolver.info()!=Eigen::Success)
-				SG_SERROR("Eigendecomposition failed")
-
-			// solve problem
-			// min sum_i (s_i - s*_i)^2 s.t. sum_i s_i = k, 0<=s_i<=1
-			for (int i=0; i<n_tasks; i++)
-			{
-				diag_H[i] = 2.0;
-				// TODO fails with C++11
-				//std::complex<MatrixXd::Scalar> eigenvalue = eigensolver.eigenvalues()[i];
-				//cout << "eigenvalue " << eigenvalue << "=" << std::real(eigenvalue) << "+i" << std::imag(eigenvalue) << endl;
-				f[i] = -2*eigensolver.eigenvalues()[i].real();
-				if (f[i]!=f[i])
-					SG_SERROR("NaN %d eigenvalue", i)
-
-				SG_SDEBUG("%dth eigenvalue %f\n",i,eigensolver.eigenvalues()[i].real())
-				a[i] = 1.0;
-				lb[i] = 0.0;
-				ub[i] = 1.0;
-				x[i] = double(options.n_clusters)/n_tasks;
-			}
-			double b = options.n_clusters;//eigensolver.eigenvalues().sum().real();
-			SG_SDEBUG("b = %f\n", b)
-			SG_SDEBUG("Calling libqp\n")
-			libqp_state_T problem_state = libqp_gsmo_solver(&get_col,diag_H,f,a,b,lb,ub,x,n_tasks,1000,1e-6,NULL);
-			SG_SDEBUG("Libqp objective = %f\n",problem_state.QP)
-			SG_SDEBUG("Exit code = %d\n",problem_state.exitflag)
-			SG_SDEBUG("%d iteration passed\n",problem_state.nIter)
-			SG_SDEBUG("Solution is \n [ ")
-			for (int i=0; i<n_tasks; i++)
-				SG_SDEBUG("%f ", x[i])
-			SG_SDEBUG("]\n")
-			Map<VectorXd> Mzp_DiagSigz(x,n_tasks);
-			Mzp_Pz = eigensolver.eigenvectors().real();
-			Mzp = Mzp_Pz*Mzp_DiagSigz.asDiagonal()*Mzp_Pz.transpose();
-			//internal::set_is_malloc_allowed(false);
-			// walk in direction of antigradient
-			for (int i=0; i<n_tasks; i++)
-				Mzp_DiagSigz[i] += eta;
-			//internal::set_is_malloc_allowed(true);
-			invEtaMWt = (Mzp_Pz*
-			             (Mzp_DiagSigz.cwiseInverse().asDiagonal())*
-			             Mzp_Pz.transpose())*
-			             Wzp.transpose();
-			//internal::set_is_malloc_allowed(false);
-			// compute objective at line search point
-			Fzp = 0.0;
-			for (task=0; task<n_tasks; task++)
-			{
-				SGVector<index_t> task_idx = options.tasks_indices[task];
-				int n_vecs_task = task_idx.vlen;
-				for (int i=0; i<n_vecs_task; i++)
-				{
-					double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Cs[task]);
-					double bb = CMath::max(aa,0.0);
-
-					Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task;
-				}
-			}
-			Fzp += c*(Wzp*invEtaMWt).trace();
-
-			// compute delta between line search point and search point
-			delta_Wzp = Wzp - Ws;
-			delta_Czp = Czp - Cs;
-			delta_Mzp = Mzp - Ms;
-
-			// norms of delta
-			double nrm_delta_Wzp = delta_Wzp.squaredNorm();
-			double nrm_delta_Czp = delta_Czp.squaredNorm();
-			double nrm_delta_Mzp = delta_Mzp.squaredNorm();
-
-			double r_sum = (nrm_delta_Wzp + nrm_delta_Czp + nrm_delta_Mzp)/3;
-
-			double Fzp_gamma = 0.0;
-			if (n_feats > n_tasks)
-			{
-				Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() +
-				                 (delta_Czp.transpose()*gCs).trace() +
-				                 (delta_Mzp.transpose()*gMs).trace() +
-				                 (gamma/2)*nrm_delta_Wzp +
-				                 (gamma/2)*nrm_delta_Czp +
-				                 (gamma/2)*nrm_delta_Mzp;
-			}
-			else
-			{
-				Fzp_gamma = Fs + (gWs.transpose()*delta_Wzp).trace() +
-				                 (gCs.transpose()*delta_Czp).trace() +
-				                 (gMs.transpose()*delta_Mzp).trace() +
-				                 (gamma/2)*nrm_delta_Wzp +
-				                 (gamma/2)*nrm_delta_Czp +
-				                 (gamma/2)*nrm_delta_Mzp;
-			}
-
-			// break if delta is getting too small
-			if (r_sum <= 1e-20)
-			{
-				done = true;
-				break;
-			}
-
-			// break if objective at line searc point is smaller than Fzp_gamma
-			if (Fzp <= Fzp_gamma)
-				break;
-			else
-				gamma *= gamma_inc;
-
-			inner_iter++;
-		}
-
-		Wz_old = Wz;
-		Cz_old = Cz;
-		Mz_old = Mz;
-		Wz = Wzp;
-		Cz = Czp;
-		Mz = Mzp;
-
-		// compute objective value
-		obj_old = obj;
-		obj = Fzp;
-
-		// check if process should be terminated
-		switch (options.termination)
-		{
-			case 0:
-				if (iter>=2)
-				{
-					if ( CMath::abs(obj-obj_old) <= options.tolerance )
-						done = true;
-				}
-			break;
-			case 1:
-				if (iter>=2)
-				{
-					if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old))
-						done = true;
-				}
-			break;
-			case 2:
-				if (CMath::abs(obj) <= options.tolerance)
-					done = true;
-			break;
-			case 3:
-				if (iter>=options.max_iter)
-					done = true;
-			break;
-		}
-
-		iter++;
-		t_old = t;
-		t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
-	}
-	//internal::set_is_malloc_allowed(true);
-	SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj)
-
-	SG_FREE(H_diag_matrix);
-	SG_FREE(diag_H);
-	SG_FREE(f);
-	SG_FREE(a);
-	SG_FREE(lb);
-	SG_FREE(ub);
-	SG_FREE(x);
-
-	SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
-	for (int i=0; i<n_feats; i++)
-	{
-		for (task=0; task<n_tasks; task++)
-			tasks_w(i,task) = Wzp(i,task);
-	}
-	//tasks_w.display_matrix();
-	SGVector<float64_t> tasks_c(n_tasks);
-	for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
-	return malsar_result_t(tasks_w, tasks_c);
-};
-};
-#endif
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/malsar/malsar_clustered.h b/src/shogun/lib/malsar/malsar_clustered.h
deleted file mode 100644
index be937bb8c88..00000000000
--- a/src/shogun/lib/malsar/malsar_clustered.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
- */
-
-#ifndef  MALSAR_CLUSTERED_H_
-#define  MALSAR_CLUSTERED_H_
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/malsar/malsar_options.h>
-#include <shogun/features/DotFeatures.h>
-
-namespace shogun
-{
-
-/**
- * Routine for learning a linear multitask
- * logistic regression model using
- * Clustered multitask learning algorithm.
- *
- */
-malsar_result_t malsar_clustered(
-		CDotFeatures* features,
-		double* y,
-		double rho1,
-		double rho2,
-		const malsar_options& options);
-
-};
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef MALSAR_CLUSTERED_H_  ----- */
diff --git a/src/shogun/lib/malsar/malsar_joint_feature_learning.cpp b/src/shogun/lib/malsar/malsar_joint_feature_learning.cpp
deleted file mode 100644
index 02bb4bfadbf..00000000000
--- a/src/shogun/lib/malsar/malsar_joint_feature_learning.cpp
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
- */
-
-
-#include <shogun/lib/malsar/malsar_joint_feature_learning.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/Signal.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/mathematics/eigen3.h>
-#include <iostream>
-
-using namespace Eigen;
-using namespace std;
-
-namespace shogun
-{
-
-malsar_result_t malsar_joint_feature_learning(
-		CDotFeatures* features,
-		double* y,
-		double rho1,
-		double rho2,
-		const malsar_options& options)
-{
-	int task;
-	int n_feats = features->get_dim_feature_space();
-	SG_SDEBUG("n feats = %d\n", n_feats)
-	int n_vecs = features->get_num_vectors();
-	SG_SDEBUG("n vecs = %d\n", n_vecs)
-	int n_tasks = options.n_tasks;
-	SG_SDEBUG("n tasks = %d\n", n_tasks)
-
-	int iter = 0;
-
-	// initialize weight vector and bias for each task
-	MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks);
-	VectorXd Cs = VectorXd::Zero(n_tasks);
-	MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws;
-	VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs;
-
-	double t=1, t_old=0;
-	double gamma=1, gamma_inc=2;
-	double obj=0.0, obj_old=0.0;
-
-	//internal::set_is_malloc_allowed(false);
-	bool done = false;
-	while (!done && iter <= options.max_iter && !CSignal::cancel_computations())
-	{
-		double alpha = double(t_old - 1)/t;
-
-		// compute search point
-		Ws = (1+alpha)*Wz - alpha*Wz_old;
-		Cs = (1+alpha)*Cz - alpha*Cz_old;
-
-		// zero gradient
-		gWs.setZero();
-		gCs.setZero();
-
-		// compute gradient and objective at search point
-		double Fs = 0;
-		for (task=0; task<n_tasks; task++)
-		{
-			SGVector<index_t> task_idx = options.tasks_indices[task];
-			int n_task_vecs = task_idx.vlen;
-			for (int i=0; i<n_task_vecs; i++)
-			{
-				double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
-				double bb = CMath::max(aa,0.0);
-
-				// avoid underflow when computing exponential loss
-				Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
-				double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_task_vecs;
-
-				gCs[task] += b;
-				features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
-			}
-		}
-		gWs.noalias() += 2*rho2*Ws;
-
-		// add regularizer
-		Fs += Ws.squaredNorm();
-
-		//cout << "gWs" << endl << gWs << endl;
-		//cout << "gCs" << endl << gCs << endl;
-		//SG_SPRINT("Fs = %f\n",Fs)
-
-		double Fzp = 0.0;
-
-		int inner_iter = 0;
-		// line search, Armijo-Goldstein scheme
-		while (inner_iter <= 1000)
-		{
-			// compute lasso projection of Ws - gWs/gamma
-			for (int i=0; i<n_feats; i++)
-			{
-				Wzp.row(i).noalias() = Ws.row(i) - gWs.row(i)/gamma;
-				double norm = Wzp.row(i).lpNorm<2>();
-				if (norm == 0.0)
-					Wzp.row(i).setZero();
-				else
-				{
-					double threshold = norm - rho1/gamma;
-					if (threshold < 0.0)
-						Wzp.row(i).setZero();
-					else
-						Wzp.row(i) *= threshold/norm;
-				}
-			}
-			// walk in direction of antigradient
-			Czp = Cs - gCs/gamma;
-
-			// compute objective at line search point
-			Fzp = 0.0;
-			for (task=0; task<n_tasks; task++)
-			{
-				SGVector<index_t> task_idx = options.tasks_indices[task];
-				int n_task_vecs = task_idx.vlen;
-				for (int i=0; i<n_task_vecs; i++)
-				{
-					double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Czp[task]);
-					double bb = CMath::max(aa,0.0);
-
-					Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
-				}
-			}
-			Fzp += rho2*Wzp.squaredNorm();
-
-			// compute delta between line search point and search point
-			delta_Wzp = Wzp - Ws;
-			delta_Czp = Czp - Cs;
-
-			// norms of delta
-			double nrm_delta_Wzp = delta_Wzp.squaredNorm();
-			double nrm_delta_Czp = delta_Czp.squaredNorm();
-
-			double r_sum = (nrm_delta_Wzp + nrm_delta_Czp)/2;
-
-			double Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() +
-				(delta_Czp.transpose()*gCs).trace() +
-				(gamma/2)*nrm_delta_Wzp +
-				(gamma/2)*nrm_delta_Czp;
-
-			// break if delta is getting too small
-			if (r_sum <= 1e-20)
-			{
-				SG_SDEBUG("Line search point is too close to search point\n")
-				done = true;
-				break;
-			}
-
-			// break if objective at line searc point is smaller than Fzp_gamma
-			if (Fzp <= Fzp_gamma)
-				break;
-			else
-				gamma *= gamma_inc;
-
-			inner_iter++;
-		}
-
-		Wz_old = Wz;
-		Cz_old = Cz;
-		Wz = Wzp;
-		Cz = Czp;
-
-		// compute objective value
-		obj_old = obj;
-		obj = Fzp;
-		for (int i=0; i<n_feats; i++)
-			obj += rho1*(Wz.row(i).lpNorm<2>());
-		//for (task=0; task<n_tasks; task++)
-		//	obj += rho1*(Wz.col(task).norm());
-		SG_SDEBUG("Obj = %f\n",obj)
-		//SG_SABS_PROGRESS(obj,0.0)
-		// check if process should be terminated
-		switch (options.termination)
-		{
-			case 0:
-				if (iter>=2)
-				{
-					if ( CMath::abs(obj-obj_old) <= options.tolerance )
-					{
-						SG_SDEBUG("Objective changes less than tolerance\n")
-						done = true;
-					}
-				}
-			break;
-			case 1:
-				if (iter>=2)
-				{
-					if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old))
-						done = true;
-				}
-			break;
-			case 2:
-				if (CMath::abs(obj) <= options.tolerance)
-					done = true;
-			break;
-			case 3:
-				if (iter>=options.max_iter)
-					done = true;
-			break;
-		}
-
-		iter++;
-		t_old = t;
-		t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
-	}
-	//internal::set_is_malloc_allowed(true);
-	SG_SDONE()
-	SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj)
-
-	SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
-	for (int i=0; i<n_feats; i++)
-	{
-		for (task=0; task<n_tasks; task++)
-			tasks_w(i,task) = Wzp(i,task);
-	}
-	SGVector<float64_t> tasks_c(n_tasks);
-	for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
-	return malsar_result_t(tasks_w, tasks_c);
-};
-};
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/malsar/malsar_joint_feature_learning.h b/src/shogun/lib/malsar/malsar_joint_feature_learning.h
deleted file mode 100644
index 49ebce8e7fe..00000000000
--- a/src/shogun/lib/malsar/malsar_joint_feature_learning.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
- */
-
-#ifndef  MALSAR_JOINT_FEATURE_LEARNING_H_
-#define  MALSAR_JOINT_FEATURE_LEARNING_H_
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/malsar/malsar_options.h>
-#include <shogun/features/DotFeatures.h>
-
-namespace shogun
-{
-
-/**
- * Routine for learning a linear multitask
- * logistic regression model
- * using Joint Feature algorithm.
- *
- */
-malsar_result_t malsar_joint_feature_learning(
-		CDotFeatures* features,
-		double* y,
-		double rho1,
-		double rho2,
-		const malsar_options& options);
-
-};
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef MALSAR_JOINT_FEATURE_LEARNING_H_  ----- */
-
diff --git a/src/shogun/lib/malsar/malsar_low_rank.cpp b/src/shogun/lib/malsar/malsar_low_rank.cpp
deleted file mode 100644
index 34ac52ee5f5..00000000000
--- a/src/shogun/lib/malsar/malsar_low_rank.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
- */
-
-#include <shogun/lib/malsar/malsar_low_rank.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/mathematics/eigen3.h>
-#include <shogun/mathematics/Math.h>
-#include <iostream>
-
-using namespace Eigen;
-
-namespace shogun
-{
-
-malsar_result_t malsar_low_rank(
-		CDotFeatures* features,
-		double* y,
-		double rho,
-		const malsar_options& options)
-{
-	int task;
-	int n_feats = features->get_dim_feature_space();
-	SG_SDEBUG("n feats = %d\n", n_feats)
-	int n_vecs = features->get_num_vectors();
-	SG_SDEBUG("n vecs = %d\n", n_vecs)
-	int n_tasks = options.n_tasks;
-	SG_SDEBUG("n tasks = %d\n", n_tasks)
-
-	int iter = 0;
-
-	// initialize weight vector and bias for each task
-	MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks);
-	VectorXd Cs = VectorXd::Zero(n_tasks);
-	MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws;
-	VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs;
-
-	double t=1, t_old=0;
-	double gamma=1, gamma_inc=2;
-	double obj=0.0, obj_old=0.0;
-
-	double rho_L2 = 0.0;
-
-	//internal::set_is_malloc_allowed(false);
-	bool done = false;
-	while (!done && iter <= options.max_iter)
-	{
-		double alpha = double(t_old - 1)/t;
-
-		// compute search point
-		Ws = (1+alpha)*Wz - alpha*Wz_old;
-		Cs = (1+alpha)*Cz - alpha*Cz_old;
-
-		// zero gradient
-		gWs.setZero();
-		gCs.setZero();
-
-		// compute gradient and objective at search point
-		double Fs = 0;
-		for (task=0; task<n_tasks; task++)
-		{
-			SGVector<index_t> task_idx = options.tasks_indices[task];
-			int n_task_vecs = task_idx.vlen;
-			for (int i=0; i<n_task_vecs; i++)
-			{
-				double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
-				double bb = CMath::max(aa,0.0);
-
-				// avoid underflow when computing exponential loss
-				Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
-				double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_task_vecs;
-
-				gCs[task] += b;
-				features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
-			}
-		}
-		gWs.noalias() += 2*rho_L2*Ws;
-		//SG_SDEBUG("gWs=%f\n",gWs.squaredNorm())
-
-		// add regularizer
-		Fs += rho_L2*Ws.squaredNorm();
-
-		double Fzp = 0.0;
-
-		int inner_iter = 0;
-		// line search, Armijo-Goldstein scheme
-		while (inner_iter <= 1000)
-		{
-			// compute trace projection of Ws - gWs/gamma with 2*rho/gamma
-			//internal::set_is_malloc_allowed(true);
-			Wzp.setZero();
-			JacobiSVD<MatrixXd> svd((Ws - gWs/gamma).transpose(),ComputeThinU | ComputeThinV);
-			for (int i=0; i<svd.singularValues().size(); i++)
-			{
-				if (svd.singularValues()[i] > rho/gamma)
-					Wzp += (svd.matrixU().col(i)*
-					       svd.singularValues()[i]*
-					       svd.matrixV().col(i).transpose()).transpose();
-			}
-			//internal::set_is_malloc_allowed(false);
-			// walk in direction of antigradient
-			Czp = Cs - gCs/gamma;
-
-			// compute objective at line search point
-			Fzp = 0.0;
-			for (task=0; task<n_tasks; task++)
-			{
-				SGVector<index_t> task_idx = options.tasks_indices[task];
-				int n_task_vecs = task_idx.vlen;
-				for (int i=0; i<n_task_vecs; i++)
-				{
-					double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Czp[task]);
-					double bb = CMath::max(aa,0.0);
-
-					Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
-				}
-			}
-			Fzp += rho_L2*Wzp.squaredNorm();
-
-			// compute delta between line search point and search point
-			delta_Wzp = Wzp - Ws;
-			delta_Czp = Czp - Cs;
-
-			// norms of delta
-			double nrm_delta_Wzp = delta_Wzp.squaredNorm();
-			double nrm_delta_Czp = delta_Czp.squaredNorm();
-
-			double r_sum = (nrm_delta_Wzp + nrm_delta_Czp)/2;
-
-			double Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() +
-				(delta_Czp.transpose()*gCs).trace() +
-				(gamma/2)*nrm_delta_Wzp +
-				(gamma/2)*nrm_delta_Czp;
-
-			// break if delta is getting too small
-			if (r_sum <= 1e-20)
-			{
-				done = true;
-				break;
-			}
-
-			// break if objective at line search point is smaller than Fzp_gamma
-			if (Fzp <= Fzp_gamma)
-				break;
-			else
-				gamma *= gamma_inc;
-		}
-
-		Wz_old = Wz;
-		Cz_old = Cz;
-		Wz = Wzp;
-		Cz = Czp;
-
-		// compute objective value
-		obj_old = obj;
-		obj = Fzp;
-		//internal::set_is_malloc_allowed(true);
-		JacobiSVD<MatrixXd> svd(Wzp, EigenvaluesOnly);
-		obj += rho*svd.singularValues().sum();
-		//internal::set_is_malloc_allowed(false);
-
-
-		// check if process should be terminated
-		switch (options.termination)
-		{
-			case 0:
-				if (iter>=2)
-				{
-					if ( CMath::abs(obj-obj_old) <= options.tolerance )
-						done = true;
-				}
-			break;
-			case 1:
-				if (iter>=2)
-				{
-					if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old))
-						done = true;
-				}
-			break;
-			case 2:
-				if (CMath::abs(obj) <= options.tolerance)
-					done = true;
-			break;
-			case 3:
-				if (iter>=options.max_iter)
-					done = true;
-			break;
-		}
-
-		iter++;
-		t_old = t;
-		t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
-	}
-	//internal::set_is_malloc_allowed(true);
-	SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj)
-
-	SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
-	for (int i=0; i<n_feats; i++)
-	{
-		for (task=0; task<n_tasks; task++)
-			tasks_w(i,task) = Wzp(i,task);
-	}
-	SGVector<float64_t> tasks_c(n_tasks);
-	for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
-	return malsar_result_t(tasks_w, tasks_c);
-};
-};
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/malsar/malsar_low_rank.h b/src/shogun/lib/malsar/malsar_low_rank.h
deleted file mode 100644
index 0b06e2719c0..00000000000
--- a/src/shogun/lib/malsar/malsar_low_rank.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
- */
-
-#ifndef  MALSAR_LOW_RANK_H_
-#define  MALSAR_LOW_RANK_H_
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/malsar/malsar_options.h>
-#include <shogun/features/DotFeatures.h>
-
-namespace shogun
-{
-
-/**
- * Routine for learning a linear multitask
- * logistic regression model using
- * Low Rank multitask algorithm.
- *
- */
-malsar_result_t malsar_low_rank(
-		CDotFeatures* features,
-		double* y,
-		double rho,
-		const malsar_options& options);
-
-};
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef MALSAR_LOW_RANK_H_  ----- */
diff --git a/src/shogun/lib/malsar/malsar_options.h b/src/shogun/lib/malsar/malsar_options.h
deleted file mode 100644
index d76f8dffbee..00000000000
--- a/src/shogun/lib/malsar/malsar_options.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-#ifndef  MALSAR_OPTIONS_H_
-#define  MALSAR_OPTIONS_H_
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#define IGNORE_IN_CLASSLIST
-
-#include <stdlib.h>
-#include <shogun/lib/SGMatrix.h>
-#include <shogun/lib/SGVector.h>
-
-namespace shogun
-{
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-IGNORE_IN_CLASSLIST enum malsar_loss
-{
-	MALSAR_LOGISTIC,
-	MALSAR_LEAST_SQUARES
-};
-
-IGNORE_IN_CLASSLIST struct malsar_options
-{
-	int termination;
-	double tolerance;
-	int max_iter;
-	int n_tasks;
-	int n_clusters;
-	SGVector<int>* tasks_indices;
-	malsar_loss loss;
-
-	static malsar_options default_options()
-	{
-		malsar_options opts;
-		opts.termination = 2;
-		opts.tolerance = 1e-3;
-		opts.max_iter = 1000;
-		opts.tasks_indices = NULL;
-		opts.n_clusters = 2;
-		opts.loss = MALSAR_LOGISTIC;
-		return opts;
-	}
-};
-
-IGNORE_IN_CLASSLIST struct malsar_result_t
-{
-	SGMatrix<double> w;
-	SGVector<double> c;
-
-	malsar_result_t(SGMatrix<double> w_, SGVector<double> c_)
-	{
-		w = w_;
-		c = c_;
-	}
-};
-#endif
-}
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef MALSAR_OPTIONS_H_  ----- */
diff --git a/src/shogun/lib/parameter_observers/ObservedValue.h b/src/shogun/lib/parameter_observers/ObservedValue.h
new file mode 100644
index 00000000000..65a622deab9
--- /dev/null
+++ b/src/shogun/lib/parameter_observers/ObservedValue.h
@@ -0,0 +1,194 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+
+#ifndef SHOGUN_OBSERVEDVALUE_H
+#define SHOGUN_OBSERVEDVALUE_H
+
+#include <chrono>
+#include <shogun/lib/any.h>
+#include <shogun/lib/common.h>
+#include <utility>
+
+/**
+ * Definitions of basic object with are needed by the Parameter
+ * Observer architecture.
+ */
+namespace shogun
+{
+	/* Timepoint */
+	typedef std::chrono::steady_clock::time_point time_point;
+
+	/* Type of the observed value */
+	enum SG_OBS_VALUE_TYPE
+	{
+		TENSORBOARD,
+		CROSSVALIDATION
+	};
+
+	/**
+	 * Observed value which is emitted by algorithms.
+	 */
+	class ObservedValue
+	{
+	public:
+		/**
+		 * Constructor
+		 * @param step step
+		 * @param name param's name
+		 * @param value Any-wrapped value of the param
+		 */
+		ObservedValue(
+		    int64_t step, std::string& name, Any value, SG_OBS_VALUE_TYPE type)
+		    : m_step(step), m_name(name), m_value(value), m_type(type)
+		{
+		}
+
+		~ObservedValue(){};
+
+		/**
+		 * Get the step
+		 * @return an integer representing the step
+		 */
+		int64_t get_step() const
+		{
+			return m_step;
+		}
+
+		/**
+		 * Set the step
+		 * @param step step
+		 */
+		void set_step(int64_t step)
+		{
+			m_step = step;
+		}
+
+		/**
+		 * Get the param's name
+		 * @return param's name
+		 */
+		const std::string& get_name() const
+		{
+			return m_name;
+		}
+
+		/**
+		 * Set the param's name
+		 * @param name
+		 */
+		void set_name(const std::string& name)
+		{
+			m_name = name;
+		}
+
+		/**
+		 * Get the Any-wrapped value
+		 * @return Any-wrapped value
+		 */
+		const Any& get_value() const
+		{
+			return m_value;
+		}
+
+		/**
+		 * Set the param's value
+		 * @param value
+		 */
+		void set_value(const Any& value)
+		{
+			m_value = value;
+		}
+
+		/**
+		 * Get the type of this ObservedValue
+		 * @return observed value type
+		 */
+		SG_OBS_VALUE_TYPE get_type() const
+		{
+			return m_type;
+		}
+
+		/**
+		 * Set the observed value type
+		 * @param type type of this observed value
+		 */
+		void set_type(const SG_OBS_VALUE_TYPE type)
+		{
+			m_type = type;
+		}
+
+		/**
+		* Helper method to generate an ObservedValue (TensorBoard oriented)
+		* @param step the step
+		* @param name the param's name we are observing
+		* @param value the param's value
+		* @return an ObservedValue object initialized
+		*/
+		static ObservedValue
+		make_observation(int64_t step, std::string& name, Any value)
+		{
+			return ObservedValue(step, name, value, TENSORBOARD);
+		}
+
+	protected:
+		/** ObservedValue step (used by Tensorboard to print graphs) */
+		int64_t m_step;
+		/** Parameter's name */
+		std::string m_name;
+		/** Parameter's value */
+		Any m_value;
+		/** ObservedValue type */
+		SG_OBS_VALUE_TYPE m_type;
+	};
+
+	/**
+	 * Observed value with a timestamp
+	 */
+	typedef std::pair<ObservedValue, time_point> TimedObservedValue;
+
+	/**
+	 * Helper method to convert a time_point to milliseconds
+	 * @param value time point we want to convert
+	 * @return the time point converted to milliseconds
+	 */
+	SG_FORCED_INLINE double convert_to_millis(const time_point& value)
+	{
+		return std::chrono::duration_cast<std::chrono::milliseconds>(
+		           value.time_since_epoch())
+		    .count();
+	}
+}
+
+#endif // SHOGUN_OBSERVEDVALUE_H
diff --git a/src/shogun/lib/parameter_observers/ParameterObserverCV.cpp b/src/shogun/lib/parameter_observers/ParameterObserverCV.cpp
new file mode 100644
index 00000000000..1d2199eddcf
--- /dev/null
+++ b/src/shogun/lib/parameter_observers/ParameterObserverCV.cpp
@@ -0,0 +1,183 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+
+#include <shogun/classifier/mkl/MKL.h>
+#include <shogun/classifier/mkl/MKLMulticlass.h>
+#include <shogun/labels/Labels.h>
+#include <shogun/lib/parameter_observers/ParameterObserverCV.h>
+#include <shogun/machine/KernelMachine.h>
+#include <shogun/machine/LinearMachine.h>
+#include <shogun/machine/LinearMulticlassMachine.h>
+
+using namespace shogun;
+
+CParameterObserverCV::CParameterObserverCV(bool verbose)
+    : ParameterObserverInterface(), m_verbose(verbose)
+{
+	m_type = CROSSVALIDATION;
+}
+
+CParameterObserverCV::~CParameterObserverCV()
+{
+	for (auto i : m_observations)
+		SG_UNREF(i)
+}
+
+void CParameterObserverCV::on_next(const shogun::TimedObservedValue& value)
+{
+	CHECK_OBSERVED_VALUE_TYPE(value.first.get_type());
+
+	if (value.first.get_value().type_info().hash_code() ==
+	    typeid(CrossValidationStorage*).hash_code())
+	{
+		CrossValidationStorage* recalled_value =
+		    recall_type<CrossValidationStorage*>(value.first.get_value());
+		SG_REF(recalled_value);
+
+		/* Print information on screen if enabled*/
+		if (m_verbose)
+			print_observed_value(recalled_value);
+
+		m_observations.push_back(recalled_value);
+	}
+	else
+	{
+		SG_SERROR(
+		    "ParameterObserverCV: The observed value received is not of "
+		    "type CrossValidationStorage\n");
+	}
+}
+
+void CParameterObserverCV::on_error(std::exception_ptr ptr)
+{
+}
+
+void CParameterObserverCV::on_complete()
+{
+}
+
+void CParameterObserverCV::clear()
+{
+	for (auto i : m_observations)
+	{
+		SG_UNREF(i)
+	}
+	m_observations.clear();
+}
+
+void CParameterObserverCV::print_observed_value(
+    CrossValidationStorage* value) const
+{
+	for (int i = 0; i < value->get_num_folds(); i++)
+	{
+		auto f = value->get_fold(i);
+		SG_SPRINT("\n")
+		SG_SPRINT("Current run index: %i\n", f->get_current_run_index())
+		SG_SPRINT("Current fold index: %i\n", f->get_current_fold_index())
+		f->get_train_indices().display_vector("Train Indices ");
+		f->get_test_indices().display_vector("Test Indices ");
+		print_machine_information(f->get_trained_machine());
+		f->get_test_result()->get_values().display_vector("Test Labels ");
+		f->get_test_true_result()->get_values().display_vector(
+		    "Test True Label ");
+		SG_SPRINT("Evaluation result: %f\n", f->get_evaluation_result());
+		SG_UNREF(f)
+	}
+}
+
+void CParameterObserverCV::print_machine_information(CMachine* machine) const
+{
+	if (dynamic_cast<CLinearMachine*>(machine))
+	{
+		CLinearMachine* linear_machine = (CLinearMachine*)machine;
+		linear_machine->get_w().display_vector("Learned Weights = ");
+		SG_SPRINT("Learned Bias = %f\n", linear_machine->get_bias())
+	}
+
+	if (dynamic_cast<CKernelMachine*>(machine))
+	{
+		CKernelMachine* kernel_machine = (CKernelMachine*)machine;
+		kernel_machine->get_alphas().display_vector("Learned alphas = ");
+		SG_SPRINT("Learned Bias = %f\n", kernel_machine->get_bias())
+	}
+
+	if (dynamic_cast<CLinearMulticlassMachine*>(machine) ||
+	    dynamic_cast<CKernelMulticlassMachine*>(machine))
+	{
+		CMulticlassMachine* mc_machine = (CMulticlassMachine*)machine;
+		for (int i = 0; i < mc_machine->get_num_machines(); i++)
+		{
+			CMachine* sub_machine = mc_machine->get_machine(i);
+			this->print_machine_information(sub_machine);
+			SG_UNREF(sub_machine);
+		}
+	}
+
+	if (dynamic_cast<CMKL*>(machine))
+	{
+		CMKL* mkl = (CMKL*)machine;
+		CCombinedKernel* kernel =
+		    dynamic_cast<CCombinedKernel*>(mkl->get_kernel());
+		kernel->get_subkernel_weights().display_vector(
+		    "MKL sub-kernel weights =");
+		SG_UNREF(kernel);
+	}
+
+	if (dynamic_cast<CMKLMulticlass*>(machine))
+	{
+		CMKLMulticlass* mkl = (CMKLMulticlass*)machine;
+		CCombinedKernel* kernel =
+		    dynamic_cast<CCombinedKernel*>(mkl->get_kernel());
+		kernel->get_subkernel_weights().display_vector(
+		    "MKL sub-kernel weights =");
+		SG_UNREF(kernel);
+	}
+}
+
+CrossValidationStorage* CParameterObserverCV::get_observation(int run) const
+{
+	REQUIRE(
+	    run < get_num_observations(), "The run number must be less than %i",
+	    get_num_observations())
+
+	CrossValidationStorage* obs = m_observations[run];
+	SG_REF(obs)
+	return obs;
+}
+
+const int CParameterObserverCV::get_num_observations() const
+{
+	return m_observations.size();
+}
diff --git a/src/shogun/lib/parameter_observers/ParameterObserverCV.h b/src/shogun/lib/parameter_observers/ParameterObserverCV.h
new file mode 100644
index 00000000000..8539b816791
--- /dev/null
+++ b/src/shogun/lib/parameter_observers/ParameterObserverCV.h
@@ -0,0 +1,110 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+
+#ifndef SHOGUN_PARAMETEROBSERVERCV_H
+#define SHOGUN_PARAMETEROBSERVERCV_H
+
+#include <shogun/base/SGObject.h>
+#include <shogun/evaluation/CrossValidationStorage.h>
+#include <shogun/lib/parameter_observers/ParameterObserverInterface.h>
+
+namespace shogun
+{
+
+	/**
+	 * Base ParameterObserver class for CrossValidation.
+	 */
+	class CParameterObserverCV : public ParameterObserverInterface,
+	                             public CSGObject
+	{
+
+	public:
+		CParameterObserverCV(bool verbose = false);
+		virtual ~CParameterObserverCV();
+
+		virtual void on_next(const TimedObservedValue& value);
+		virtual void on_error(std::exception_ptr ptr);
+		virtual void on_complete();
+
+		/* Erase all observations done so far */
+		virtual void clear();
+
+		/**
+		 * Get the total number of cross validation runs received
+		 * by this observer.
+		 * @return number of runs.
+		 */
+		const int get_num_observations() const;
+
+		/**
+		 * Get a CrossValidationStorage object which will store
+		 * the result of a CrossValidation run.
+		 * @param run index of the run
+		 * @return a CrossValidationStorage object
+		 */
+		CrossValidationStorage* get_observation(int run) const;
+
+		/**
+		 * Print data contained into a CrossValidationStorage object.
+		 * @param value CrossValidationStorage object
+		 */
+		void print_observed_value(CrossValidationStorage* value) const;
+
+		/**
+		* Get class name.
+		* @return class name
+		*/
+		virtual const char* get_name() const
+		{
+			return "ParameterObserverCV";
+		}
+
+	private:
+		void print_machine_information(CMachine* machine) const;
+
+	protected:
+		/**
+		 * Observation's vector
+		 */
+		std::vector<CrossValidationStorage*> m_observations;
+
+		/**
+		 * enable printing of information
+		 */
+		bool m_verbose;
+	};
+}
+
+#endif // SHOGUN_PARAMETEROBSERVERCV_H
diff --git a/src/shogun/lib/parameter_observers/ParameterObserverHistogram.cpp b/src/shogun/lib/parameter_observers/ParameterObserverHistogram.cpp
new file mode 100644
index 00000000000..fe2c365b4f5
--- /dev/null
+++ b/src/shogun/lib/parameter_observers/ParameterObserverHistogram.cpp
@@ -0,0 +1,82 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#include <shogun/lib/config.h>
+#ifdef HAVE_TFLOGGER
+
+#include <shogun/io/TBOutputFormat.h>
+#include <shogun/lib/parameter_observers/ParameterObserverHistogram.h>
+
+using namespace shogun;
+
+ParameterObserverHistogram::ParameterObserverHistogram()
+    : ParameterObserverTensorBoard()
+{
+}
+
+ParameterObserverHistogram::ParameterObserverHistogram(
+    std::vector<std::string>& parameters)
+    : ParameterObserverTensorBoard(parameters)
+{
+}
+
+ParameterObserverHistogram::ParameterObserverHistogram(
+    const std::string& filename, std::vector<std::string>& parameters)
+    : ParameterObserverTensorBoard(filename, parameters)
+{
+}
+
+ParameterObserverHistogram::~ParameterObserverHistogram()
+{
+}
+
+void ParameterObserverHistogram::on_next(const TimedObservedValue& value)
+{
+	CHECK_OBSERVED_VALUE_TYPE(value.first.get_type())
+
+	auto node_name = std::string("node");
+	auto format = TBOutputFormat();
+	auto event_value = format.convert_vector(value, node_name);
+	m_writer.writeEvent(event_value);
+}
+
+void ParameterObserverHistogram::on_error(std::exception_ptr)
+{
+}
+
+void ParameterObserverHistogram::on_complete()
+{
+}
+
+#endif // HAVE_TFLOGGER
diff --git a/src/shogun/lib/parameter_observers/ParameterObserverHistogram.h b/src/shogun/lib/parameter_observers/ParameterObserverHistogram.h
new file mode 100644
index 00000000000..8670c7b4078
--- /dev/null
+++ b/src/shogun/lib/parameter_observers/ParameterObserverHistogram.h
@@ -0,0 +1,77 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#include <shogun/lib/config.h>
+#ifdef HAVE_TFLOGGER
+
+#ifndef SHOGUN_PARAMETEROBSERVERHISTOGRAM_H
+#define SHOGUN_PARAMETEROBSERVERHISTOGRAM_H
+
+#include <shogun/base/SGObject.h>
+#include <shogun/lib/parameter_observers/ParameterObserverTensorBoard.h>
+
+namespace shogun
+{
+	/**
+	 * Implementation of a ParameterObserver which write to file
+	 * histograms, given object emitted from a parameter observable.
+	 */
+	class ParameterObserverHistogram : public ParameterObserverTensorBoard,
+	                                   public CSGObject
+	{
+
+	public:
+		ParameterObserverHistogram();
+		ParameterObserverHistogram(std::vector<std::string>& parameters);
+		ParameterObserverHistogram(
+		    const std::string& filename, std::vector<std::string>& parameters);
+		~ParameterObserverHistogram();
+
+		virtual void on_next(const TimedObservedValue& value);
+		virtual void on_error(std::exception_ptr);
+		virtual void on_complete();
+
+		/**
+		* Get class name.
+		* @return class name
+		*/
+		virtual const char* get_name() const
+		{
+			return "ParameterObserverHistogram";
+		}
+	};
+}
+
+#endif // SHOGUN_PARAMETEROBSERVERHISTOGRAM_H
+#endif // HAVE_TFLOGGER
diff --git a/src/shogun/lib/parameter_observers/ParameterObserverInterface.cpp b/src/shogun/lib/parameter_observers/ParameterObserverInterface.cpp
new file mode 100644
index 00000000000..482c1c1f4f0
--- /dev/null
+++ b/src/shogun/lib/parameter_observers/ParameterObserverInterface.cpp
@@ -0,0 +1,70 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#include <shogun/lib/RefCount.h>
+#include <shogun/lib/parameter_observers/ParameterObserverInterface.h>
+
+using namespace shogun;
+
+ParameterObserverInterface::ParameterObserverInterface() : m_parameters()
+{
+}
+
+ParameterObserverInterface::ParameterObserverInterface(
+    std::vector<std::string>& parameters)
+    : m_parameters(parameters)
+{
+}
+
+ParameterObserverInterface::ParameterObserverInterface(
+    const std::string& filename, std::vector<std::string>& parameters)
+    : m_parameters(parameters)
+{
+}
+
+ParameterObserverInterface::~ParameterObserverInterface()
+{
+}
+
+bool ParameterObserverInterface::filter(const std::string& param)
+{
+	// If there are no specified parameters, then watch everything
+	if (m_parameters.size() == 0)
+		return true;
+
+	for (auto v : m_parameters)
+		if (v == param)
+			return true;
+	return false;
+}
\ No newline at end of file
diff --git a/src/shogun/lib/parameter_observers/ParameterObserverInterface.h b/src/shogun/lib/parameter_observers/ParameterObserverInterface.h
new file mode 100644
index 00000000000..baee4b9fc3e
--- /dev/null
+++ b/src/shogun/lib/parameter_observers/ParameterObserverInterface.h
@@ -0,0 +1,124 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#ifndef SHOGUN_PARAMETEROBSERVERINTERFACE_H
+#define SHOGUN_PARAMETEROBSERVERINTERFACE_H
+
+#include <stdexcept>
+#include <vector>
+
+#include <shogun/lib/any.h>
+#include <shogun/lib/parameter_observers/ObservedValue.h>
+
+/* Used to check if the observed value type */
+#define CHECK_OBSERVED_VALUE_TYPE(type)                                        \
+	if (type != m_type)                                                        \
+		return;
+
+namespace shogun
+{
+
+	/**
+	 * Interface for the parameter observer classes
+	 */
+	class ParameterObserverInterface
+	{
+
+	public:
+		/**
+		* Default constructor
+		*/
+		ParameterObserverInterface();
+
+		/**
+		 * Constructor
+		 * @param parameters list of parameters which we want to watch over
+		 */
+		ParameterObserverInterface(std::vector<std::string>& parameters);
+
+		/**
+		 * Constructor
+		 * @param filename name of the generated output file
+		 * @param parameters list of parameters which we want to watch over
+		 */
+		ParameterObserverInterface(
+		    const std::string& filename, std::vector<std::string>& parameters);
+		/**
+		 * Virtual destructor
+		 */
+		virtual ~ParameterObserverInterface();
+
+		/**
+		 * Filter function, check if the parameter name supplied is what
+		 * we want to monitor
+		 * @param param the param name
+		 * @return true if param is found inside of m_parameters list
+		 */
+		virtual bool filter(const std::string& param);
+
+		/**
+		 * Method which will be called when the parameter observable emits a
+		 * value.
+		 * @param value the value emitted by the parameter observable
+		 */
+		virtual void on_next(const TimedObservedValue& value) = 0;
+		/**
+		 * Method which will be called on errors
+		 */
+		virtual void on_error(std::exception_ptr) = 0;
+		/**
+		 * Method which will be called on completion
+		 */
+		virtual void on_complete() = 0;
+
+		/**
+		 * Method useful to empty the observer from
+		 * obseverd value it may have stored.
+		 */
+		virtual void clear(){};
+
+	protected:
+		/**
+		 * List of parameter's names we want to monitor
+		 */
+		std::vector<std::string> m_parameters;
+
+		/**
+		 * The type of params this observers accept
+		 */
+		SG_OBS_VALUE_TYPE m_type;
+	};
+}
+
+#endif // SHOGUN_PARAMETEROBSERVER_H
diff --git a/src/shogun/lib/parameter_observers/ParameterObserverScalar.cpp b/src/shogun/lib/parameter_observers/ParameterObserverScalar.cpp
new file mode 100644
index 00000000000..7db0b150bb7
--- /dev/null
+++ b/src/shogun/lib/parameter_observers/ParameterObserverScalar.cpp
@@ -0,0 +1,82 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#include <shogun/lib/config.h>
+#ifdef HAVE_TFLOGGER
+
+#include <shogun/io/TBOutputFormat.h>
+#include <shogun/lib/parameter_observers/ParameterObserverScalar.h>
+
+using namespace shogun;
+
+ParameterObserverScalar::ParameterObserverScalar()
+    : ParameterObserverTensorBoard()
+{
+}
+
+ParameterObserverScalar::ParameterObserverScalar(
+    std::vector<std::string>& parameters)
+    : ParameterObserverTensorBoard(parameters)
+{
+}
+
+ParameterObserverScalar::ParameterObserverScalar(
+    const std::string& filename, std::vector<std::string>& parameters)
+    : ParameterObserverTensorBoard(filename, parameters)
+{
+}
+
+ParameterObserverScalar::~ParameterObserverScalar()
+{
+}
+
+void ParameterObserverScalar::on_next(const TimedObservedValue& value)
+{
+	CHECK_OBSERVED_VALUE_TYPE(value.first.get_type())
+
+	auto node_name = std::string("node");
+	auto format = TBOutputFormat();
+	auto event_value = format.convert_scalar(value, node_name);
+	m_writer.writeEvent(event_value);
+}
+
+void ParameterObserverScalar::on_error(std::exception_ptr)
+{
+}
+
+void ParameterObserverScalar::on_complete()
+{
+}
+
+#endif // HAVE_TFLOGGER
diff --git a/src/shogun/lib/parameter_observers/ParameterObserverScalar.h b/src/shogun/lib/parameter_observers/ParameterObserverScalar.h
new file mode 100644
index 00000000000..e16eea6225e
--- /dev/null
+++ b/src/shogun/lib/parameter_observers/ParameterObserverScalar.h
@@ -0,0 +1,77 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#include <shogun/lib/config.h>
+#ifdef HAVE_TFLOGGER
+
+#ifndef SHOGUN_PARAMETEROBSERVERSCALAR_H
+#define SHOGUN_PARAMETEROBSERVERSCALAR_H
+
+#include <shogun/base/SGObject.h>
+#include <shogun/lib/parameter_observers/ParameterObserverTensorBoard.h>
+
+namespace shogun
+{
+	/**
+	 * Implementation of a ParameterObserver which write to file
+	 * scalar values, given object emitted from a parameter observable.
+	 */
+	class ParameterObserverScalar : public ParameterObserverTensorBoard,
+	                                public CSGObject
+	{
+
+	public:
+		ParameterObserverScalar();
+		ParameterObserverScalar(std::vector<std::string>& parameters);
+		ParameterObserverScalar(
+		    const std::string& filename, std::vector<std::string>& parameters);
+		~ParameterObserverScalar();
+
+		virtual void on_next(const TimedObservedValue& value);
+		virtual void on_error(std::exception_ptr);
+		virtual void on_complete();
+
+		/**
+		* Get class name.
+		* @return class name
+		*/
+		virtual const char* get_name() const
+		{
+			return "ParameterObserverScalar";
+		}
+	};
+}
+
+#endif // SHOGUN_PARAMETEROBSERVERSCALAR_H
+#endif // HAVE_TFLOGGER
diff --git a/src/shogun/lib/parameter_observers/ParameterObserverTensorBoard.cpp b/src/shogun/lib/parameter_observers/ParameterObserverTensorBoard.cpp
new file mode 100644
index 00000000000..0448ee8c05b
--- /dev/null
+++ b/src/shogun/lib/parameter_observers/ParameterObserverTensorBoard.cpp
@@ -0,0 +1,68 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#include <shogun/lib/config.h>
+#ifdef HAVE_TFLOGGER
+
+#include "ParameterObserverTensorBoard.h"
+
+using namespace shogun;
+
+ParameterObserverTensorBoard::ParameterObserverTensorBoard()
+    : ParameterObserverInterface(), m_writer("shogun")
+{
+	m_writer.init();
+}
+
+ParameterObserverTensorBoard::ParameterObserverTensorBoard(
+    std::vector<std::string>& parameters)
+    : ParameterObserverInterface(parameters), m_writer("shogun")
+{
+	m_writer.init();
+}
+
+ParameterObserverTensorBoard::ParameterObserverTensorBoard(
+    const std::string& filename, std::vector<std::string>& parameters)
+    : ParameterObserverInterface(parameters), m_writer(filename.c_str())
+{
+	m_writer.init();
+}
+
+ParameterObserverTensorBoard::~ParameterObserverTensorBoard()
+{
+	m_writer.flush();
+	m_writer.close();
+}
+
+#endif // HAVE_TFLOGGER
diff --git a/src/shogun/lib/parameter_observers/ParameterObserverTensorBoard.h b/src/shogun/lib/parameter_observers/ParameterObserverTensorBoard.h
new file mode 100644
index 00000000000..e83fb9b596c
--- /dev/null
+++ b/src/shogun/lib/parameter_observers/ParameterObserverTensorBoard.h
@@ -0,0 +1,83 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#include <shogun/lib/config.h>
+#ifdef HAVE_TFLOGGER
+
+#ifndef SHOGUN_PARAMETEROBSERVERTENSORBOARD_H
+#define SHOGUN_PARAMETEROBSERVERTENSORBOARD_H
+
+#include <shogun/lib/parameter_observers/ParameterObserverInterface.h>
+
+#include <tflogger/event_logger.h>
+
+namespace shogun
+{
+	class ParameterObserverTensorBoard : public ParameterObserverInterface
+	{
+
+	public:
+		/**
+		* Default constructor
+		*/
+		ParameterObserverTensorBoard();
+
+		/**
+		 * Constructor
+		 * @param parameters list of parameters which we want to watch over
+		 */
+		ParameterObserverTensorBoard(std::vector<std::string>& parameters);
+
+		/**
+		 * Constructor
+		 * @param filename name of the generated output file
+		 * @param parameters list of parameters which we want to watch over
+		 */
+		ParameterObserverTensorBoard(
+		    const std::string& filename, std::vector<std::string>& parameters);
+		/**
+		 * Virtual destructor
+		 */
+		virtual ~ParameterObserverTensorBoard();
+
+	protected:
+		/**
+		* Writer object which will be used to write tensorflow::Event files
+		*/
+		tflogger::EventLogger m_writer;
+	};
+}
+
+#endif // SHOGUN_PARAMETEROBSERVERTENSORBOARD_H
+#endif // HAVE_TFLOGGER
diff --git a/src/shogun/lib/slep/SpInvCoVa/invCov.cpp b/src/shogun/lib/slep/SpInvCoVa/invCov.cpp
deleted file mode 100644
index 3a2e46130b4..00000000000
--- a/src/shogun/lib/slep/SpInvCoVa/invCov.cpp
+++ /dev/null
@@ -1,231 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#include <shogun/lib/slep/SpInvCoVa/invCov.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/mathematics/Math.h>
-
-#include <stdlib.h>
-#include <time.h>
-
-void m_Ax(double *Ax, double  *A, double *x, int n, int ith)
-{
-	int i, j;
-	double t;
-	for(i=0;i<n;i++){
-		if (i==ith)
-			continue;
-		t=0;
-		for(j=0;j<n;j++){
-			if (j==ith)
-				continue;
-			t+=A[i*n+j]* x[j];
-			Ax[i]=t;
-		}
-	}
-}
-
-
-int lassoCD(double *Theta, double *W, double *S, double lambda, int n, int ith, int flag, int maxIter, double fGap, double xGap)
-{
-	int iter_step, i,j;
-	double * Ax, * x;
-	double u, v, s_v, t=0, x_new;
-	double fun_new,fun_old=-100;
-	double x_change;
-
-	Ax=         (double *)malloc(sizeof(double)*n);
-	if (Ax==NULL)
-	{
-		printf("\n Memory allocation failure!");
-		return (-1);
-	}
-
-	x=          (double *)malloc(sizeof(double)*n);
-	if (x==NULL)
-	{
-		printf("\n Memory allocation failure!");
-		free(Ax);
-		return (-1);
-	}
-
-	/* give x an intialized value, from previously Theta*/
-	for(i=0;i<n;i++){
-		if (i==ith)
-			continue;
-		x[i]=Theta[i*n+ith];
-	}
-
-	/* Ax contains the derivative*/
-	m_Ax(Ax, W, x, n, ith);	
-
-	for (iter_step=0;iter_step<maxIter; iter_step++){
-
-		/*printf("\n Iter: %d",iter_step);*/
-
-		x_change=0;
-
-		for (i=0;i<n;i++){
-			if(i==ith)
-				continue;
-
-			u=W[i*n + i];
-
-			v=Ax[i]-x[i]*u;
-
-			s_v=S[i*n+ ith]-v;
-
-			if(s_v > lambda)
-				x_new= (s_v-lambda) / u;
-			else{
-				if(s_v < -lambda)
-					x_new= (s_v + lambda) / u;
-				else
-					x_new=0;
-			}
-			if (x[i]!=x_new){
-				for(j=0;j<n;j++){
-					if (j==ith)
-						continue;
-					Ax[j]+= W[j*n+ i]*(x_new - x[i]);
-				}
-				x_change+=fabs(x[i]-x_new);
-
-				x[i]=x_new;
-			}
-		}
-
-		fun_new=0;
-		t=0;
-		for(i=0;i<n;i++){
-			if (i==ith)
-				continue;
-			t+= Ax[i]*x[i] ;
-			fun_new+=- S[i*n+ith]*x[i]+ lambda* fabs(x[i]);
-		}
-		fun_new+=0.5*t;
-
-
-		/*
-		   the Lasso terminates either
-		   the change of the function value is less than fGap
-		   or the change of the solution in terms of L1 norm is less than xGap
-		   or the maximal iteration maxIter has achieved
-		   */
-		if ( (fabs(fun_new-fun_old) <=fGap) || x_change <=xGap){
-			/*printf("\n %d, Fun value: %2.5f",iter_step, fun_new);
-			  printf("\n The objective gap between adjacent solutions is less than %e",1e-6);
-			  */
-			break;
-		}
-		else{
-			/*
-			   if(iter_step%10 ==0)
-			   printf("\n %d, Fun value: %2.5f",iter_step, fun_new);
-			   */
-			fun_old=fun_new;
-		}
-	}
-
-	/*printf("\n %d, Fun value: %2.5f",iter_step, fun_new);*/
-
-	if (flag){        
-		t=1/(W[ith*n+ith]-t);
-		Theta[ith*n + ith]=t;
-
-		for(i=0;i<n;i++){
-			if (i==ith)
-				continue;
-			W[i*n+ ith]=W[ith*n +i]=Ax[i];
-			Theta[i*n+ ith]=Theta[ith*n +i]=-x[i]*t;
-		}
-	}
-	else{
-		for(i=0;i<n;i++){
-			if (i==ith)
-				continue;
-			W[i*n+ ith]=W[ith*n +i]=Ax[i];
-			Theta[i*n+ ith]=Theta[ith*n +i]=x[i];
-		}
-	}
-
-
-	free(Ax); free(x);
-
-	return(iter_step);
-}
-
-
-void invCov(double *Theta, double *W, double *S, double lambda, double sum_S, int n,  
-		int LassoMaxIter, double fGap, double xGap, /*for the Lasso (inner iteration)*/
-		int maxIter, double xtol)  /*for the outer iteration*/
-{
-	int iter_step, i,j, ith;
-	double * W_old;
-	double gap;
-	int flag=0;
-
-	W_old=  (double *)malloc(sizeof(double)*n*n);
-
-
-	if ( W_old==NULL ){
-		printf("\n Memory allocation failure!");
-		exit (-1);
-	}
-
-	for(i=0;i<n;i++)
-		for(j=0;j<n;j++){
-			if (i==j)
-				W_old[i*n+j]=W[i*n+j]=S[i*n+j]+lambda;
-			else
-				W_old[i*n+j]=W[i*n+j]=S[i*n+j];
-
-			Theta[i*n+j]=0;
-		}
-
-	for (iter_step=0;iter_step<=maxIter; iter_step++){
-		for(ith=0;ith<n;ith++)	
-			lassoCD(Theta, W, S, lambda, n, ith, flag, LassoMaxIter,fGap, xGap);
-
-		if (flag)
-			break;
-
-		gap=0;
-		for(i=0;i<n;i++)
-			for(j=0;j<n;j++){
-				gap+=fabs(W[i*n+j]-W_old[i*n+j]);
-				W_old[i*n+j]=W[i*n+j];
-			}
-
-		/* printf("\n Outer Loop: %d, gap %e\n",iter_step,gap); */
-
-
-		if ( (gap <= xtol) || (iter_step==maxIter-1) ){
-			flag=1;
-		}
-		/*
-		   The outer loop terminates either the difference between ajacent solution in terms of L1 norm is less than xtol, 
-		   or the maximal iterations has achieved
-		   */
-	}
-
-	free(W_old);
-
-	/*return (iter_step);*/
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/slep/SpInvCoVa/invCov.h b/src/shogun/lib/slep/SpInvCoVa/invCov.h
deleted file mode 100644
index fdf1a9a6031..00000000000
--- a/src/shogun/lib/slep/SpInvCoVa/invCov.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye
- */
-
-
-#ifndef  INVCOV_SLEP
-#define  INVCOV_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-/*
- * A:    n x n
- * x:    n x 1
- * Ax:   n x 1
- *
- * Perform the task of Ax= A* x,
- * where the ith row and column in A, and ith row in x
- * are undefined, so that in Ax, the ith row has no meaning
- */
-void m_Ax(double *Ax, double  *A, double *x, int n, int ith);
-
-int lassoCD(double *Theta, double *W, double *S, double lambda, int n,
-            int ith, int flag, int maxIter, double fGap, double xGap);
-
-void invCov(double *Theta, double *W, double *S, double lambda,
-            double sum_S, int n,
-            int LassoMaxIter, double fGap,
-            double xGap, /*for the Lasso (inner iteration)*/
-            int maxIter, double xtol);  /*for the outer iteration*/
-
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef INVCOV_SLEP  ----- */
-
diff --git a/src/shogun/lib/slep/flsa/flsa.cpp b/src/shogun/lib/slep/flsa/flsa.cpp
deleted file mode 100644
index c0b1bc4d5dc..00000000000
--- a/src/shogun/lib/slep/flsa/flsa.cpp
+++ /dev/null
@@ -1,251 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye
- */
-
-#include <shogun/lib/slep/flsa/flsa.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/slep/flsa/sfa.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-
-void flsa(double *x, double *z, double *infor,
-		double * v, double *z0,
-		double lambda1, double lambda2, int n,
-		int maxStep, double tol, int tau, int flag)
-{
-
-	int i, nn=n-1, m;
-	double zMax, temp;
-	double *Av, *g, *s;
-	int iterStep, numS;
-	double gap;
-	double *zz = NULL; /*to replace z0, so that z0 shall not revised after */
-
-
-	Av=(double *) malloc(sizeof(double)*nn);
-
-	/*
-	   Compute Av= A*v                  (n=4, nn=3)
-	   A= [ -1  1  0  0;
-	   0  -1 1  0;
-	   0  0  -1 1]
-	   */
-
-	for (i=0;i<nn; i++)
-		Av[i]=v[i+1]-v[i];
-
-	/*
-	   Sovlve the linear system via Thomas's algorithm or Rose's algorithm
-	   B * z0 = Av
-	   */
-
-	Thomas(&zMax, z, Av, nn);
-
-	/*
-	   We consider two cases:
-	   1) lambda2 >= zMax, which leads to a solution with same entry values
-	   2) lambda2 < zMax, which needs to first run sfa, and then perform soft thresholding
-	   */
-
-	/*
-	   First case: lambda2 >= zMax
-	   */
-	if (lambda2 >= zMax){
-
-		temp=0;
-		m=n%5;
-		if (m!=0){
-			for (i=0;i<m;i++)
-				temp+=v[i];
-		}
-		for (i=m;i<n;i+=5){
-			temp += v[i] + v[i+1] + v[i+2] + v[i+3] + v[i+4];
-		}
-		temp/=n;
-		/* temp is the mean value of v*/
-
-
-		/*
-		   soft thresholding by lambda1
-		   */
-		if (temp> lambda1)
-			temp= temp-lambda1;
-		else
-			if (temp < -lambda1)
-				temp= temp+lambda1;
-			else
-				temp=0;
-
-		m=n%7;
-		if (m!=0){
-			for (i=0;i<m;i++)
-				x[i]=temp;
-		}
-		for (i=m;i<n;i+=7){
-			x[i]   =temp;
-			x[i+1] =temp;
-			x[i+2] =temp;
-			x[i+3] =temp;
-			x[i+4] =temp;
-			x[i+5] =temp;
-			x[i+6] =temp;
-		}
-
-		gap=0;
-
-		free(Av);
-
-		if (infor)
-		{
-			infor[0]= gap;
-			infor[1]= 0;
-			infor[2]=zMax;
-			infor[3]=0;
-		}
-
-		return;
-	}
-
-
-	/*
-	   Second case: lambda2 < zMax
-
-	   We need to call sfa for computing x, and then do soft thresholding
-
-	   Before calling sfa, we need to allocate memory for g and s,
-	   and initialize z and z0.
-	   */
-
-
-	/*
-	   Allocate memory for g and s
-	   */
-
-	g    =(double *) malloc(sizeof(double)*nn),
-		 s    =(double *) malloc(sizeof(double)*nn);
-
-
-
-	m=flag /10;
-	/*
-
-	   If m=0, then this shows that, z0 is a "good" starting point. (m=1-6)
-
-	   Otherwise (m=11-16), we shall set z as either the solution to the linear system.
-	   or the zero point
-
-*/
-	if (m==0){
-		for (i=0;i<nn;i++){
-			if (z0[i] > lambda2)
-				z[i]=lambda2;
-			else
-				if (z0[i]<-lambda2)
-					z[i]=-lambda2;
-				else
-					z[i]=z0[i];
-		}
-	}
-	else{
-		if (lambda2 >= 0.5 * zMax){
-			for (i=0;i<nn;i++){
-				if (z[i] > lambda2)
-					z[i]=lambda2;
-				else
-					if (z[i]<-lambda2)
-						z[i]=-lambda2;
-			}
-		}
-		else{
-			for (i=0;i<nn;i++)
-				z[i]=0;
-
-		}
-	}
-
-	flag=flag %10;  /*
-					   flag is now in [1:6]
-
-					   for sfa, i.e., flag in [1:4], we need initialize z0 with zero
-					   */
-
-	if (flag>=1 && flag<=4){
-		zz    =(double *) malloc(sizeof(double)*nn);
-
-		for (i=0;i<nn;i++)
-			zz[i]=0;
-	}
-
-	/*
-	   call sfa, sfa_one, or sfa_special to compute z, for finding the subgradient
-	   and x
-	   */
-
-	if (flag==6)
-		iterStep=sfa_one(x, &gap, &numS,
-				z,  v,   Av,
-				lambda2, nn,  maxStep,
-				s, g,
-				tol, tau);
-	else
-		if (flag==5)
-			iterStep=sfa_special(x, &gap, &numS,
-					z,  v,   Av,
-					lambda2, nn,  maxStep,
-					s, g,
-					tol, tau);
-		else{
-			iterStep=sfa(x, &gap, &numS,
-					z, zz,   v,  Av,
-					lambda2, nn, maxStep,
-					s,  g,
-					tol,tau, flag);
-
-			free (zz);
-			/*free the variable zz*/
-		}
-
-
-	/*
-	   soft thresholding by lambda1
-	   */
-
-	for(i=0;i<n;i++)
-		if (x[i] > lambda1)
-			x[i]-=lambda1;
-		else
-			if (x[i]<-lambda1)
-				x[i]+=lambda1;
-			else
-				x[i]=0;
-
-
-	free(Av);
-	free(g);
-	free(s);
-
-	if (infor)
-	{
-		infor[0]=gap;
-		infor[1]=iterStep;
-		infor[2]=zMax;
-		infor[3]=numS;
-	}
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/slep/flsa/flsa.h b/src/shogun/lib/slep/flsa/flsa.h
deleted file mode 100644
index eb78b76a53b..00000000000
--- a/src/shogun/lib/slep/flsa/flsa.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef  FLSA_SLEP
-#define  FLSA_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-/*
-
-   In this file, we solve the Fused Lasso Signal Approximator (FLSA) problem:
-
-   min_x  1/2 \|x-v\|^2  + lambda1 * \|x\|_1 + lambda2 * \|A x\|_1,      (1)
-
-   It can be shown that, if x* is the solution to
-
-   min_x  1/2 \|x-v\|^2  + lambda2 \|A x\|_1,                            (2)
-
-   then 
-   x**= sgn(x*) max(|x*|-lambda_1, 0)                                    (3)
-
-   is the solution to (1).
-
-   By some derivation (see the description in sfa.h), (2) can be solved by
-
-   x*= v - A^T z*,
-
-   where z* is the optimal solution to
-
-   min_z  1/2  z^T A AT z - < z, A v>,
-   subject to  \|z\|_{infty} \leq lambda2                             (4)
-   */
-
-
-
-/*
-
-   In flsa, we solve (1) corresponding to a given (lambda1, lambda2)
-
-   void flsa(double *x, double *z, double *gap,
-   double * v, double *z0, 
-   double lambda1, double lambda2, int n, 
-   int maxStep, double tol, int flag)
-
-   Output parameters:
-x:        the solution to problem (1)
-z:        the solution to problem (4)
-infor:    the information about running the subgradient finding algorithm
-infor[0] = gap:         the computed gap (either the duality gap
-or the summation of the absolute change of the adjacent solutions)
-infor[1] = steps:       the number of iterations
-infor[2] = lambad2_max: the maximal value of lambda2_max
-infor[3] = numS:        the number of elements in the support set
-
-Input parameters:
-v:        the input vector to be projected
-z0:       a guess of the solution of z
-
-lambad1:  the regularization parameter
-labmda2:  the regularization parameter
-n:        the length of v and x
-
-maxStep:  the maximal allowed iteration steps
-tol:      the tolerance parameter
-tau:      the program sfa is checked every tau iterations for termination
-flag:     the flag for initialization and deciding calling sfa
-switch ( flag )
-1-4, 11-14: sfa
-
-switch ( flag )
-case 1, 2, 3, or 4: 
-z0 is a "good" starting point 
-(such as the warm-start of the previous solution,
-or the user want to test the performance of this starting point;
-the starting point shall be further projected to the L_{infty} ball,
-to make sure that it is feasible)
-
-case 11, 12, 13, or 14: z0 is a "random" guess, and thus not used
-(we shall initialize z as follows:
-if lambda2 >= 0.5 * lambda_2^max, we initialize the solution of the linear system;
-if lambda2 <  0.5 * lambda_2^max, we initialize with zero
-this solution is projected to the L_{infty} ball)
-
-switch( flag )
-5, 15: sfa_special
-
-switch( flag )
-5:  z0 is a good starting point
-15: z0 is a bad starting point, use the solution of the linear system
-
-
-switch( flag )
-6, 16: sfa_one
-
-switch( flag )
-6:  z0 is a good starting point
-16: z0 is a bad starting point, use the solution of the linear system
-
-Revision made on October 31, 2009.
-The input variable z0 is not modified after calling sfa. For this sake, we allocate a new variable zz to replace z0.
-*/
-void flsa(double *x, double *z, double *infor,
-		double * v, double *z0, 
-		double lambda1, double lambda2, int n, 
-		int maxStep, double tol, int tau, int flag);
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef FLSA_SLEP  ----- */
-
diff --git a/src/shogun/lib/slep/flsa/sfa.cpp b/src/shogun/lib/slep/flsa/sfa.cpp
deleted file mode 100644
index e30dc348ddb..00000000000
--- a/src/shogun/lib/slep/flsa/sfa.cpp
+++ /dev/null
@@ -1,1892 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-#include <shogun/lib/slep/flsa/sfa.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-
-#define delta 1e-10
-
-/* 
-   Revision History
-
-   First Version available on October 10, 2009 
-
-   A runnable version on October 15, 2009
-
-   Major revision on October 29, 2009
-   (Some functions appearing in a previous version have deleted, please refer to the previous version for the old functions.
-   Some new functions have been added as well)
-
-*/
-
-/*
-
-   Files contained in this header file sfa.h:
-
-   1. Algorithms for solving the linear system A A^T z0 = Av (see the description of A from the following context)
-
-   void Thomas(double *zMax, double *z0, 
-   double * Av, int nn)
-
-   void Rose(double *zMax, double *z0, 
-   double * Av, int nn)
-
-   int supportSet(double *x, double *v, double *z, 
-   double *g, int * S, double lambda, int nn)
-
-   void dualityGap(double *gap, double *z, 
-   double *g, double *s, double *Av, 
-   double lambda, int nn)
-
-   void dualityGap2(double *gap, double *z, 
-   double *g, double *s, double *Av, 
-   double lambda, int nn)
-
-
-   2. The Subgraident Finding Algorithm (SFA) for solving problem (4) (refer to the description of the problem for detail) 
-
-   int sfa(double *x,     double *gap,
-   double *z,     double *z0,   double * v,   double * Av, 
-   double lambda, int nn,       int maxStep,
-   double *s,     double *g,
-   double tol,    int tau,       int flag)
-
-   int sfa_special(double *x,     double *gap,
-   double *z,     double * v,   double * Av, 
-   double lambda, int nn,       int maxStep,
-   double *s,     double *g,
-   double tol,    int tau)
-
-   int sfa_one(double *x,     double *gap,
-   double *z,     double * v,   double * Av, 
-   double lambda, int nn,       int maxStep,
-   double *s,     double *g,
-   double tol,    int tau)
-
-
-*/
-
-
-/*
-
-   Some mathematical background.
-
-   In this file, we discuss how to solve the following subproblem,
-
-   min_x  1/2 \|x-v\|^2  + lambda \|A x\|_1,                 (1)
-
-   which is a key problem used in the Fused Lasso Signal Approximator (FLSA).
-
-   Also, note that, FLSA is a building block for solving the optimation problmes with fused Lasso penalty.
-
-   In (1), x and v are n-dimensional vectors, 
-   and A is a matrix with size (n-1) x n, and is defined as follows (e.g., n=4):
-   A= [ -1  1  0  0;
-   0  -1 1  0;
-   0  0  -1 1]
-
-   The above problem can be reformulated as the following equivalent min-max optimization problem
-
-   min_x  max_z  1/2 \|x-v\|^2  + <A x, z>
-   subject to   \|z\|_{infty} \leq lambda                     (2)
-
-
-   It is easy to get that, at the optimal point
-
-   x = v - AT z,                             (3)
-
-   where z is the optimal solution to the following optimization problem
-
-   min_z  1/2  z^T A AT z - < z, A v>,
-   subject to  \|z\|_{infty} \leq lambda                      (4)
-
-
-
-   Let B=A A^T. It is easy to get that B is a (n-1) x (n-1) tridiagonal matrix.
-   When n=5, B is defined as:
-   B= [ 2  -1   0    0;
-   -1  2   -1   0;
-   0  -1   2    -1;
-   0   0   -1   2]
-
-   Let z0 be the solution to the linear system:
-
-   A A^T * z0 = A * v                  (5)
-
-   The problem (5) can be solve by the Thomas Algorithm, in about 5n multiplications and 4n additions.
-
-   It can also be solved by the Rose's Algorithm, in about 2n multiplications and 2n additions.
-
-   Moreover, considering the special structure of the matrix A (and B), 
-   it can be solved in about n multiplications and 3n additions
-
-   If lambda \geq \|z0\|_{infty}, x_i= mean(v), for all i, 
-   the problem (1) admits near analytical solution
-
-
-   We have also added the restart technique, please refer to our paper for detail!
-
-*/
-
-
-/*
-///////////////    Solving the Linear System via Thomas's Algorithm \\\\\\\\\\\\\\\\\\
-*/
-
-void Thomas(double *zMax, double *z0, double * Av, int nn){
-
-	/*
-
-	   We apply the Tomas algorithm for solving the following linear system
-	   B * z0 = Av
-	   Thomas algorithm is also called the tridiagonal matrix algorithm
-
-	   B=[ 2  -1   0    0;
-	   -1  2   -1   0;
-	   0  -1   2    -1;
-	   0   0   -1   2]
-
-	   z0 is the result,  Av is unchanged after the computation
-
-
-	   c is a precomputed nn dimensional vector
-	   c=[-1/2, -2/3, -3/4, -4/5, ..., -nn/(nn+1)]
-
-	   c[i]=- (i+1) / (i+2)
-	   c[i-1]=- i / (i+1)
-
-	   z0 is an nn dimensional vector
-
-*/
-
-	int i;
-	double tt, z_max;
-
-	/*
-	   Modify the coefficients in Av (copy to z0)
-	   */
-	z0[0]=Av[0]/2;
-	for (i=1;i < nn; i++){
-		tt=Av[i] + z0[i-1];
-		z0[i]=tt - tt / (i+2);
-	}
-
-	/*z0[i]=(Av[i] + z0[i-1]) * (i+1) / (i+2);*/
-
-	/*z0[i]=(Av[i] + z0[i-1])/ ( 2 - i / (i+1));*/
-
-
-	/*
-	   Back substitute (obtain the result in z0)
-	   */
-	z_max= fabs(z0[nn-1]);
-
-	for (i=nn-2; i>=0; i--){
-
-		z0[i]+=  z0[i+1] -  z0[i+1]/ (i+2);
-
-		/*z0[i]+=  z0[i+1] * (i+1) / (i+2);*/
-
-		tt=fabs(z0[i]);
-
-		if (tt > z_max)
-			z_max=tt;
-
-	}
-	*zMax=z_max;
-
-}
-
-
-
-
-/*
-///////////////    Solving the Linear System via Rose's Algorithm \\\\\\\\\\\\\\\\\\
-*/
-
-void Rose(double *zMax, double *z0,	double * Av, int nn){
-
-	/*
-	   We use the Rose algorithm for solving the following linear system
-	   B * z0 = Av
-
-
-	   B=[ 2  -1   0    0;
-	   -1  2   -1   0;
-	   0  -1   2    -1;
-	   0   0   -1   2]
-
-	   z0 is the result,  Av is unchanged after the computation
-
-	   z0 is an nn dimensional vector
-
-*/
-
-	int i, m;
-	double s=0, z_max;
-
-
-	/*
-	   We follow the style in CLAPACK
-	   */
-	m= nn % 5;
-	if (m!=0){
-		for (i=0;i<m; i++)
-			s+=Av[i] * (i+1);
-	}
-	for(i=m;i<nn;i+=5)
-		s+=   Av[i]   * (i+1) 
-			+ Av[i+1] * (i+2) 
-			+ Av[i+2] * (i+3) 
-			+ Av[i+3] * (i+4) 
-			+ Av[i+4] * (i+5);
-	s/=(nn+1);
-
-
-	/*
-	   from nn-1 to 0
-	   */
-	z0[nn-1]=Av[nn-1]- s;
-	for (i=nn-2;i >=0; i--){
-		z0[i]=Av[i] + z0[i+1];
-	}
-
-	/*
-	   from 0 to nn-1
-	   */
-	z_max= fabs(z0[0]);
-	for (i=0; i<nn; i++){
-
-		z0[i]+=  z0[i-1];
-
-		s=fabs(z0[i]);
-
-		if (s > z_max)
-			z_max=s;
-
-	}
-	*zMax=z_max;
-
-}
-
-
-
-/*
-////////////////    compute x for restarting \\\\\\\\\\\\\\\\\\\\\\\\\
-
-x=omega(z)
-
-v: the vector to be projected
-z: the approximate solution
-g: the gradient at z (g should be computed before calling this function
-
-nn: the length of z, g, and S (maximal length for S)
-
-n:  the length of x and v
-
-S: records the indices of the elements in the support set
-*/
-
-int supportSet(double *x, double *v, double *z, double *g, int * S, double lambda, int nn){
-
-	int i, j, n=nn+1, numS=0;
-	double temp;
-
-
-	/*
-	   we first scan z and g to obtain the support set S
-	   */
-
-	/*numS: number of the elements in the support set S*/
-	for(i=0;i<nn; i++){
-		if ( ( (z[i]==lambda) && (g[i] < delta) ) || ( (z[i]==-lambda) && (g[i] >delta) )){
-			S[numS]=i;
-			numS++;
-		}
-	}
-
-	/*
-	   printf("\n %d",numS);
-	   */
-
-	if (numS==0){ /*this shows that S is empty*/
-		temp=0;
-		for (i=0;i<n;i++)
-			temp+=v[i];
-
-		temp=temp/n;
-		for(i=0;i<n;i++)
-			x[i]=temp;
-
-		return numS;
-	}
-
-
-	/*
-	   Next, we deal with numS >=1
-	   */
-
-	/*process the first block
-
-	  j=0
-	  */
-	temp=0;
-	for (i=0;i<=S[0]; i++)
-		temp+=v[i];
-	/*temp =sum (v [0: s[0] ]*/
-	temp=( temp + z[ S[0] ] ) / (S[0] +1);
-	for (i=0;i<=S[0]; i++)
-		x[i]=temp;
-
-
-	/*process the middle blocks
-
-	  If numS=1, it belongs the last block
-	  */
-	for (j=1; j < numS; j++){
-		temp=0;
-		for (i= S[j-1] +1; i<= S[j]; i++){
-			temp+=v[i];
-		}
-
-		/*temp =sum (v [ S[j-1] +1: s[j] ]*/
-
-		temp=(temp - z[ S[j-1] ] + z[ S[j] ])/ (S[j]- S[j-1]);
-
-		for (i= S[j-1] +1; i<= S[j]; i++){
-			x[i]=temp;
-		}
-	}
-
-	/*process the last block
-	  j=numS-1;
-	  */
-	temp=0;
-	for (i=S[numS-1] +1 ;i< n; i++)
-		temp+=v[i];
-	/*temp =sum (v [  (S[numS-1] +1): (n-1) ]*/
-
-	temp=( temp - z[ S[numS-1] ] ) / (nn - S[numS-1]); /*S[numS-1] <= nn-1*/
-
-	for (i=S[numS-1] +1 ;i< n; i++)
-		x[i]=temp;
-
-	return numS;
-
-}
-
-
-
-/*
-
-////////////  Computing the duality gap \\\\\\\\\\\\\\\\\\\\\\\\\\
-
-we compute the duality corresponding the solution z
-
-z: the approximate solution
-g: the gradient at z (we recompute the gradient)
-s: an auxiliary variable
-Av: A*v
-
-nn: the lenght for z, g, s, and Av
-
-The variables g and s shall be revised.
-
-The variables z and Av remain unchanged.
-*/
-
-void dualityGap(double *gap, double *z, double *g, double *s, double *Av, double lambda, int nn){
-
-	int i, m;
-	double temp;
-
-
-	g[0]=z[0] + z[0] - z[1] - Av[0];
-	for (i=1;i<nn-1;i++){
-		g[i]= - z[i-1] + z[i] + z[i] - z[i+1] - Av[i];
-	}	
-	g[nn-1]= -z[nn-2] + z[nn-1] + z[nn-1] - Av[nn-1];
-
-
-	for (i=0;i<nn;i++)
-		if (g[i]>0)
-			s[i]=lambda + z[i];
-		else
-			s[i]=-lambda + z[i];
-
-
-	temp=0;					
-	m=nn%5;
-
-	if (m!=0){
-		for(i=0;i<m;i++)
-			temp+=s[i]*g[i];
-	}
-
-	for(i=m;i<nn;i+=5)
-		temp=temp + s[i]  *g[i]
-			+ s[i+1]*g[i+1]
-			+ s[i+2]*g[i+2]
-			+ s[i+3]*g[i+3]
-			+ s[i+4]*g[i+4];
-	*gap=temp;
-}
-
-
-/*
-   Similar to dualityGap,
-
-   The difference is that, we assume that g has been computed.
-   */
-
-void dualityGap2(double *gap, double *z, double *g, double *s, double *Av, double lambda, int nn){
-
-	int i, m;
-	double temp;
-
-
-	/*
-	   g[0]=z[0] + z[0] - z[1] - Av[0];
-	   for (i=1;i<nn-1;i++){
-	   g[i]= - z[i-1] + z[i] + z[i] - z[i+1] - Av[i];
-	   }	
-	   g[nn-1]= -z[nn-2] + z[nn-1] + z[nn-1] - Av[nn-1];
-
-*/
-
-	for (i=0;i<nn;i++)
-		if (g[i]>0)
-			s[i]=lambda + z[i];
-		else
-			s[i]=-lambda + z[i];
-
-
-	temp=0;					
-	m=nn%5;
-
-	if (m!=0){
-		for(i=0;i<m;i++)
-			temp+=s[i]*g[i];
-	}
-
-	for(i=m;i<nn;i+=5)
-		temp=temp + s[i]  *g[i]
-			+ s[i+1]*g[i+1]
-			+ s[i+2]*g[i+2]
-			+ s[i+3]*g[i+3]
-			+ s[i+4]*g[i+4];
-	*gap=temp;
-}
-
-
-/*
-generateSolution:
-
-generate the solution x based on the information of z and g 
-(!!!!we assume that g has been computed as the gradient of z!!!!)
-
-*/
-
-int generateSolution(double *x, double *z, double *gap,
-		double *v, double *Av,
-		double *g, double *s, int *S,
-		double lambda, int nn){
-
-	int i, m, numS, n=nn+1;
-	double temp, funVal1, funVal2;
-
-	/*
-	   z is the appropriate solution,
-	   and g contains its gradient
-	   */
-
-
-	/*
-	   We assume that n>=3, and thus nn>=2
-
-	   We have two ways for recovering x. 
-	   The first way is x = v - A^T z
-	   The second way is x =omega(z)
-	   */
-
-	temp=0;
-	m=nn%5;
-	if (m!=0){
-		for (i=0;i<m;i++)
-			temp+=z[i]*(g[i] + Av[i]);
-	}
-	for (i=m;i<nn;i+=5)
-		temp=temp + z[i]  *(g[i]   + Av[i])
-			+ z[i+1]*(g[i+1] + Av[i+1])
-			+ z[i+2]*(g[i+2] + Av[i+2])
-			+ z[i+3]*(g[i+3] + Av[i+3])
-			+ z[i+4]*(g[i+4] + Av[i+4]);
-	funVal1=temp /2;
-
-	temp=0;
-	m=nn%5;
-	if (m!=0){
-		for (i=0;i<m;i++)
-			temp+=fabs(g[i]);
-	}
-	for (i=m;i<nn;i+=5)
-		temp=temp + fabs(g[i])
-			+ fabs(g[i+1])
-			+ fabs(g[i+2])
-			+ fabs(g[i+3])
-			+ fabs(g[i+4]);
-	funVal1=funVal1+ temp*lambda;
-
-
-	/*
-	   we compute the solution by the second way
-	   */
-
-	numS= supportSet(x, v, z, g, S, lambda, nn);
-
-	/*
-	   we compute the objective function of x computed in the second way
-	   */
-
-	temp=0;
-	m=n%5;
-	if (m!=0){
-		for (i=0;i<m;i++)
-			temp+=(x[i]-v[i]) * (x[i]-v[i]);
-	}
-	for (i=m;i<n;i+=5)
-		temp=temp + (x[i]-  v[i]) * (  x[i]-  v[i])
-			+ (x[i+1]-v[i+1]) * (x[i+1]-v[i+1])
-			+ (x[i+2]-v[i+2]) * (x[i+2]-v[i+2])
-			+ (x[i+3]-v[i+3]) * (x[i+3]-v[i+3])
-			+ (x[i+4]-v[i+4]) * (x[i+4]-v[i+4]);
-	funVal2=temp/2;
-
-	temp=0;
-	m=nn%5;
-	if (m!=0){
-		for (i=0;i<m;i++)
-			temp+=fabs( x[i+1]-x[i] );
-	}
-	for (i=m;i<nn;i+=5)
-		temp=temp + fabs( x[i+1]-x[i] )
-			+ fabs( x[i+2]-x[i+1] )
-			+ fabs( x[i+3]-x[i+2] )
-			+ fabs( x[i+4]-x[i+3] )
-			+ fabs( x[i+5]-x[i+4] );
-	funVal2=funVal2 + lambda * temp;
-
-
-	/*
-	   printf("\n    funVal1=%e, funVal2=%e, diff=%e\n", funVal1, funVal2, funVal1-funVal2);
-	   */
-
-
-
-
-	if (funVal2 > funVal1){  /*
-								we compute the solution by the first way
-								*/
-		x[0]=v[0] + z[0];
-		for(i=1;i<n-1;i++)
-			x[i]= v[i] - z[i-1] + z[i];
-		x[n-1]=v[n-1] - z[n-2];
-	}
-	else{
-
-		/*
-		   the solution x is computed in the second way
-		   the gap can be further reduced
-		   (note that, there might be numerical error)
-		   */
-
-		*gap=*gap - (funVal1- funVal2);
-		if (*gap <0)
-			*gap=0;
-	}
-
-	return (numS);
-}
-
-
-void restartMapping(double *g, double *z,  double * v, 
-		double lambda, int nn)
-{
-
-	int i, n=nn+1;
-	double temp;
-	int* S=(int *) malloc(sizeof(int)*nn);
-	double *x=(double *)malloc(sizeof(double)*n);
-	double *s=(double *)malloc(sizeof(double)*nn);
-	double *Av=(double *)malloc(sizeof(double)*nn);
-	//int numS=-1;    
-
-	/*
-	   for a given input z, 
-	   we compute the z0 after restarting
-
-	   The elements in z lie in [-lambda, lambda]
-
-	   The returned value is g
-	   */
-
-
-	for (i=0;i<nn; i++)
-		Av[i]=v[i+1]-v[i];
-
-
-
-	g[0]=z[0] + z[0] - z[1] - Av[0];
-	for (i=1;i<nn-1;i++){
-		g[i]= - z[i-1] + z[i] + z[i] - z[i+1] - Av[i];
-	}	
-	g[nn-1]= -z[nn-2] + z[nn-1] + z[nn-1] - Av[nn-1];
-
-
-	//numS = supportSet(x, v, z, g, S, lambda, nn);
-
-
-	/*With x, we compute z via
-	  AA^T z = Av - Ax
-	  */
-
-	/*
-	   compute s= Av -Ax
-	   */
-
-	for (i=0;i<nn; i++)
-		s[i]=Av[i] - x[i+1] + x[i];
-
-
-	/*
-	   Apply Rose Algorithm for solving z
-	   */
-
-	Thomas(&temp, g, s, nn);
-
-	/*
-	   Rose(&temp, g, s, nn);
-	   */
-
-	/*
-	   project g to [-lambda, lambda]
-	   */
-
-	for(i=0;i<nn;i++){		
-		if (g[i]>lambda)
-			g[i]=lambda;
-		else
-			if (g[i]<-lambda)
-				g[i]=-lambda;
-	}
-
-
-	free (S);
-	free (x);
-	free (s);
-	free (Av);
-
-}
-
-
-
-/*
-
-/////////////////////////////////////// Explanation for the function sfa \\\\\\\\\\\\\\\\\\\\\\\\\\\\
-
-Our objective is to solve the fused Lasso signal approximator (flsa) problem:
-
-min_x  g(x) 1/2 \|x-v\|^2  + lambda \|A x\|_1,                      (1)
-
-Let x* be the solution (which is unique), it satisfies
-
-0 in  x* - v +  A^T * lambda *SGN(Ax*)                     (2)
-
-To solve x*, it suffices to find
-
-y*  in A^T * lambda *SGN(Ax*)                              (3)
-that satisfies
-
-x* - v + y* =0                                             (4)
-which leads to
-x*= v - y*                                                 (5)
-
-Due to the uniqueness of x*, we conclude that y* is unique. 
-
-As y* is a subgradient of lambda \|A x*\|_1, 
-we name our method as Subgradient Finding Algorithm (sfa).
-
-y* in (3) can be further written as
-
-y*= A^T * z*                                               (6)
-where
-
-z* in lambda* SGN (Ax*)                                    (7)
-
-From (6), we have
-z* = (A A^T)^{-1} A * y*                                   (8)
-
-Therefore, from the uqniueness of y*, we conclude that z* is also unique.
-Next, we discuss how to solve this unique z*.
-
-The problem (1) can reformulated as the following equivalent problem:	 
-
-min_x  max_z  f(x, z)= 1/2 \|x-v\|^2  + <A x, z>
-subject to   \|z\|_{infty} \leq lambda                                  (9)
-
-At the saddle point, we have
-
-x = v - AT z,                                            (10)
-
-which somehow concides with (5) and (6)
-
-Plugging (10) into (9), we obtain the problem
-
-min_z  1/2  z^T A AT z - < z, A v>,
-subject to  \|z\|_{infty} \leq lambda,                             (11)
-
-In this program, we apply the Nesterov's method for solving (11).
-
-
-Duality gap:
-
-At a given point z0, we compute x0= v - A^T z0.
-It is easy to show that
-min_x f(x, z0) = f(x0, z0) <= max_z f(x0, z)               (12)
-
-Moreover, we have
-max_z f(x0, z) - min_x f(x, z0) 
-<= lambda * \|A x0\|_1 - < z0, Av - A A^T z0>           (13)
-
-It is also to get that
-
-f(x0, z0) <= f(x*, z*) <= max_z f(x0, z)                   (14)
-
-g(x*)=f(x*, z*)                                            (15)
-
-g(x0)=max_z f(x0, z)                                       (17)
-
-	Therefore, we have
-
-g(x0)-g(x*) <= lambda * \|A x0\|_1 - < z0, Av - A A^T z0>  (18)
-
-
-	We have applied a restarting technique, which is quite involved; and thus, we do not explain here.
-
-	/////////////////////////////////////// Explanation for the function sfa \\\\\\\\\\\\\\\\\\\\\\\\\\\\
-		*/
-
-
-		/*
-		////////////               sfa              \\\\\\\\\\\\\\\\\\\\\
-
-		For sfa, the stepsize of the Nesterov's method is fixed to 1/4, so that no line search is needed.
-
-
-
-		Explanation of the parameters:
-
-		Output parameters
-		x:    the solution to the primal problem
-		gap:  the duality gap (pointer)
-
-		Input parameters
-		z:    the solution to the dual problem (before calling this function, z contains a starting point)
-		!!!!we assume that the starting point has been successfully initialized in z !!!!
-		z0:   a variable used for multiple purposes:
-		1) the previous solution z0
-		2) the difference between z and z0, i.e., z0=z- z0
-
-		lambda:   the regularization parameter (and the radius of the infity ball, see (11)).
-		nn:       the length of z, z0, Av, g, and s
-		maxStep:  the maximal number of iterations
-
-		v:    the point to be projected (not changed after the program)
-		Av:   A*v (not changed after the program)
-
-		s:        the search point (used for multiple purposes)
-		g:        the gradient at g (and it is also used for multiple purposes)
-
-		tol:      the tolerance of the gap
-		tau:  the duality gap or the restarting technique is done every tau steps
-		flag: if flag=1,  we apply the resart technique
-		flag=2,  just run the SFA algorithm, terminate it when the absolution change is less than tol
-		flag=3,  just run the SFA algorithm, terminate it when the duality gap is less than tol
-		flag=4,  just run the SFA algorithm, terminate it when the relative duality gap is less than tol
-
-
-		We would like to emphasis that the following assumptions 
-		have been checked in the functions that call this function:
-		1) 0< lambda < z_max
-		2) nn >=2
-		3) z has been initialized with a starting point
-		4) z0 has been initialized with all zeros
-
-		The termination condition is checked every tau iterations.
-
-		For the duality gap, please refer to (12-18)
-		*/
-
-		int sfa(double *x,     double *gap, int * activeS,
-				double *z,     double *z0,   double * v,   double * Av, 
-				double lambda, int nn,       int maxStep,
-				double *s,     double *g,
-				double tol,    int tau,       int flag){
-
-			int i, iterStep, m, tFlag=0, n=nn+1;
-			double alphap=0, alpha=1, beta=0, temp;
-			int* S=(int *) malloc(sizeof(int)*nn);
-			double gapp=-1, gappp=-1;	/*gapp denotes the previous gap*/
-			int numS=-1, numSp=-2, numSpp=-3;;    
-			/*
-			   numS denotes the number of elements in the Support Set S
-			   numSp denotes the number of elements in the previous Support Set S
-			   */
-
-			*gap=-1; /*initial a value -1*/
-
-			/*
-			   The main algorithm by Nesterov's method
-
-			   B is an nn x nn tridiagonal matrix.
-
-			   The nn eigenvalues of B are 2- 2 cos (i * PI/ n), i=1, 2, ..., nn
-			   */
-
-			for (iterStep=1; iterStep<=maxStep; iterStep++){
-
-
-				/*-------------   Step 1 ---------------------*/
-
-				beta=(alphap -1 ) / alpha;
-				/*
-				   compute search point
-
-				   s= z + beta * z0
-
-				   We follow the style of CLAPACK
-				   */
-				m=nn % 5;
-				if (m!=0){
-					for (i=0;i<m; i++)
-						s[i]=z[i]+ beta* z0[i];
-				}
-				for (i=m;i<nn;i+=5){
-					s[i]   =z[i]   + beta* z0[i];
-					s[i+1] =z[i+1] + beta* z0[i+1];
-					s[i+2] =z[i+2] + beta* z0[i+2];
-					s[i+3] =z[i+3] + beta* z0[i+3];			
-					s[i+4] =z[i+4] + beta* z0[i+4];
-				}
-
-				/*
-				   s and g are of size nn x 1
-
-				   compute the gradient at s
-
-				   g= B * s - Av,
-
-				   where B is an nn x nn tridiagonal matrix. and is defined as
-
-				   B= [ 2  -1   0    0;
-				   -1  2   -1   0;
-				   0  -1   2    -1;
-				   0   0   -1   2]
-
-				   We assume n>=3, which leads to nn>=2
-				   */
-				g[0]=s[0] + s[0] - s[1] - Av[0];
-				for (i=1;i<nn-1;i++){
-					g[i]= - s[i-1] + s[i] + s[i] - s[i+1] - Av[i];
-				}
-				g[nn-1]= -s[nn-2] + s[nn-1] + s[nn-1] - Av[nn-1];
-
-
-				/* 
-				   z0 stores the previous -z 
-				   */
-				m=nn%7;
-				if (m!=0){
-					for (i=0;i<m;i++)
-						z0[i]=-z[i];
-				}
-				for (i=m; i <nn; i+=7){
-					z0[i]   = - z[i];
-					z0[i+1] = - z[i+1];
-					z0[i+2] = - z[i+2];
-					z0[i+3] = - z[i+3];
-					z0[i+4] = - z[i+4];
-					z0[i+5] = - z[i+5];
-					z0[i+6] = - z[i+6];
-				}
-
-
-				/* 
-				   do a gradient step based on s to get z
-				   */
-				m=nn%5;
-				if (m!=0){
-					for(i=0;i<m; i++)
-						z[i]=s[i] - g[i]/4;
-				}
-				for (i=m;i<nn; i+=5){			
-					z[i]   = s[i]   -  g[i]  /4;
-					z[i+1] = s[i+1] -  g[i+1]/4;
-					z[i+2] = s[i+2] -  g[i+2]/4;
-					z[i+3] = s[i+3] -  g[i+3]/4;
-					z[i+4] = s[i+4] -  g[i+4]/4;
-				}
-
-				/*
-				   project z onto the L_{infty} ball with radius lambda
-
-				   z is the new approximate solution
-				   */			
-				for (i=0;i<nn; i++){
-					if (z[i]>lambda)
-						z[i]=lambda;
-					else
-						if (z[i]<-lambda)
-							z[i]=-lambda;
-				}
-
-				/*
-				   compute the difference between the new solution 
-				   and the previous solution (stored in z0=-z_p)
-
-				   the difference is written to z0
-				   */
-
-				m=nn%5;
-				if (m!=0){
-					for (i=0;i<m;i++)
-						z0[i]+=z[i];
-				}
-				for(i=m;i<nn; i+=5){
-					z0[i]  +=z[i];
-					z0[i+1]+=z[i+1];
-					z0[i+2]+=z[i+2];
-					z0[i+3]+=z[i+3];
-					z0[i+4]+=z[i+4];
-				}
-
-
-				alphap=alpha;
-				alpha=(1+sqrt(4*alpha*alpha+1))/2;		
-
-				/*
-				   check the termination condition
-				   */
-				if (iterStep%tau==0){
-
-
-					/*
-					   The variables g and s can be modified
-
-					   The variables x, z0 and z can be revised for case 0, but not for the rest
-					   */
-					switch (flag){
-						case 1:
-
-							/*
-
-							   terminate the program once the "duality gap" is smaller than tol
-
-							   compute the duality gap:
-
-							   x= v - A^T z
-							   Ax = Av - A A^T z = -g, 
-							   where
-							   g = A A^T z - A v 
-
-
-							   the duality gap= lambda * \|Ax\|-1 - <z, Ax>
-							   = lambda * \|g\|_1 + <z, g>
-
-							   In fact, gap=0 indicates that,
-							   if g_i >0, then z_i=-lambda
-							   if g_i <0, then z_i=lambda
-							   */
-
-							gappp=gapp;
-							gapp=*gap;  /*record the previous gap*/
-							numSpp=numSp;
-							numSp=numS; /*record the previous numS*/
-
-							dualityGap(gap, z, g, s, Av, lambda, nn);
-							/*g is computed as the gradient of z in this function*/
-
-
-							/*
-							   printf("\n Iteration: %d, gap=%e, numS=%d", iterStep, *gap, numS);
-							   */
-
-							/*
-							   If *gap <=tol, we terminate the iteration
-							   Otherwise, we restart the algorithm
-							   */
-
-							if (*gap <=tol){
-								tFlag=1;
-								break;
-
-							} /* end of *gap <=tol */
-							else{
-
-								/* we apply the restarting technique*/
-
-								/*
-								   we compute the solution by the second way
-								   */
-								numS = supportSet(x, v, z, g, S, lambda, nn);	
-								/*g, the gradient of z should be computed before calling this function*/
-
-								/*With x, we compute z via
-								  AA^T z = Av - Ax
-								  */
-
-								/*
-								   printf("\n iterStep=%d, numS=%d, gap=%e",iterStep, numS, *gap);
-								   */
-
-
-								m=1;
-								if (nn > 1000000)
-									m=10;
-								else
-									if (nn > 100000)
-										m=5;
-
-								if ( abs(numS-numSp) < m){
-
-									numS=generateSolution(x, z, gap, v, Av,
-											g, s, S, lambda, nn);
-									/*g, the gradient of z should be computed before calling this function*/
-
-
-									if (*gap <tol){
-										tFlag=2;	 /*tFlag =2 shows that the result is already optimal
-													   There is no need to call generateSolution for recomputing the best solution
-													   */					
-										break;
-									}
-
-									if ( (*gap ==gappp) && (numS==numSpp) ){
-
-										tFlag=2;
-										break;
-
-									}
-
-									/*we terminate the program is *gap <1
-									  numS==numSP
-									  and gapp==*gap
-									  */
-								}
-
-								/*
-								   compute s= Av -Ax
-								   */
-								for (i=0;i<nn; i++)
-									s[i]=Av[i] - x[i+1] + x[i];
-
-								/*
-								   apply Rose Algorithm for solving z
-								   */
-
-								Thomas(&temp, z, s, nn);
-
-								/*
-								   Rose(&temp, z, s, nn);
-								   */
-
-								/*
-								   printf("\n Iteration: %d, %e", iterStep, temp);
-								   */
-
-								/*
-								   project z to [-lambda2, lambda2]
-								   */
-								for(i=0;i<nn;i++){
-									if (z[i]>lambda)
-										z[i]=lambda;
-									else
-										if (z[i]<-lambda)
-											z[i]=-lambda;
-								}
-
-
-
-								m=nn%7;
-								if (m!=0){
-									for (i=0;i<m;i++)
-										z0[i]=0;
-								}
-								for (i=m; i<nn; i+=7){
-									z0[i]   = z0[i+1] 
-										= z0[i+2]
-										= z0[i+3]
-										= z0[i+4]
-										= z0[i+5]
-										= z0[i+6]
-										=0;
-								}
-
-
-								alphap=0; alpha=1;
-
-								/*
-								   we restart the algorithm
-								   */
-
-							}
-
-							break; /*break case 1*/ 
-
-						case 2: 
-
-							/*
-							   The program is terminated either the summation of the absolution change (denoted by z0)
-							   of z (from the previous zp) is less than tol * nn,
-							   or the maximal number of iteration (maxStep) is achieved
-Note: tol indeed measures the averaged per element change.
-*/
-							temp=0;
-							m=nn%5;
-							if (m!=0){
-								for(i=0;i<m;i++)
-									temp+=fabs(z0[i]);
-							}
-							for(i=m;i<nn;i+=5)
-								temp=temp + fabs(z0[i])
-									+ fabs(z0[i+1])
-									+ fabs(z0[i+2])
-									+ fabs(z0[i+3])
-									+ fabs(z0[i+4]);
-							*gap=temp / nn;
-
-							if (*gap <=tol){
-
-								tFlag=1;
-							}
-
-							break;
-
-						case 3:
-
-							/*
-
-							   terminate the program once the "duality gap" is smaller than tol
-
-							   compute the duality gap:
-
-							   x= v - A^T z
-							   Ax = Av - A A^T z = -g, 
-							   where
-							   g = A A^T z - A v 
-
-
-							   the duality gap= lambda * \|Ax\|-1 - <z, Ax>
-							   = lambda * \|g\|_1 + <z, g>
-
-							   In fact, gap=0 indicates that,
-							   if g_i >0, then z_i=-lambda
-							   if g_i <0, then z_i=lambda
-							   */
-
-
-							g[0]=z[0] + z[0] - z[1] - Av[0];
-							for (i=1;i<nn-1;i++){
-								g[i]= - z[i-1] + z[i] + z[i] - z[i+1] - Av[i];
-							}
-
-							g[nn-1]= -z[nn-2] + z[nn-1] + z[nn-1] - Av[nn-1];
-
-							for (i=0;i<nn;i++)
-								if (g[i]>0)
-									s[i]=lambda + z[i];
-								else
-									s[i]=-lambda + z[i];
-
-							temp=0;					
-							m=nn%5;
-							if (m!=0){
-								for(i=0;i<m;i++)
-									temp+=s[i]*g[i];
-							}					
-							for(i=m;i<nn;i+=5)
-								temp=temp + s[i]  *g[i]
-									+ s[i+1]*g[i+1]
-									+ s[i+2]*g[i+2]
-									+ s[i+3]*g[i+3]
-									+ s[i+4]*g[i+4];
-							*gap=temp;
-
-							/*
-							   printf("\n %e", *gap);
-							   */
-
-
-							if (*gap <=tol)
-								tFlag=1;
-
-							break;
-
-						case 4:
-
-							/*
-
-							   terminate the program once the "relative duality gap" is smaller than tol
-
-
-							   compute the duality gap:
-
-							   x= v - A^T z
-							   Ax = Av - A A^T z = -g, 
-							   where
-							   g = A A^T z - A v 
-
-
-							   the duality gap= lambda * \|Ax\|-1 - <z, Ax>
-							   = lambda * \|g\|_1 + <z, g>
-
-							   In fact, gap=0 indicates that,
-							   if g_i >0, then z_i=-lambda
-							   if g_i <0, then z_i=lambda
-
-
-							   Here, the "relative duality gap" is defined as:
-							   duality gap / - 1/2 \|A^T z\|^2 + < z, Av>
-
-							   We efficiently compute - 1/2 \|A^T z\|^2 + < z, Av> using the following relationship
-
-							   - 1/2 \|A^T z\|^2 + < z, Av>
-							   = -1/2 <z, A A^T z - Av -Av>
-							   = -1/2 <z, g - Av>
-							   */
-
-
-							g[0]=z[0] + z[0] - z[1] - Av[0];
-							for (i=1;i<nn-1;i++){
-								g[i]= - z[i-1] + z[i] + z[i] - z[i+1] - Av[i];
-							}
-
-							g[nn-1]= -z[nn-2] + z[nn-1] + z[nn-1] - Av[nn-1];
-
-							for (i=0;i<nn;i++)
-								if (g[i]>0)
-									s[i]=lambda + z[i];
-								else
-									s[i]=-lambda + z[i];
-
-							temp=0;					
-							m=nn%5;
-							if (m!=0){
-								for(i=0;i<m;i++)
-									temp+=s[i]*g[i];
-							}					
-							for(i=m;i<nn;i+=5)
-								temp=temp + s[i]  *g[i]
-									+ s[i+1]*g[i+1]
-									+ s[i+2]*g[i+2]
-									+ s[i+3]*g[i+3]
-									+ s[i+4]*g[i+4];
-							*gap=temp;
-							/*
-							   Now, *gap contains the duality gap
-							   Next, we compute
-							   - 1/2 \|A^T z\|^2 + < z, Av>
-							   =-1/2 <z, g - Av>
-							   */
-
-							temp=0;
-							m=nn%5;
-							if (m!=0){
-								for(i=0;i<m;i++)
-									temp+=z[i] * (g[i] - Av[i]);
-							}					
-							for(i=m;i<nn;i+=5)
-								temp=temp + z[i]  * (g[i] -  Av[i])
-									+ z[i+1]* (g[i+1]- Av[i+1])
-									+ z[i+2]* (g[i+2]- Av[i+2])
-									+ z[i+3]* (g[i+3]- Av[i+3])
-									+ z[i+4]* (g[i+4]- Av[i+4]);
-							temp=fabs(temp) /2; 
-
-							if (temp <1)
-								temp=1;
-
-							*gap/=temp;
-							/*
-							 *gap now contains the relative gap
-							 */
-
-
-							if (*gap <=tol){
-								tFlag=1;
-							}
-
-							break;
-
-						default:
-
-							/*
-							   The program is terminated either the summation of the absolution change (denoted by z0)
-							   of z (from the previous zp) is less than tol * nn,
-							   or the maximal number of iteration (maxStep) is achieved
-Note: tol indeed measures the averaged per element change.
-*/
-							temp=0;
-							m=nn%5;
-							if (m!=0){
-								for(i=0;i<m;i++)
-									temp+=fabs(z0[i]);
-							}
-							for(i=m;i<nn;i+=5)
-								temp=temp + fabs(z0[i])
-									+ fabs(z0[i+1])
-									+ fabs(z0[i+2])
-									+ fabs(z0[i+3])
-									+ fabs(z0[i+4]);
-							*gap=temp / nn;
-
-							if (*gap <=tol){
-
-								tFlag=1;
-							}
-
-							break;
-
-					}/*end of switch*/
-
-					if (tFlag)
-						break;
-
-				}/* end of the if for checking the termination condition */
-
-				/*-------------- Step 3 --------------------*/
-
-			}
-
-			/*
-			   for the other cases, except flag=1, compute the solution x according the first way (the primal-dual way)
-			   */
-
-			if ( (flag !=1) || (tFlag==0) ){
-				x[0]=v[0] + z[0];
-				for(i=1;i<n-1;i++)
-					x[i]= v[i] - z[i-1] + z[i];
-				x[n-1]=v[n-1] - z[n-2];
-			}
-
-			if ( (flag==1) && (tFlag==1)){
-
-				/*
-				   We assume that n>=3, and thus nn>=2
-
-				   We have two ways for recovering x. 
-				   The first way is x = v - A^T z
-				   The second way is x =omega(z)
-				   */
-
-				/*
-				   We first compute the objective function value of the first choice in terms f(x), see our paper
-				   */
-
-				/*
-				   for numerical reason, we do a gradient descent step
-				   */
-
-				/*
-				   ---------------------------------------------------
-				   A gradient step  begins
-				   */
-				g[0]=z[0] + z[0] - z[1] - Av[0];
-				for (i=1;i<nn-1;i++){
-					g[i]= - z[i-1] + z[i] + z[i] - z[i+1] - Av[i];
-				}
-				g[nn-1]= -z[nn-2] + z[nn-1] + z[nn-1] - Av[nn-1];
-
-
-				/* 
-				   do a gradient step based on z to get the new z
-				   */
-				m=nn%5;
-				if (m!=0){
-					for(i=0;i<m; i++)
-						z[i]=z[i] - g[i]/4;
-				}
-				for (i=m;i<nn; i+=5){			
-					z[i]   = z[i]   -  g[i]  /4;
-					z[i+1] = z[i+1] -  g[i+1]/4;
-					z[i+2] = z[i+2] -  g[i+2]/4;
-					z[i+3] = z[i+3] -  g[i+3]/4;
-					z[i+4] = z[i+4] -  g[i+4]/4;
-				}
-
-				/*
-				   project z onto the L_{infty} ball with radius lambda
-
-				   z is the new approximate solution
-				   */			
-				for (i=0;i<nn; i++){
-					if (z[i]>lambda)
-						z[i]=lambda;
-					else
-						if (z[i]<-lambda)
-							z[i]=-lambda;
-				}
-
-				/*
-				   ---------------------------------------------------
-				   A gradient descent step ends
-				   */
-
-				/*compute the gradient at z*/
-
-				g[0]=z[0] + z[0] - z[1] - Av[0];
-				for (i=1;i<nn-1;i++){
-					g[i]= - z[i-1] + z[i] + z[i] - z[i+1] - Av[i];
-				}	
-				g[nn-1]= -z[nn-2] + z[nn-1] + z[nn-1] - Av[nn-1];
-
-
-				numS=generateSolution(x, z, gap, v, Av,
-						g, s, S, lambda, nn);
-				/*g, the gradient of z should be computed before calling this function*/
-
-			}
-
-			free (S);
-			/*
-			   free the variables S
-			   */
-
-			*activeS=numS;
-			return (iterStep);
-
-		}
-
-
-/*
-
-   Refer to sfa for the defintions of the variables  
-
-   In this file, we restart the program every step, and neglect the gradient step.
-
-   It seems that, this program does not converge.
-
-   This function shows that the gradient step is necessary.
-   */
-
-int sfa_special(double *x,     double *gap,  int * activeS,
-		double *z,     double * v,   double * Av, 
-		double lambda, int nn,       int maxStep,
-		double *s,     double *g,
-		double tol,    int tau){
-
-	int i, iterStep;
-	//int tFlag=0;
-	//int n=nn+1;
-	double temp;
-	int* S=(int *) malloc(sizeof(int)*nn);
-	double gapp=-1;	/*gapp denotes the previous gap*/
-	int numS=-1, numSp=-1;    
-	/*
-	   numS denotes the number of elements in the Support Set S
-	   numSp denotes the number of elements in the previous Support Set S
-	   */
-
-	*gap=-1; /*initialize *gap a value*/
-
-	for (iterStep=1; iterStep<=maxStep; iterStep++){
-
-
-		g[0]=z[0] + z[0] - z[1] - Av[0];
-		for (i=1;i<nn-1;i++){
-			g[i]= - z[i-1] + z[i] + z[i] - z[i+1] - Av[i];
-		}	
-		g[nn-1]= -z[nn-2] + z[nn-1] + z[nn-1] - Av[nn-1];
-
-		numSp=numS; /*record the previous numS*/
-		numS = supportSet(x, v, z, g, S, lambda, nn);
-
-
-		/*With x, we compute z via
-		  AA^T z = Av - Ax
-		  */
-
-		/*
-		   compute s= Av -Ax
-		   */
-
-		for (i=0;i<nn; i++)
-			s[i]=Av[i] - x[i+1] + x[i];
-
-
-		/*
-		   Apply Rose Algorithm for solving z
-		   */
-
-		Thomas(&temp, z, s, nn);
-
-		/*
-		   Rose(&temp, z, s, nn);
-		   */
-
-		/*
-		   project z to [-lambda, lambda]
-		   */
-
-		for(i=0;i<nn;i++){		
-			if (z[i]>lambda)
-				z[i]=lambda;
-			else
-				if (z[i]<-lambda)
-					z[i]=-lambda;
-		}
-
-
-		if (iterStep%tau==0){
-			gapp=*gap;  /*record the previous gap*/
-
-			dualityGap(gap, z, g, s, Av, lambda, nn);
-
-			/*
-			   printf("\n iterStep=%d, numS=%d, gap=%e, diff=%e",iterStep, numS, *gap, *gap -gapp);
-
-*/
-
-			if (*gap <=tol){
-				//tFlag=1;
-				break;
-			}
-
-			if ( (*gap <1) && (numS==numSp) && (gapp == *gap) ){
-				//tFlag=1;			
-				break;
-				/*we terminate the program is *gap <1
-				  numS==numSP
-				  and gapp==*gap
-				  */
-			}
-
-		}/*end of if tau*/
-
-	}/*end for */		
-
-	free (S);
-
-	* activeS=numS;
-	return(iterStep);
-
-}
-
-
-/*
-
-   We do one gradient descent, and then restart the program
-   */
-
-
-int sfa_one(double *x,     double *gap, int * activeS,
-		double *z,     double * v,   double * Av, 
-		double lambda, int nn,       int maxStep,
-		double *s,     double *g,
-		double tol,    int tau){
-
-	int i, iterStep, m;
-	int tFlag=0;
-	//int n=nn+1;
-	double temp;
-	int* S=(int *) malloc(sizeof(int)*nn);
-	double gapp=-1, gappp=-2;	/*gapp denotes the previous gap*/
-	int numS=-100, numSp=-200, numSpp=-300;    
-	/*
-	   numS denotes the number of elements in the Support Set S
-	   numSp denotes the number of elements in the previous Support Set S
-	   */
-
-	*gap=-1; /*initialize *gap a value*/
-
-	/*
-	   The main algorithm by Nesterov's method
-
-	   B is an nn x nn tridiagonal matrix.
-
-	   The nn eigenvalues of B are 2- 2 cos (i * PI/ n), i=1, 2, ..., nn
-	   */
-
-
-	/*
-	   we first do a gradient step based on z
-	   */
-
-
-	/*
-	   ---------------------------------------------------
-	   A gradient step  begins
-	   */
-	g[0]=z[0] + z[0] - z[1] - Av[0];
-	for (i=1;i<nn-1;i++){
-		g[i]= - z[i-1] + z[i] + z[i] - z[i+1] - Av[i];
-	}
-	g[nn-1]= -z[nn-2] + z[nn-1] + z[nn-1] - Av[nn-1];
-
-
-	/* 
-	   do a gradient step based on z to get the new z
-	   */
-	m=nn%5;
-	if (m!=0){
-		for(i=0;i<m; i++)
-			z[i]=z[i] - g[i]/4;
-	}
-	for (i=m;i<nn; i+=5){			
-		z[i]   = z[i]   -  g[i]  /4;
-		z[i+1] = z[i+1] -  g[i+1]/4;
-		z[i+2] = z[i+2] -  g[i+2]/4;
-		z[i+3] = z[i+3] -  g[i+3]/4;
-		z[i+4] = z[i+4] -  g[i+4]/4;
-	}
-
-	/*
-	   project z onto the L_{infty} ball with radius lambda
-
-	   z is the new approximate solution
-	   */			
-	for (i=0;i<nn; i++){
-		if (z[i]>lambda)
-			z[i]=lambda;
-		else
-			if (z[i]<-lambda)
-				z[i]=-lambda;
-	}
-
-	/*
-	   ---------------------------------------------------
-	   A gradient descent step ends
-	   */
-
-
-	/*compute the gradient at z*/
-
-	g[0]=z[0] + z[0] - z[1] - Av[0];
-	for (i=1;i<nn-1;i++){
-		g[i]= - z[i-1] + z[i] + z[i] - z[i+1] - Av[i];
-	}	
-	g[nn-1]= -z[nn-2] + z[nn-1] + z[nn-1] - Av[nn-1];
-
-	for (iterStep=1; iterStep<=maxStep; iterStep++){
-
-
-		/*
-		   ---------------------------------------------------
-		   restart the algorithm with x=omega(z)
-		   */
-
-		numSpp=numSp;
-		numSp=numS; /*record the previous numS*/
-		numS = supportSet(x, v, z, g, S, lambda, nn);
-
-
-		/*With x, we compute z via
-		  AA^T z = Av - Ax
-		  */
-
-		/*
-		   compute s= Av -Ax
-		   */
-
-		for (i=0;i<nn; i++)
-			s[i]=Av[i] - x[i+1] + x[i];
-
-
-		/*
-		   Apply Rose Algorithm for solving z
-		   */
-
-		Thomas(&temp, z, s, nn);
-
-		/*
-		   Rose(&temp, z, s, nn);
-		   */
-
-		/*
-		   project z to [-lambda, lambda]
-		   */
-
-		for(i=0;i<nn;i++){		
-			if (z[i]>lambda)
-				z[i]=lambda;
-			else
-				if (z[i]<-lambda)
-					z[i]=-lambda;
-		}
-
-		/*
-		   ---------------------------------------------------
-		   restart the algorithm with x=omega(z)
-
-		   we have computed a new z, based on the above relationship
-		   */
-
-
-		/*
-		   ---------------------------------------------------
-		   A gradient step  begins
-		   */
-		g[0]=z[0] + z[0] - z[1] - Av[0];
-		for (i=1;i<nn-1;i++){
-			g[i]= - z[i-1] + z[i] + z[i] - z[i+1] - Av[i];
-		}
-		g[nn-1]= -z[nn-2] + z[nn-1] + z[nn-1] - Av[nn-1];
-
-
-		/* 
-		   do a gradient step based on z to get the new z
-		   */
-		m=nn%5;
-		if (m!=0){
-			for(i=0;i<m; i++)
-				z[i]=z[i] - g[i]/4;
-		}
-		for (i=m;i<nn; i+=5){			
-			z[i]   = z[i]   -  g[i]  /4;
-			z[i+1] = z[i+1] -  g[i+1]/4;
-			z[i+2] = z[i+2] -  g[i+2]/4;
-			z[i+3] = z[i+3] -  g[i+3]/4;
-			z[i+4] = z[i+4] -  g[i+4]/4;
-		}
-
-		/*
-		   project z onto the L_{infty} ball with radius lambda
-
-		   z is the new approximate solution
-		   */			
-		for (i=0;i<nn; i++){
-			if (z[i]>lambda)
-				z[i]=lambda;
-			else
-				if (z[i]<-lambda)
-					z[i]=-lambda;
-		}
-
-		/*
-		   ---------------------------------------------------
-		   A gradient descent step ends
-		   */
-
-		/*compute the gradient at z*/
-
-		g[0]=z[0] + z[0] - z[1] - Av[0];
-		for (i=1;i<nn-1;i++){
-			g[i]= - z[i-1] + z[i] + z[i] - z[i+1] - Av[i];
-		}	
-		g[nn-1]= -z[nn-2] + z[nn-1] + z[nn-1] - Av[nn-1];
-
-
-		if (iterStep % tau==0){
-			gappp=gapp;
-			gapp=*gap;  /*record the previous gap*/
-
-			dualityGap2(gap, z, g, s, Av, lambda, nn);
-			/*g, the gradient of z should be computed before calling this function*/
-
-
-			/*
-			   printf("\n iterStep=%d, numS=%d, gap=%e",iterStep, numS, *gap);
-			   */
-
-
-			/*
-			   printf("\n  %d  & %d   &  %2.0e \\\\ \n \\hline ",iterStep, numS, *gap);
-			   */
-
-
-			/*
-			   printf("\n %e",*gap);
-			   */
-
-			/*		
-
-					printf("\n %d",numS);
-
-*/
-
-			if (*gap <=tol){
-				//tFlag=1;
-				break;
-			}
-
-			m=1;
-			if (nn > 1000000)
-				m=5;
-			else
-				if (nn > 100000)
-					m=3;
-
-			if ( abs( numS-numSp) <m ){
-
-				/*
-				   printf("\n numS=%d, numSp=%d",numS,numSp);
-				   */
-
-				m=generateSolution(x, z, gap, v, Av,
-						g, s, S, lambda, nn);
-				/*g, the gradient of z should be computed before calling this function*/
-
-				if (*gap < tol){
-
-					numS=m;
-					tFlag=2;
-					break;
-				}
-
-
-				if ( (*gap ==gappp) && (numS==numSpp) ){
-
-					tFlag=2;
-					break;
-
-				}
-
-			} /*end of if*/
-
-		}/*end of if tau*/
-
-
-	} /*end of for*/
-
-
-
-	if (tFlag!=2){
-		numS=generateSolution(x, z, gap, v, Av, g, s, S, lambda, nn);
-		/*g, the gradient of z should be computed before calling this function*/
-	}
-
-	free(S);
-
-	*activeS=numS;
-	return(iterStep);
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/slep/flsa/sfa.h b/src/shogun/lib/slep/flsa/sfa.h
deleted file mode 100644
index 4bdcc6061d0..00000000000
--- a/src/shogun/lib/slep/flsa/sfa.h
+++ /dev/null
@@ -1,375 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef  SFA_SLEP
-#define  SFA_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-/* 
-   Revision History
-
-   First Version available on October 10, 2009 
-
-   A runnable version on October 15, 2009
-
-   Major revision on October 29, 2009
-   (Some functions appearing in a previous version have deleted, please refer to the previous version for the old functions.
-   Some new functions have been added as well)
-
-*/
-
-/*
-
-   Files contained in this header file sfa.h:
-
-   1. Algorithms for solving the linear system A A^T z0 = Av (see the description of A from the following context)
-
-   void Thomas(double *zMax, double *z0, 
-   double * Av, int nn)
-
-   void Rose(double *zMax, double *z0, 
-   double * Av, int nn)
-
-   int supportSet(double *x, double *v, double *z, 
-   double *g, int * S, double lambda, int nn)
-
-   void dualityGap(double *gap, double *z, 
-   double *g, double *s, double *Av, 
-   double lambda, int nn)
-
-   void dualityGap2(double *gap, double *z, 
-   double *g, double *s, double *Av, 
-   double lambda, int nn)
-
-
-   2. The Subgraident Finding Algorithm (SFA) for solving problem (4) (refer to the description of the problem for detail) 
-
-   int sfa(double *x,     double *gap,
-   double *z,     double *z0,   double * v,   double * Av, 
-   double lambda, int nn,       int maxStep,
-   double *s,     double *g,
-   double tol,    int tau,       int flag)
-
-   int sfa_special(double *x,     double *gap,
-   double *z,     double * v,   double * Av, 
-   double lambda, int nn,       int maxStep,
-   double *s,     double *g,
-   double tol,    int tau)
-
-   int sfa_one(double *x,     double *gap,
-   double *z,     double * v,   double * Av, 
-   double lambda, int nn,       int maxStep,
-   double *s,     double *g,
-   double tol,    int tau)
-
-
-*/
-
-
-/*
-
-   Some mathematical background.
-
-   In this file, we discuss how to solve the following subproblem,
-
-   min_x  1/2 \|x-v\|^2  + lambda \|A x\|_1,                 (1)
-
-   which is a key problem used in the Fused Lasso Signal Approximator (FLSA).
-
-   Also, note that, FLSA is a building block for solving the optimation problmes with fused Lasso penalty.
-
-   In (1), x and v are n-dimensional vectors, 
-   and A is a matrix with size (n-1) x n, and is defined as follows (e.g., n=4):
-   A= [ -1  1  0  0;
-   0  -1 1  0;
-   0  0  -1 1]
-
-   The above problem can be reformulated as the following equivalent min-max optimization problem
-
-   min_x  max_z  1/2 \|x-v\|^2  + <A x, z>
-   subject to   \|z\|_{infty} \leq lambda                     (2)
-
-
-   It is easy to get that, at the optimal point
-
-   x = v - AT z,                             (3)
-
-   where z is the optimal solution to the following optimization problem
-
-   min_z  1/2  z^T A AT z - < z, A v>,
-   subject to  \|z\|_{infty} \leq lambda                      (4)
-
-
-
-   Let B=A A^T. It is easy to get that B is a (n-1) x (n-1) tridiagonal matrix.
-   When n=5, B is defined as:
-   B= [ 2  -1   0    0;
-   -1  2   -1   0;
-   0  -1   2    -1;
-   0   0   -1   2]
-
-   Let z0 be the solution to the linear system:
-
-   A A^T * z0 = A * v                  (5)
-
-   The problem (5) can be solve by the Thomas Algorithm, in about 5n multiplications and 4n additions.
-
-   It can also be solved by the Rose's Algorithm, in about 2n multiplications and 2n additions.
-
-   Moreover, considering the special structure of the matrix A (and B), 
-   it can be solved in about n multiplications and 3n additions
-
-   If lambda \geq \|z0\|_{infty}, x_i= mean(v), for all i, 
-   the problem (1) admits near analytical solution
-
-
-   We have also added the restart technique, please refer to our paper for detail!
-
-*/
-
-
-void Thomas(double *zMax, double *z0, double * Av, int nn);
-
-void Rose(double *zMax, double *z0,	double * Av, int nn);
-
-/*
-////////////////    compute x for restarting \\\\\\\\\\\\\\\\\\\\\\\\\
-
-x=omega(z)
-
-v: the vector to be projected
-z: the approximate solution
-g: the gradient at z (g should be computed before calling this function
-
-nn: the length of z, g, and S (maximal length for S)
-
-n:  the length of x and v
-
-S: records the indices of the elements in the support set
-*/
-int supportSet(double *x, double *v, double *z, double *g, int * S, double lambda, int nn);
-
-/*
-////////////  Computing the duality gap \\\\\\\\\\\\\\\\\\\\\\\\\\
-
-we compute the duality corresponding the solution z
-
-z: the approximate solution
-g: the gradient at z (we recompute the gradient)
-s: an auxiliary variable
-Av: A*v
-
-nn: the lenght for z, g, s, and Av
-
-The variables g and s shall be revised.
-
-The variables z and Av remain unchanged.
-*/
-void dualityGap(double *gap, double *z, double *g, double *s, double *Av, double lambda, int nn);
-
-/*
-   Similar to dualityGap,
-
-   The difference is that, we assume that g has been computed.
-   */
-void dualityGap2(double *gap, double *z, double *g, double *s, double *Av, double lambda, int nn);
-
-/*
-generateSolution:
-
-generate the solution x based on the information of z and g 
-(!!!!we assume that g has been computed as the gradient of z!!!!)
-
-*/
-int generateSolution(double *x, double *z, double *gap,
-		double *v, double *Av,
-		double *g, double *s, int *S,
-		double lambda, int nn);
-
-void restartMapping(double *g, double *z,  double * v, 
-		double lambda, int nn);
-
-/*
-/////////////////////////////////////// Explanation for the function sfa \\\\\\\\\\\\\\\\\\\\\\\\\\\\
-
-Our objective is to solve the fused Lasso signal approximator (flsa) problem:
-
-min_x  g(x) 1/2 \|x-v\|^2  + lambda \|A x\|_1,                      (1)
-
-Let x* be the solution (which is unique), it satisfies
-
-0 in  x* - v +  A^T * lambda *SGN(Ax*)                     (2)
-
-To solve x*, it suffices to find
-
-y*  in A^T * lambda *SGN(Ax*)                              (3)
-that satisfies
-
-x* - v + y* =0                                             (4)
-which leads to
-x*= v - y*                                                 (5)
-
-Due to the uniqueness of x*, we conclude that y* is unique. 
-
-As y* is a subgradient of lambda \|A x*\|_1, 
-we name our method as Subgradient Finding Algorithm (sfa).
-
-y* in (3) can be further written as
-
-y*= A^T * z*                                               (6)
-where
-
-z* in lambda* SGN (Ax*)                                    (7)
-
-From (6), we have
-z* = (A A^T)^{-1} A * y*                                   (8)
-
-Therefore, from the uqniueness of y*, we conclude that z* is also unique.
-Next, we discuss how to solve this unique z*.
-
-The problem (1) can reformulated as the following equivalent problem:	 
-
-min_x  max_z  f(x, z)= 1/2 \|x-v\|^2  + <A x, z>
-subject to   \|z\|_{infty} \leq lambda                                  (9)
-
-At the saddle point, we have
-
-x = v - AT z,                                            (10)
-
-which somehow concides with (5) and (6)
-
-Plugging (10) into (9), we obtain the problem
-
-min_z  1/2  z^T A AT z - < z, A v>,
-subject to  \|z\|_{infty} \leq lambda,                             (11)
-
-In this program, we apply the Nesterov's method for solving (11).
-
-
-Duality gap:
-
-At a given point z0, we compute x0= v - A^T z0.
-It is easy to show that
-min_x f(x, z0) = f(x0, z0) <= max_z f(x0, z)               (12)
-
-Moreover, we have
-max_z f(x0, z) - min_x f(x, z0) 
-<= lambda * \|A x0\|_1 - < z0, Av - A A^T z0>           (13)
-
-It is also to get that
-
-f(x0, z0) <= f(x*, z*) <= max_z f(x0, z)                   (14)
-
-g(x*)=f(x*, z*)                                            (15)
-
-g(x0)=max_z f(x0, z)                                       (17)
-
-	Therefore, we have
-
-g(x0)-g(x*) <= lambda * \|A x0\|_1 - < z0, Av - A A^T z0>  (18)
-
-
-	We have applied a restarting technique, which is quite involved; and thus, we do not explain here.
-
-	/////////////////////////////////////// Explanation for the function sfa \\\\\\\\\\\\\\\\\\\\\\\\\\\\
-		*/
-
-
-		/*
-		////////////               sfa              \\\\\\\\\\\\\\\\\\\\\
-
-		For sfa, the stepsize of the Nesterov's method is fixed to 1/4, so that no line search is needed.
-
-
-
-		Explanation of the parameters:
-
-		Output parameters
-		x:    the solution to the primal problem
-		gap:  the duality gap (pointer)
-
-		Input parameters
-		z:    the solution to the dual problem (before calling this function, z contains a starting point)
-		!!!!we assume that the starting point has been successfully initialized in z !!!!
-		z0:   a variable used for multiple purposes:
-		1) the previous solution z0
-		2) the difference between z and z0, i.e., z0=z- z0
-
-		lambda:   the regularization parameter (and the radius of the infity ball, see (11)).
-		nn:       the length of z, z0, Av, g, and s
-		maxStep:  the maximal number of iterations
-
-		v:    the point to be projected (not changed after the program)
-		Av:   A*v (not changed after the program)
-
-		s:        the search point (used for multiple purposes)
-		g:        the gradient at g (and it is also used for multiple purposes)
-
-		tol:      the tolerance of the gap
-		tau:  the duality gap or the restarting technique is done every tau steps
-		flag: if flag=1,  we apply the resart technique
-		flag=2,  just run the SFA algorithm, terminate it when the absolution change is less than tol
-		flag=3,  just run the SFA algorithm, terminate it when the duality gap is less than tol
-		flag=4,  just run the SFA algorithm, terminate it when the relative duality gap is less than tol
-
-
-		We would like to emphasis that the following assumptions 
-		have been checked in the functions that call this function:
-		1) 0< lambda < z_max
-		2) nn >=2
-		3) z has been initialized with a starting point
-		4) z0 has been initialized with all zeros
-
-		The termination condition is checked every tau iterations.
-
-		For the duality gap, please refer to (12-18)
-		*/
-int sfa(double *x,     double *gap, int * activeS,
-		double *z,     double *z0,   double * v,   double * Av, 
-		double lambda, int nn,       int maxStep,
-		double *s,     double *g,
-		double tol,    int tau,       int flag);
-
-/*
-
-   Refer to sfa for the defintions of the variables  
-
-   In this file, we restart the program every step, and neglect the gradient step.
-
-   It seems that, this program does not converge.
-
-   This function shows that the gradient step is necessary.
-   */
-int sfa_special(double *x,     double *gap,  int * activeS,
-		double *z,     double * v,   double * Av, 
-		double lambda, int nn,       int maxStep,
-		double *s,     double *g,
-		double tol,    int tau);
-
-/*
-   We do one gradient descent, and then restart the program
-   */
-int sfa_one(double *x,     double *gap, int * activeS,
-		double *z,     double * v,   double * Av, 
-		double lambda, int nn,       int maxStep,
-		double *s,     double *g,
-		double tol,    int tau);
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef SFA_SLEP  ----- */
-
diff --git a/src/shogun/lib/slep/flsa/tesla_proj.cpp b/src/shogun/lib/slep/flsa/tesla_proj.cpp
deleted file mode 100644
index ec4ecfb5940..00000000000
--- a/src/shogun/lib/slep/flsa/tesla_proj.cpp
+++ /dev/null
@@ -1,192 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-#ifdef USE_GPL_SHOGUN
-
-#ifndef  TESLA_PROJ_SLEP
-#define  TESLA_PROJ_SLEP
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-#include <shogun/lib/slep/flsa/flsa.h>
-
-
-/*
-
-   Functions contained in "flsa.h"
-
-   void flsa(double *x, double *z, double *infor,
-   double * v, double *z0, 
-   double lambda1, double lambda2, int n, 
-   int maxStep, double tol, int tau, int flag)
-
-*/
-
-/*
-   In this file, we need to make use of the function flsa for solving the following problem
-
-   min 1/2 \|X - V\|_2^2  + lambda1 * \|X\|_1 + lambda2 \|X A^T\|_1   (1)
-
-   where X and V are of size p x n
-
-   For the definition of A, please refer to "flsa.h" and the included "sfa.h".
-
-   The problem can be decoupled into the following 
-
-   min_x  1/2 \|x-v\|^2  + lambda1 * \|x\|_1 + lambda2 * \|A x\|_1,   (2)
-
-   where x and v correspond to a row of of X and V, respectively.
-
-   The problem (2) is essentially the flsa problem, and can be solved by the function flsa.
-
-
-   void tesla_proj(double *X, double *Z, double *gap,
-   double *V, double *Z0,
-   double lambda1, double lambda2, int p, int n,
-   int maxStep, double tol, int flag)
-
-   Output parameters:
-X:          the solution (of size p x n)
-Z:          the auxiliary variable (result by subgradient finding),
-Z can be used as a warm start for the next "tesla_proj", 
-size: p x (n-1)
-gap:        the gap for each decoupled flsa problem (of size p x 1)
-
-Input parameters:
-V:          the one to be projected
-Z0:         the starting point (see flag for whether it is used as the starting point)
-size: p x (n-1)
-
-lambda1:    the regularization parameter
-lambda2:    the regularization parameter
-p:          the number of rows in X and V
-n:          the number of columns in X and V
-
-maxStep:    the maximal allowed iteration steps
-tol:    the tolerance parameter
-flag:     the flag for initialization and deciding calling sfa
-switch ( flag )
-1-4, 11-14: sfa
-
-switch ( flag )
-case 1, 2, 3, or 4: 
-z0 is a "good" starting point 
-(such as the warm-start of the previous solution,
-or the user want to test the performance of this starting point;
-the starting point shall be further projected to the L_{infty} ball,
-to make sure that it is feasible)
-
-case 11, 12, 13, or 14: z0 is a "random" guess, and thus not used
-(we shall initialize z as follows:
-if lambda2 >= 0.5 * lambda_2^max, we initialize the solution of the linear system;
-if lambda2 <  0.5 * lambda_2^max, we initialize with zero
-this solution is projected to the L_{infty} ball)
-
-switch( flag )
-5, 15: sfa_special
-
-switch( flag )
-5:  z0 is a good starting point
-15: z0 is a bad starting point, use the solution of the linear system
-
-
-switch( flag )
-6, 16: sfa_one
-
-switch( flag )
-	6:  z0 is a good starting point
-	16: z0 is a bad starting point, use the solution of the linear system
-
-	*/
-
-	void tesla_proj(double *X, double *Z, double *gap,
-			double *V, double *Z0,
-			double lambda1, double lambda2, int p, int n,
-			int maxStep, double tol, int tau, int flag){
-		/*
-		   We assume that X and V are of size p x n
-		   */
-
-		int i, j;
-		int nn=n-1;
-		double
-			*x    =(double *) malloc(sizeof(double)*n),	
-			*v    =(double *) malloc(sizeof(double)*n),
-			*z    =(double *) malloc(sizeof(double)*nn),
-			*z0   =(double *) malloc(sizeof(double)*nn),
-			*infor=(double *) malloc(sizeof(double)*4);
-		//double temp;
-
-
-
-		if (n<3){
-			printf("\n n should be equal to or larger than 3");
-			exit(-1);
-		}
-
-
-		for(i=0;i<p; i++){
-
-			/*
-			   copy a row of V to v
-			   */
-			for (j=0;j<n; j++)
-				v[j]=V[j*p + i];
-
-			/*
-			   copy a row of Z0 to z0
-			   */
-			for (j=0;j<nn; j++)
-				z0[j]=Z0[j*p + i];
-
-			/*
-			   call flsa to compute x
-			   */
-
-			flsa(x, z, infor,
-					v, z0, 
-					lambda1, lambda2, n, 
-					maxStep, tol, tau, flag);
-
-
-			/*
-			   store the solution x to X
-			   */
-			for (j=0;j<n; j++)
-				X[j*p + i]=x[j];
-
-			/*
-			   store the solution z to Z
-			   */
-			for (j=0;j<nn; j++)
-				Z[j*p + i]=z[j];
-
-			gap[i]=infor[0];
-		}
-
-
-		free(x);
-		free(v);
-		free(z);
-		free(z0);
-		free(infor);
-
-	}
-#endif   /* ----- #ifndef TESLA_PROJ_SLEP  ----- */
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/slep/order/orderTree.h b/src/shogun/lib/slep/order/orderTree.h
deleted file mode 100644
index 8193ac1e8e4..00000000000
--- a/src/shogun/lib/slep/order/orderTree.h
+++ /dev/null
@@ -1,1011 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef  ORDERTREE_SLEP
-#define  ORDERTREE_SLEP
-
-#define IGNORE_IN_CLASSLIST
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-
-
-/*
- * In this file, we propose an O(n^2) algorithm for solving the problem:
- *
- * min   1/2 \|x - u\|^2
- * s.t.  x_i \ge x_j \ge 0, (i,j) \in I,
- *
- * where I is the edge set of the tree
- *
- *
- */
-
-/*
- * Last updated on January, 21, 2011
- *
- * 1) the function merge is a non-recursive process for merging one tree with the other
- *
- * 2) we follow the writeup to revise the function computeMaximalMean
- *
- */
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-IGNORE_IN_CLASSLIST struct NodeNum
-{
-	int node_num;
-	struct NodeNum *next;
-};
-
-IGNORE_IN_CLASSLIST struct ChildrenNum
-{
-	int children_num;
-	int *children;
-};
-
-IGNORE_IN_CLASSLIST struct Node
-{
-	int flag; /*if the maximal root-tree of the subtree rooted at this node has been computed, flag=1, otherwise 0*/
-	double m; /*During the computation, it stores the maximal mean from this node to (grandson) child node
-			   *The number of nodes on this path is stored in num
-			   *
-			   *It is intialized with the value of u(node_num)
-			   */
-	int num;  /*the number of nodes, whose avarage gives the maximal mean---x*/
-	struct Node *brother; /*the pointer to the brother node(s)*/
-	struct Node *child; /*the pointer to the child node(s)*/
-	struct NodeNum *firstNode; /*the first node in the "maximal mean" group*/
-	struct NodeNum *lastNode; /*the last node in the "maximal mean" group*/
-};
-#endif
-
-/*
- * We build a tree with the input from a file
- *
- * The file has n rows represented in the following format
- *
- |  parent   | number of children | children
- 18               3             10  13  17
- 10               3             5   8   9
- 13               2             11  12
- 17               3             13  14  15
- 5                2             1  4
- 8                2             6  7
- 9                0
- 11               0
- 12               0
- 14               0
- 15               0
- 16               0
- 1                0
- 4                2              2  3
- 6                0
- 7                0
- 2                0
- 3                0
- *
- *
- * Each row provides the information of one parent node and its children
- *
- * If a parent node is not included in any row, it is regarded that it is the leaf node.
- * For example, it is valid that the rows with zero children can be deleted.
- *
- * Node number is numbered from 1 to n, where n denotes the number of nodes.
- *
- * In the program, we deduct the number by 1, as C starts from 0, instead of 1.
- *
- */
-
-void readFromFile(char * FileName, struct ChildrenNum ** TreeInfo, int n){
-	FILE *fp;
-	struct ChildrenNum * treeInfo;
-	int i, j, num, nodeId;
-
-
-	fp=fopen(FileName, "r");
-
-	if(!fp){
-		printf("\n\n Fatal Error!!!");
-		printf("\n\n Failure in reading the file:%s!", FileName);
-		printf("\n\n The program does not check the correctness of the tree provided in the file: %s!", FileName);
-		return;
-	}
-
-	treeInfo=(struct ChildrenNum *)malloc(sizeof(struct ChildrenNum)*n);
-
-	if(!treeInfo){
-		printf("\n Allocation of treeInfo failure!");
-		return;
-	}
-
-	for(i=0;i<n;i++){
-		treeInfo[i].children_num=0;
-		treeInfo[i].children=NULL;
-	}
-
-
-	while (!feof(fp)) {
-
-		i=-1;num=-1;
-		if ( fscanf(fp, "%d %d", &i, &num)!=2){
-
-			/*if this is due to extra spaces/breaks etc., we terminate reading the file */
-			if(feof(fp))
-				break;
-
-			printf("\n For each row, it should has at least two numbers: nodeNum and number of children");
-			return;
-		}
-
-		if (i>n || i<1){
-			printf("\n The node number should be between [1, %d]!",n);
-			return;
-		}
-
-		i=i-1;
-		/*i=i-1, as C starts from 0 instead of 1*/
-		if (num>0){            
-			treeInfo[i].children_num=num;            
-
-			treeInfo[i].children=(int *)malloc(sizeof(int)*num);
-
-			if(!treeInfo[i].children){
-				printf("\n Allocation of treeInfo failure!");
-				return;
-			}
-
-			for(j=0;j<num;j++){
-				if(!fscanf(fp, "%d", &nodeId) ){
-					printf("\n This row should have %d children nodes!", num);
-					return;
-				}
-				else{
-					if (nodeId>n || nodeId<1){
-						printf("\n The node number should be between [1, %d]!", n);
-						return;
-					}
-
-					treeInfo[i].children[j]=nodeId-1;
-					/*add -1, as C starts from 0 instead of 1*/
-				}
-
-			}
-		}
-	}
-
-	fclose(fp);
-
-	/*
-	   printf("\n In readFromFile!");
-	   for(i=0;i<n;i++){
-	   printf("\n %d: %d:",i, treeInfo[i].children_num);
-
-	   for(j=0;j<treeInfo[i].children_num;j++)
-	   printf(" %d", treeInfo[i].children[j]);
-	   }
-	   printf("\n Out of readFromFile!");
-	   */
-
-
-	*TreeInfo=treeInfo;/*set value for TreeInfo*/
-}
-
-
-/*
- *
- * We build the tree in a recursive manner
- *
- */
-void buildTree(struct Node* root, struct ChildrenNum * treeInfo, double *u){
-
-
-	struct Node * newNode;
-	struct NodeNum * currentNode;
-	int currentRoot=root->firstNode->node_num;
-	int numberOfChildren=treeInfo[currentRoot].children_num;
-	int i;
-
-	/* insert the children nodes of the current root
-	*/
-	for(i=0;i<numberOfChildren;i++){
-
-
-		newNode=(struct Node *)malloc(sizeof(struct Node));
-		currentNode=(struct NodeNum *)malloc(sizeof(struct NodeNum));
-
-		if(!newNode){
-			printf("\n Allocation in buildTree failure!");
-			return;
-		}
-
-		if(!currentNode){
-			printf("\n Allocation in buildTree failure!");
-			return;
-		}
-
-
-		newNode->flag=0;
-		newNode->m=u[treeInfo[currentRoot].children[i]];
-		newNode->num=1;
-		newNode->child=NULL;
-
-		currentNode->node_num=treeInfo[currentRoot].children[i];
-		currentNode->next=NULL;
-		newNode->firstNode=newNode->lastNode=currentNode;
-
-		/*
-		 * insert newnode to be the children nodes of root
-		 *
-		 */
-		newNode->brother=root->child;
-		root->child=newNode;
-
-		/*
-		 * treat newNode as the root, and add its children
-		 *
-		 */
-
-		buildTree(newNode, treeInfo, u);
-	}
-}
-
-/*
- * initilize the root, which means that the tree is built by this function.
- * as the root is the starting point of a tree
- * 
- * we use the input file for building the tree
- */
-
-void initializeRoot(struct Node ** Root, char * FileName, double *u, int rootNum, int n){
-
-	struct NodeNum * currentNode;
-	struct Node *root;
-	struct ChildrenNum * treeInfo;
-	int i;
-
-	/*read the from the file to construct treeInfo*/
-	readFromFile(FileName, &treeInfo, n);
-
-	if(rootNum>n || rootNum <1){
-		printf("\n The node number of the root should be between [1, %d]!", n);
-		return;
-	}
-
-	rootNum=rootNum-1;
-	/*add -1, as C starts from 0 instead of 1*/
-
-	root=(struct Node *)malloc(sizeof(struct Node));
-	currentNode=(struct NodeNum *)malloc(sizeof(struct NodeNum));
-
-	if(!root){
-		printf("\n Allocation in computeGroups failure!");
-		return;
-	}
-
-	if(!currentNode){
-		printf("\n Allocation in computeGroups failure!");
-		return;
-	}
-
-
-	root->flag=0;
-	root->m=u[rootNum];
-	root->num=1;
-	root->brother=root->child=NULL;
-
-	currentNode->node_num=rootNum;
-	currentNode->next=NULL;
-	root->firstNode=root->lastNode=currentNode;
-
-	/*build the tree using buildTree*/
-	buildTree(root, treeInfo, u);
-
-	/*free treeInfo*/
-	for(i=0;i<n;i++){
-		if (treeInfo[i].children_num)
-			free(treeInfo[i].children);
-	}
-	free(treeInfo);
-
-	*Root=root;
-}
-
-
-
-/*
- * initilize the root for the full binary tree
- *
- * We do not need to give the input file, as binary tree is very special
- */
-
-void initializeRootBinary(struct Node ** Root, double *u, int n){
-
-	struct NodeNum * currentNode;
-	struct Node *root;
-	struct ChildrenNum * treeInfo;
-	int rootNum=1;
-	int i, half=n/2;
-
-	/*
-	 *
-	 * readFromFile(FileName, &treeInfo, n);
-	 *
-	 * Replace the above function.
-	 *
-	 * we build treeInfo here directly using the special structure
-	 *
-	 */
-
-	treeInfo=(struct ChildrenNum *)malloc(sizeof(struct ChildrenNum)*n);    
-	if(!treeInfo){
-		printf("\n Allocation of treeInfo failure!");
-		return;
-	}
-
-	for(i=0;i<half;i++){
-		treeInfo[i].children_num=2;
-		treeInfo[i].children=(int *)malloc(sizeof(int)*2);
-		treeInfo[i].children[0]=2*i+1;
-		treeInfo[i].children[1]=2*i+2;
-	}
-
-	for(i=half;i<n;i++){
-		treeInfo[i].children_num=0;
-		treeInfo[i].children=NULL;
-	}
-
-
-	rootNum=rootNum-1;
-	/*add -1, as C starts from 0 instead of 1*/
-
-	root=(struct Node *)malloc(sizeof(struct Node));
-	currentNode=(struct NodeNum *)malloc(sizeof(struct NodeNum));
-
-	if(!root){
-		printf("\n Allocation in computeGroups failure!");
-		return;
-	}
-
-	if(!currentNode){
-		printf("\n Allocation in computeGroups failure!");
-		return;
-	}
-
-
-	root->flag=0;
-	root->m=u[rootNum];
-	root->num=1;
-	root->brother=root->child=NULL;
-
-	currentNode->node_num=rootNum;
-	currentNode->next=NULL;
-	root->firstNode=root->lastNode=currentNode;
-
-	/*build the tree using buildTree*/
-	buildTree(root, treeInfo, u);
-
-	/*free treeInfo*/
-	for(i=0;i<half;i++){
-		free(treeInfo[i].children);
-	}
-	free(treeInfo);
-
-	*Root=root;
-}
-
-
-/*
- * initilize the root for the full binary tree
- *
- * We do not need to give the input file, as tree of depth 1 is very special
- */
-
-void initializeRootDepth1(struct Node ** Root, double *u, int n){
-
-	struct NodeNum * currentNode;
-	struct Node *root;
-	struct ChildrenNum * treeInfo;
-	int rootNum=1;
-	int i;
-
-	/*
-	 * readFromFile(FileName, &treeInfo, n);
-	 *
-	 * we build treeInfo here, using the special structure of the tree with depth 1
-	 *
-	 */
-
-	treeInfo=(struct ChildrenNum *)malloc(sizeof(struct ChildrenNum)*n);    
-	if(!treeInfo){
-		printf("\n Allocation of treeInfo failure!");
-		return;
-	}
-
-	for(i=0;i<n;i++){
-		treeInfo[i].children_num=0;
-		treeInfo[i].children=NULL;
-	}
-
-	/*process the root*/
-	if (n>1){
-		treeInfo[0].children_num=n-1;
-		treeInfo[0].children=(int *)malloc(sizeof(int)*(n-1));
-		for(i=1;i<n;i++)
-			treeInfo[0].children[i-1]=i;
-	}
-
-	rootNum=rootNum-1;
-	/*add -1, as C starts from 0 instead of 1*/
-
-	root=(struct Node *)malloc(sizeof(struct Node));
-	currentNode=(struct NodeNum *)malloc(sizeof(struct NodeNum));
-
-	if(!root){
-		printf("\n Allocation in computeGroups failure!");
-		return;
-	}
-
-	if(!currentNode){
-		printf("\n Allocation in computeGroups failure!");
-		return;
-	}
-
-
-	root->flag=0;
-	root->m=u[rootNum];
-	root->num=1;
-	root->brother=root->child=NULL;
-
-	currentNode->node_num=rootNum;
-	currentNode->next=NULL;
-	root->firstNode=root->lastNode=currentNode;
-
-	/*build the tree using buildTree*/
-	buildTree(root, treeInfo, u);
-
-	/*free treeInfo*/
-	if(n>1)
-		free(treeInfo[0].children);
-	free(treeInfo);
-
-	*Root=root;
-}
-
-
-
-/*
- * merge root with maxNode
- */
-void merge(struct Node * root, struct Node * maxNode ){
-	struct Node * childrenNode, *maxNodeChild;
-
-	root->m= (root->m* root->num + maxNode->m *maxNode->num)/(root->num + maxNode->num);
-	root->num+=maxNode->num;
-	root->lastNode->next=maxNode->firstNode;
-	root->lastNode=maxNode->lastNode;
-
-	/*
-	 * update the brother list of maxNode (when removing maxNode)
-	 *
-	 */
-	if (root->child==maxNode){
-		root->child=maxNode->brother;
-	}
-	else{
-		childrenNode=root->child;
-
-		while(childrenNode->brother!=maxNode){
-			childrenNode=childrenNode->brother;
-		}
-		/*childrenNode's brother is maxNode*/
-		childrenNode->brother=maxNode->brother;
-	}
-
-
-	/*
-	 * change the children of maxNode to the children of root
-	 */
-	maxNodeChild=maxNode->child;
-	if (maxNodeChild){
-		/*if maxNode has at least a child*/
-
-		while(maxNodeChild->brother)
-			maxNodeChild=maxNodeChild->brother;
-		/*maxNodeChild points to the last child of maxNode*/
-
-		maxNodeChild->brother=root->child;
-		root->child=maxNode->child;
-	}
-
-	/*
-	 * remove maxNode from the children list of root
-	 */
-	free(maxNode);
-
-}
-
-
-
-/*
- * compute the maximal mean for each node
- *
- */
-
-void computeMaximalMean(struct Node * root){
-	struct Node * childrenNode, *maxNode;
-	double mean;
-
-	/*if root already points to a leaf node, we do nothing*/
-	if(!root->child){
-
-		/*the value of a maximal root-tree is non-negative*/
-		if (root->m <0)
-			root->m =0;
-
-		root->flag=1;
-		return;
-	}
-
-	/*the following loop corresponds to line 5-20 of the algorithm*/
-	while(1){
-
-		childrenNode=root->child;
-		if(!childrenNode){
-
-			if (root->m <0)
-				root->m =0;
-
-			root->flag=1;
-			return;
-		}
-
-		/*we note that, childrenNode->m >=0*/
-
-		mean=0;
-
-		/*visit all its children nodes, to get the maximal "mean" and corresponding maxNode*/
-		while(childrenNode){
-
-			/*if the maximal root-tree at childrenNode is not computed, we compute it*/
-			if (!childrenNode->flag)
-				computeMaximalMean(childrenNode);
-
-			if (childrenNode->m >= mean){
-				mean=childrenNode->m;
-				maxNode=childrenNode;
-			}
-
-			childrenNode=childrenNode->brother;            
-		}
-
-		if ( (root->m <= 0) && (mean==0) ){
-			/* merge root with all its children, in this case, 
-			 * its children is a super-node 
-			 * (thus does not has any other children, due to merge)*/
-
-			childrenNode=root->child;
-			while(childrenNode){
-				merge(root, childrenNode);
-				childrenNode=root->child;
-			}
-
-			root->m =0;            
-			root->flag=1;
-			return;
-		}
-
-		if (root->m > mean){
-
-			root->flag=1;
-			return;
-		}
-
-		merge(root,maxNode);
-	}
-
-}
-
-
-
-/*
- * compute the maximal mean for each node, without the non-negative constraint
- * 
- * Composed on November 23, 2011.
- *
- */
-
-void computeMaximalMean_without_nonnegative(struct Node * root){
-	struct Node * childrenNode, *maxNode;
-	double mean;
-	int mean_flag;
-
-	/*if root already points to a leaf node, we do nothing*/
-	if(!root->child){
-
-		/*the value of a maximal root-tree is not necessarily non-negative, when the non-negative constraint is not imposed*/
-
-		/*
-		   The following is removed
-		   if (root->m <0)
-		   root->m =0;
-		   */
-
-
-		root->flag=1;
-		return;
-	}
-
-	/*the following loop corresponds to line 5-20 of the algorithm */
-	while(1){
-
-		childrenNode=root->child;
-		if(!childrenNode){
-
-			/*the value of a maximal root-tree is not necessarily non-negative, when the non-negative constraint is not imposed*/
-
-			/*
-			   The following is removed
-
-			   if (root->m <0)
-			   root->m =0;
-			   */
-
-			root->flag=1;
-			return;
-		}
-
-		/*we note that, childrenNode->m >=0 does not necesarily hold.
-		  Therefore, for mean, we need to initialize with a small value. We initialize it with the value of its first child node
-		  */
-
-		mean_flag=0; /*0 denotes that "mean" has not been really specified*/
-
-		/*visit all its children nodes, to get the maximal "mean" and corresponding maxNode*/
-		while(childrenNode){
-
-			/*if the maximal root-tree at childrenNode is not computed, we compute it*/
-			if (!childrenNode->flag)
-				computeMaximalMean_without_nonnegative(childrenNode);
-
-			/*if mean has not been specified, let us specify it, and set mean_flag to 1*/
-			if (!mean_flag){
-				mean=childrenNode->m;
-				mean_flag=1;
-			}
-
-			if (childrenNode->m >= mean){
-				mean=childrenNode->m;
-				maxNode=childrenNode;
-			}
-
-			childrenNode=childrenNode->brother;            
-		}
-
-		if (root->m > mean){
-
-			root->flag=1;
-			return;
-		}
-
-		merge(root,maxNode);
-	}
-
-}
-
-
-/*
- * computeSolution
- *
- */
-
-
-void computeSolution(double *x, struct Node *root){
-	struct Node * child;
-	struct NodeNum *currentNode;
-	double mean;
-
-	if (root){        
-		/*
-		 * process the root
-		 * 
-		 * set the value for x
-		 */
-
-		mean=root->m;
-
-		currentNode=root->firstNode;
-		while(currentNode){
-			x[currentNode->node_num]=mean;
-			currentNode=currentNode->next;
-		}            
-
-		/*process the children of root*/
-		child=root->child;
-		while(child){
-			computeSolution(x, child);
-
-			child=child->brother;
-		}
-	}
-}
-
-/*
- * traverse the tree
- * used for debugging the correctness of the code
- */
-
-void traversalTree(struct Node *root){
-	struct Node * child;
-	struct NodeNum *currentNode;
-
-	if (root){
-		printf("\n\n root->m =%2.5f, num:%d \n Nodes:",root->m,root->num);
-
-		currentNode=root->firstNode;
-		while(currentNode){
-			printf(" %d ", currentNode->node_num);
-			currentNode=currentNode->next;
-		}     
-
-		printf("\n root: %d, child:", root->m);
-
-		/*print out the children of root*/
-		child=root->child;
-		while(child){
-			printf(" %d", child->m);
-			child=child->brother;
-		}
-
-		/*print out the children of children*/
-		child=root->child;
-		while(child){
-			traversalTree(child);
-
-			child=child->brother;
-		}
-	}
-}
-
-
-
-
-
-/*
- * free the dynamic space generated by alloc
- */
-
-void deleteTree(struct Node *root){
-	struct Node *child, *temp;
-	struct NodeNum *currentNode;
-
-	if (root){
-
-		child=root->child;
-
-		while(child){
-
-			temp=child->brother;
-			/*point to its brother*/
-
-			deleteTree(child);
-			/*free its chlidren*/
-
-			child=temp;
-		}
-
-		/*
-		 * free root
-		 *
-		 * 1. free NodeNum pointed by firstNode and lastNode
-		 * 2. free Node
-		 */
-		currentNode=root->firstNode;
-		while(currentNode){
-			root->firstNode=currentNode->next;
-			free(currentNode);
-
-			currentNode=root->firstNode;
-		}
-		root->lastNode=NULL;
-		free(root);
-	}
-}
-
-/*
- * This is the main function for the general tree
- *
- */
-
-void orderTree(double *x, char * FileName, double *u, int rootNum, int n){
-	struct Node * root;
-
-	/*
-	 * build the tree using initializeRoot
-	 */
-	initializeRoot(&root, FileName, u, rootNum, n);  
-
-	/*
-	   printf("\n\n Before computation");
-	   traversalTree(root);
-	   */
-
-
-	/*
-	 * compute the maximal average for each node
-	 */
-
-	computeMaximalMean(root);
-
-
-	/*compute the solution from the tree*/
-
-	computeSolution(x, root);
-
-
-	/*
-	   printf("\n\n After computation");
-	   traversalTree(root);
-	   */
-
-
-	/* delete the tree
-	*/
-	deleteTree(root);
-}
-
-
-/*
- * This is the main function for the general tree, without the non-negative constraint
- *
- */
-
-void orderTree_without_nonnegative(double *x, char * FileName, double *u, int rootNum, int n){
-	struct Node * root;
-
-	/*
-	 * build the tree using initializeRoot
-	 */
-	initializeRoot(&root, FileName, u, rootNum, n);  
-
-	/*
-	   printf("\n\n Before computation");
-	   traversalTree(root);
-	   */
-
-
-	/*
-	 * compute the maximal average for each node
-	 */
-
-	computeMaximalMean_without_nonnegative(root);
-
-
-	/*compute the solution from the tree*/
-
-	computeSolution(x, root);
-
-
-	/*
-	   printf("\n\n After computation");
-	   traversalTree(root);
-	   */
-
-
-	/* delete the tree
-	*/
-	deleteTree(root);
-}
-
-
-
-/*
- * This is the main function for the full binary tree
- *
- */
-
-void orderTreeBinary(double *x, double *u, int n){
-	struct Node * root;
-
-	/*
-	 * build the tree using initializeRootBinary for the binary tree
-	 *
-	 * please make sure that n=2^{depth +1} -1
-	 *
-	 */
-
-	initializeRootBinary(&root, u, n);
-
-	/*
-	   printf("\n\n Before computation");
-	   traversalTree(root);
-	   */
-
-
-	/*
-	 * compute the maximal average for each node
-	 */
-
-	computeMaximalMean(root);
-
-
-	/*compute the solution from the tree*/
-
-	computeSolution(x, root);
-
-
-	/*
-	   printf("\n\n After computation");
-	   traversalTree(root);
-	   */
-
-
-	/* delete the tree
-	*/
-	deleteTree(root);
-}
-
-
-/*
- * This is the main function for the tree with depth 1
- *
- */
-
-void orderTreeDepth1(double *x, double *u, int n){
-	struct Node * root;
-
-	/*
-	 * build the tree using initializeRootDepth1 for the tree with depth 1
-	 *
-	 */
-
-	initializeRootDepth1(&root, u, n);
-
-	/*
-	   printf("\n\n Before computation");
-	   traversalTree(root);
-	   */
-
-
-	/*
-	 * compute the maximal average for each node
-	 */
-
-	computeMaximalMean(root);
-
-
-	/*compute the solution from the tree*/
-
-	computeSolution(x, root);
-
-
-	/*
-	   printf("\n\n After computation");
-	   traversalTree(root);
-	   */
-
-
-	/* delete the tree
-	*/
-	deleteTree(root);
-}
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef ORDERTREE_SLEP  ----- */
diff --git a/src/shogun/lib/slep/order/sequence.h b/src/shogun/lib/slep/order/sequence.h
deleted file mode 100644
index fc706e67f46..00000000000
--- a/src/shogun/lib/slep/order/sequence.h
+++ /dev/null
@@ -1,225 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-#ifndef  SEQUENCE_SLEP
-#define  SEQUENCE_SLEP
-
-#include <shogun/lib/config.h>
-
-#ifdef USE_GPL_SHOGUN
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-
-
-/*
- * In this file, we propose the algorithms for solving the problem:
- *
- * min   1/2 \|x - u\|^2
- * s.t.  x1 \ge x2 \ge x3 \ge ... \ge xn \ge 0
- *
- *
- */
-
-/*
- *
- * x= sequence_bottomup(u,n)
- *
- * we compute using a bottom up order
- * 
- */
-
-void sequence_bottomup(double *x, double *u, int n){
-	int i, j;
-	int *location=(int *)malloc(sizeof(int)*n);
-	int num;
-
-	if(!location){
-		printf("\n Allocation of array failure!");
-		exit(1);
-	}
-
-
-	/*
-	 * compute the maximal mean from the root to the i-th point:
-	 *
-	 * x[i]: the maximal mean
-	 * location[i]: the ending index of the mean
-	 *
-	 */
-
-
-	/* process the last element*/
-	if (n<1){
-		printf("\n n=%d should be an integer over 1!",n);
-		exit(1);
-	}
-	else{
-		i=n-1;        
-		x[i]=u[i];
-		location[i]=i; 
-	}
-
-	/*process the remaining elements in a bottom-up recursive manner*/
-	for(i=n-2;i>=0;i--){
-
-
-		if (u[i]>x[i+1]){
-			x[i]=u[i];
-			location[i]=i;            
-		}
-		else{
-			/*make use of x[i: (n-1)] and location[i: (n-1)] for update*/
-
-			/*merge with the first group*/
-			num=location[i+1]-i;
-			x[i]=(u[i] + x[i+1]*num)/(num+1);
-			location[i]=location[i+1];
-			j=location[i+1]+1;
-
-			/*If necessary, we need to further merge with the remainig groups */
-			for(;j<n;){
-				if(x[i] <= x[j]){
-
-					num=location[j]-j +1;
-					x[i]=( x[i]* (j-i) + x[j]* num ) / (location[j] -i +1);
-					location[i]=location[j];
-
-					j=location[j]+1;
-				}
-				else
-					break;
-			}                
-		}
-	}
-
-	/*
-	   for(i=0;i<30;i++)
-	   printf("\n x[%d]=%2.5f, location[%d]=%d",i+1, x[i], i+1, location[i]+1);
-	   */
-
-	/*
-	 * compute the solution x with the mean and location
-	 *
-	 */
-
-	for(i=0;i<n;){
-
-		if (x[i]>0){
-			for(j=i+1;j<=location[i];j++){
-				x[j]=x[i];
-			}
-
-			i=location[i]+1;
-		}
-		else{
-			for(j=i;j<n;j++)
-				x[j]=0;
-			break;
-		}
-	}
-
-	free(location);
-}
-
-
-/*
- *
- * x= sequence_topdown(u,n)
- *
- * we compute using a top to down order
- * 
- */
-
-void sequence_topdown(double *x, double *u, int n){
-	int i, j;
-	double sum, max, mean;
-	int num;
-	int *location=(int *)malloc(sizeof(int)*n);
-
-
-	if(!location){
-		printf("\n Allocation of array failure!");
-		exit(1);
-	}
-
-	for(i=0;i<n;){
-
-		/*
-		 * From each root node i, we compute the maximal mean from.
-		 *
-		 */
-
-		max=0;
-		location[i]=i;
-
-		sum=0;
-		num=1;        
-		for(j=i;j<n;j++){
-			sum+=u[j];
-			mean=sum/num;            
-			num++;
-
-			/* record the most largest mean and the location*/
-			if (mean >= max){                
-				max=mean;
-				location[i]=j;
-			}
-		}
-
-		if (max>0){
-			x[i]=max; /*record the maximal mean*/
-			i=location[i]+1; /*the next i*/
-		}
-		else{
-			x[i]=-1; /* any value less or equal to 0
-					  *
-					  * This shows that the remaining elements should be zero
-					  *
-					  */
-			break;
-		}
-	}
-
-
-	/*
-	 * compute the solution x with the mean and location
-	 *
-	 */
-
-	for(i=0;i<n;){
-
-		if (x[i]>0){
-			for(j=i+1;j<=location[i];j++){
-				x[j]=x[i];
-			}
-
-			i=location[i]+1;
-		}
-		else{
-			for(j=i;j<n;j++)
-				x[j]=0;
-			break;
-		}
-	}
-
-	free(location);
-}
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef SEQUENCE_SLEP  ----- */
-
diff --git a/src/shogun/lib/slep/overlapping/overlapping.cpp b/src/shogun/lib/slep/overlapping/overlapping.cpp
deleted file mode 100644
index 1c0da4b3b01..00000000000
--- a/src/shogun/lib/slep/overlapping/overlapping.cpp
+++ /dev/null
@@ -1,1242 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye
- */
-
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/slep/overlapping/overlapping.h>
-#include <shogun/mathematics/Math.h>
-#include <stdlib.h>
-#include <string.h>
-
-void identifySomeZeroEntries(double * u, int * zeroGroupFlag, int *entrySignFlag,
-		int *pp, int *gg,
-		double *v, double lambda1, double lambda2,
-		int p, int g, double * w, double *G){
-
-	int i, j, newZeroNum, iterStep=0;
-	double twoNorm, temp;
-
-	/*
-	 * process the L1 norm
-	 *
-	 * generate the u>=0, and assign values to entrySignFlag
-	 *
-	 */
-	for(i=0;i<p;i++){
-		if (v[i]> lambda1){
-			u[i]=v[i]-lambda1;
-
-			entrySignFlag[i]=1;
-		}
-		else{
-			if (v[i] < -lambda1){
-				u[i]= -v[i] -lambda1;
-
-				entrySignFlag[i]=-1;
-			}
-			else{
-				u[i]=0;
-
-				entrySignFlag[i]=0;
-			}
-		}
-	}
-
-	/*
-	 * Applying Algorithm 1 for identifying some sparse groups
-	 *
-	 */
-
-	/* zeroGroupFlag denotes whether the corresponding group is zero */
-	for(i=0;i<g;i++)
-		zeroGroupFlag[i]=1;
-
-	while(1){
-
-		iterStep++;
-
-		if (iterStep>g+1){
-
-			printf("\n Identify Zero Group: iterStep= %d. The code might have a bug! Check it!", iterStep);
-			return;
-		}
-
-		/*record the number of newly detected sparse groups*/
-		newZeroNum=0;
-
-		for (i=0;i<g;i++){
-
-			if(zeroGroupFlag[i]){
-
-				/*compute the two norm of the */
-
-				twoNorm=0;
-				for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-					temp=u[ (int) G[j]];
-					twoNorm+=temp*temp;
-				}
-				twoNorm=sqrt(twoNorm);
-
-				/*
-				   printf("\n twoNorm=%2.5f, %2.5f",twoNorm,lambda2 * w[3*i+2]);
-				   */
-
-				/*
-				 * Test whether this group should be sparse
-				 */
-				if (twoNorm<= lambda2 * w[3*i+2] ){
-					zeroGroupFlag[i]=0;
-
-					for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++)
-						u[ (int) G[j]]=0;
-
-					newZeroNum++;
-
-					/*
-					   printf("\n zero group=%d", i);
-					   */
-				}
-			} /*end of if(!zeroGroupFlag[i]) */
-
-		} /*end of for*/
-
-		if (newZeroNum==0)
-			break;
-	}
-
-	*pp=0;
-	/* zeroGroupFlag denotes whether the corresponding entry is zero */
-	for(i=0;i<p;i++){
-		if (u[i]==0){
-			entrySignFlag[i]=0;
-			*pp=*pp+1;
-		}
-	}
-
-	*gg=0;
-	for(i=0;i<g;i++){
-		if (zeroGroupFlag[i]==0)
-			*gg=*gg+1;
-	}
-}
-
-void xFromY(double *x, double *y,
-		double *u, double *Y,
-		int p, int g, int *zeroGroupFlag,
-		double *G, double *w){
-
-	int i,j;
-
-
-	for(i=0;i<p;i++)
-		x[i]=u[i];
-
-	for(i=0;i<g;i++){
-		if(zeroGroupFlag[i]){ /*this group is non-zero*/
-
-			for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-				x[ (int) G[j] ] -= Y[j];
-			}
-		}
-	}/*end of for(i=0;i<g;i++) */
-
-	for(i=0;i<p;i++){
-		if (x[i]>=0){
-			y[i]=0;
-		}
-		else{
-			y[i]=x[i];
-			x[i]=0;
-		}
-	}
-}
-
-void YFromx(double *Y,
-		double *xnew, double *Ynew,
-		double lambda2, int g, int *zeroGroupFlag,
-		double *G, double *w){
-
-	int i, j;
-	double twoNorm, temp;
-
-	for(i=0;i<g;i++){
-		if(zeroGroupFlag[i]){ /*this group is non-zero*/
-
-			twoNorm=0;
-			for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-				temp=xnew[ (int) G[j] ];
-
-				Y[j]=temp;
-
-				twoNorm+=temp*temp;
-			}
-			twoNorm=sqrt(twoNorm); /* two norm for x_{G_i}*/
-
-			if (twoNorm > 0 ){ /*if x_{G_i} is non-zero*/
-				temp=lambda2 * w[3*i+2] / twoNorm;
-
-				for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++)
-					Y[j] *= temp;
-			}
-			else  /*if x_{G_j} =0, we let Y^i=Ynew^i*/
-			{
-				for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++)
-					Y[j]=Ynew[j];
-			}
-		}
-	}/*end of for(i=0;i<g;i++) */
-}
-
-void dualityGap(double *gap, double *penalty2,
-		double *x, double *Y, int g, int *zeroGroupFlag,
-		double *G, double *w, double lambda2){
-
-	int i,j;
-	double temp, twoNorm, innerProduct;
-
-	*gap=0; *penalty2=0;
-
-	for(i=0;i<g;i++){
-		if(zeroGroupFlag[i]){ /*this group is non-zero*/
-
-			twoNorm=0;innerProduct=0;
-
-			for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-				temp=x[ (int) G[j] ];
-
-				twoNorm+=temp*temp;
-
-				innerProduct+=temp * Y[j];
-			}
-
-			twoNorm=sqrt(twoNorm)* w[3*i +2];
-
-			*penalty2+=twoNorm;
-
-			twoNorm=lambda2 * twoNorm;
-			if (twoNorm > innerProduct)
-				*gap+=twoNorm-innerProduct;
-		}
-	}/*end of for(i=0;i<g;i++) */
-}
-
-void overlapping_gd(double *x, double *gap, double *penalty2,
-		double *v, int p, int g, double lambda1, double lambda2,
-		double *w, double *G, double *Y, int maxIter, int flag, double tol){
-
-	int YSize=(int) w[3*(g-1) +1]+1;
-	double *u=(double *)malloc(sizeof(double)*p);
-	double *y=(double *)malloc(sizeof(double)*p);
-
-	double *xnew=(double *)malloc(sizeof(double)*p);
-	double *Ynew=(double *)malloc(sizeof(double)* YSize );
-
-	int *zeroGroupFlag=(int *)malloc(sizeof(int)*g);
-	int *entrySignFlag=(int *)malloc(sizeof(int)*p);
-	int pp, gg;
-	int i, j, iterStep;
-	double twoNorm,temp, L=1, leftValue, rightValue, gapR, penalty2R;
-	int nextRestartStep=0;
-
-	/*
-	 * call the function to identify some zero entries
-	 *
-	 * entrySignFlag[i]=0 denotes that the corresponding entry is definitely zero
-	 *
-	 * zeroGroupFlag[i]=0 denotes that the corresponding group is definitely zero
-	 *
-	 */
-
-	identifySomeZeroEntries(u, zeroGroupFlag, entrySignFlag,
-			&pp, &gg,
-			v, lambda1, lambda2,
-			p, g, w, G);
-
-	penalty2[1]=pp;
-	penalty2[2]=gg;
-	/*store pp and gg to penalty2[1] and penalty2[2]*/
-
-
-	/*
-	 *-------------------
-	 *  Process Y
-	 *-------------------
-	 * We make sure that Y is feasible
-	 *    and if x_i=0, then set Y_{ij}=0
-	 */
-	for(i=0;i<g;i++){
-
-		if(zeroGroupFlag[i]){ /*this group is non-zero*/
-
-			/*compute the two norm of the group*/
-			twoNorm=0;
-
-			for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-
-				if (! u[ (int) G[j] ] )
-					Y[j]=0;
-
-				twoNorm+=Y[j]*Y[j];
-			}
-			twoNorm=sqrt(twoNorm);
-
-			if (twoNorm > lambda2 * w[3*i+2] ){
-				temp=lambda2 * w[3*i+2] / twoNorm;
-
-				for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++)
-					Y[j]*=temp;
-			}
-		}
-		else{ /*this group is zero*/
-			for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++)
-				Y[j]=0;
-		}
-	}
-
-	/*
-	 * set Ynew to zero
-	 *
-	 * in the following processing, we only operator Y and Ynew in the
-	 * possibly non-zero groups by "if(zeroGroupFlag[i])"
-	 *
-	 */
-	for(i=0;i<YSize;i++)
-		Ynew[i]=0;
-
-	/*
-	 * ------------------------------------
-	 * Gradient Descent begins here
-	 * ------------------------------------
-	 */
-
-	/*
-	 * compute x=max(u-Y * e, 0);
-	 *
-	 */
-	xFromY(x, y, u, Y, p, g, zeroGroupFlag, G, w);
-
-
-	/*the main loop */
-
-	for(iterStep=0;iterStep<maxIter;iterStep++){
-
-
-		/*
-		 * the gradient at Y is
-		 *
-		 *   omega'(Y)=-x e^T
-		 *
-		 *  where  x=max(u-Y * e, 0);
-		 *
-		 */
-
-
-		/*
-		 * line search to find Ynew with appropriate L
-		 */
-
-		while (1){
-			/*
-			 * compute
-			 * Ynew = proj ( Y + x e^T / L )
-			 */
-			for(i=0;i<g;i++){
-				if(zeroGroupFlag[i]){ /*this group is non-zero*/
-
-					twoNorm=0;
-					for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-						Ynew[j]= Y[j] + x[ (int) G[j] ] / L;
-
-						twoNorm+=Ynew[j]*Ynew[j];
-					}
-					twoNorm=sqrt(twoNorm);
-
-					if (twoNorm > lambda2 * w[3*i+2] ){
-						temp=lambda2 * w[3*i+2] / twoNorm;
-
-						for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++)
-							Ynew[j]*=temp;
-					}
-				}
-			}/*end of for(i=0;i<g;i++) */
-
-			/*
-			 * compute xnew=max(u-Ynew * e, 0);
-			 *
-			 *void xFromY(double *x, double *y,
-			 *            double *u, double *Y,
-			 *            int p, int g, int *zeroGroupFlag,
-			 *            double *G, double *w)
-			 */
-			xFromY(xnew, y, u, Ynew, p, g, zeroGroupFlag, G, w);
-
-			/* test whether L is appropriate*/
-			leftValue=0;
-			for(i=0;i<p;i++){
-				if (entrySignFlag[i]){
-					temp=xnew[i]-x[i];
-
-					leftValue+= temp * ( 0.5 * temp + y[i]);
-				}
-			}
-
-			rightValue=0;
-			for(i=0;i<g;i++){
-				if(zeroGroupFlag[i]){ /*this group is non-zero*/
-
-					for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-						temp=Ynew[j]-Y[j];
-
-						rightValue+=temp * temp;
-					}
-				}
-			}/*end of for(i=0;i<g;i++) */
-			rightValue=rightValue/2;
-
-			if ( leftValue <= L * rightValue){
-
-				temp= L * rightValue / leftValue;
-
-				if (temp >5)
-					L=L*0.8;
-
-				break;
-			}
-			else{
-				temp=leftValue / rightValue;
-
-				if (L*2 <= temp)
-					L=temp;
-				else
-					L=2*L;
-
-
-				if ( L / g - 2* g ){
-
-					if (rightValue < 1e-16){
-						break;
-					}
-					else{
-
-						printf("\n GD: leftValue=%e, rightValue=%e, ratio=%e", leftValue, rightValue, temp);
-
-						printf("\n L=%e > 2 * %d * %d. There might be a bug here. Otherwise, it is due to numerical issue.", L, g, g);
-
-						break;
-					}
-				}
-			}
-		}
-
-		/* compute the duality gap at (xnew, Ynew)
-		 *
-		 * void dualityGap(double *gap, double *penalty2,
-		 *               double *x, double *Y, int g, int *zeroGroupFlag,
-		 *               double *G, double *w, double lambda2)
-		 *
-		 */
-		dualityGap(gap, penalty2, xnew, Ynew, g, zeroGroupFlag, G, w, lambda2);
-
-		/*
-		 * flag =1 means restart
-		 *
-		 * flag =0 means with restart
-		 *
-		 * nextRestartStep denotes the next "step number" for
-		 *            initializing the restart process.
-		 *
-		 * This is based on the fact that, the result is only beneficial when
-		 *    xnew is good. In other words,
-		 *             if xnew is not good, then the
-		 *                restart might not be helpful.
-		 */
-
-		if ( (flag==0) || (flag==1 && iterStep < nextRestartStep )){
-
-			/* copy Ynew to Y, and xnew to x */
-			sg_memcpy(x, xnew, sizeof(double) * p);
-			sg_memcpy(Y, Ynew, sizeof(double) * YSize);
-
-			/*
-			   printf("\n iterStep=%d, L=%2.5f, gap=%e", iterStep, L, *gap);
-			   */
-
-		}
-		else{
-			/*
-			 * flag=1
-			 *
-			 * We allow the restart of the program.
-			 *
-			 * Here, Y is constructed as a subgradient of xnew, based on the
-			 *   assumption that Y might be a better choice than Ynew, provided
-			 *   that xnew is good enough.
-			 *
-			 */
-
-			/*
-			 * compute the restarting point Y with xnew and Ynew
-			 *
-			 *void YFromx(double *Y,
-			 *            double *xnew, double *Ynew,
-			 *            double lambda2, int g, int *zeroGroupFlag,
-			 *            double *G, double *w)
-			 */
-			YFromx(Y, xnew, Ynew, lambda2, g, zeroGroupFlag, G, w);
-
-			/*compute the solution with the starting point Y
-			 *
-			 *void xFromY(double *x, double *y,
-			 *            double *u, double *Y,
-			 *            int p, int g, int *zeroGroupFlag,
-			 *            double *G, double *w)
-			 *
-			 */
-			xFromY(x, y, u, Y, p, g, zeroGroupFlag, G, w);
-
-			/*compute the duality at (x, Y)
-			 *
-			 * void dualityGap(double *gap, double *penalty2,
-			 *               double *x, double *Y, int g, int *zeroGroupFlag,
-			 *               double *G, double *w, double lambda2)
-			 *
-			 */
-			dualityGap(&gapR, &penalty2R, x, Y, g, zeroGroupFlag, G, w, lambda2);
-
-			if (*gap< gapR){
-				/*(xnew, Ynew) is better in terms of duality gap*/
-				/* copy Ynew to Y, and xnew to x */
-				sg_memcpy(x, xnew, sizeof(double) * p);
-				sg_memcpy(Y, Ynew, sizeof(double) * YSize);
-
-				/*In this case, we do not apply restart, as (x,Y) is not better
-				 *
-				 * We postpone the "restart" by giving a
-				 *           "nextRestartStep"
-				 */
-
-				/*
-				 * we test *gap here, in case *gap=0
-				 */
-				if (*gap <=tol)
-					break;
-				else{
-					nextRestartStep=iterStep+ (int) sqrt(gapR / *gap);
-				}
-			}
-			else{ /*we use (x, Y), as it is better in terms of duality gap*/
-				*gap=gapR;
-				*penalty2=penalty2R;
-			}
-
-			/*
-			   printf("\n iterStep=%d, L=%2.5f, gap=%e, gapR=%e", iterStep, L, *gap, gapR);
-			   */
-
-		}
-
-		/*
-		 * if the duality gap is within pre-specified parameter tol
-		 *
-		 * we terminate the algorithm
-		 */
-		if (*gap <=tol)
-			break;
-	}
-
-	penalty2[3]=iterStep;
-
-	penalty2[4]=0;
-	for(i=0;i<g;i++){
-		if (zeroGroupFlag[i]==0)
-			penalty2[4]=penalty2[4]+1;
-		else{
-			for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-				if (x[ (int) G[j] ] !=0)
-					break;
-			}
-
-			if (j>(int) w[3*i +1])
-				penalty2[4]=penalty2[4]+1;
-		}
-	}
-
-	/*
-	 * assign sign to the solution x
-	 */
-	for(i=0;i<p;i++){
-		if (entrySignFlag[i]==-1){
-			x[i]=-x[i];
-		}
-	}
-
-	free (u);
-	free (y);
-	free (xnew);
-	free (Ynew);
-	free (zeroGroupFlag);
-	free (entrySignFlag);
-}
-
-void gradientDescentStep(double *xnew, double *Ynew,
-		double *LL, double *u, double *y, int *entrySignFlag, double lambda2,
-		double *x, double *Y, int p, int g, int * zeroGroupFlag,
-		double *G, double *w){
-
-	double twoNorm, temp, L=*LL, leftValue, rightValue;
-	int i,j;
-
-
-
-	while (1){
-
-		/*
-		 * compute
-		 * Ynew = proj ( Y + x e^T / L )
-		 */
-		for(i=0;i<g;i++){
-			if(zeroGroupFlag[i]){ /*this group is non-zero*/
-
-				twoNorm=0;
-				for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-					Ynew[j]= Y[j] + x[ (int) G[j] ] / L;
-
-					twoNorm+=Ynew[j]*Ynew[j];
-				}
-				twoNorm=sqrt(twoNorm);
-
-				if (twoNorm > lambda2 * w[3*i+2] ){
-					temp=lambda2 * w[3*i+2] / twoNorm;
-
-					for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++)
-						Ynew[j]*=temp;
-				}
-			}
-		}/*end of for(i=0;i<g;i++) */
-
-		/*
-		 * compute xnew=max(u-Ynew * e, 0);
-		 *
-		 *void xFromY(double *x, double *y,
-		 *            double *u, double *Y,
-		 *            int p, int g, int *zeroGroupFlag,
-		 *            double *G, double *w)
-		 */
-		xFromY(xnew, y, u, Ynew, p, g, zeroGroupFlag, G, w);
-
-		/* test whether L is appropriate*/
-		leftValue=0;
-		for(i=0;i<p;i++){
-			if (entrySignFlag[i]){
-				temp=xnew[i]-x[i];
-
-				leftValue+= temp * ( 0.5 * temp + y[i]);
-			}
-		}
-
-		rightValue=0;
-		for(i=0;i<g;i++){
-			if(zeroGroupFlag[i]){ /*this group is non-zero*/
-
-				for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-					temp=Ynew[j]-Y[j];
-
-					rightValue+=temp * temp;
-				}
-			}
-		}/*end of for(i=0;i<g;i++) */
-		rightValue=rightValue/2;
-
-		/*
-		   printf("\n leftValue =%e, rightValue=%e, L=%e", leftValue, rightValue, L);
-		   */
-
-		if ( leftValue <= L * rightValue){
-
-			temp= L * rightValue / leftValue;
-
-			if (temp >5)
-				L=L*0.8;
-
-			break;
-		}
-		else{
-			temp=leftValue / rightValue;
-
-			if (L*2 <= temp)
-				L=temp;
-			else
-				L=2*L;
-
-			if ( L / g - 2* g >0 ){
-
-				if (rightValue < 1e-16){
-					break;
-				}
-				else{
-
-					printf("\n One Gradient Step: leftValue=%e, rightValue=%e, ratio=%e", leftValue, rightValue, temp);
-
-					printf("\n L=%e > 2 * %d * %d. There might be a bug here. Otherwise, it is due to numerical issue.", L, g, g);
-
-					break;
-				}
-			}
-		}
-	}
-
-	*LL=L;
-}
-
-void overlapping_agd(double *x, double *gap, double *penalty2,
-		double *v, int p, int g, double lambda1, double lambda2,
-		double *w, double *G, double *Y, int maxIter, int flag, double tol){
-
-	int YSize=(int) w[3*(g-1) +1]+1;
-	double *u=(double *)malloc(sizeof(double)*p);
-	double *y=(double *)malloc(sizeof(double)*p);
-
-	double *xnew=(double *)malloc(sizeof(double)*p);
-	double *Ynew=(double *)malloc(sizeof(double)* YSize );
-
-	double *xS=(double *)malloc(sizeof(double)*p);
-	double *YS=(double *)malloc(sizeof(double)* YSize );
-
-	/*double *xp=(double *)malloc(sizeof(double)*p);*/
-	double *Yp=(double *)malloc(sizeof(double)* YSize );
-
-	int *zeroGroupFlag=(int *)malloc(sizeof(int)*g);
-	int *entrySignFlag=(int *)malloc(sizeof(int)*p);
-
-	int pp, gg;
-	int i, j, iterStep;
-	double twoNorm,temp, L=1, leftValue, rightValue, gapR, penalty2R;
-	int nextRestartStep=0;
-
-	double alpha, alphap=0.5, beta, gamma;
-
-	/*
-	 * call the function to identify some zero entries
-	 *
-	 * entrySignFlag[i]=0 denotes that the corresponding entry is definitely zero
-	 *
-	 * zeroGroupFlag[i]=0 denotes that the corresponding group is definitely zero
-	 *
-	 */
-
-	identifySomeZeroEntries(u, zeroGroupFlag, entrySignFlag,
-			&pp, &gg,
-			v, lambda1, lambda2,
-			p, g, w, G);
-
-	penalty2[1]=pp;
-	penalty2[2]=gg;
-	/*store pp and gg to penalty2[1] and penalty2[2]*/
-
-	/*
-	 *-------------------
-	 *  Process Y
-	 *-------------------
-	 * We make sure that Y is feasible
-	 *    and if x_i=0, then set Y_{ij}=0
-	 */
-	for(i=0;i<g;i++){
-
-		if(zeroGroupFlag[i]){ /*this group is non-zero*/
-
-			/*compute the two norm of the group*/
-			twoNorm=0;
-
-			for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-
-				if (! u[ (int) G[j] ] )
-					Y[j]=0;
-
-				twoNorm+=Y[j]*Y[j];
-			}
-			twoNorm=sqrt(twoNorm);
-
-			if (twoNorm > lambda2 * w[3*i+2] ){
-				temp=lambda2 * w[3*i+2] / twoNorm;
-
-				for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++)
-					Y[j]*=temp;
-			}
-		}
-		else{ /*this group is zero*/
-			for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++)
-				Y[j]=0;
-		}
-	}
-
-	/*
-	 * set Ynew and Yp to zero
-	 *
-	 * in the following processing, we only operate, Yp, Y and Ynew in the
-	 * possibly non-zero groups by "if(zeroGroupFlag[i])"
-	 *
-	 */
-	for(i=0;i<YSize;i++)
-		YS[i]=Yp[i]=Ynew[i]=0;
-
-
-	/*
-	 * ---------------
-	 *
-	 * we first do a gradient descent step for determing the value of an approporate L
-	 *
-	 * Also, we initialize gamma
-	 *
-	 * with Y, we compute a new Ynew
-	 *
-	 */
-
-
-	/*
-	 * compute x=max(u-Y * e, 0);
-	 */
-	xFromY(x, y, u, Y, p, g, zeroGroupFlag, G, w);
-
-	/*
-	 * compute (xnew, Ynew) from (x, Y)
-	 *
-	 *
-	 * gradientDescentStep(double *xnew, double *Ynew,
-	 double *LL, double *u, double *y, int *entrySignFlag, double lambda2,
-	 double *x, double *Y, int p, int g, int * zeroGroupFlag,
-	 double *G, double *w)
-	 */
-
-	gradientDescentStep(xnew, Ynew,
-			&L, u, y,entrySignFlag,lambda2,
-			x, Y, p, g, zeroGroupFlag,
-			G, w);
-
-	/*
-	 * we have finished one gradient descent to get
-	 *
-	 * (x, Y) and (xnew, Ynew), where (xnew, Ynew) is
-	 *
-	 *    a gradient descent step based on (x, Y)
-	 *
-	 * we set (xp, Yp)=(x, Y)
-	 *
-	 *        (x, Y)= (xnew, Ynew)
-	 */
-
-	/*sg_memcpy(xp, x, sizeof(double) * p);*/
-	sg_memcpy(Yp, Y, sizeof(double) * YSize);
-
-	/*sg_memcpy(x, xnew, sizeof(double) * p);*/
-	sg_memcpy(Y, Ynew, sizeof(double) * YSize);
-
-	gamma=L;
-
-	/*
-	 * ------------------------------------
-	 * Accelerated Gradient Descent begins here
-	 * ------------------------------------
-	 */
-
-
-	for(iterStep=0;iterStep<maxIter;iterStep++){
-
-
-		while (1){
-
-
-			/*
-			 * compute alpha as the positive root of
-			 *
-			 *     L * alpha^2 = (1-alpha) * gamma
-			 *
-			 */
-
-			alpha= ( - gamma + sqrt( gamma * gamma + 4 * L * gamma ) ) / 2 / L;
-
-			beta= gamma * (1-alphap)/ alphap / (gamma + L * alpha);
-
-			/*
-			 * compute YS= Y + beta * (Y - Yp)
-			 *
-			 */
-			for(i=0;i<g;i++){
-				if(zeroGroupFlag[i]){ /*this group is non-zero*/
-
-					for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-
-						YS[j]=Y[j] + beta * (Y[j]-Yp[j]);
-
-					}
-				}
-			}/*end of for(i=0;i<g;i++) */
-
-
-			/*
-			 * compute xS
-			 */
-			xFromY(xS, y, u, YS, p, g, zeroGroupFlag, G, w);
-
-
-			/*
-			 *
-			 * Ynew = proj ( YS + xS e^T / L )
-			 *
-			 */
-			for(i=0;i<g;i++){
-				if(zeroGroupFlag[i]){ /*this group is non-zero*/
-
-					twoNorm=0;
-					for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-
-						Ynew[j]= YS[j] + xS[ (int) G[j] ] / L;
-
-						twoNorm+=Ynew[j]*Ynew[j];
-					}
-					twoNorm=sqrt(twoNorm);
-
-					if (twoNorm > lambda2 * w[3*i+2] ){
-						temp=lambda2 * w[3*i+2] / twoNorm;
-
-						for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++)
-							Ynew[j]*=temp;
-					}
-				}
-			}/*end of for(i=0;i<g;i++) */
-
-			/*
-			 * compute xnew=max(u-Ynew * e, 0);
-			 *
-			 *void xFromY(double *x, double *y,
-			 *            double *u, double *Y,
-			 *            int p, int g, int *zeroGroupFlag,
-			 *            double *G, double *w)
-			 */
-
-			xFromY(xnew, y, u, Ynew, p, g, zeroGroupFlag, G, w);
-
-			/* test whether L is appropriate*/
-			leftValue=0;
-			for(i=0;i<p;i++){
-				if (entrySignFlag[i]){
-					temp=xnew[i]-xS[i];
-
-					leftValue+= temp * ( 0.5 * temp + y[i]);
-				}
-			}
-
-			rightValue=0;
-			for(i=0;i<g;i++){
-				if(zeroGroupFlag[i]){ /*this group is non-zero*/
-
-					for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-						temp=Ynew[j]-YS[j];
-
-						rightValue+=temp * temp;
-					}
-				}
-			}/*end of for(i=0;i<g;i++) */
-			rightValue=rightValue/2;
-
-			if ( leftValue <= L * rightValue){
-
-				temp= L * rightValue / leftValue;
-
-				if (temp >5)
-					L=L*0.8;
-
-				break;
-			}
-			else{
-				temp=leftValue / rightValue;
-
-				if (L*2 <= temp)
-					L=temp;
-				else
-					L=2*L;
-
-
-
-				if ( L / g - 2* g  >0 ){
-
-					if (rightValue < 1e-16){
-						break;
-					}
-					else{
-
-						printf("\n AGD: leftValue=%e, rightValue=%e, ratio=%e", leftValue, rightValue, temp);
-
-						printf("\n L=%e > 2 * %d * %d. There might be a bug here. Otherwise, it is due to numerical issue.", L, g, g);
-
-						break;
-					}
-				}
-			}
-		}
-
-		/* compute the duality gap at (xnew, Ynew)
-		 *
-		 * void dualityGap(double *gap, double *penalty2,
-		 *               double *x, double *Y, int g, int *zeroGroupFlag,
-		 *               double *G, double *w, double lambda2)
-		 *
-		 */
-		dualityGap(gap, penalty2,
-				xnew, Ynew, g, zeroGroupFlag,
-				G, w, lambda2);
-
-
-		/*
-		 * if the duality gap is within pre-specified parameter tol
-		 *
-		 * we terminate the algorithm
-		 */
-		if (*gap <=tol){
-
-			sg_memcpy(x, xnew, sizeof(double) * p);
-			sg_memcpy(Y, Ynew, sizeof(double) * YSize);
-
-			break;
-		}
-
-
-
-		/*
-		 * flag =1 means restart
-		 *
-		 * flag =0 means with restart
-		 *
-		 * nextRestartStep denotes the next "step number" for
-		 *            initializing the restart process.
-		 *
-		 * This is based on the fact that, the result is only beneficial when
-		 *    xnew is good. In other words,
-		 *             if xnew is not good, then the
-		 *                restart might not be helpful.
-		 */
-
-		if ( (flag==0) || (flag==1 && iterStep < nextRestartStep )){
-
-
-			/*sg_memcpy(xp, x, sizeof(double) * p);*/
-			sg_memcpy(Yp, Y, sizeof(double) * YSize);
-
-			/*sg_memcpy(x, xnew, sizeof(double) * p);*/
-			sg_memcpy(Y, Ynew, sizeof(double) * YSize);
-
-			gamma=gamma * (1-alpha);
-
-			alphap=alpha;
-
-			/*
-			   printf("\n iterStep=%d, L=%2.5f, gap=%e", iterStep, L, *gap);
-			   */
-
-		}
-		else{
-			/*
-			 * flag=1
-			 *
-			 * We allow the restart of the program.
-			 *
-			 * Here, Y is constructed as a subgradient of xnew, based on the
-			 *   assumption that Y might be a better choice than Ynew, provided
-			 *   that xnew is good enough.
-			 *
-			 */
-
-			/*
-			 * compute the restarting point YS with xnew and Ynew
-			 *
-			 *void YFromx(double *Y,
-			 *            double *xnew, double *Ynew,
-			 *            double lambda2, int g, int *zeroGroupFlag,
-			 *            double *G, double *w)
-			 */
-			YFromx(YS, xnew, Ynew, lambda2, g, zeroGroupFlag, G, w);
-
-			/*compute the solution with the starting point YS
-			 *
-			 *void xFromY(double *x, double *y,
-			 *            double *u, double *Y,
-			 *            int p, int g, int *zeroGroupFlag,
-			 *            double *G, double *w)
-			 *
-			 */
-			xFromY(xS, y, u, YS, p, g, zeroGroupFlag, G, w);
-
-			/*compute the duality at (xS, YS)
-			 *
-			 * void dualityGap(double *gap, double *penalty2,
-			 *               double *x, double *Y, int g, int *zeroGroupFlag,
-			 *               double *G, double *w, double lambda2)
-			 *
-			 */
-			dualityGap(&gapR, &penalty2R, xS, YS, g, zeroGroupFlag, G, w, lambda2);
-
-			if (*gap< gapR){
-				/*(xnew, Ynew) is better in terms of duality gap*/
-
-				/*In this case, we do not apply restart, as (xS,YS) is not better
-				 *
-				 * We postpone the "restart" by giving a
-				 *           "nextRestartStep"
-				 */
-
-				/*sg_memcpy(xp, x, sizeof(double) * p);*/
-				sg_memcpy(Yp, Y, sizeof(double) * YSize);
-
-				/*sg_memcpy(x, xnew, sizeof(double) * p);*/
-				sg_memcpy(Y, Ynew, sizeof(double) * YSize);
-
-				gamma=gamma * (1-alpha);
-
-				alphap=alpha;
-
-				nextRestartStep=iterStep+ (int) sqrt(gapR / *gap);
-			}
-			else{
-				/*we use (xS, YS), as it is better in terms of duality gap*/
-
-				*gap=gapR;
-				*penalty2=penalty2R;
-
-				if (*gap <=tol){
-
-					sg_memcpy(x, xS, sizeof(double) * p);
-					sg_memcpy(Y, YS, sizeof(double) * YSize);
-
-					break;
-				}else{
-					/*
-					 * we do a gradient descent based on  (xS, YS)
-					 *
-					 */
-
-					/*
-					 * compute (x, Y) from (xS, YS)
-					 *
-					 *
-					 * gradientDescentStep(double *xnew, double *Ynew,
-					 * double *LL, double *u, double *y, int *entrySignFlag, double lambda2,
-					 * double *x, double *Y, int p, int g, int * zeroGroupFlag,
-					 * double *G, double *w)
-					 */
-					gradientDescentStep(x, Y,
-							&L, u, y, entrySignFlag,lambda2,
-							xS, YS, p, g, zeroGroupFlag,
-							G, w);
-
-					/*sg_memcpy(xp, xS, sizeof(double) * p);*/
-					sg_memcpy(Yp, YS, sizeof(double) * YSize);
-
-					gamma=L;
-
-					alphap=0.5;
-
-				}
-
-
-			}
-
-			/*
-			 * printf("\n iterStep=%d, L=%2.5f, gap=%e, gapR=%e", iterStep, L, *gap, gapR);
-			 */
-
-		}/* flag =1*/
-
-	} /* main loop */
-
-
-
-	penalty2[3]=iterStep+1;
-
-	/*
-	 * get the number of nonzero groups
-	 */
-
-	penalty2[4]=0;
-	for(i=0;i<g;i++){
-		if (zeroGroupFlag[i]==0)
-			penalty2[4]=penalty2[4]+1;
-		else{
-			for(j=(int) w[3*i] ; j<= (int) w[3*i +1]; j++){
-				if (x[ (int) G[j] ] !=0)
-					break;
-			}
-
-			if (j>(int) w[3*i +1])
-				penalty2[4]=penalty2[4]+1;
-		}
-	}
-
-
-	/*
-	 * assign sign to the solution x
-	 */
-	for(i=0;i<p;i++){
-		if (entrySignFlag[i]==-1){
-			x[i]=-x[i];
-		}
-	}
-
-	free (u);
-	free (y);
-
-	free (xnew);
-	free (Ynew);
-
-	free (xS);
-	free (YS);
-
-	/*free (xp);*/
-	free (Yp);
-
-	free (zeroGroupFlag);
-	free (entrySignFlag);
-}
-
-void overlapping(double *x, double *gap, double *penalty2,
-		double *v, int p, int g, double lambda1, double lambda2,
-		double *w, double *G, double *Y, int maxIter, int flag, double tol){
-
-	switch(flag){
-		case 0:
-		case 1:
-			overlapping_gd(x, gap, penalty2,
-					v,  p, g, lambda1, lambda2,
-					w, G, Y, maxIter, flag,tol);
-			break;
-		case 2:
-		case 3:
-
-			overlapping_agd(x, gap, penalty2,
-					v,  p, g, lambda1, lambda2,
-					w, G, Y, maxIter, flag-2,tol);
-
-			break;
-		default:
-			/* printf("\n Wrong flag! The value of flag should be 0,1,2,3. The program uses flag=2.");*/
-
-			overlapping_agd(x, gap, penalty2,
-					v,  p, g, lambda1, lambda2,
-					w, G, Y, maxIter, 0,tol);
-			break;
-	}
-
-
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/slep/overlapping/overlapping.h b/src/shogun/lib/slep/overlapping/overlapping.h
deleted file mode 100644
index 6e49d6d773d..00000000000
--- a/src/shogun/lib/slep/overlapping/overlapping.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef  OVERLAPPING_SLEP
-#define  OVERLAPPING_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-
-
-/*
- * -------------------------------------------------------------------
- *                       Function and parameter
- * -------------------------------------------------------------------
- *
- * In this file, we focus solving the following problem
- *
- * 1/2 \|x-v\|^2 + \lambda_1 \|x\|_1 + \lambda_2 \sum w_i \|x_{G_i}\|,
- *
- * where x and v are of dimension p,
- *       w >0, and G_i contains the indices for the i-th group
- *
- * The file is implemented in the following in Matlab:
- *
- * x=overlapping(v, p, g, lambda1, lambda2, w, G, Y, maxIter, flag, tol);
- *
- * x and v are vectors of dimension p
- *
- * g denotes the number of groups
- *
- * lambda1 and labmda2 are non-negative regularization paramter
- *
- * G is a vector containing the indices for the groups
- * G_1, G_2, ..., G_g
- *
- * w is a 3xg matrix
- * w(1,i) contains the starting index of the i-th group in G
- * w(2,i) contains the ending index   of the i-th group in G
- * w(3,i) contains the weight for the i-th group
- *
- * Y is the dual of \|x_{G_i}\|, it is of the same size as G
- *
- * maxIter is the maximal number of iteration
- *
- * flag=0, we apply the pure projected gradient descent 
- *      (forward and backward line search is used)
- *
- * flag=1, we apply the projected gradient descent with restart
- * 
- * in the future, we may apply the accelerated gradient descent 
- *  with adaptive line search (see our KDD'09 paper) with other "flag"
- *
- *
- * Note: 
- * 
- *  1. One should ensure w(2,i)-w(1,i)+1=|G_i|. 
- *      !! The program does not check w(2,i)-w(1,i)+1=|G_i|.!!
- *
- *  2. The index in G and w starts from 0
- *
- * -------------------------------------------------------------------
- *                       History:
- * -------------------------------------------------------------------
- *
- * Composed by Jun Liu on May 17, 2010
- *
- * For any question or suggestion, please email j.liu@asu.edu or
- *                                              jun.liu.80@gmail.com
- *
- */
-
-
-/*
- * --------------------------------------------------------------------
- *              Identifying some zero Entries
- * --------------------------------------------------------------------
- *
- * lambda1, lambda2 should be non-negative
- *
- * v is the vector of size p to be projected
- *
- *
- * zeroGroupFlag is a vector of size g
- *
- * zeroGroupFlag[i]=0 denotes that the corresponding group is definitely zero
- * zeroGroupFlag[i]=1 denotes that the corresponding group is (possibly) nonzero
- *
- *
- * u is a vector of size p
- *
- *
- * entrySignFlag is a vector of size p
- *
- * entrySignFlag[i]=0 denotes that the corresponding entry is definitely zero
- * entrySignFlag[i]=1 denotes that the corresponding entry is (possibly) positive
- * entrySignFlag[i]=-1 denotes that the corresponding entry is (possibly) negative
- * 
- */
-void identifySomeZeroEntries(double * u, int * zeroGroupFlag, int *entrySignFlag,
-		int *pp, int *gg,
-		double *v, double lambda1, double lambda2, 
-		int p, int g, double * w, double *G);
-
-/*
- *
- * function: xFromY
- *
- * compute x=max(u-Y * e, 0);
- *
- * xFromY(x, y, u, Y, p, g, zeroGroupFlag, G, w);
- *
- * y=u-Y * e - max( u - Y * e, 0)
- *
- */
-void xFromY(double *x, double *y,
-		double *u, double *Y, 
-		int p, int g, int *zeroGroupFlag,
-		double *G, double *w);
-
-/*
- *
- * function: YFromx
- *
- * compute Y=subgradient(x)
- *
- * YFromx(Y, xnew, Ynew, lambda2, g, zeroGroupFlag, G, w); 
- *
- * The idea is that, if x_{G_i} is nonzero, 
- *           we compute Y^i as x_{G_i}/ \|x_{G_i}\| * lambda2 * w[3*i+2]
- *                   otherwise
- *                      Y^i=Ynew^i
- *
- */
-void YFromx(double *Y, 
-		double *xnew, double *Ynew,
-		double lambda2, int g, int *zeroGroupFlag,
-		double *G, double *w);
-
-/*
- * function: dualityGap
- *
- * compute the duality gap for the approximate solution (x, Y)
- *
- * Meanwhile, we compute 
- *       
- *       penalty2=\sum_{i=1}^g w_i \|x_{G_i}\|
- *
- */
-void dualityGap(double *gap, double *penalty2,
-		double *x, double *Y, int g, int *zeroGroupFlag, 
-		double *G, double *w, double lambda2);
-
-/*
- * we solve the proximal opeartor:
- *
- * 1/2 \|x-v\|^2 + \lambda_1 \|x\|_1 + \lambda_2 \sum w_i \|x_{G_i}\|
- *
- * See the description of the variables in the beginning of this file
- *
- * x is the primal variable, each of its entry is non-negative
- *
- * Y is the dual variable, each of its entry should be non-negative
- *
- * flag =0: no restart
- * flag =1; restart
- *
- * tol: the precision parameter
- * 
- * The following code apply the projected gradient descent method 
- *   
- */
-void overlapping_gd(double *x, double *gap, double *penalty2,
-		double *v, int p, int g, double lambda1, double lambda2,
-		double *w, double *G, double *Y, int maxIter, int flag, double tol);
-
-/*
- *
- * do a gradient descent step based (x, Y) to get (xnew, Ynew)
- *
- * (x, Y) is known. Here we do a line search for determining the value of L
- *
- *  gradientDescentStep(double *xnew, double *Ynew, 
- double *LL, double *u,
- double *x, double *Y, int p, int g, int * zeroGroupFlag, 
- double *G, double *w)
- *
- */
-void gradientDescentStep(double *xnew, double *Ynew, 
-		double *LL, double *u, double *y, int *entrySignFlag, double lambda2,
-		double *x, double *Y, int p, int g, int * zeroGroupFlag, 
-		double *G, double *w);
-
-/*
- *
- * we use the accelerated gradient descent
- * 
- */
-void overlapping_agd(double *x, double *gap, double *penalty2,
-		double *v, int p, int g, double lambda1, double lambda2,
-		double *w, double *G, double *Y, int maxIter, int flag, double tol);
-
-/*
- * This is main function for the projection
- *
- * It calls overlapping_gd and overlapping_agd based on flag
- *
- * 
- */
-void overlapping(double *x, double *gap, double *penalty2,
-		double *v, int p, int g, double lambda1, double lambda2,
-		double *w, double *G, double *Y, int maxIter, int flag, double tol);
-
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef OVERLAPPING_SLEP  ----- */
diff --git a/src/shogun/lib/slep/q1/ep1R.h b/src/shogun/lib/slep/q1/ep1R.h
deleted file mode 100644
index d7447c5dd1f..00000000000
--- a/src/shogun/lib/slep/q1/ep1R.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef  EP1R_SLEP
-#define  EP1R_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-
-
-/*
-   Euclidean Projection onto l_{2,1} Ball
-
-   min  1/2 ||x- u||_2^2 + 1/2 ||t- v||_2^2
-   s.t. |x|<=t
-
-
-Usage:
-[x, t]=ep1R(u, v, n);
-
-*/
-
-
-void ep1R(double * x, double *t, double * u, double * v, int n)
-{
-	int j;
-
-
-	for(j=0;j<n;j++){
-
-		if(fabs(u[j]) > fabs(v[j])){
-			t[j]=(fabs(u[j]) + v[j])/2;
-
-			if (u[j] >0)
-				x[j]=t[j];
-			else
-				x[j]=-t[j];
-		}
-		else
-			if(fabs(u[j]) <= v[j]){
-				t[j]=v[j];
-				x[j]=u[j];
-			}
-			else{
-				t[j]=x[j]=0;
-			}
-
-	}
-}
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef EP1R_SLEP  ----- */
-
diff --git a/src/shogun/lib/slep/q1/ep21R.h b/src/shogun/lib/slep/q1/ep21R.h
deleted file mode 100644
index f53a9abbc86..00000000000
--- a/src/shogun/lib/slep/q1/ep21R.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef  EP21R_SLEP
-#define  EP21R_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-
-
-/*
-   Euclidean Projection onto l_{2,1} Ball
-
-   min  1/2 ||x- u||_2^2 + 1/2 ||t- v||_2^2
-   s.t. ||x^j||_{2,1} <= t^j
-
-
-Usage:
-[x, t]=ep21R(u, v, n, k);
-
-*/
-
-
-void ep21R(double * x, double *t, double * u, double * v, int n, int k)
-{
-	int i, j, tn=n*k;
-	double temp;
-
-	/* compute the 2 norm of each group
-	*/
-
-	for(j=0;j<n;j++){
-		temp=0;
-		for(i=j; i< tn; i+=n)
-			temp+= u[i]* u[i];
-		temp=sqrt(temp);
-		/*temp contains the 2-norm of of each row of u*/
-
-		if(temp > fabs(v[j])){
-			t[j]=(temp + v[j])/2;
-			for (i=j; i<tn; i+=n)
-				x[i]= t[j] / temp * u[i];
-		}
-		else
-			if(temp <= v[j]){
-				t[j]=v[j];
-
-				for (i=j; i<tn; i+=n)
-					x[i]= u[i];
-			}
-			else{
-				t[j]=0;
-
-				for (i=j; i<tn; i+=n)
-					x[i]=0;
-			}
-
-	}
-}
-
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef EP21R_SLEP  ----- */
diff --git a/src/shogun/lib/slep/q1/ep21d.h b/src/shogun/lib/slep/q1/ep21d.h
deleted file mode 100644
index a8241bc772e..00000000000
--- a/src/shogun/lib/slep/q1/ep21d.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef  EP21D_SLEP
-#define  EP21D_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-#include <shogun/lib/slep/q1/epph.h> /* This is the head file that contains the implementation of the used functions*/
-
-/*
-   Euclidean Projection onto l_{2,1} Ball
-
-   min  1/2 ||X- V||_2^2
-   s.t. ||X||_{2,1} <= z
-
-   which is converted to the following zero finding problem
-
-   f(lambda)= \sum_i ( max( |v^i|-lambda,0) )-z=0
-
-   v^i denotes the i-th row of V
-
-Usage:
-[x, lambda, iter_step]=ep21d(y, n, k, z, lambda0);
-
-*/
-
-
-void ep21d(double * x, double *root, int * steps, double * v, int n, int k, double z, double lambda0)
-{
-	int i, j, tn=n*k;
-	double *vnorm=(double *)malloc(sizeof(double)*n);
-	double *vproj=(double *)malloc(sizeof(double)*n);
-	double t;
-
-	/* compute the 2 norm of each group
-	*/
-
-	for(j=0;j<n;j++){
-		t=0;
-		for(i=j; i< tn; i+=n)
-			t+= v[i]* v[i];
-		vnorm[j]=sqrt(t);
-	}
-
-
-
-	eplb(vproj, root, steps, vnorm, n, z, lambda0);
-
-	/* compute x
-	*/
-
-	if (*root==0){
-		for(i=0;i<tn;i++)
-			x[i]=v[i];
-	}
-	else{
-		for (j=0;j<n;j++){
-			if ( vnorm[j] <= *root){
-				for(i=j; i< tn; i+=n)
-					x[i]=0;
-			}
-			else{
-				t=1- *root/ vnorm[j];
-				for(i=j; i< tn; i+=n)
-					x[i]=t* v[i];
-			}
-		}
-	}
-
-	free(vnorm);
-	free(vproj);
-
-}
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef EP21D_SLEP  ----- */
-
diff --git a/src/shogun/lib/slep/q1/eppMatrix.cpp b/src/shogun/lib/slep/q1/eppMatrix.cpp
deleted file mode 100644
index 054472ae789..00000000000
--- a/src/shogun/lib/slep/q1/eppMatrix.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye
- */
-
-#include <shogun/lib/slep/q1/eppMatrix.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-
-void eppMatrix(double *X, double * V, int k, int n, double rho, double p)
-{
-    int i, j, *iter_step;
-    double *v, *x;
-    double c0, c;
-
-    v=(double *)malloc(sizeof(double)*n);
-    x=(double *)malloc(sizeof(double)*n);
-    iter_step=(int *)malloc(sizeof(int)*2);
-
-    /*
-     *X and V are k x n matrices in matlab, stored in column priority manner
-     *x corresponds a row of X
-     */
-
-
-    c0=0;
-    for(i=0; i<k; i++){
-
-        for(j=0; j<n; j++)
-            v[j]=V[i + j*k];
-
-        epp(x, &c, iter_step, v, n, rho, p, c0);
-        c0=c;
-
-        for(j=0; j<n; j++)
-            X[i + j*k]=x[j];
-    }
-
-    free(v);
-    free(x);
-    free(iter_step);
-};
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/slep/q1/eppMatrix.h b/src/shogun/lib/slep/q1/eppMatrix.h
deleted file mode 100644
index 0a6be93a0c7..00000000000
--- a/src/shogun/lib/slep/q1/eppMatrix.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef EPPMATRIXQ1_SLEP
-#define EPPMATRIXQ1_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/slep/q1/epph.h> /* This is the head file that contains the implementation of the used functions*/
-
-/*
- Lp Norm Regularized Euclidean Projection
- 
-        min  1/2 ||x- v||_2^2 + rho * ||x||_p
- 
- Usage (in Matlab):
- [x, c, iter_step]=epp(v, n, rho, p, c0);
-
- Usage in C:
- epp(x, c, iter_step, v, n, rho, p, c0);
-
- The function epp implements the following three functions
- epp1(x, v, n, rho) for p=1
- epp2(x, v, n, rho) for p=2
- eppInf(x, c, iter_step, v,  n, rho, c0) for p=inf
- eppO(x, c, iter_step, v,   n, rho, p) for other p
-
-------------------------------------------------------------
-
-  Here, the input and output are of matrix form. Each row corresponds a group
-
-
- Written by Jun Liu, May 18th, 2009
- For any problem, please contact: j.liu@asu.edu
- 
- */
-void eppMatrix(double *X, double * V, int k, int n, double rho, double p);
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef EPPMATRIXQ1_SLEP  ----- */
-
diff --git a/src/shogun/lib/slep/q1/eppVector.h b/src/shogun/lib/slep/q1/eppVector.h
deleted file mode 100644
index 94a6a98a793..00000000000
--- a/src/shogun/lib/slep/q1/eppVector.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef  EPPVECTOR_SLEP
-#define  EPPVECTOR_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-#include <shogun/lib/slep/q1/epph.h> /* This is the head file that contains the implementation of the used functions*/
-
-
-/*
-   Lp Norm Regularized Euclidean Projection
-
-   min  1/2 ||x- v||_2^2 + rho * ||x||_p
-
-   Usage (in Matlab):
-   [x, c, iter_step]=epp(v, n, rho, p, c0);
-
-   Usage in C:
-   epp(x, c, iter_step, v, n, rho, p, c0);
-
-   The function epp implements the following three functions
-   epp1(x, v, n, rho) for p=1
-   epp2(x, v, n, rho) for p=2
-   eppInf(x, c, iter_step, v,  n, rho, c0) for p=inf
-   eppO(x, c, iter_step, v,   n, rho, p) for other p
-
-   ------------------------------------------------------------
-
-   Here, the input and output are of Vector form.
-
-
-   Written by Jun Liu, May 18th, 2009
-   For any problem, please contact: j.liu@asu.edu
-
-*/
-
-void eppVector(double *x, double * v, int* ind, int k, int n, double * rho, double rho_multiplier, double p){
-	int i, *iter_step;
-	double c0, c;
-	double *px, *pv;
-
-	iter_step=(int *)malloc(sizeof(int)*2);
-
-	c0=0;
-	for(i=0; i<k; i++)
-	{
-		px=x+(int)ind[i];
-		pv=v+(int)ind[i];
-
-		epp(px, &c, iter_step, pv, (int)(ind[i+1]-ind[i]), rho[i]*rho_multiplier, p, c0);
-	}
-
-	free(iter_step);    
-}
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef EPPVECTOR_SLEP  ----- */
-
diff --git a/src/shogun/lib/slep/q1/eppVectorR.h b/src/shogun/lib/slep/q1/eppVectorR.h
deleted file mode 100644
index 4d73f2d28fa..00000000000
--- a/src/shogun/lib/slep/q1/eppVectorR.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef  EPPVECTORR_SLEP
-#define  EPPVECTORR_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-
-/*
-   min  1/2 ( ||x- u||_2^2 + ||t-v||_2^2 )
-   s.t.  ||x_j||_2 <= t_j
- 
- */
-
-void eppVectorR(double *x, double * t, double * u, double * v, double * ind, int n, int k){
-    int i, j;
-    double temp;
-
-	/* compute the 2 norm of each group
-	*/
-
-	for(j=0;j<k;j++){
-		temp=0;
-		for(i=(int) (ind[j]); i< (int) (ind[j+1]); i++)
-			temp+= u[i]* u[i];
-        temp=sqrt(temp);
-        /*temp contains the 2-norm of of each row of u*/
-
-        if(temp > fabs(v[j])){
-           t[j]=(temp + v[j])/2;
-           
-           for(i=(int) (ind[j]); i< (int) (ind[j+1]); i++)
-               x[i]= t[j] / temp * u[i];
-        }
-        else
-           if(temp <= v[j]){
-               t[j]=v[j];
-                
-               for(i=(int) (ind[j]); i< (int) (ind[j+1]); i++)
-                   x[i]= u[i];
-            }
-            else{
-                t[j]=0;
-                
-               for(i=(int) (ind[j]); i< (int) (ind[j+1]); i++)
-                   x[i]=0;
-            }
-              
-	}    
-}
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef EPPVECTORR_SLEP  ----- */
diff --git a/src/shogun/lib/slep/q1/epph.cpp b/src/shogun/lib/slep/q1/epph.cpp
deleted file mode 100644
index 07b986c5a2f..00000000000
--- a/src/shogun/lib/slep/q1/epph.cpp
+++ /dev/null
@@ -1,690 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye
- */
-
-#include <shogun/lib/slep/q1/epph.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-
-#define delta 1e-8
-
-#define innerIter 1000
-#define outerIter 1000
-
-void eplb(double * x, double *root, int * steps, double * v,int n, double z, double lambda0)
-{
-
-	int i, j, flag=0;
-	int rho_1, rho_2, rho, rho_T, rho_S;
-	int V_i_b, V_i_e, V_i;
-	double lambda_1, lambda_2, lambda_T, lambda_S, lambda;
-	double s_1, s_2, s, s_T, s_S, v_max, temp;
-	double f_lambda_1, f_lambda_2, f_lambda, f_lambda_T, f_lambda_S;
-	int iter_step=0;
-
-	/* find the maximal absolute value in v
-	 * and copy the (absolute) values from v to x
-	 */
-
-	if (z< 0){
-		printf("\n z should be nonnegative!");
-		return;
-	}
-
-	V_i=0;
-	if (v[0] !=0){
-		rho_1=1;
-		s_1=x[V_i]=v_max=fabs(v[0]);
-		V_i++;
-	}
-	else{
-		rho_1=0;
-		s_1=v_max=0;
-	}
-
-	for (i=1;i<n; i++){
-		if (v[i]!=0){
-			x[V_i]=fabs(v[i]); s_1+= x[V_i]; rho_1++;
-
-			if (x[V_i] > v_max)
-				v_max=x[V_i];
-			V_i++;
-		}
-	}
-
-	/* If ||v||_1 <= z, then v is the solution  */
-	if (s_1 <= z){
-		flag=1;        lambda=0;
-		for(i=0;i<n;i++){
-			x[i]=v[i];
-		}
-		*root=lambda;
-		*steps=iter_step;
-		return;
-	}
-
-	lambda_1=0; lambda_2=v_max;
-	f_lambda_1=s_1 -z;
-	/*f_lambda_1=s_1-rho_1* lambda_1 -z;*/
-	rho_2=0; s_2=0; f_lambda_2=-z;
-	V_i_b=0; V_i_e=V_i-1;
-
-	lambda=lambda0;
-	if ( (lambda<lambda_2) && (lambda> lambda_1) ){
-		/*-------------------------------------------------------------------
-		  Initialization with the root
-		 *-------------------------------------------------------------------
-		 */
-
-		i=V_i_b; j=V_i_e; rho=0; s=0;
-		while (i <= j){
-			while( (i <= V_i_e) && (x[i] <= lambda) ){
-				i++;
-			}
-			while( (j>=V_i_b) && (x[j] > lambda) ){
-				s+=x[j];
-				j--;
-			}
-			if (i<j){
-				s+=x[i];
-
-				temp=x[i];  x[i]=x[j];  x[j]=temp;
-				i++;  j--;
-			}
-		}
-
-		rho=V_i_e-j;  rho+=rho_2;  s+=s_2;
-		f_lambda=s-rho*lambda-z;
-
-		if ( fabs(f_lambda)< delta ){
-			flag=1;
-		}
-
-		if (f_lambda <0){
-			lambda_2=lambda; s_2=s;	rho_2=rho; f_lambda_2=f_lambda;
-
-			V_i_e=j;  V_i=V_i_e-V_i_b+1;
-		}
-		else{
-			lambda_1=lambda; rho_1=rho;	s_1=s; f_lambda_1=f_lambda;
-
-			V_i_b=i; V_i=V_i_e-V_i_b+1;
-		}
-
-		if (V_i==0){
-			/*printf("\n rho=%d, rho_1=%d, rho_2=%d",rho, rho_1, rho_2);
-
-			  printf("\n V_i=%d",V_i);*/
-
-			lambda=(s - z)/ rho;
-			flag=1;
-		}
-		/*-------------------------------------------------------------------
-		  End of initialization
-		 *--------------------------------------------------------------------
-		 */
-
-	}/* end of if(!flag) */
-
-	while (!flag){
-		iter_step++;
-
-		/* compute lambda_T  */
-		lambda_T=lambda_1 + f_lambda_1 /rho_1;
-		if(rho_2 !=0){
-			if (lambda_2 + f_lambda_2 /rho_2 >	lambda_T)
-				lambda_T=lambda_2 + f_lambda_2 /rho_2;
-		}
-
-		/* compute lambda_S */
-		lambda_S=lambda_2 - f_lambda_2 *(lambda_2-lambda_1)/(f_lambda_2-f_lambda_1);
-
-		if (fabs(lambda_T-lambda_S) <= delta){
-			lambda=lambda_T; flag=1;
-			break;
-		}
-
-		/* set lambda as the middle point of lambda_T and lambda_S */
-		lambda=(lambda_T+lambda_S)/2;
-
-		s_T=s_S=s=0;
-		rho_T=rho_S=rho=0;
-		i=V_i_b; j=V_i_e;
-		while (i <= j){
-			while( (i <= V_i_e) && (x[i] <= lambda) ){
-				if (x[i]> lambda_T){
-					s_T+=x[i]; rho_T++;
-				}
-				i++;
-			}
-			while( (j>=V_i_b) && (x[j] > lambda) ){
-				if (x[j] > lambda_S){
-					s_S+=x[j]; rho_S++;
-				}
-				else{
-					s+=x[j];  rho++;
-				}
-				j--;
-			}
-			if (i<j){
-				if (x[i] > lambda_S){
-					s_S+=x[i]; rho_S++;
-				}
-				else{
-					s+=x[i]; rho++;
-				}
-
-				if (x[j]> lambda_T){
-					s_T+=x[j]; rho_T++;
-				}
-
-				temp=x[i]; x[i]=x[j];  x[j]=temp;
-				i++; j--;
-			}
-		}
-
-		s_S+=s_2; rho_S+=rho_2;
-		s+=s_S; rho+=rho_S;
-		s_T+=s; rho_T+=rho;
-		f_lambda_S=s_S-rho_S*lambda_S-z;
-		f_lambda=s-rho*lambda-z;
-		f_lambda_T=s_T-rho_T*lambda_T-z;
-
-		/*printf("\n %d & %d  & %5.6f & %5.6f & %5.6f & %5.6f & %5.6f \\\\ \n \\hline ", iter_step, V_i, lambda_1, lambda_T, lambda, lambda_S, lambda_2);*/
-
-		if ( fabs(f_lambda)< delta ){
-			/*printf("\n lambda");*/
-			flag=1;
-			break;
-		}
-		if ( fabs(f_lambda_S)< delta ){
-			/* printf("\n lambda_S");*/
-			lambda=lambda_S; flag=1;
-			break;
-		}
-		if ( fabs(f_lambda_T)< delta ){
-			/* printf("\n lambda_T");*/
-			lambda=lambda_T; flag=1;
-			break;
-		}
-
-		/*
-		   printf("\n\n f_lambda_1=%5.6f, f_lambda_2=%5.6f, f_lambda=%5.6f",f_lambda_1,f_lambda_2, f_lambda);
-		   printf("\n lambda_1=%5.6f, lambda_2=%5.6f, lambda=%5.6f",lambda_1, lambda_2, lambda);
-		   printf("\n rho_1=%d, rho_2=%d, rho=%d ",rho_1, rho_2, rho);
-		   */
-
-		if (f_lambda <0){
-			lambda_2=lambda;  s_2=s;  rho_2=rho;
-			f_lambda_2=f_lambda;
-
-			lambda_1=lambda_T; s_1=s_T; rho_1=rho_T;
-			f_lambda_1=f_lambda_T;
-
-			V_i_e=j;  i=V_i_b;
-			while (i <= j){
-				while( (i <= V_i_e) && (x[i] <= lambda_T) ){
-					i++;
-				}
-				while( (j>=V_i_b) && (x[j] > lambda_T) ){
-					j--;
-				}
-				if (i<j){
-					x[j]=x[i];
-					i++;   j--;
-				}
-			}
-			V_i_b=i; V_i=V_i_e-V_i_b+1;
-		}
-		else{
-			lambda_1=lambda;  s_1=s; rho_1=rho;
-			f_lambda_1=f_lambda;
-
-			lambda_2=lambda_S; s_2=s_S; rho_2=rho_S;
-			f_lambda_2=f_lambda_S;
-
-			V_i_b=i;  j=V_i_e;
-			while (i <= j){
-				while( (i <= V_i_e) && (x[i] <= lambda_S) ){
-					i++;
-				}
-				while( (j>=V_i_b) && (x[j] > lambda_S) ){
-					j--;
-				}
-				if (i<j){
-					x[i]=x[j];
-					i++;   j--;
-				}
-			}
-			V_i_e=j; V_i=V_i_e-V_i_b+1;
-		}
-
-		if (V_i==0){
-			lambda=(s - z)/ rho; flag=1;
-			/*printf("\n V_i=0, lambda=%5.6f",lambda);*/
-			break;
-		}
-	}/* end of while */
-
-
-	for(i=0;i<n;i++){
-		if (v[i] > lambda)
-			x[i]=v[i]-lambda;
-		else
-			if (v[i]< -lambda)
-				x[i]=v[i]+lambda;
-			else
-				x[i]=0;
-	}
-	*root=lambda;
-	*steps=iter_step;
-}
-
-void  epp1(double *x, double *v, int n, double rho)
-{
-	int i;
-
-	/*
-	   we assume rho>=0
-	   */
-
-	for(i=0;i<n;i++){
-		if (fabs(v[i])<=rho)
-			x[i]=0;
-		else
-			if (v[i]< -rho)
-				x[i]=v[i]+rho;
-			else
-				x[i]=v[i]-rho;
-	}
-}
-
-void  epp2(double *x, double *v, int n, double rho)
-{
-	int i;
-	double v2=0, ratio;
-
-	/*
-	   we assume rho>=0
-	   */
-
-	for(i=0; i< n; i++){
-		v2+=v[i]*v[i];
-	}
-	v2=sqrt(v2);
-
-	if (rho >= v2)
-		for(i=0;i<n;i++)
-			x[i]=0;
-	else{
-		ratio= (v2-rho) /v2;
-		for(i=0;i<n;i++)
-			x[i]=v[i]*ratio;
-	}
-}
-
-void  eppInf(double *x, double * c, int * iter_step, double *v,  int n, double rho, double c0)
-{
-	int i, steps;
-
-	/*
-	   we assume rho>=0
-	   */
-
-	eplb(x, c, &steps, v, n, rho, c0);
-
-	for(i=0; i< n; i++){
-		x[i]=v[i]-x[i];
-	}
-	iter_step[0]=steps;
-	iter_step[1]=0;
-}
-
-void zerofind(double *root, int * iterStep, double v, double p, double c, double x0)
-{
-	double x, f, fprime, p1=p-1, pp;
-	int step=0;
-
-
-	if (v==0){
-		*root=0;	   *iterStep=0;	   return;
-	}
-
-	if (c==0){
-		*root=v;	   * iterStep=0;	   return;
-	}
-
-
-	if ( (x0 <v) && (x0>0) )
-		x=x0;
-	else
-		x=v;
-
-
-	pp=pow(x, p1);
-	f= x + c* pp -v;
-
-
-	/*
-	   We apply the Newton's method for solving the root
-	   */
-	while (1){
-		step++;
-
-		fprime=1 + c* p1 * pp / x;
-		/*
-		   The derivative at the current solution x
-		   */
-
-		x = x- f/fprime; /*
-							The new solution is computed by the Newton method
-							*/
-
-
-
-		if (p>2){
-			if (x>v){
-				x=v;
-			}
-		}
-		else{
-			if ( (x<0) || (x>v)){
-				x=1e-30;
-
-				f= x+c* pow(x,p1)-v;
-
-				if (f>0){ /*
-							 If f(x) = x + c x^{p-1} - v <0 at x=1e-30,
-							 this shows that the real root is between (0, 1e-30).
-							 For numerical reason, we just set x=0
-							 */
-
-					*root=x;
-					* iterStep=step;
-
-					break;
-				}
-			}
-		}
-		/*
-		   This makes sure that x lies in the interval [0, v]
-		   */
-
-		pp=pow(x, p1);
-		f= x + c* pp -v;
-		/*
-		   The function value at the new solution
-		   */
-
-		if ( fabs(f) <= delta){
-			*root=x;
-			* iterStep=step;
-			break;
-		}
-
-		if (step>=innerIter){
-			printf("\n The number of steps exceed %d, in finding the root for f(x)= x + c x^{p-1} - v, 0< x< v.", innerIter);
-			printf("\n If you meet with this problem, please contact Jun Liu (j.liu@asu.edu). Thanks!");
-			return;
-		}
-
-	}
-
-	/*
-	   printf("\n x=%e, f=%e, step=%d\n",x, f, step);
-	   */
-}
-
-double norm(double * v, double p, int n)
-{
-	int i;
-	double t=0;
-
-
-	/*
-	   we assume that v[i]>=0
-	   p>1
-	   */
-
-	for(i=0;i<n;i++)
-		t+=pow(v[i], p);
-
-	return( pow(t, 1/p) );
-};
-
-void eppO(double *x, double * cc, int * iter_step, double *v,  int n, double rho, double p)
-{
-	int i, *flag, bisStep, newtonStep=0, totoalStep=0;
-	double vq=0, epsilon, vmax=0, vmin=1e10; /* we assume that the minimal value in |v| is less than 1e10*/
-	double q=1/(1-1/p), c, c1, c2, root, f, xp;
-
-	double x_diff=0; /* this value denotes the maximal difference of the x values computed from c1 and c2*/
-	double temp;
-	int p_n=1; /* p_n indicates the previous phi(c) is positive or negative*/
-
-	flag=(int *)malloc(sizeof(int)*n);
-
-	/*
-	   compute vq, the q-norm of v
-	   flag denotes the sign of v:
-	   flag[i]=0 denotes v[i] is non-negative
-	   flag[i]=1 denotes v[i] is negative
-	   vmin and vmax are the maximal and minimal value of |v| (excluding 0)
-	   */
-	for(i=0; i< n; i++){
-
-		x[i]=0;
-
-		if (v[i]==0)
-			flag[i]=0;
-		else
-		{
-			if (v[i]>0)
-				flag[i]=0;
-			else
-			{
-				flag[i]=1;
-				v[i]=-v[i];/*
-							  we set v[i] to its absolute value
-							  */
-			}
-
-			vq+=pow(v[i], q);
-
-
-			if (v[i]>vmax)
-				vmax=v[i];
-
-			if (v[i]<vmin)
-				vmin=v[i];
-		}
-	}
-	vq=pow(vq, 1/q);
-
-	/*
-	   zero solution
-	   */
-	if (rho >= vq){
-		*cc=0;
-		iter_step[0]=iter_step[1]=0;
-
-
-		for(i=0;i<n;i++){
-			if (flag[i])
-				v[i]=-v[i]; /* set the value of v[i] back*/
-		}
-
-		free(flag);
-		return;
-	}
-
-	/*
-	   compute epsilon
-	   initialize c1 and c2, the interval where the root lies
-	   */
-	epsilon=(vq -rho)/ vq;
-	if (p>2){
-
-		if ( log((1-epsilon) * vmin) - (p-1) * log( epsilon* vmin ) >= 709 )
-		{
-			/* If this contition holds, we have c2 >= 1e308, exceeding the machine precision.
-
-			   In this case, the reason is that p is too large
-			   and meanwhile epsilon * vmin is typically small.
-
-			   For numerical stablity, we just regard p=inf, and run eppInf
-			   */
-
-
-			for(i=0;i<n;i++){
-				if (flag[i])
-					v[i]=-v[i]; /* set the value of v[i] back*/
-			}
-
-			eppInf(x, cc, iter_step, v,  n, rho, 0);
-
-			free(flag);
-			return;
-		}
-
-		c1= (1-epsilon) * vmax / pow(epsilon* vmax, p-1);
-		c2= (1-epsilon) * vmin / pow(epsilon* vmin, p-1);
-	}
-	else{ /*1 < p < 2*/
-
-		c2= (1-epsilon) * vmax / pow(epsilon* vmax, p-1);
-		c1= (1-epsilon) * vmin / pow(epsilon* vmin, p-1);
-	}
-
-
-	/*
-	   printf("\n c1=%e, c2=%e", c1, c2);
-	   */
-
-	if (fabs(c1-c2) <= delta){
-		c=c1;
-	}
-	else
-		c=(c1+c2)/2;
-
-
-	bisStep =0;
-
-	while(1){
-		bisStep++;
-
-		/*compute the root corresponding to c*/
-		x_diff=0;
-		for(i=0;i<n;i++){
-			zerofind(&root, &newtonStep, v[i], p, c, x[i]);
-
-			temp=fabs(root-x[i]);
-			if (x_diff< temp )
-				x_diff=temp; /*x_diff denotes the largest gap to the previous solution*/
-
-			x[i]=root;
-			totoalStep+=newtonStep;
-		}
-
-		xp=norm(x, p, n);
-
-		f=rho * pow(xp, 1-p) - c;
-
-		if ( fabs(f)<=delta || fabs(c1-c2)<=delta )
-			break;
-		else{
-			if (f>0){
-				if ( (x_diff <=delta) && (p_n==0) )
-					break;
-
-				c1=c;  p_n=1;
-			}
-			else{
-
-				if ( (x_diff <=delta) && (p_n==1) )
-					break;
-
-				c2=c;  p_n=0;
-			}
-		}
-		c=(c1+c2)/2;
-
-		if (bisStep>=outerIter){
-
-
-			if ( fabs(c1-c2) <=delta * c2 )
-				break;
-			else{
-				printf("\n The number of bisection steps exceed %d.", outerIter);
-				printf("\n c1=%e, c2=%e, x_diff=%e, f=%e",c1,c2,x_diff,f);
-				printf("\n If you meet with this problem, please contact Jun Liu (j.liu@asu.edu). Thanks!");
-				free(flag);
-
-				return;
-			}
-		}
-
-		/*
-		   printf("\n c1=%e, c2=%e, f=%e, newtonStep=%d", c1, c2, f, newtonStep);
-		   */
-	}
-
-	/*
-	   printf("\n c1=%e, c2=%e, x_diff=%e, f=%e, bisStep=%d, totoalStep=%d",c1,c2, x_diff, f,bisStep,totoalStep);
-	   */
-
-	for(i=0;i<n;i++){
-		if (flag[i]){
-			x[i]=-x[i];
-			v[i]=-v[i];
-		}
-	}
-	free(flag);
-
-	*cc=c;
-
-	iter_step[0]=bisStep;
-	iter_step[1]=totoalStep;
-}
-
-void epp(double *x, double * c, int * iter_step, double * v, int n, double rho, double p, double c0){
-	if (rho <0){
-		printf("\n rho should be non-negative!");
-		exit(1);
-	}
-
-	if (p==1){
-		epp1(x, v, n, rho);
-		*c=0;
-		iter_step[0]=iter_step[1]=0;
-	}
-	else
-		if (p==2){
-			epp2(x, v, n, rho);
-			*c=0;
-			iter_step[0]=iter_step[1]=0;
-		}
-		else
-			if (p>=1e6) /* when p >=1e6, we treat it as infity*/
-				eppInf(x, c, iter_step, v,  n, rho, c0);
-			else
-				eppO(x, c, iter_step, v,  n, rho, p);
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/slep/q1/epph.h b/src/shogun/lib/slep/q1/epph.h
deleted file mode 100644
index cd3d36e25d4..00000000000
--- a/src/shogun/lib/slep/q1/epph.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef EPPHQ1_SLEP
-#define EPPHQ1_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-/* -------------------------- Function eplb -----------------------------
-
-   Euclidean Projection onto l1 Ball (eplb)
-
-   min  1/2 ||x- v||_2^2
-   s.t. ||x||_1 <= z
-
-   which is converted to the following zero finding problem
-
-   f(lambda)= sum( max( |v|-lambda,0) )-z=0
-
-   For detail, please refer to our paper:
-
-   Jun Liu and Jieping Ye. Efficient Euclidean Projections in Linear Time,
-   ICML 2009.  
-
-   Usage (in matlab):
-   [x, lambda, iter_step]=eplb(v, n, z, lambda0);
-
-   -------------------------- Function eplb -----------------------------
-   */
-void eplb(double * x, double *root, int * steps, double * v,int n, double z, double lambda0);
-
-/* -------------------------- Function epp1 -----------------------------
-
-   The L1-norm Regularized Euclidean Projection (epp1)
-
-   min  1/2 ||x- v||_2^2 + rho ||x||_1
-
-   which has the closed form solution
-
-   x= sign(v) max( |v|- rho, 0)
-
-   Usage (in matlab)
-   x=epp1(v, n, rho); 
-
-   -------------------------- Function epp1 -----------------------------
-   */
-void  epp1(double *x, double *v, int n, double rho);
-
-/* -------------------------- Function epp2 -----------------------------
-
-   The L2-norm Regularized Euclidean Projection (epp2)
-
-   min  1/2 ||x- v||_2^2 + rho ||x||_2
-
-   which has the closed form solution
-
-   x= max( ||v||_2- rho, 0) / ||v||_2 * v
-
-   Usage (in matlab)
-   x=epp2(v, n, rho); 
-
-   -------------------------- Function epp2 -----------------------------
-   */
-void  epp2(double *x, double *v, int n, double rho);
-
-/* -------------------------- Function eppInf -----------------------------
-
-   The LInf-norm Regularized Euclidean Projection (eppInf)
-
-   min  1/2 ||x- v||_2^2 + rho ||x||_Inf
-
-   which is can be solved by using eplb
-
-   Usage (in matlab)
-   [x, lambda, iter_step]=eppInf(v, n, rho, rho0); 
-
-   -------------------------- Function eppInf -----------------------------
-   */
-void  eppInf(double *x, double * c, int * iter_step, double *v,  int n, double rho, double c0);
-
-/* -------------------------- Function zerofind -----------------------------
- 
-   Find the root for the function: f(x) = x + c x^{p-1} - v, 
-   0 <= x <= v, v>=0
-   1< p < infty, p \neq 2
-
-   Property: when p>2, f(x) is a convex function
-   when 1<p<2, f(x) is a concave function
-
-   Method: we use Newton's method (other methods such as bisection can also work)
-
-   Note: we donot check the valid of the parameter. 
-   Since it is only employed in eepO, 
-   we can assure that these parameters satisfy the above conditions.
-
-   Usage (in matlab)
-   [root, interStep]=eppInf(v, p, c, x0); 
-
-   -------------------------- Function zerofind -----------------------------
-   */
-void zerofind(double *root, int * iterStep, double v, double p, double c, double x0);
-
-/* -------------------------- Function norm -----------------------------
-
-   Compute the p-norm
-
-   -------------------------- Function norm -----------------------------
-   */
-double norm(double * v, double p, int n);
-
-/* -------------------------- Function eppInf -----------------------------
-
-   The Lp-norm Regularized Euclidean Projection (eppO) for 1< p<Inf
-
-   min  1/2 ||x- v||_2^2 + rho ||x||_p
-
-   We solve two simple zero finding algorithms
-
-   Usage (in matlab)
-   [x, c, iter_step]=eppO(v, n, rho, p); 
-
-   -------------------------- Function eppInf -----------------------------
-   */
-void  eppO(double *x, double * cc, int * iter_step, double *v,  int n, double rho, double p);
-
-/* -------------------------- Function epp -----------------------------
-
-   The Lp-norm Regularized Euclidean Projection (epp) for all p>=1
-
-   min  1/2 ||x- v||_2^2 + rho ||x||_p
-
-   This function uses the previously defined functions.
-
-   Usage (in matlab)
-   [x, c, iter_step]=eppO(v, n, rho, p, c0); 
-
-   -------------------------- Function epp -----------------------------
-   */
-void epp(double *x, double * c, int * iter_step, double * v, int n, double rho, double p, double c0);
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef EPPHQ1_SLEP  ----- */
-
diff --git a/src/shogun/lib/slep/q1/epsgLasso.h b/src/shogun/lib/slep/q1/epsgLasso.h
deleted file mode 100644
index 30fc0e3f98a..00000000000
--- a/src/shogun/lib/slep/q1/epsgLasso.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef  EPSGLASSO_SLEP
-#define  EPSGLASSO_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-#include <shogun/lib/slep/q1/epph.h> /* This is the head file that contains the implementation of the used functions*/
-
-
-/*
- Projection for sgLasso
-
-  min  1/2 \|X - V\|_F^2 + \lambda_1 \|X\|_1 + \lambda_2 \|X\|_{p,1}
-
- Written by Jun Liu, January 15, 2010
- For any problem, please contact: j.liu@asu.edu
- 
- */
-
-void epsgLasso(double *X, double * normx, double * V, int k, int n, double lambda1, double lambda2, int pflag){
-    int i, j, *iter_step, nn=n*k, m;
-    double *v, *x;
-    double normValue,c0=0, c;
-    
-    v=(double *)malloc(sizeof(double)*n);
-    x=(double *)malloc(sizeof(double)*n);
-    iter_step=(int *)malloc(sizeof(int)*2);
-    
-	/*
-	initialize normx
-	*/
-	normx[0]=normx[1]=0;
-
-
-    /*
-     X and V are k x n matrices in matlab, stored in column priority manner
-     x corresponds a row of X
-	 
-	 pflag=2:   p=2
-	 pflag=0:   p=inf
-     */
-    
-	/*
-	soft thresholding 
-	by lambda1
-
-    the results are stored in X
-	*/
-	for (i=0;i<nn;i++){
-		if (V[i]< -lambda1)
-			X[i]=V[i] + lambda1;
-		else
-			if (V[i]> lambda1)
-				X[i]=V[i] - lambda1;
-			else
-				X[i]=0;
-	}
-	
-	/*
-	Shrinkage or Truncating
-	by lambda2
-	*/
-	if (pflag==2){
-		for(i=0; i<k; i++){
-
-			/*
-			process the i-th row, and store it in v
-			*/
-			normValue=0;
-
-			m=n%5;
-			for(j=0;j<m;j++){
-				v[j]=X[i + j*k];
-			}
-			for(j=m;j<n;j+=5){
-				v[j  ]=X[i + j*k];
-				v[j+1]=X[i + (j+1)*k ];
-				v[j+2]=X[i + (j+2)*k];
-				v[j+3]=X[i + (j+3)*k];
-				v[j+4]=X[i + (j+4)*k];
-			}
-						
-			m=n%5;
-			for(j=0;j<m;j++){
-				normValue+=v[j]*v[j];
-			}
-			for(j=m;j<n;j+=5){
-				normValue+=v[j]*v[j]+
-					       v[j+1]*v[j+1]+
-						   v[j+2]*v[j+2]+
-						   v[j+3]*v[j+3]+
-						   v[j+4]*v[j+4];
-			}
-
-			/*
-			for(j=0; j<n; j++){
-				v[j]=X[i + j*k];
-
-				normValue+=v[j]*v[j];
-			}
-			*/
-
-			normValue=sqrt(normValue);
-
-			if (normValue<= lambda2){
-				for(j=0; j<n; j++)
-					X[i + j*k]=0;
-
-				/*normx needs not to be updated*/
-			}
-			else{
-
-				normx[1]+=normValue-lambda2;
-				/*update normx[1]*/
-
-				normValue=(normValue-lambda2)/normValue;
-
-				m=n%5;
-				for(j=0;j<m;j++){
-					X[i + j*k]*=normValue;
-					normx[0]+=fabs(X[i + j*k]);
-				}
-				for(j=m; j<n;j+=5){
-					X[i + j*k]*=normValue;
-					X[i + (j+1)*k]*=normValue;
-					X[i + (j+2)*k]*=normValue;
-					X[i + (j+3)*k]*=normValue;
-					X[i + (j+4)*k]*=normValue;
-
-					normx[0]+=fabs(X[i + j*k])+
-						      fabs(X[i + (j+1)*k])+
-							  fabs(X[i + (j+2)*k])+
-							  fabs(X[i + (j+3)*k])+
-							  fabs(X[i + (j+4)*k]);
-				}
-
-				/*
-				for(j=0; j<n; j++)
-					X[i + j*k]*=normValue;
-				*/
-			}
-		}
-	}
-	else{
-		for(i=0; i<k; i++){
-			
-		    /*
-			process the i-th row, and store it in v
-			*/			
-			normValue=0;
-			for(j=0; j<n; j++){
-				v[j]=X[i + j*k];
-
-				normValue+=fabs(v[j]);
-			}
-
-			if (normValue<= lambda2){
-				for(j=0; j<n; j++)
-					X[i + j*k]=0;
-			}
-			else{
-				eplb(x, &c, iter_step, v, n, lambda2, c0);
-
-				for(j=0; j<n; j++){
-					if (X[i + j*k] > c)
-						X[i + j*k]=c;
-					else
-						if (X[i + j*k]<-c)
-							X[i + j*k]=-c;
-				}
-			}
-		}
-	}
-
-    
-    free(v);
-    free(x);
-    free(iter_step);    
-}
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef EPSGLASSO_SLEP  ----- */
-
diff --git a/src/shogun/lib/slep/q1/epsp.h b/src/shogun/lib/slep/q1/epsp.h
deleted file mode 100644
index 7a14b0981fe..00000000000
--- a/src/shogun/lib/slep/q1/epsp.h
+++ /dev/null
@@ -1,305 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-#ifdef USE_GPL_SHOGUN
-
-#ifndef  EPSP_SLEP
-#define  EPSP_SLEP
-
-#include <shogun/lib/config.h>
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <math.h>
-
-#define delta 1e-12
-
-/*
- Euclidean Projection onto the simplex (epsp)
- 
-        min  1/2 ||x- y||_2^2
-        s.t. ||x||_1 = z, x >=0
- 
-which is converted to the following zero finding problem
- 
-        f(lambda)= sum( max( x-lambda,0) )-z=0
- 
- Usage:
- [x, lambda, iter_step]=epsp(y, n, z, lambda0);
- 
- */
-
-void epsp(double * x, double *root, int * stepsp, double * v,
-int n, double z, double lambda0)
-{
-    
-    int i, j, flag=0;
-    int rho_1, rho_2, rho, rho_T, rho_S;
-    int V_i_b, V_i_e, V_i;
-    double lambda_1, lambda_2, lambda_T, lambda_S, lambda;
-    double s_1, s_2, s, s_T, s_S, v_max, temp;
-    double f_lambda_1, f_lambda_2, f_lambda, f_lambda_T, f_lambda_S;
-    int iter_step=0;
-        
-
-
-	if (z< 0){
-		printf("\n z should be nonnegative!");
-		exit(-1);
-	}
-         
-	
-	/* 
-	 * find the maximal value in v
-     */
-	v_max=v[0];    
-    for (i=1;i<n; i++){            
-            if (v[i] > v_max)
-                v_max=v[i];
-    }
-	   
-   
-    lambda_1=v_max - z; lambda_2=v_max;
-	/*
-	 * copy v to x
-	 * compute f_lambda_1, rho_1, s_1
-	 */
-	V_i=0;s_1=0; rho_1=0;
-	for (i=0;i<n;i++){
-		if (v[i] > lambda_1){
-			x[V_i]=v[i];
-
-			s_1+=x[V_i]; rho_1++;
-
-			V_i++;
-		}
-	}
-    f_lambda_1=s_1-rho_1* lambda_1 -z;
-
-    rho_2=0; s_2=0; f_lambda_2=-z; 
-    V_i_b=0; V_i_e=V_i-1;
-    
-    lambda=lambda0; 
-    if ( (lambda<lambda_2) && (lambda> lambda_1) ){ 
-    /*-------------------------------------------------------------------
-                  Initialization with the root
-     *-------------------------------------------------------------------
-     */
-           
-        i=V_i_b; j=V_i_e; rho=0; s=0;
-        while (i <= j){            
-            while( (i <= V_i_e) && (x[i] <= lambda) ){
-                i++;
-            }
-            while( (j>=V_i_b) && (x[j] > lambda) ){
-                s+=x[j];                
-                j--;
-            }
-            if (i<j){
-                s+=x[i];
-                
-                temp=x[i];  x[i]=x[j];  x[j]=temp;
-                i++;  j--;
-            }
-		}
-        
-        rho=V_i_e-j;  rho+=rho_2;  s+=s_2;        
-		f_lambda=s-rho*lambda-z;
-        
-        if ( fabs(f_lambda)< delta ){
-            flag=1;
-		}
-		
-		if (f_lambda <0){
-			lambda_2=lambda; s_2=s;	rho_2=rho; f_lambda_2=f_lambda;
-
-			V_i_e=j;  V_i=V_i_e-V_i_b+1;
-		}
-		else{
-			lambda_1=lambda; rho_1=rho;	s_1=s; f_lambda_1=f_lambda;
-
-			V_i_b=i; V_i=V_i_e-V_i_b+1;
-		}
-
-		if (V_i==0){
-			/*printf("\n rho=%d, rho_1=%d, rho_2=%d",rho, rho_1, rho_2);*/
-
-            /*printf("\n V_i=%d",V_i);*/
-            
-			lambda=(s - z)/ rho;
-			flag=1;
-		}       
-     /*-------------------------------------------------------------------
-                          End of initialization
-      *--------------------------------------------------------------------
-      */       
-        
-    }/* end of if(!flag) */
-    
-    while (!flag){
-        iter_step++;
-        
-        /* compute lambda_T  */
-        lambda_T=lambda_1 + f_lambda_1 /rho_1;
-        if(rho_2 !=0){
-            if (lambda_2 + f_lambda_2 /rho_2 >	lambda_T)
-                lambda_T=lambda_2 + f_lambda_2 /rho_2;
-        }
-        
-        /* compute lambda_S */
-        lambda_S=lambda_2 - f_lambda_2 *(lambda_2-lambda_1)/(f_lambda_2-f_lambda_1);
-        
-        if (fabs(lambda_T-lambda_S) <= delta){
-            lambda=lambda_T; flag=1;
-            break;
-        }
-        
-        /* set lambda as the middle point of lambda_T and lambda_S */
-        lambda=(lambda_T+lambda_S)/2;
-        
-        s_T=s_S=s=0;
-        rho_T=rho_S=rho=0;
-        i=V_i_b; j=V_i_e;
-        while (i <= j){            
-            while( (i <= V_i_e) && (x[i] <= lambda) ){
-                if (x[i]> lambda_T){
-                    s_T+=x[i]; rho_T++;
-                }
-                i++;
-            }
-            while( (j>=V_i_b) && (x[j] > lambda) ){
-                if (x[j] > lambda_S){
-                    s_S+=x[j]; rho_S++;
-                }
-                else{
-                    s+=x[j];  rho++;
-                }
-                j--;
-            }
-            if (i<j){
-                if (x[i] > lambda_S){
-                    s_S+=x[i]; rho_S++;
-                }
-                else{
-                    s+=x[i]; rho++;
-                }
-                
-                if (x[j]> lambda_T){
-                    s_T+=x[j]; rho_T++;
-                }
-                
-                temp=x[i]; x[i]=x[j];  x[j]=temp;
-                i++; j--;
-            }
-		}
-        
-        s_S+=s_2; rho_S+=rho_2;
-        s+=s_S; rho+=rho_S;
-        s_T+=s; rho_T+=rho;
-        f_lambda_S=s_S-rho_S*lambda_S-z;
-        f_lambda=s-rho*lambda-z;
-        f_lambda_T=s_T-rho_T*lambda_T-z;
-        
-        /*printf("\n %d & %d  & %5.6f & %5.6f & %5.6f & %5.6f & %5.6f \\\\ \n \\hline ", iter_step, V_i, lambda_1, lambda_T, lambda, lambda_S, lambda_2);*/
-                
-        if ( fabs(f_lambda)< delta ){
-            /*printf("\n lambda");*/
-            flag=1;
-            break;
-        }
-        if ( fabs(f_lambda_S)< delta ){
-           /* printf("\n lambda_S");*/
-            lambda=lambda_S; flag=1;
-            break;
-        }
-        if ( fabs(f_lambda_T)< delta ){
-           /* printf("\n lambda_T");*/
-            lambda=lambda_T; flag=1;
-            break;
-        }        
-        
-        /*
-        printf("\n\n f_lambda_1=%5.6f, f_lambda_2=%5.6f, f_lambda=%5.6f",f_lambda_1,f_lambda_2, f_lambda);
-        printf("\n lambda_1=%5.6f, lambda_2=%5.6f, lambda=%5.6f",lambda_1, lambda_2, lambda);
-        printf("\n rho_1=%d, rho_2=%d, rho=%d ",rho_1, rho_2, rho);
-         */
-        
-        if (f_lambda <0){
-            lambda_2=lambda;  s_2=s;  rho_2=rho;
-            f_lambda_2=f_lambda;            
-            
-            lambda_1=lambda_T; s_1=s_T; rho_1=rho_T;
-            f_lambda_1=f_lambda_T;
-            
-            V_i_e=j;  i=V_i_b;
-            while (i <= j){
-                while( (i <= V_i_e) && (x[i] <= lambda_T) ){
-                    i++;
-                }
-                while( (j>=V_i_b) && (x[j] > lambda_T) ){
-                    j--;
-                }
-                if (i<j){                    
-                    x[j]=x[i];
-                    i++;   j--;
-                }
-            }            
-            V_i_b=i; V_i=V_i_e-V_i_b+1;
-        }
-        else{
-            lambda_1=lambda;  s_1=s; rho_1=rho;
-            f_lambda_1=f_lambda;
-            
-            lambda_2=lambda_S; s_2=s_S; rho_2=rho_S;
-            f_lambda_2=f_lambda_S;
-            
-            V_i_b=i;  j=V_i_e;
-            while (i <= j){
-                while( (i <= V_i_e) && (x[i] <= lambda_S) ){
-                    i++;
-                }
-                while( (j>=V_i_b) && (x[j] > lambda_S) ){
-                    j--;
-                }
-                if (i<j){
-                    x[i]=x[j];
-                    i++;   j--;
-                }
-            }
-            V_i_e=j; V_i=V_i_e-V_i_b+1;
-        }
-        
-        if (V_i==0){
-            lambda=(s - z)/ rho; flag=1;
-            /*printf("\n V_i=0, lambda=%5.6f",lambda);*/
-            break;
-        }
-    }/* end of while */
-    
-    
-    for(i=0;i<n;i++){        
-        if (v[i] > lambda)
-            x[i]=v[i]-lambda;
-        else
-			x[i]=0;
-    }
-    *root=lambda;
-    *stepsp=iter_step;
-}
-#endif   /* ----- #ifndef EPSP_SLEP  ----- */
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/slep/slep_mc_plain_lr.cpp b/src/shogun/lib/slep/slep_mc_plain_lr.cpp
deleted file mode 100644
index f10592f2e48..00000000000
--- a/src/shogun/lib/slep/slep_mc_plain_lr.cpp
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2010-2012 Jun Liu, Jieping Ye
- */
-
-#include <shogun/lib/slep/slep_mc_plain_lr.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/slep/q1/eppMatrix.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/mathematics/eigen3.h>
-#include <shogun/lib/Signal.h>
-#include <shogun/lib/Time.h>
-#include <iostream>
-
-using namespace shogun;
-using namespace Eigen;
-using namespace std;
-
-namespace shogun
-{
-
-slep_result_t slep_mc_plain_lr(
-		CDotFeatures* features,
-		CMulticlassLabels* labels,
-		float64_t z,
-		const slep_options& options)
-{
-	int i,j;
-	// obtain problem parameters
-	int n_feats   = features->get_dim_feature_space();
-	int n_vecs    = features->get_num_vectors();
-	int n_classes = labels->get_num_classes();
-
-	// labels vector containing values in range (0 .. n_classes)
-	SGVector<float64_t> labels_vector = labels->get_labels();
-
-	// initialize matrices and vectors to be used
-	// weight vector
-	MatrixXd w  = MatrixXd::Zero(n_feats, n_classes);
-	// intercepts (biases)
-	VectorXd c  = VectorXd::Zero(n_classes);
-
-	if (options.last_result)
-	{
-		SGMatrix<float64_t> last_w = options.last_result->w;
-		SGVector<float64_t> last_c = options.last_result->c;
-		for (i=0; i<n_classes; i++)
-		{
-			c[i] = last_c[i];
-			for (j=0; j<n_feats; j++)
-				w(j,i) = last_w(j,i);
-		}
-	}
-	// iterative process matrices and vectors
-	MatrixXd wp = w, wwp = MatrixXd::Zero(n_feats, n_classes);
-	VectorXd cp = c, ccp = VectorXd::Zero(n_classes);
-	// search point weight vector
-	MatrixXd search_w = MatrixXd::Zero(n_feats, n_classes);
-	// search point intercepts
-	VectorXd search_c = VectorXd::Zero(n_classes);
-	// dot products
-	MatrixXd Aw  = MatrixXd::Zero(n_vecs, n_classes);
-	for (j=0; j<n_classes; j++)
-		features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);
-	MatrixXd As  = MatrixXd::Zero(n_vecs, n_classes);
-	MatrixXd Awp = MatrixXd::Zero(n_vecs, n_classes);
-	// gradients
-	MatrixXd g   = MatrixXd::Zero(n_feats, n_classes);
-	VectorXd gc  = VectorXd::Zero(n_classes);
-	// projection
-	MatrixXd v   = MatrixXd::Zero(n_feats, n_classes);
-
-	// Lipschitz continuous gradient parameter for line search
-	double L = 1.0/(n_vecs*n_classes);
-	// coefficients for search point computation
-	double alphap = 0, alpha = 1;
-
-	// lambda regularization parameter
-	double lambda = z;
-	// objective values
-	double objective = 0.0;
-	double objective_p = 0.0;
-
-	int iter = 0;
-	bool done = false;
-	CTime time;
-	//internal::set_is_malloc_allowed(false);
-	while ((!done) && (iter<options.max_iter) && (!CSignal::cancel_computations()))
-	{
-		double beta = (alphap-1)/alpha;
-		// compute search points
-		search_w = w + beta*wwp;
-		search_c = c + beta*ccp;
-
-		// update dot products with search point
-		As = Aw + beta*(Aw-Awp);
-
-		// compute objective and gradient at search point
-		double fun_s = 0;
-		g.setZero();
-		gc.setZero();
-		// for each vector
-		for (i=0; i<n_vecs; i++)
-		{
-			// class of current vector
-			int vec_class = labels_vector[i];
-			// for each class
-			for (j=0; j<n_classes; j++)
-			{
-				// compute logistic loss
-				double aa = ((vec_class == j) ? -1.0 : 1.0)*(As(i,j) + search_c(j));
-				double bb = aa > 0.0 ? aa : 0.0;
-				// avoid underflow via log-sum-exp trick
-				fun_s += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
-				double prob = 1.0/(1+CMath::exp(aa));
-				double b = ((vec_class == j) ? -1.0 : 1.0)*(1-prob);///(n_vecs*n_classes);
-				// update gradient of intercepts
-				gc[j] += b;
-				// update gradient of weight vectors
-				features->add_to_dense_vec(b, i, g.col(j).data(), n_feats);
-			}
-		}
-		//fun_s /= (n_vecs*n_classes);
-
-		wp = w;
-		Awp = Aw;
-		cp = c;
-
-		int inner_iter = 0;
-		double fun_x = 0;
-
-		// line search process
-		while (inner_iter<5000)
-		{
-			// compute line search point
-			v = search_w - g/L;
-			c = search_c - gc/L;
-
-			// compute projection of gradient
-			eppMatrix(w.data(),v.data(),n_feats,n_classes,lambda/L,options.q);
-
-			v = w - search_w;
-
-			// update dot products
-			for (j=0; j<n_classes; j++)
-				features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);
-
-			// compute objective at search point
-			fun_x = 0;
-			for (i=0; i<n_vecs; i++)
-			{
-				int vec_class = labels_vector[i];
-				for (j=0; j<n_classes; j++)
-				{
-					double aa = ((vec_class == j) ? -1.0 : 1.0)*(Aw(i,j) + c(j));
-					double bb = aa > 0.0 ? aa : 0.0;
-					fun_x += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
-				}
-			}
-			//fun_x /= (n_vecs*n_classes);
-
-			// check for termination of line search
-			double r_sum = (v.squaredNorm() + (c-search_c).squaredNorm())/2;
-			double l_sum = fun_x - fun_s - v.cwiseProduct(g).sum() - (c-search_c).dot(gc);
-
-			// stop if projected gradient is less than 1e-20
-			if (r_sum <= 1e-20)
-			{
-				SG_SINFO("Gradient step makes little improvement (%f)\n",r_sum)
-				done = true;
-				break;
-			}
-
-			if (l_sum <= r_sum*L)
-				break;
-			else
-				L = CMath::max(2*L, l_sum/r_sum);
-
-			inner_iter++;
-		}
-
-		// update alpha coefficients
-		alphap = alpha;
-		alpha = (1+CMath::sqrt(4*alpha*alpha+1))/2;
-
-		// update wwp and ccp
-		wwp = w - wp;
-		ccp = c - cp;
-
-		// update objectives
-		objective_p = objective;
-		objective = fun_x;
-
-		// regularize objective with tree norm
-		double L1q_norm = 0.0;
-		for (int m=0; m<n_classes; m++)
-			L1q_norm += w.col(m).norm();
-		objective += lambda*L1q_norm;
-
-		//cout << "Objective = " << objective << endl;
-
-		// check for termination of whole process
-		if ((CMath::abs(objective - objective_p) < options.tolerance*CMath::abs(objective_p)) && (iter>2))
-		{
-			SG_SINFO("Objective changes less than tolerance\n")
-			done = true;
-		}
-
-		iter++;
-	}
-	SG_SINFO("%d iterations passed, objective = %f\n",iter,objective)
-	//internal::set_is_malloc_allowed(true);
-
-	// output computed weight vectors and intercepts
-	SGMatrix<float64_t> r_w(n_feats,n_classes);
-	for (j=0; j<n_classes; j++)
-	{
-		for (i=0; i<n_feats; i++)
-			r_w(i,j) = w(i,j);
-	}
-	//r_w.display_matrix();
-	SGVector<float64_t> r_c(n_classes);
-	for (j=0; j<n_classes; j++)
-		r_c[j] = c[j];
-	return slep_result_t(r_w, r_c);
-};
-};
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/slep/slep_mc_plain_lr.h b/src/shogun/lib/slep/slep_mc_plain_lr.h
deleted file mode 100644
index 7ca6dde9a25..00000000000
--- a/src/shogun/lib/slep/slep_mc_plain_lr.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (W) 2012 Sergey Lisitsyn
- */
-
-
-#ifndef SLEP_MC_PLAIN_LR_H_
-#define SLEP_MC_PLAIN_LR_H_
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/slep/slep_options.h>
-#include <shogun/features/DotFeatures.h>
-#include <shogun/labels/MulticlassLabels.h>
-
-namespace shogun
-{
-
-/** Accelerated projected gradient solver for multiclass
- * logistic regression problem with feature tree regularization.
- *
- * @param features features to be used
- * @param labels labels to be used
- * @param z regularization ratio
- * @param options options of solver
- */
-slep_result_t slep_mc_plain_lr(
-		CDotFeatures* features,
-		CMulticlassLabels* labels,
-		float64_t z,
-		const slep_options& options);
-
-};
-#endif //USE_GPL_SHOGUN
-#endif /* SLEP_MC_PLAIN_LR_H_ */
-
diff --git a/src/shogun/lib/slep/slep_mc_tree_lr.cpp b/src/shogun/lib/slep/slep_mc_tree_lr.cpp
deleted file mode 100644
index fc8bb4c0878..00000000000
--- a/src/shogun/lib/slep/slep_mc_tree_lr.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2010-2012 Jun Liu, Jieping Ye
- */
-
-#include <shogun/lib/slep/slep_mc_tree_lr.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/slep/tree/general_altra.h>
-#include <shogun/lib/slep/tree/altra.h>
-#include <shogun/lib/slep/q1/eppMatrix.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/mathematics/eigen3.h>
-#include <shogun/lib/Signal.h>
-#include <shogun/lib/Time.h>
-#include <iostream>
-
-using namespace shogun;
-using namespace Eigen;
-using namespace std;
-
-namespace shogun
-{
-
-slep_result_t slep_mc_tree_lr(
-		CDotFeatures* features,
-		CMulticlassLabels* labels,
-		float64_t z,
-		const slep_options& options)
-{
-	int i,j;
-	// obtain problem parameters
-	int n_feats   = features->get_dim_feature_space();
-	int n_vecs    = features->get_num_vectors();
-	int n_classes = labels->get_num_classes();
-
-	// labels vector containing values in range (0 .. n_classes)
-	SGVector<float64_t> labels_vector = labels->get_labels();
-
-	// initialize matrices and vectors to be used
-	// weight vector
-	MatrixXd w  = MatrixXd::Zero(n_feats, n_classes);
-	// intercepts (biases)
-	VectorXd c  = VectorXd::Zero(n_classes);
-
-	if (options.last_result)
-	{
-		SGMatrix<float64_t> last_w = options.last_result->w;
-		SGVector<float64_t> last_c = options.last_result->c;
-		for (i=0; i<n_classes; i++)
-		{
-			c[i] = last_c[i];
-			for (j=0; j<n_feats; j++)
-				w(j,i) = last_w(j,i);
-		}
-	}
-	// iterative process matrices and vectors
-	MatrixXd wp = w, wwp = MatrixXd::Zero(n_feats, n_classes);
-	VectorXd cp = c, ccp = VectorXd::Zero(n_classes);
-	// search point weight vector
-	MatrixXd search_w = MatrixXd::Zero(n_feats, n_classes);
-	// search point intercepts
-	VectorXd search_c = VectorXd::Zero(n_classes);
-	// dot products
-	MatrixXd Aw  = MatrixXd::Zero(n_vecs, n_classes);
-	for (j=0; j<n_classes; j++)
-		features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);
-	MatrixXd As  = MatrixXd::Zero(n_vecs, n_classes);
-	MatrixXd Awp = MatrixXd::Zero(n_vecs, n_classes);
-	// gradients
-	MatrixXd g   = MatrixXd::Zero(n_feats, n_classes);
-	VectorXd gc  = VectorXd::Zero(n_classes);
-	// projection
-	MatrixXd v   = MatrixXd::Zero(n_feats, n_classes);
-
-	// Lipschitz continuous gradient parameter for line search
-	double L = 1.0/(n_vecs*n_classes);
-	// coefficients for search point computation
-	double alphap = 0, alpha = 1;
-
-	// lambda regularization parameter
-	double lambda = z;
-	// objective values
-	double objective = 0.0;
-	double objective_p = 0.0;
-
-	int iter = 0;
-	bool done = false;
-	CTime time;
-	//internal::set_is_malloc_allowed(false);
-	while ((!done) && (iter<options.max_iter) && (!CSignal::cancel_computations()))
-	{
-		double beta = (alphap-1)/alpha;
-		// compute search points
-		search_w = w + beta*wwp;
-		search_c = c + beta*ccp;
-
-		// update dot products with search point
-		As = Aw + beta*(Aw-Awp);
-
-		// compute objective and gradient at search point
-		double fun_s = 0;
-		g.setZero();
-		gc.setZero();
-		// for each vector
-		for (i=0; i<n_vecs; i++)
-		{
-			// class of current vector
-			int vec_class = labels_vector[i];
-			// for each class
-			for (j=0; j<n_classes; j++)
-			{
-				// compute logistic loss
-				double aa = ((vec_class == j) ? -1.0 : 1.0)*(As(i,j) + search_c(j));
-				double bb = aa > 0.0 ? aa : 0.0;
-				// avoid underflow via log-sum-exp trick
-				fun_s += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
-				double prob = 1.0/(1+CMath::exp(aa));
-				double b = ((vec_class == j) ? -1.0 : 1.0)*(1-prob);///(n_vecs*n_classes);
-				// update gradient of intercepts
-				gc[j] += b;
-				// update gradient of weight vectors
-				features->add_to_dense_vec(b, i, g.col(j).data(), n_feats);
-			}
-		}
-		//fun_s /= (n_vecs*n_classes);
-
-		wp = w;
-		Awp = Aw;
-		cp = c;
-
-		int inner_iter = 0;
-		double fun_x = 0;
-
-		// line search process
-		while (inner_iter<5000)
-		{
-			// compute line search point
-			v = search_w - g/L;
-			c = search_c - gc/L;
-
-			// compute projection of gradient
-			if (options.general)
-				general_altra_mt(w.data(),v.data(),n_classes,n_feats,options.G,options.ind_t,options.n_nodes,lambda/L);
-			else
-				altra_mt(w.data(),v.data(),n_classes,n_feats,options.ind_t,options.n_nodes,lambda/L);
-			v = w - search_w;
-
-			// update dot products
-			for (j=0; j<n_classes; j++)
-				features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);
-
-			// compute objective at search point
-			fun_x = 0;
-			for (i=0; i<n_vecs; i++)
-			{
-				int vec_class = labels_vector[i];
-				for (j=0; j<n_classes; j++)
-				{
-					double aa = ((vec_class == j) ? -1.0 : 1.0)*(Aw(i,j) + c(j));
-					double bb = aa > 0.0 ? aa : 0.0;
-					fun_x += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
-				}
-			}
-			//fun_x /= (n_vecs*n_classes);
-
-			// check for termination of line search
-			double r_sum = (v.squaredNorm() + (c-search_c).squaredNorm())/2;
-			double l_sum = fun_x - fun_s - v.cwiseProduct(g).sum() - (c-search_c).dot(gc);
-
-			// stop if projected gradient is less than 1e-20
-			if (r_sum <= 1e-20)
-			{
-				SG_SINFO("Gradient step makes little improvement (%f)\n",r_sum)
-				done = true;
-				break;
-			}
-
-			if (l_sum <= r_sum*L)
-				break;
-			else
-				L = CMath::max(2*L, l_sum/r_sum);
-
-			inner_iter++;
-		}
-
-		// update alpha coefficients
-		alphap = alpha;
-		alpha = (1+CMath::sqrt(4*alpha*alpha+1))/2;
-
-		// update wwp and ccp
-		wwp = w - wp;
-		ccp = c - cp;
-
-		// update objectives
-		objective_p = objective;
-		objective = fun_x;
-
-		// compute tree norm
-		double tree_norm = 0.0;
-		if (options.general)
-		{
-			for (i=0; i<n_classes; i++)
-				tree_norm += general_treeNorm(w.col(i).data(),n_classes,n_feats,options.G,options.ind_t,options.n_nodes);
-		}
-		else
-		{
-			for (i=0; i<n_classes; i++)
-				tree_norm += treeNorm(w.col(i).data(),n_classes,n_feats,options.ind_t,options.n_nodes);
-		}
-
-		// regularize objective with tree norm
-		objective += lambda*tree_norm;
-
-		//cout << "Objective = " << objective << endl;
-
-		// check for termination of whole process
-		if ((CMath::abs(objective - objective_p) < options.tolerance*CMath::abs(objective_p)) && (iter>2))
-		{
-			SG_SINFO("Objective changes less than tolerance\n")
-			done = true;
-		}
-
-		iter++;
-	}
-	SG_SINFO("%d iterations passed, objective = %f\n",iter,objective)
-	//internal::set_is_malloc_allowed(true);
-
-	// output computed weight vectors and intercepts
-	SGMatrix<float64_t> r_w(n_feats,n_classes);
-	for (j=0; j<n_classes; j++)
-	{
-		for (i=0; i<n_feats; i++)
-			r_w(i,j) = w(i,j);
-	}
-	//r_w.display_matrix();
-	SGVector<float64_t> r_c(n_classes);
-	for (j=0; j<n_classes; j++)
-		r_c[j] = c[j];
-	return slep_result_t(r_w, r_c);
-};
-};
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/slep/slep_mc_tree_lr.h b/src/shogun/lib/slep/slep_mc_tree_lr.h
deleted file mode 100644
index 8040a97d270..00000000000
--- a/src/shogun/lib/slep/slep_mc_tree_lr.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (W) 2012 Sergey Lisitsyn
- */
-
-
-#ifndef SLEP_MC_TREE_LR_H_
-#define SLEP_MC_TREE_LR_H_
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/slep/slep_options.h>
-#include <shogun/features/DotFeatures.h>
-#include <shogun/labels/MulticlassLabels.h>
-
-namespace shogun
-{
-
-/** Accelerated projected gradient solver for multiclass
- * logistic regression problem with feature tree regularization.
- *
- * @param features features to be used
- * @param labels labels to be used
- * @param z regularization ratio
- * @param options options of solver
- */
-slep_result_t slep_mc_tree_lr(
-		CDotFeatures* features,
-		CMulticlassLabels* labels,
-		float64_t z,
-		const slep_options& options);
-
-};
-#endif //USE_GPL_SHOGUN
-#endif /* SLEP_MC_TREE_LR_H_ */
-
diff --git a/src/shogun/lib/slep/slep_options.h b/src/shogun/lib/slep/slep_options.h
deleted file mode 100644
index 0db0cb975c7..00000000000
--- a/src/shogun/lib/slep/slep_options.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-#ifndef  SLEP_OPTIONS_H_
-#define  SLEP_OPTIONS_H_
-
-
-#define IGNORE_IN_CLASSLIST
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <stdlib.h>
-#include <shogun/lib/SGMatrix.h>
-#include <shogun/lib/SGVector.h>
-
-namespace shogun
-{
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-IGNORE_IN_CLASSLIST enum slep_mode
-{
-	MULTITASK_GROUP,
-	MULTITASK_TREE,
-	FEATURE_GROUP,
-	FEATURE_TREE,
-	PLAIN,
-	FUSED
-};
-
-IGNORE_IN_CLASSLIST enum slep_loss
-{
-	LOGISTIC,
-	LEAST_SQUARES
-};
-
-IGNORE_IN_CLASSLIST struct slep_result_t
-{
-	SGMatrix<double> w;
-	SGVector<double> c;
-
-	slep_result_t(SGMatrix<double> w_, SGVector<double> c_)
-	{
-		w = w_;
-		c = c_;
-	}
-};
-
-IGNORE_IN_CLASSLIST struct slep_options
-{
-	bool general;
-	int termination;
-	double tolerance;
-	int max_iter;
-	int restart_num;
-	int n_nodes;
-	int n_tasks;
-	int regularization;
-	int n_feature_blocks;
-	int* ind;
-	double rsL2;
-	double* ind_t;
-	double* G;
-	double* gWeight;
-	double q;
-	SGVector<index_t>* tasks_indices;
-	slep_loss loss;
-	slep_mode mode;
-	slep_result_t* last_result;
-
-	static slep_options default_options()
-	{
-		slep_options opts;
-		opts.general = false;
-		opts.termination = 0;
-		opts.tolerance = 1e-3;
-		opts.max_iter = 1000;
-		opts.restart_num = 100;
-		opts.regularization = 0;
-		opts.q = 2.0;
-		opts.gWeight = NULL;
-		opts.ind = NULL;
-		opts.ind_t = NULL;
-		opts.G = NULL;
-		opts.rsL2 = 0.0;
-		opts.last_result = NULL;
-		opts.tasks_indices = NULL;
-		opts.loss = LOGISTIC;
-		opts.mode = MULTITASK_GROUP;
-		return opts;
-	}
-};
-#endif
-}
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef SLEP_OPTIONS_H_  ----- */
-
diff --git a/src/shogun/lib/slep/slep_solver.cpp b/src/shogun/lib/slep/slep_solver.cpp
deleted file mode 100644
index 47adda59485..00000000000
--- a/src/shogun/lib/slep/slep_solver.cpp
+++ /dev/null
@@ -1,746 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2010-2012 Jun Liu, Jieping Ye
- */
-
-
-#include <shogun/lib/slep/slep_solver.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/mathematics/Math.h>
-#include <shogun/lib/slep/q1/eppMatrix.h>
-#include <shogun/lib/slep/q1/eppVector.h>
-#include <shogun/lib/slep/flsa/flsa.h>
-#include <shogun/lib/slep/tree/altra.h>
-#include <shogun/lib/slep/tree/general_altra.h>
-#include <shogun/lib/Signal.h>
-
-namespace shogun
-{
-
-double compute_regularizer(double* w, double lambda, double lambda2, int n_vecs, int n_feats,
-                           int n_blocks, const slep_options& options)
-{
-	double regularizer = 0.0;
-	switch (options.mode)
-	{
-		case MULTITASK_GROUP:
-		{
-			for (int i=0; i<n_feats; i++)
-			{
-				double w_row_norm = 0.0;
-				for (int t=0; t<n_blocks; t++)
-					w_row_norm += CMath::pow(w[i+t*n_feats],options.q);
-				regularizer += CMath::pow(w_row_norm,1.0/options.q);
-			}
-			regularizer *= lambda;
-		}
-		break;
-		case MULTITASK_TREE:
-		{
-			for (int i=0; i<n_feats; i++)
-			{
-				double tree_norm = 0.0;
-
-				if (options.general)
-					tree_norm = general_treeNorm(w+i, n_blocks, n_blocks, options.G, options.ind_t, options.n_nodes);
-				else
-					tree_norm = treeNorm(w+i, n_blocks, n_blocks, options.ind_t, options.n_nodes);
-
-				regularizer += tree_norm;
-			}
-			regularizer *= lambda;
-		}
-		break;
-		case FEATURE_GROUP:
-		{
-			for (int t=0; t<n_blocks; t++)
-			{
-				double group_qpow_sum = 0.0;
-				int group_ind_start = options.ind[t];
-				int group_ind_end = options.ind[t+1];
-				for (int i=group_ind_start; i<group_ind_end; i++)
-					group_qpow_sum += CMath::pow(w[i], options.q);
-
-				regularizer += options.gWeight[t]*CMath::pow(group_qpow_sum, 1.0/options.q);
-			}
-			regularizer *= lambda;
-		}
-		break;
-		case FEATURE_TREE:
-		{
-			if (options.general)
-				regularizer = general_treeNorm(w, 1, n_feats, options.G, options.ind_t, options.n_nodes);
-			else
-				regularizer = treeNorm(w, 1, n_feats, options.ind_t, options.n_nodes);
-
-			regularizer *= lambda;
-		}
-		break;
-		case PLAIN:
-		{
-			for (int i=0; i<n_feats; i++)
-				regularizer += CMath::abs(w[i]);
-
-			regularizer *= lambda;
-		}
-		break;
-		case FUSED:
-		{
-			double l1 = 0.0;
-			for (int i=0; i<n_feats; i++)
-				l1 += CMath::abs(w[i]);
-			regularizer += lambda*l1;
-			double fuse = 0.0;
-			for (int i=1; i<n_feats; i++)
-				fuse += CMath::abs(w[i]-w[i-1]);
-			regularizer += lambda2*fuse;
-		}
-		break;
-	}
-	return regularizer;
-};
-
-double compute_lambda(
-		double* ATx,
-		double z,
-		CDotFeatures* features,
-		double* y,
-		int n_vecs, int n_feats,
-		int n_blocks,
-		const slep_options& options)
-{
-	double lambda_max = 0.0;
-	if (z<0 || z>1)
-		SG_SERROR("z is not in range [0,1]")
-
-	double q_bar = 0.0;
-	if (options.q==1)
-		q_bar = CMath::ALMOST_INFTY;
-	else if (options.q>1e6)
-		q_bar = 1;
-	else
-		q_bar = options.q/(options.q-1);
-
-	SG_SINFO("q bar = %f \n",q_bar)
-
-	switch (options.mode)
-	{
-		case MULTITASK_GROUP:
-		case MULTITASK_TREE:
-		{
-			for (int t=0; t<n_blocks; t++)
-			{
-				SGVector<index_t> task_idx = options.tasks_indices[t];
-				int n_vecs_task = task_idx.vlen;
-
-				switch (options.loss)
-				{
-					case LOGISTIC:
-					{
-						double b = 0.0;
-						int m1 = 0, m2 = 0;
-						for (int i=0; i<n_vecs_task; i++)
-						{
-							if (y[task_idx[i]]>0)
-								m1++;
-							else
-								m2++;
-						}
-						for (int i=0; i<n_vecs_task; i++)
-						{
-							if (y[task_idx[i]]>0)
-								b = double(m1)/(m1+m2);
-							else
-								b = -double(m2)/(m1+m2);
-
-							features->add_to_dense_vec(b,task_idx[i],ATx+t*n_feats,n_feats);
-						}
-					}
-					break;
-					case LEAST_SQUARES:
-					{
-						for (int i=0; i<n_vecs_task; i++)
-							features->add_to_dense_vec(y[task_idx[i]],task_idx[i],ATx+t*n_feats,n_feats);
-					}
-				}
-			}
-		}
-		break;
-		case FEATURE_GROUP:
-		case FEATURE_TREE:
-		case PLAIN:
-		case FUSED:
-		{
-			switch (options.loss)
-			{
-				case LOGISTIC:
-				{
-					int m1 = 0, m2 = 0;
-					double b = 0.0;
-					for (int i=0; i<n_vecs; i++)
-						y[i]>0 ? m1++ : m2++;
-
-					SG_SDEBUG("# pos = %d , # neg = %d\n",m1,m2)
-
-					for (int i=0; i<n_vecs; i++)
-					{
-						y[i]>0 ? b=double(m2) / CMath::sq(n_vecs) : b=-double(m1) / CMath::sq(n_vecs);
-						features->add_to_dense_vec(b,i,ATx,n_feats);
-					}
-				}
-				break;
-				case LEAST_SQUARES:
-				{
-					for (int i=0; i<n_vecs; i++)
-						features->add_to_dense_vec(y[i],i,ATx,n_feats);
-				}
-				break;
-			}
-		}
-		break;
-	}
-
-	switch (options.mode)
-	{
-		case MULTITASK_GROUP:
-		{
-			for (int i=0; i<n_feats; i++)
-			{
-				double sum = 0.0;
-				for (int t=0; t<n_blocks; t++)
-					sum += CMath::pow(fabs(ATx[t*n_feats+i]),q_bar);
-				lambda_max =
-					CMath::max(lambda_max, CMath::pow(sum,1.0/q_bar));
-			}
-
-			if (options.loss==LOGISTIC)
-				lambda_max /= n_vecs;
-		}
-		break;
-		case MULTITASK_TREE:
-		{
-			if (options.general)
-				lambda_max = general_findLambdaMax_mt(ATx, n_feats, n_blocks, options.G, options.ind_t, options.n_nodes);
-			else
-				lambda_max = findLambdaMax_mt(ATx, n_feats, n_blocks, options.ind_t, options.n_nodes);
-
-			lambda_max /= n_vecs*n_blocks;
-		}
-		break;
-		case FEATURE_GROUP:
-		{
-			for (int t=0; t<n_blocks; t++)
-			{
-				int group_ind_start = options.ind[t];
-				int group_ind_end = options.ind[t+1];
-				double sum = 0.0;
-				for (int i=group_ind_start; i<group_ind_end; i++)
-					sum += CMath::pow(fabs(ATx[i]),q_bar);
-
-				sum = CMath::pow(sum, 1.0/q_bar);
-				sum /= options.gWeight[t];
-				SG_SINFO("sum = %f\n",sum)
-				if (sum>lambda_max)
-					lambda_max = sum;
-			}
-		}
-		break;
-		case FEATURE_TREE:
-		{
-			if (options.general)
-				lambda_max = general_findLambdaMax(ATx, n_feats, options.G, options.ind_t, options.n_nodes);
-			else
-				lambda_max = findLambdaMax(ATx, n_feats, options.ind_t, options.n_nodes);
-		}
-		break;
-		case PLAIN:
-		case FUSED:
-		{
-			double max = 0.0;
-			for (int i=0; i<n_feats; i++)
-			{
-				if (CMath::abs(ATx[i]) > max)
-					max = CMath::abs(ATx[i]);
-			}
-			lambda_max = max;
-		}
-		break;
-	}
-
-	SG_SINFO("Computed lambda = %f * %f = %f\n",z,lambda_max,z*lambda_max)
-	return z*lambda_max;
-}
-
-void projection(double* w, double* v, int n_feats, int n_blocks, double lambda, double lambda2,
-                double L, double* z, double* z0, const slep_options& options)
-{
-	switch (options.mode)
-	{
-		case MULTITASK_GROUP:
-			eppMatrix(w, v, n_feats, n_blocks, lambda/L, options.q);
-		break;
-		case MULTITASK_TREE:
-			if (options.general)
-				general_altra_mt(w, v, n_feats, n_blocks, options.G, options.ind_t, options.n_nodes, lambda/L);
-			else
-				altra_mt(w, v, n_feats, n_blocks, options.ind_t, options.n_nodes, lambda/L);
-		break;
-		case FEATURE_GROUP:
-			eppVector(w, v, options.ind, n_blocks, n_feats, options.gWeight, lambda/L, options.q > 1e6 ? 1e6 : options.q);
-		break;
-		case FEATURE_TREE:
-			if (options.general)
-				general_altra(w, v, n_feats, options.G, options.ind_t, options.n_nodes, lambda/L);
-			else
-				altra(w, v, n_feats, options.ind_t, options.n_nodes, lambda/L);
-		break;
-		case PLAIN:
-			for (int i=0; i<n_feats; i++)
-				w[i] = CMath::sign(v[i])*CMath::max(0.0,CMath::abs(v[i])-lambda/L);
-		break;
-		case FUSED:
-			flsa(w,z,NULL,v,z0,lambda/L,lambda2/L,n_feats,1000,1e-8,1,6);
-			for (int i=0; i<n_feats; i++)
-				z0[i] = z[i];
-		break;
-	}
-
-}
-
-double search_point_gradient_and_objective(CDotFeatures* features, double* ATx, double* As,
-                                           double* sc, double* y, int n_vecs,
-                                           int n_feats, int n_tasks,
-                                           double* g, double* gc,
-                                           const slep_options& options)
-{
-	double fun_s = 0.0;
-	//SG_SDEBUG("As=%f\n", CMath::dot(As,As,n_vecs))
-	//SG_SDEBUG("sc=%f\n", CMath::dot(sc,sc,n_tasks))
-	switch (options.mode)
-	{
-		case MULTITASK_GROUP:
-		case MULTITASK_TREE:
-			for (int t=0; t<n_tasks; t++)
-			{
-				SGVector<index_t> task_idx = options.tasks_indices[t];
-				int n_vecs_task = task_idx.vlen;
-				switch (options.loss)
-				{
-					case LOGISTIC:
-						gc[t] = 0.0;
-						for (int i=0; i<n_vecs_task; i++)
-						{
-							double aa = -y[task_idx[i]]*(As[task_idx[i]]+sc[t]);
-							double bb = CMath::max(aa,0.0);
-							fun_s += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/ n_vecs;
-							double prob = 1.0/(1.0+CMath::exp(aa));
-							double b = -y[task_idx[i]]*(1.0-prob) / n_vecs;
-							gc[t] += b;
-							features->add_to_dense_vec(b,task_idx[i],g+t*n_feats,n_feats);
-						}
-					break;
-					case LEAST_SQUARES:
-						for (int i=0; i<n_feats*n_tasks; i++)
-							g[i] = -ATx[i];
-						for (int i=0; i<n_vecs_task; i++)
-							features->add_to_dense_vec(As[task_idx[i]],task_idx[i],g+t*n_feats,n_feats);
-					break;
-				}
-			}
-		break;
-		case FEATURE_GROUP:
-		case FEATURE_TREE:
-		case PLAIN:
-		case FUSED:
-			switch (options.loss)
-			{
-				case LOGISTIC:
-					gc[0] = 0.0;
-
-					for (int i=0; i<n_vecs; i++)
-					{
-						double aa = -y[i]*(As[i]+sc[0]);
-						double bb = CMath::max(aa,0.0);
-						fun_s += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb);
-						/*
-						if (y[i]>0)
-							fun_s += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)*pos_weight;
-						else
-							fun_s += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)*neg_weight;
-						*/
-						double prob = 1.0/(1.0+CMath::exp(aa));
-						//double b = 0;
-						double b = -y[i]*(1.0-prob)/n_vecs;
-						/*
-						if (y[i]>0)
-							b = -y[i]*(1.0-prob)*pos_weight;
-						else
-							b = -y[i]*(1.0-prob)*neg_weight;
-						*/
-						gc[0] += b;
-						features->add_to_dense_vec(b,i,g,n_feats);
-					}
-					fun_s /= n_vecs;
-				break;
-				case LEAST_SQUARES:
-					for (int i=0; i<n_feats; i++)
-						g[i] = -ATx[i];
-					for (int i=0; i<n_vecs; i++)
-						features->add_to_dense_vec(As[i],i,g,n_feats);
-				break;
-			}
-		break;
-	}
-	SG_SDEBUG("G=%f\n", CMath::dot(g,g,n_feats*n_tasks))
-
-	return fun_s;
-}
-
-slep_result_t slep_solver(
-		CDotFeatures* features,
-		double* y,
-		double z,
-		const slep_options& options)
-{
-	int i,t;
-	int n_feats = features->get_dim_feature_space();
-	int n_vecs = features->get_num_vectors();
-	double lambda, beta;
-	double funcp = 0.0, func = 0.0;
-
-	int n_blocks = 0;
-	int n_tasks = 0;
-
-	switch (options.mode)
-	{
-		case MULTITASK_GROUP:
-		case MULTITASK_TREE:
-			n_tasks = options.n_tasks;
-			n_blocks = options.n_tasks;
-		break;
-		case FEATURE_GROUP:
-		case FEATURE_TREE:
-			n_tasks = 1;
-			n_blocks = options.n_feature_blocks;
-		break;
-		case PLAIN:
-		case FUSED:
-			n_tasks = 1;
-			n_blocks = 1;
-		break;
-	}
-	SG_SDEBUG("n_tasks = %d, n_blocks = %d\n",n_tasks,n_blocks)
-	SG_SDEBUG("n_nodes = %d\n",options.n_nodes)
-
-	int iter = 1;
-	bool done = false;
-	bool gradient_break = false;
-
-	double rsL2 = options.rsL2;
-
-	double* ATx = SG_CALLOC(double, n_feats*n_tasks);
-	if (options.regularization!=0)
-	{
-		lambda = compute_lambda(ATx, z, features, y, n_vecs, n_feats, n_blocks, options);
-		rsL2*= lambda;
-	}
-	else
-		lambda = z;
-
-	double lambda2 = 0.0;
-
-	SGMatrix<double> w(n_feats,n_tasks);
-	w.zero();
-	SGVector<double> c(n_tasks);
-	c.zero();
-
-	if (options.last_result)
-	{
-		w = options.last_result->w;
-		c = options.last_result->c;
-	}
-
-	double* s = SG_CALLOC(double, n_feats*n_tasks);
-	double* sc = SG_CALLOC(double, n_tasks);
-	double* g = SG_CALLOC(double, n_feats*n_tasks);
-	double* v = SG_CALLOC(double, n_feats*n_tasks);
-	double* z_flsa = SG_CALLOC(double, n_feats);
-	double* z0_flsa = SG_CALLOC(double, n_feats);
-
-	double* Aw = SG_CALLOC(double, n_vecs);
-	switch (options.mode)
-	{
-		case MULTITASK_GROUP:
-		case MULTITASK_TREE:
-		{
-			for (t=0; t<n_blocks; t++)
-			{
-				SGVector<index_t> task_idx = options.tasks_indices[t];
-				//task_idx.display_vector("task");
-				int n_vecs_task = task_idx.vlen;
-				for (i=0; i<n_vecs_task; i++)
-					Aw[task_idx[i]] = features->dense_dot(task_idx[i],w.matrix+t*n_feats,n_feats);
-			}
-		}
-		break;
-		case FEATURE_GROUP:
-		case FEATURE_TREE:
-		case PLAIN:
-		case FUSED:
-		{
-			for (i=0; i<n_vecs; i++)
-				Aw[i] = features->dense_dot(i,w.matrix,n_feats);
-		}
-		break;
-	}
-
-	double* Av = SG_MALLOC(double, n_vecs);
-	double* As = SG_MALLOC(double, n_vecs);
-
-	double L = 1.0/n_vecs;
-
-	if (options.mode==FUSED)
-		L += rsL2;
-
-	double* wp = SG_CALLOC(double, n_feats*n_tasks);
-	for (i=0; i<n_feats*n_tasks; i++)
-		wp[i] = w[i];
-	double* Awp = SG_MALLOC(double, n_vecs);
-	for (i=0; i<n_vecs; i++)
-		Awp[i] = Aw[i];
-	double* wwp = SG_CALLOC(double, n_feats*n_tasks);
-
-	double* cp = SG_MALLOC(double, n_tasks);
-	for (t=0; t<n_tasks; t++)
-		cp[t] = c[t];
-	double* ccp = SG_CALLOC(double, n_tasks);
-
-	double* gc = SG_MALLOC(double, n_tasks);
-	double alphap = 0.0, alpha = 1.0;
-	double fun_x = 0.0;
-
-	while (!done && iter <= options.max_iter && !CSignal::cancel_computations())
-	{
-		beta = (alphap-1.0)/alpha;
-
-		for (i=0; i<n_feats*n_tasks; i++)
-			s[i] = w[i] + beta*wwp[i];
-		for (t=0; t<n_tasks; t++)
-			sc[t] = c[t] + beta*ccp[t];
-		for (i=0; i<n_vecs; i++)
-			As[i] = Aw[i] + beta*(Aw[i]-Awp[i]);
-		for (i=0; i<n_tasks*n_feats; i++)
-			g[i] = 0.0;
-
-		double fun_s = search_point_gradient_and_objective(features, ATx, As, sc, y, n_vecs, n_feats, n_tasks, g, gc, options);
-
-		//SG_SDEBUG("fun_s = %f\n", fun_s)
-
-		if (options.mode==PLAIN || options.mode==FUSED)
-			fun_s += rsL2/2 * CMath::dot(w.matrix,w.matrix,n_feats);
-
-		for (i=0; i<n_feats*n_tasks; i++)
-			wp[i] = w[i];
-		for (t=0; t<n_tasks; t++)
-			cp[t] = c[t];
-		for (i=0; i<n_vecs; i++)
-			Awp[i] = Aw[i];
-
-		int inner_iter = 1;
-		while (inner_iter <= 1000)
-		{
-			for (i=0; i<n_feats*n_tasks; i++)
-				v[i] = s[i] - g[i]*(1.0/L);
-
-			for (t=0; t<n_tasks; t++)
-				c[t] = sc[t] - gc[t]*(1.0/L);
-
-			projection(w.matrix,v,n_feats,n_blocks,lambda,lambda2,L,z_flsa,z0_flsa,options);
-
-			for (i=0; i<n_feats*n_tasks; i++)
-				v[i] = w[i] - s[i];
-
-			fun_x = 0.0;
-			switch (options.mode)
-			{
-				case MULTITASK_GROUP:
-				case MULTITASK_TREE:
-					for (t=0; t<n_blocks; t++)
-					{
-						SGVector<index_t> task_idx = options.tasks_indices[t];
-						int n_vecs_task = task_idx.vlen;
-						for (i=0; i<n_vecs_task; i++)
-						{
-							Aw[task_idx[i]] = features->dense_dot(task_idx[i],w.matrix+t*n_feats,n_feats);
-							if (options.loss==LOGISTIC)
-							{
-								double aa = -y[task_idx[i]]*(Aw[task_idx[i]]+c[t]);
-								double bb = CMath::max(aa,0.0);
-								fun_x += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb);
-							}
-						}
-					}
-				break;
-				case FEATURE_GROUP:
-				case FEATURE_TREE:
-				case PLAIN:
-				case FUSED:
-					for (i=0; i<n_vecs; i++)
-					{
-						Aw[i] = features->dense_dot(i, w.matrix, n_feats);
-						if (options.loss==LOGISTIC)
-						{
-							double aa = -y[i]*(Aw[i]+c[0]);
-							double bb = CMath::max(aa,0.0);
-							if (y[i]>0)
-								fun_x += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb);//*pos_weight;
-							else
-								fun_x += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb);//*neg_weight;
-						}
-					}
-				break;
-			}
-			if (options.loss==LOGISTIC)
-				fun_x /= n_vecs;
-			if (options.mode==PLAIN || options.mode==FUSED)
-				fun_x += rsL2/2 * CMath::dot(w.matrix,w.matrix,n_feats);
-
-			double l_sum = 0.0, r_sum = 0.0;
-			switch (options.loss)
-			{
-				case LOGISTIC:
-					r_sum = CMath::dot(v,v,n_feats*n_tasks);
-					l_sum = fun_x - fun_s - CMath::dot(v,g,n_feats*n_tasks);
-					for (t=0; t<n_tasks; t++)
-					{
-						r_sum += CMath::sq(c[t] - sc[t]);
-						l_sum -= (c[t] - sc[t])*gc[t];
-					}
-					r_sum /= 2.0;
-				break;
-				case LEAST_SQUARES:
-					r_sum = CMath::dot(v,v,n_feats*n_tasks);
-					for (i=0; i<n_vecs; i++)
-						l_sum += CMath::sq(Aw[i]-As[i]);
-				break;
-			}
-
-			if (r_sum <= 1e-20)
-			{
-				gradient_break = true;
-				break;
-			}
-
-			if (l_sum <= r_sum*L)
-				break;
-			else
-				L = CMath::max(2*L, l_sum/r_sum);
-			inner_iter++;
-		}
-
-		alphap = alpha;
-		alpha = 0.5*(1+CMath::sqrt(4*alpha*alpha+1));
-		for (i=0; i<n_feats*n_tasks; i++)
-			wwp[i] = w[i] - wp[i];
-		for (t=0; t<n_tasks; t++)
-			ccp[t] = c[t] - cp[t];
-		double regularizer = compute_regularizer(w.matrix, lambda, lambda2, n_vecs, n_feats, n_blocks, options);
-		funcp = func;
-
-		if (options.loss==LOGISTIC)
-		{
-			func = fun_x + regularizer;
-		}
-		if (options.loss==LEAST_SQUARES)
-		{
-			func = regularizer;
-			for (i=0; i<n_vecs; i++)
-				func += CMath::sq(Aw[i] - y[i]);
-		}
-		SG_SDEBUG("Obj = %f + %f = %f \n",fun_x, regularizer, func)
-
-		if (gradient_break)
-		{
-			SG_SINFO("Gradient norm is less than 1e-20\n")
-			break;
-		}
-
-		double norm_wp, norm_wwp;
-		double step;
-		switch (options.termination)
-		{
-			case 0:
-				if (iter>=2)
-				{
-					step = CMath::abs(func-funcp);
-					if (step <= options.tolerance)
-					{
-						SG_SINFO("Objective changes less than tolerance\n")
-						done = true;
-					}
-				}
-			break;
-			case 1:
-				if (iter>=2)
-				{
-					step = CMath::abs(func-funcp);
-					if (step <= step*options.tolerance)
-					{
-						SG_SINFO("Objective changes relatively less than tolerance\n")
-						done = true;
-					}
-				}
-			break;
-			case 2:
-				if (func <= options.tolerance)
-				{
-					SG_SINFO("Objective is less than tolerance\n")
-					done = true;
-				}
-			break;
-			case 3:
-				norm_wwp = CMath::sqrt(CMath::dot(wwp,wwp,n_feats*n_tasks));
-				if (norm_wwp <= options.tolerance)
-					done = true;
-			break;
-			case 4:
-				norm_wp = CMath::sqrt(CMath::dot(wp,wp,n_feats*n_tasks));
-				norm_wwp = CMath::sqrt(CMath::dot(wwp,wwp,n_feats*n_tasks));
-				if (norm_wwp <= options.tolerance*CMath::max(norm_wp,1.0))
-					done = true;
-			break;
-			default:
-				done = true;
-		}
-
-		iter++;
-	}
-	SG_SINFO("Finished %d iterations, objective = %f\n", iter, func)
-
-	SG_FREE(ATx);
-	SG_FREE(wp);
-	SG_FREE(wwp);
-	SG_FREE(s);
-	SG_FREE(sc);
-	SG_FREE(cp);
-	SG_FREE(ccp);
-	SG_FREE(g);
-	SG_FREE(v);
-	SG_FREE(Aw);
-	SG_FREE(Awp);
-	SG_FREE(Av);
-	SG_FREE(As);
-	SG_FREE(gc);
-	SG_FREE(z_flsa);
-	SG_FREE(z0_flsa);
-
-	return slep_result_t(w,c);
-};
-};
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/slep/slep_solver.h b/src/shogun/lib/slep/slep_solver.h
deleted file mode 100644
index cf77ffa0670..00000000000
--- a/src/shogun/lib/slep/slep_solver.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2010-2012 Jun Liu, Jieping Ye
- */
-
-#ifndef  SLEP_MT_LOGISTIC_H_
-#define  SLEP_MT_LOGISTIC_H_
-
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/slep/slep_options.h>
-#include <shogun/features/DotFeatures.h>
-
-namespace shogun
-{
-
-/** Learning optimization task solver ported from the
- * SLEP (Sparse LEarning Package) library.
- *
- * Based on accelerated projected gradient method.
- *
- * Supports two types of losses: logistic and least squares.
- *
- * Supports multitask problems (task group [MULTITASK_GROUP]
- * and task tree [MULTITASK_TREE] relations),
- * problems with feature relations (feature group [FEATURE_GROUP]
- * and feature tree [FEATURE_TREE]),
- * basic regularized problems [PLAIN] and fused formulation.
- *
- */
-slep_result_t slep_solver(
-		CDotFeatures* features,
-		double* y,
-		double z,
-		const slep_options& options);
-
-};
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef SLEP_LOGISTIC_H_  ----- */
-
diff --git a/src/shogun/lib/slep/tree/altra.cpp b/src/shogun/lib/slep/tree/altra.cpp
deleted file mode 100644
index 5c4e5453233..00000000000
--- a/src/shogun/lib/slep/tree/altra.cpp
+++ /dev/null
@@ -1,402 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye
- */
-
-
-#include <shogun/lib/slep/tree/altra.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/mathematics/Math.h>
-#include <stdlib.h>
-#include <string.h>
-
-void altra(double *x, double *v, int n, double *ind, int nodes, double mult)
-{
-	int i, j;
-	double lambda,twoNorm, ratio;
-
-	/*
-	 * test whether the first node is special
-	 */
-	if ((int) ind[0]==-1){
-
-		/*
-		 *Recheck whether ind[1] equals to zero
-		 */
-		if ((int) ind[1]!=-1){
-			printf("\n Error! \n Check ind");
-			exit(1);
-		}
-
-		lambda=mult*ind[2];
-
-		for(j=0;j<n;j++){
-			if (v[j]>lambda)
-				x[j]=v[j]-lambda;
-			else
-				if (v[j]<-lambda)
-					x[j]=v[j]+lambda;
-				else
-					x[j]=0;
-		}
-
-		i=1;
-	}
-	else{
-		shogun::sg_memcpy(x, v, sizeof(double) * n);
-		i=0;
-	}
-
-	/*
-	 * sequentially process each node
-	 *
-	 */
-	for(;i < nodes; i++){
-		/*
-		 * compute the L2 norm of this group
-		 */
-		twoNorm=0;
-		for(j=(int) ind[3*i]-1;j< (int) ind[3*i+1];j++)
-			twoNorm += x[j] * x[j];
-		twoNorm=sqrt(twoNorm);
-
-		lambda=mult*ind[3*i+2];
-		if (twoNorm>lambda){
-			ratio=(twoNorm-lambda)/twoNorm;
-
-			/*
-			 * shrinkage this group by ratio
-			 */
-			for(j=(int) ind[3*i]-1;j<(int) ind[3*i+1];j++)
-				x[j]*=ratio;
-		}
-		else{
-			/*
-			 * threshold this group to zero
-			 */
-			for(j=(int) ind[3*i]-1;j<(int) ind[3*i+1];j++)
-				x[j]=0;
-		}
-	}
-}
-
-void altra_mt(double *X, double *V, int n, int k, double *ind, int nodes, double mult)
-{
-	int i, j;
-
-	double *x=(double *)malloc(sizeof(double)*k);
-	double *v=(double *)malloc(sizeof(double)*k);
-
-	for (i=0;i<n;i++){
-		/*
-		 * copy a row of V to v
-		 *
-		 */
-		for(j=0;j<k;j++)
-			v[j]=V[j*n + i];
-
-		altra(x, v, k, ind, nodes, mult);
-
-		/*
-		 * copy the solution to X
-		 */
-		for(j=0;j<k;j++)
-			X[j*n+i]=x[j];
-	}
-
-	free(x);
-	free(v);
-}
-
-void computeLambda2Max(double *lambda2_max, double *x, int n, double *ind, int nodes)
-{
-	int i, j;
-	double twoNorm;
-
-	*lambda2_max=0;
-
-	for(i=0;i < nodes; i++){
-		/*
-		 * compute the L2 norm of this group
-		 */
-		twoNorm=0;
-		for(j=(int) ind[3*i]-1;j< (int) ind[3*i+1];j++)
-			twoNorm += x[j] * x[j];
-		twoNorm=sqrt(twoNorm);
-
-		twoNorm=twoNorm/ind[3*i+2];
-
-		if (twoNorm >*lambda2_max )
-			*lambda2_max=twoNorm;
-	}
-}
-
-double treeNorm(double *x, int ldx, int n, double *ind, int nodes){
-
-	int i, j;
-	double twoNorm, lambda;
-
-	double tree_norm = 0;
-
-	/*
-	 * test whether the first node is special
-	 */
-	if ((int) ind[0]==-1){
-
-		/*
-		 *Recheck whether ind[1] equals to zero
-		 */
-		if ((int) ind[1]!=-1){
-			printf("\n Error! \n Check ind");
-			exit(1);
-		}
-
-		lambda=ind[2];
-
-		for(j=0;j<n*ldx;j+=ldx){
-			tree_norm+=fabs(x[j]);
-		}
-
-		tree_norm = tree_norm * lambda;
-
-		i=1;
-	}
-	else{
-		i=0;
-	}
-
-	/*
-	 * sequentially process each node
-	 *
-	 */
-	for(;i < nodes; i++){
-		/*
-		 * compute the L2 norm of this group
-		 */
-		twoNorm=0;
-
-		int n_in_node = (int) ind[3*i+1] - (int) ind[3*i]-1;
-		for(j=(int) ind[3*i]-1;j< (int) ind[3*i]-1 + n_in_node*ldx;j+=ldx)
-			twoNorm += x[j] * x[j];
-		twoNorm=sqrt(twoNorm);
-
-		lambda=ind[3*i+2];
-
-		tree_norm = tree_norm + lambda*twoNorm;
-	}
-
-	return tree_norm;
-}
-
-double findLambdaMax(double *v, int n, double *ind, int nodes){
-
-	int i;
-	double lambda=0,squaredWeight=0, lambda1,lambda2;
-	double *x=(double *)malloc(sizeof(double)*n);
-	double *ind2=(double *)malloc(sizeof(double)*nodes*3);
-	int num=0;
-
-	for(i=0;i<n;i++){
-		lambda+=v[i]*v[i];
-	}
-
-	if ( (int)ind[0]==-1 )
-		squaredWeight=n*ind[2]*ind[2];
-	else
-		squaredWeight=ind[2]*ind[2];
-
-	for (i=1;i<nodes;i++){
-		squaredWeight+=ind[3*i+2]*ind[3*i+2];
-	}
-
-	/* set lambda to an initial guess
-	*/
-	lambda=sqrt(lambda/squaredWeight);
-
-	/*
-	   printf("\n\n   lambda=%2.5f",lambda);
-	   */
-
-	/*
-	 *copy ind to ind2,
-	 *and scale the weight 3*i+2
-	 */
-	for(i=0;i<nodes;i++){
-		ind2[3*i]=ind[3*i];
-		ind2[3*i+1]=ind[3*i+1];
-		ind2[3*i+2]=ind[3*i+2]*lambda;
-	}
-
-	/* test whether the solution is zero or not
-	*/
-	altra(x, v, n, ind2, nodes);
-	for(i=0;i<n;i++){
-		if (x[i]!=0)
-			break;
-	}
-
-	if (i>=n) {
-		/*x is a zero vector*/
-		lambda2=lambda;
-		lambda1=lambda;
-
-		num=0;
-
-		while(1){
-			num++;
-
-			lambda2=lambda;
-			lambda1=lambda1/2;
-			/* update ind2
-			*/
-			for(i=0;i<nodes;i++){
-				ind2[3*i+2]=ind[3*i+2]*lambda1;
-			}
-
-			/* compute and test whether x is zero
-			*/
-			altra(x, v, n, ind2, nodes);
-			for(i=0;i<n;i++){
-				if (x[i]!=0)
-					break;
-			}
-
-			if (i<n){
-				break;
-				/*x is not zero
-				 *we have found lambda1
-				 */
-			}
-		}
-
-	}
-	else{
-		/*x is a non-zero vector*/
-		lambda2=lambda;
-		lambda1=lambda;
-
-		num=0;
-		while(1){
-			num++;
-
-			lambda1=lambda2;
-			lambda2=lambda2*2;
-			/* update ind2
-			*/
-			for(i=0;i<nodes;i++){
-				ind2[3*i+2]=ind[3*i+2]*lambda2;
-			}
-
-			/* compute and test whether x is zero
-			*/
-			altra(x, v, n, ind2, nodes);
-			for(i=0;i<n;i++){
-				if (x[i]!=0)
-					break;
-			}
-
-			if (i>=n){
-				break;
-				/*x is a zero vector
-				 *we have found lambda2
-				 */
-			}
-		}
-	}
-
-	/*
-	   printf("\n num=%d, lambda1=%2.5f, lambda2=%2.5f",num, lambda1,lambda2);
-	   */
-
-	while ( fabs(lambda2-lambda1) > lambda2 * 1e-10 ){
-
-		num++;
-
-		lambda=(lambda1+lambda2)/2;
-
-		/* update ind2
-		*/
-		for(i=0;i<nodes;i++){
-			ind2[3*i+2]=ind[3*i+2]*lambda;
-		}
-
-		/* compute and test whether x is zero
-		*/
-		altra(x, v, n, ind2, nodes);
-		for(i=0;i<n;i++){
-			if (x[i]!=0)
-				break;
-		}
-
-		if (i>=n){
-			lambda2=lambda;
-		}
-		else{
-			lambda1=lambda;
-		}
-
-		/*
-		   printf("\n lambda1=%2.5f, lambda2=%2.5f",lambda1,lambda2);
-		   */
-	}
-
-	/*
-	   printf("\n num=%d",num);
-
-	   printf("   lambda1=%2.5f, lambda2=%2.5f",lambda1,lambda2);
-
-*/
-
-	free(x);
-	free(ind2);
-
-	return lambda2;
-}
-
-double findLambdaMax_mt(double *V, int n, int k, double *ind, int nodes)
-{
-	int i, j;
-
-	double *v=(double *)malloc(sizeof(double)*k);
-	double lambda;
-
-	double lambdaMax=0;
-
-	for (i=0;i<n;i++){
-		/*
-		 * copy a row of V to v
-		 *
-		 */
-		for(j=0;j<k;j++)
-			v[j]=V[j*n + i];
-
-		lambda = findLambdaMax(v, k, ind, nodes);
-
-		/*
-		   printf("\n   lambda=%5.2f",lambda);
-		   */
-
-		if (lambda>lambdaMax)
-			lambdaMax=lambda;
-	}
-
-	/*
-	   printf("\n *lambdaMax=%5.2f",*lambdaMax);
-	   */
-
-	free(v);
-	return lambdaMax;
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/slep/tree/altra.h b/src/shogun/lib/slep/tree/altra.h
deleted file mode 100644
index 904b8bbde81..00000000000
--- a/src/shogun/lib/slep/tree/altra.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef  ALTRA_SLEP
-#define  ALTRA_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-
-
-/*
- * Important Notice: September 20, 2010
- *
- * In this head file, we assume that the features in the tree strucutre
- * are well ordered. That is to say, the indices of the left nodes is always less
- * than the right nodes. Ideally, this can be achieved by reordering the features.
- *
- * The advantage of this ordered features is that, we donot need to use an explicit
- * variable for recording the indices.
- *
- * To deal with the more general case when the features might not be well ordered,
- * we provide the functions in the head file "general_altra.h". Compared with the files in this head file,
- * we need an additional parameter G, which contains the indices of the nodes.
- *
- *
- */
-
-/*
- * -------------------------------------------------------------------
- *                       Functions and parameter
- * -------------------------------------------------------------------
- *
- * altra solves the following problem
- *
- * 1/2 \|x-v\|^2 + \sum \lambda_i \|x_{G_i}\|,
- *
- * where x and v are of dimension n,
- *       \lambda_i >=0, and G_i's follow the tree structure
- *
- * It is implemented in Matlab as follows:
- *
- * x=altra(v, n, ind, nodes);
- *
- * ind is a 3 x nodes matrix.
- *       Each column corresponds to a node.
- *
- *       The first element of each column is the starting index,
- *       the second element of each column is the ending index
- *       the third element of each column corrreponds to \lambbda_i.
- *
- * -------------------------------------------------------------------
- *                       Notices:
- * -------------------------------------------------------------------
- *
- * 1. The nodes in the parameter "ind" should be given in the 
- *    either
- *           the postordering of depth-first traversal
- *    or 
- *           the reverse breadth-first traversal.
- *
- * 2. When each elements of x are penalized via the same L1 
- *    (equivalent to the L2 norm) parameter, one can simplify the input
- *    by specifying 
- *           the "first" column of ind as (-1, -1, lambda)
- *
- *    In this case, we treat it as a single "super" node. Thus in the value
- *    nodes, we only count it once.
- *
- * 3. The values in "ind" are in [1,n].
- *
- * 4. The third element of each column should be positive. The program does
- *    not check the validity of the parameter. 
- *
- *    It is still valid to use the zero regularization parameter.
- *    In this case, the program does not change the values of 
- *    correponding indices.
- *    
- *
- * -------------------------------------------------------------------
- *                       History:
- * -------------------------------------------------------------------
- *
- * Composed by Jun Liu on April 20, 2010
- *
- * For any question or suggestion, please email j.liu@asu.edu.
- *
- */
-void altra(double *x, double *v, int n, double *ind, int nodes, double mult=1.0);
-
-/*
- * altra_mt is a generalization of altra to the 
- * 
- * multi-task learning scenario (or equivalently the multi-class case)
- *
- * altra_mt(X, V, n, k, ind, nodes);
- *
- * It applies altra for each row (1xk) of X and V
- *
- */
-void altra_mt(double *X, double *V, int n, int k, double *ind, int nodes, double mult=1.0);
-
-/*
- * compute
- *  lambda2_max=computeLambda2Max(x,n,ind,nodes);
- *
- * compute the 2 norm of each group, which is divided by the ind(3,:),
- * then the maximum value is returned
- */
-/*
- *This function does not consider the case ind={[-1, -1, 100]',...}
- *
- *This functions is not used currently.
- */
-void computeLambda2Max(double *lambda2_max, double *x, int n, double *ind, int nodes);
-
-/*
- * -------------------------------------------------------------------
- *                       Function and parameter
- * -------------------------------------------------------------------
- *
- * treeNorm compute
- *
- *        \sum \lambda_i \|x_{G_i}\|,
- *
- * where x is of dimension n,
- *       \lambda_i >=0, and G_i's follow the tree structure
- *
- * The file is implemented in the following in Matlab:
- *
- * tree_norm=treeNorm(x, n, ind,nodes);
- */
-double treeNorm(double *x, int ldx, int n, double *ind, int nodes);
-
-/*
- * -------------------------------------------------------------------
- *                       Function and parameter
- * -------------------------------------------------------------------
- *
- * findLambdaMax compute
- * 
- * the lambda_{max} that achieves a zero solution for
- *
- *     min  1/2 \|x-v\|^2 +  \lambda_{\max} * \sum  w_i \|x_{G_i}\|,
- *
- * where x is of dimension n,
- *       w_i >=0, and G_i's follow the tree structure
- *
- * The file is implemented in the following in Matlab:
- *
- * lambdaMax=findLambdaMax(v, n, ind,nodes);
- */
-double findLambdaMax(double *v, int n, double *ind, int nodes);
-
-/*
- * findLambdaMax_mt is a generalization of findLambdaMax to the 
- * 
- * multi-task learning scenario (or equivalently the multi-class case)
- *
- * lambdaMax=findLambdaMax_mt(X, V, n, k, ind, nodes);
- *
- * It applies findLambdaMax for each row (1xk) of X and V
- *
- */
-double findLambdaMax_mt(double *V, int n, int k, double *ind, int nodes);
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef ALTRA_SLEP  ----- */
-
-
diff --git a/src/shogun/lib/slep/tree/general_altra.cpp b/src/shogun/lib/slep/tree/general_altra.cpp
deleted file mode 100644
index feab1ae62bf..00000000000
--- a/src/shogun/lib/slep/tree/general_altra.cpp
+++ /dev/null
@@ -1,405 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye
- */
-
-#include <shogun/lib/slep/tree/general_altra.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/mathematics/Math.h>
-#include <stdlib.h>
-#include <string.h>
-
-void general_altra(double *x, double *v, int n, double *G, double *ind, int nodes, double mult)
-{
-
-	int i, j;
-	double lambda,twoNorm, ratio;
-
-	/*
-	 * test whether the first node is special
-	 */
-	if ((int) ind[0]==-1){
-
-		/*
-		 *Recheck whether ind[1] equals to zero
-		 */
-		if ((int) ind[1]!=-1){
-			printf("\n Error! \n Check ind");
-			exit(1);
-		}
-
-		lambda=mult*ind[2];
-
-		for(j=0;j<n;j++){
-			if (v[j]>lambda)
-				x[j]=v[j]-lambda;
-			else
-				if (v[j]<-lambda)
-					x[j]=v[j]+lambda;
-				else
-					x[j]=0;
-		}
-
-		i=1;
-	}
-	else{
-		shogun::sg_memcpy(x, v, sizeof(double) * n);
-		i=0;
-	}
-
-	/*
-	 * sequentially process each node
-	 *
-	 */
-	for(;i < nodes; i++){
-		/*
-		 * compute the L2 norm of this group
-		 */
-		twoNorm=0;
-		for(j=(int) ind[3*i]-1;j< (int) ind[3*i+1];j++)
-			twoNorm += x[(int) G[j]-1 ] * x[(int) G[j]-1 ];
-		twoNorm=sqrt(twoNorm);
-
-		lambda=mult*ind[3*i+2];
-		if (twoNorm>lambda){
-			ratio=(twoNorm-lambda)/twoNorm;
-
-			/*
-			 * shrinkage this group by ratio
-			 */
-			for(j=(int) ind[3*i]-1;j<(int) ind[3*i+1];j++)
-				x[(int) G[j]-1 ]*=ratio;
-		}
-		else{
-			/*
-			 * threshold this group to zero
-			 */
-			for(j=(int) ind[3*i]-1;j<(int) ind[3*i+1];j++)
-				x[(int) G[j]-1 ]=0;
-		}
-	}
-}
-
-void general_altra_mt(double *X, double *V, int n, int k, double *G, double *ind, int nodes, double mult)
-{
-	int i, j;
-
-	double *x=(double *)malloc(sizeof(double)*k);
-	double *v=(double *)malloc(sizeof(double)*k);
-
-	for (i=0;i<n;i++){
-		/*
-		 * copy a row of V to v
-		 *
-		 */
-		for(j=0;j<k;j++)
-			v[j]=V[j*n + i];
-
-		general_altra(x, v, k, G, ind, nodes, mult);
-
-		/*
-		 * copy the solution to X
-		 */
-		for(j=0;j<k;j++)
-			X[j*n+i]=x[j];
-	}
-
-	free(x);
-	free(v);
-}
-
-void general_computeLambda2Max(double *lambda2_max, double *x, int n, double *G, double *ind, int nodes)
-{
-	int i, j;
-	double twoNorm;
-
-	*lambda2_max=0;
-
-
-
-	for(i=0;i < nodes; i++){
-		/*
-		 * compute the L2 norm of this group
-		 */
-		twoNorm=0;
-		for(j=(int) ind[3*i]-1;j< (int) ind[3*i+1];j++)
-			twoNorm += x[(int) G[j]-1 ] * x[(int) G[j]-1 ];
-		twoNorm=sqrt(twoNorm);
-
-		twoNorm=twoNorm/ind[3*i+2];
-
-		if (twoNorm >*lambda2_max )
-			*lambda2_max=twoNorm;
-	}
-}
-
-double general_treeNorm(double *x, int ldx, int n, double *G, double *ind, int nodes)
-{
-
-	int i, j;
-	double twoNorm, lambda;
-
-	double tree_norm=0;
-
-	/*
-	 * test whether the first node is special
-	 */
-	if ((int) ind[0]==-1){
-
-		/*
-		 *Recheck whether ind[1] equals to zero
-		 */
-		if ((int) ind[1]!=-1){
-			printf("\n Error! \n Check ind");
-			exit(1);
-		}
-
-		lambda=ind[2];
-
-		for(j=0;j<n;j+=ldx){
-			tree_norm+=fabs(x[j]);
-		}
-
-		tree_norm=tree_norm * lambda;
-
-		i=1;
-	}
-	else{
-		i=0;
-	}
-
-	/*
-	 * sequentially process each node
-	 *
-	 */
-	for(;i < nodes; i++){
-		/*
-		 * compute the L2 norm of this group
-
-*/
-		twoNorm=0;
-		for(j=(int) ind[3*i]-1;j< (int) ind[3*i+1];j++)
-			twoNorm += x[(int) G[j]-1 ] * x[(int) G[j]-1 ];
-		twoNorm=sqrt(twoNorm);
-
-		lambda=ind[3*i+2];
-
-		tree_norm=tree_norm + lambda*twoNorm;
-	}
-	return tree_norm;
-}
-
-double general_findLambdaMax(double *v, int n, double *G, double *ind, int nodes)
-{
-
-	int i;
-	double lambda=0,squaredWeight=0, lambda1,lambda2;
-	double *x=(double *)malloc(sizeof(double)*n);
-	double *ind2=(double *)malloc(sizeof(double)*nodes*3);
-	int num=0;
-
-	for(i=0;i<n;i++){
-		lambda+=v[i]*v[i];
-	}
-
-	if ( (int)ind[0]==-1 )
-		squaredWeight=n*ind[2]*ind[2];
-	else
-		squaredWeight=ind[2]*ind[2];
-
-	for (i=1;i<nodes;i++){
-		squaredWeight+=ind[3*i+2]*ind[3*i+2];
-	}
-
-	/* set lambda to an initial guess
-	*/
-	lambda=sqrt(lambda/squaredWeight);
-
-	/*
-	   printf("\n\n   lambda=%2.5f",lambda);
-	   */
-
-	/*
-	 *copy ind to ind2,
-	 *and scale the weight 3*i+2
-	 */
-	for(i=0;i<nodes;i++){
-		ind2[3*i]=ind[3*i];
-		ind2[3*i+1]=ind[3*i+1];
-		ind2[3*i+2]=ind[3*i+2]*lambda;
-	}
-
-	/* test whether the solution is zero or not
-	*/
-	general_altra(x, v, n, G, ind2, nodes);
-	for(i=0;i<n;i++){
-		if (x[i]!=0)
-			break;
-	}
-
-	if (i>=n) {
-		/*x is a zero vector*/
-		lambda2=lambda;
-		lambda1=lambda;
-
-		num=0;
-
-		while(1){
-			num++;
-
-			lambda2=lambda;
-			lambda1=lambda1/2;
-			/* update ind2
-			*/
-			for(i=0;i<nodes;i++){
-				ind2[3*i+2]=ind[3*i+2]*lambda1;
-			}
-
-			/* compute and test whether x is zero
-			*/
-			general_altra(x, v, n, G, ind2, nodes);
-			for(i=0;i<n;i++){
-				if (x[i]!=0)
-					break;
-			}
-
-			if (i<n){
-				break;
-				/*x is not zero
-				 *we have found lambda1
-				 */
-			}
-		}
-
-	}
-	else{
-		/*x is a non-zero vector*/
-		lambda2=lambda;
-		lambda1=lambda;
-
-		num=0;
-		while(1){
-			num++;
-
-			lambda1=lambda2;
-			lambda2=lambda2*2;
-			/* update ind2
-			*/
-			for(i=0;i<nodes;i++){
-				ind2[3*i+2]=ind[3*i+2]*lambda2;
-			}
-
-			/* compute and test whether x is zero
-			*/
-			general_altra(x, v, n, G, ind2, nodes);
-			for(i=0;i<n;i++){
-				if (x[i]!=0)
-					break;
-			}
-
-			if (i>=n){
-				break;
-				/*x is a zero vector
-				 *we have found lambda2
-				 */
-			}
-		}
-	}
-
-	/*
-	   printf("\n num=%d, lambda1=%2.5f, lambda2=%2.5f",num, lambda1,lambda2);
-	   */
-
-	while ( fabs(lambda2-lambda1) > lambda2 * 1e-10 ){
-
-		num++;
-
-		lambda=(lambda1+lambda2)/2;
-
-		/* update ind2
-		*/
-		for(i=0;i<nodes;i++){
-			ind2[3*i+2]=ind[3*i+2]*lambda;
-		}
-
-		/* compute and test whether x is zero
-		*/
-		general_altra(x, v, n, G, ind2, nodes);
-		for(i=0;i<n;i++){
-			if (x[i]!=0)
-				break;
-		}
-
-		if (i>=n){
-			lambda2=lambda;
-		}
-		else{
-			lambda1=lambda;
-		}
-
-		/*
-		   printf("\n lambda1=%2.5f, lambda2=%2.5f",lambda1,lambda2);
-		   */
-	}
-
-	/*
-	   printf("\n num=%d",num);
-
-	   printf("   lambda1=%2.5f, lambda2=%2.5f",lambda1,lambda2);
-	   */
-
-	free(x);
-	free(ind2);
-
-	return lambda2;
-}
-
-double general_findLambdaMax_mt(double *V, int n, int k, double *G, double *ind, int nodes)
-{
-	int i, j;
-
-	double *v=(double *)malloc(sizeof(double)*k);
-	double lambda;
-
-	double lambdaMax=0;
-
-	for (i=0;i<n;i++){
-		/*
-		 * copy a row of V to v
-		 *
-		 */
-		for(j=0;j<k;j++)
-			v[j]=V[j*n + i];
-
-		lambda = general_findLambdaMax(v, k, G, ind, nodes);
-
-		/*
-		   printf("\n   lambda=%5.2f",lambda);
-		   */
-
-
-		if (lambda>lambdaMax)
-			lambdaMax=lambda;
-	}
-
-	/*
-	   printf("\n *lambdaMax=%5.2f",*lambdaMax);
-	   */
-
-	free(v);
-	return lambdaMax;
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/lib/slep/tree/general_altra.h b/src/shogun/lib/slep/tree/general_altra.h
deleted file mode 100644
index 75970bcb685..00000000000
--- a/src/shogun/lib/slep/tree/general_altra.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *   Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye 
- */
-
-
-#ifndef  GENERAL_ALTRA_SLEP
-#define  GENERAL_ALTRA_SLEP
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-
-/*
- * Important Notice: September 20, 2010
- *
- * In this head file, we deal with the case that the features might not be well ordered.
- * 
- * If the features in the tree strucutre are well ordered, i.e., the indices of the left nodes is always less
- * than the right nodes, please refer to "altra.h".
- *
- * The advantage of "altra.h" is that, we donot need to use an explicit
- * variable for recording the indices.
- *
- *
- */
-
-/*
- * -------------------------------------------------------------------
- *                       Functions and parameter
- * -------------------------------------------------------------------
- *
- * general_altra solves the following problem
- *
- * 1/2 \|x-v\|^2 + \sum \lambda_i \|x_{G_i}\|,
- *
- * where x and v are of dimension n,
- *       \lambda_i >=0, and G_i's follow the tree structure
- *
- * It is implemented in Matlab as follows:
- *
- * x=general_altra(v, n, G, ind, nodes);
- *
- * G contains the indices of the groups.
- *   It is a row vector. Its length equals to \sum_i \|G_i\|.
- *   If all the entries are penalized with L1 norm,
- *      its length is \sum_i \|G_i\| - n.
- *
- * ind is a 3 x nodes matrix.
- *       Each column corresponds to a node.
- *
- *       The first element of each column is the starting index,
- *       the second element of each column is the ending index
- *       the third element of each column corrreponds to \lambbda_i.
- *
- *
- *
- * The following example shows how G and ind works:
- *
- * G={ {1, 2}, {4, 5}, {3, 6}, {7, 8},
- *     {1, 2, 3, 6}, {4, 5, 7, 8}, 
- *     {1, 2, 3, 4, 5, 6, 7, 8} }.
- *
- * ind={ [1, 2, 100]', [3, 4, 100]', [5, 6, 100]', [7, 8, 100]',
- *       [9, 12, 100]', [13, 16, 100]', [17, 24, 100]' }
- * 
- * where "100" denotes the weight for the nodes.
- *
- *
- *
- * -------------------------------------------------------------------
- *                       Notices:
- * -------------------------------------------------------------------
- *
- * 1. The features in the tree might not be well ordered. Otherwise, you are
- *    suggested to use "altra.h".
- *
- * 2. When each elements of x are penalized via the same L1 
- *    (equivalent to the L2 norm) parameter, one can simplify the input
- *    by specifying 
- *           the "first" column of ind as (-1, -1, lambda)
- *
- *    In this case, we treat it as a single "super" node. Thus in the value
- *    nodes, we only count it once.
- *
- * 3. The values in "ind" are in [1,length(G)].
- *
- * 4. The third element of each column should be positive. The program does
- *    not check the validity of the parameter. 
- *
- * 5. The values in G should be within [1, n].
- *
- *    It is still valid to use the zero regularization parameter.
- *    In this case, the program does not change the values of 
- *    correponding indices.
- *    
- *
- * -------------------------------------------------------------------
-*                       History:
-* -------------------------------------------------------------------
-*
-* Composed by Jun Liu on April 20, 2010
-*
-* For any question or suggestion, please email j.liu@asu.edu.
-*
-*/
-void general_altra(double *x, double *v, int n, double *G, double *ind, int nodes, double mult=1.0);
-
-/*
- * altra_mt is a generalization of altra to the 
- * 
- * multi-task learning scenario (or equivalently the multi-class case)
- *
- * altra_mt(X, V, n, k, G, ind, nodes);
- *
- * It applies altra for each row (1xk) of X and V
- *
- */
-void general_altra_mt(double *X, double *V, int n, int k, double *G, double *ind, int nodes, double mult=1.0);
-
-/*
- * compute
- *  lambda2_max=general_computeLambda2Max(x,n,G, ind,nodes);
- *
- * compute the 2 norm of each group, which is divided by the ind(3,:),
- * then the maximum value is returned
- */
-/*
- *This function does not consider the case ind={[-1, -1, 100]',...}
- *
- *This functions is not used currently.
- */
-void general_computeLambda2Max(double *lambda2_max, double *x, int n, double *G, double *ind, int nodes);
-
-/*
- * -------------------------------------------------------------------
- *                       Function and parameter
- * -------------------------------------------------------------------
- *
- * treeNorm compute
- *
- *        \sum \lambda_i \|x_{G_i}\|,
- *
- * where x is of dimension n,
- *       \lambda_i >=0, and G_i's follow the tree structure
- *
- * The file is implemented in the following in Matlab:
- *
- * tree_norm=general_treeNorm(x, n, G, ind,nodes);
- */
-double general_treeNorm(double *x, int ldx, int n, double *G, double *ind, int nodes);
-
-/*
- * -------------------------------------------------------------------
- *                       Function and parameter
- * -------------------------------------------------------------------
- *
- * findLambdaMax compute
- * 
- * the lambda_{max} that achieves a zero solution for
- *
- *     min  1/2 \|x-v\|^2 +  \lambda_{\max} * \sum  w_i \|x_{G_i}\|,
- *
- * where x is of dimension n,
- *       w_i >=0, and G_i's follow the tree structure
- *
- * The file is implemented in the following in Matlab:
- *
- * lambdaMax=general_findLambdaMax(v, n, G, ind,nodes);
- */
-double general_findLambdaMax(double *v, int n, double *G, double *ind, int nodes);
-
-/*
- * findLambdaMax_mt is a generalization of findLambdaMax to the 
- * 
- * multi-task learning scenario (or equivalently the multi-class case)
- *
- * lambdaMax=general_findLambdaMax_mt(X, V, n, k, G, ind, nodes);
- *
- * It applies findLambdaMax for each row (1xk) of X and V
- *
- */
-double general_findLambdaMax_mt(double *V, int n, int k, double *G, double *ind, int nodes);
-#endif //USE_GPL_SHOGUN
-#endif   /* ----- #ifndef GENERAL_ALTRA_SLEP  ----- */
-
diff --git a/src/shogun/lib/tapkee/neighbors/covertree.hpp b/src/shogun/lib/tapkee/neighbors/covertree.hpp
deleted file mode 100644
index 640a3fdb081..00000000000
--- a/src/shogun/lib/tapkee/neighbors/covertree.hpp
+++ /dev/null
@@ -1,873 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (c) 2009-2013 John Langford, Dinoj Surendran, Fernando José Iglesias García
- */
-
-#ifndef COVERTREE_H_
-#define COVERTREE_H_
-
-#ifdef USE_GPL_SHOGUN
-
-/* Tapkee includes */
-#include <shogun/lib/tapkee/neighbors/covertree_point.hpp>
-/* End of Tapkee includes */
-
-#include <cmath>
-#include <limits>
-#include <stdio.h>
-#include <assert.h>
-
-#ifdef _WIN32
-#undef far
-#undef near
-#endif
-/* First written by John Langford jl@hunch.net
-   Templatization by Dinoj Surendran dinojs@gmail.com
-   Adaptation to Shogun by Fernando José Iglesias García
- */
-namespace tapkee
-{
-namespace tapkee_internal
-{
-
-/**
- * Cover tree node TODO better doc
- */
-template<class P>
-struct node
-{
-	node() : p(), max_dist(0.0), parent_dist(0.0),
-		children(NULL), num_children(0), scale(0)
-	{
-	}
-
-	node(P _p, ScalarType _max_dist, ScalarType _parent_dist, node<P>* _children,
-	     unsigned short int _num_children, short int _scale) : p(_p),
-		max_dist(_max_dist), parent_dist(_parent_dist), children(_children),
-		num_children(_num_children), scale(_scale)
-	{
-	}
-
-	/** Point */
-	P p;
-
-	/** The maximum distance to any grandchild */
-	ScalarType max_dist;
-
-	/** The distance to the parent */
-	ScalarType parent_dist;
-
-	/** Pointer to the list of children of this node */
-	node<P>* children;
-
-	/** The number of children nodes of this node */
-	unsigned short int num_children;
-
-	/** Essentially, an upper bound on the distance to any child */
-	short int scale;
-};
-
-template<class P>
-void free_children(const node<P>& n)
-{
-	for (int i=0; i<n.num_children; i++)
-	{
-		free_children<P>(n.children[i]);
-		n.children[i].~node<P>();
-	}
-	free(n.children);
-}
-
-
-/**
- * Cover tree node with an associated set of distances TODO better doc
- */
-template<class P>
-struct ds_node {
-
-	ds_node() : dist(), p() {}
-
-	/** Vector of distances TODO better doc*/
-	v_array<ScalarType> dist;
-
-	/** Point TODO better doc */
-	P p;
-};
-
-static ScalarType base = COVERTREE_BASE;
-static ScalarType il2 = 1. / log(base);
-
-inline ScalarType dist_of_scale (int s)
-{
-	return pow(base, s);
-}
-
-inline int get_scale(ScalarType d)
-{
-	return (int)ceil(il2 * log(d));
-}
-
-	template<class P>
-node<P> new_node(const P &p)
-{
-	node<P> new_node;
-	new_node.p = p;
-	return new_node;
-}
-
-	template<class P>
-node<P> new_leaf(const P &p)
-{
-	node<P> new_leaf(p,0.,0.,NULL,0,100);
-	return new_leaf;
-}
-
-	template<class P>
-ScalarType max_set(v_array<ds_node<P> > &v)
-{
-	ScalarType max = 0.;
-	for (int i = 0; i < v.index; i++)
-		if ( max < v[i].dist.last())
-			max = v[i].dist.last();
-	return max;
-}
-
-void print_space(int s)
-{
-	for (int i = 0; i < s; i++)
-		printf(" ");
-}
-
-template<class P>
-void print(int depth, node<P> &top_node)
-{
-	print_space(depth);
-	print(top_node.p);
-	if ( top_node.num_children > 0 )
-	{
-		print_space(depth);
-		printf("scale = %i\n",top_node.scale);
-		print_space(depth);
-		printf("max_dist = %f\n",top_node.max_dist);
-		print_space(depth);
-		printf("num children = %i\n",top_node.num_children);
-		for (int i = 0; i < top_node.num_children;i++)
-			print(depth+1, top_node.children[i]);
-	}
-}
-
-template<class P>
-void split(v_array<ds_node<P> >& point_set, v_array<ds_node<P> >& far_set, int max_scale)
-{
-	IndexType new_index = 0;
-	ScalarType fmax = dist_of_scale(max_scale);
-	for (int i = 0; i < point_set.index; i++)
-	{
-		if (point_set[i].dist.last() <= fmax)
-		{
-			point_set[new_index++] = point_set[i];
-		}
-		else
-			push(far_set,point_set[i]);
-	}
-	point_set.index=new_index;
-}
-
-template<class P, class DistanceCallback>
-void dist_split(DistanceCallback& dcb, v_array<ds_node<P> >& point_set,
-		v_array<ds_node<P> >& new_point_set,
-		P new_point,
-		int max_scale)
-{
-	IndexType new_index = 0;
-	ScalarType fmax = dist_of_scale(max_scale);
-	for(int i = 0; i < point_set.index; i++)
-	{
-		ScalarType new_d;
-		new_d = distance(dcb, new_point, point_set[i].p, fmax);
-		if (new_d <= fmax )
-		{
-			push(point_set[i].dist, new_d);
-			push(new_point_set,point_set[i]);
-		}
-		else
-			point_set[new_index++] = point_set[i];
-	}
-	point_set.index = new_index;
-}
-
-/*
-   max_scale is the maximum scale of the node we might create here.
-   point_set contains points which are 2*max_scale or less away.
-   */
-template <class P, class DistanceCallback>
-node<P> batch_insert(DistanceCallback& dcb, const P& p,
-		int max_scale,
-		int top_scale,
-		v_array<ds_node<P> >& point_set,
-		v_array<ds_node<P> >& consumed_set,
-		v_array<v_array<ds_node<P> > >& stack)
-{
-	if (point_set.index == 0)
-		return new_leaf(p);
-	else {
-		ScalarType max_dist = max_set(point_set); //O(|point_set|)
-		int next_scale = std::min(max_scale - 1, get_scale(max_dist));
-		if (next_scale == -2147483647-1) // We have points with distance 0.
-		{
-			v_array<node<P> > children;
-			push(children,new_leaf(p));
-			while (point_set.index > 0)
-			{
-				push(children,new_leaf(point_set.last().p));
-				push(consumed_set,point_set.last());
-				point_set.decr();
-			}
-			node<P> n = new_node(p);
-			n.scale = 100; // A magic number meant to be larger than all scales.
-			n.max_dist = 0;
-			alloc(children,children.index);
-			n.num_children = children.index;
-			n.children = children.elements;
-			return n;
-		}
-		else
-		{
-			v_array<ds_node<P> > far = pop(stack);
-			split(point_set,far,max_scale); //O(|point_set|)
-
-			node<P> child = batch_insert(dcb, p, next_scale, top_scale, point_set, consumed_set, stack);
-
-			if (point_set.index == 0)
-			{
-				push(stack,point_set);
-				point_set=far;
-				return child;
-			}
-			else {
-				node<P> n = new_node(p);
-				v_array<node<P> > children;
-				push(children, child);
-				v_array<ds_node<P> > new_point_set = pop(stack);
-				v_array<ds_node<P> > new_consumed_set = pop(stack);
-				while (point_set.index != 0) { //O(|point_set| * num_children)
-					P new_point = point_set.last().p;
-					ScalarType new_dist = point_set.last().dist.last();
-					push(consumed_set, point_set.last());
-					point_set.decr();
-
-					dist_split(dcb,point_set,new_point_set,new_point,max_scale); //O(|point_saet|)
-					dist_split(dcb,far,new_point_set,new_point,max_scale); //O(|far|)
-
-					node<P> new_child =
-						batch_insert(dcb, new_point, next_scale, top_scale, new_point_set, new_consumed_set, stack);
-					new_child.parent_dist = new_dist;
-
-					push(children, new_child);
-
-					ScalarType fmax = dist_of_scale(max_scale);
-					for(int i = 0; i< new_point_set.index; i++) //O(|new_point_set|)
-					{
-						new_point_set[i].dist.decr();
-						if (new_point_set[i].dist.last() <= fmax)
-							push(point_set, new_point_set[i]);
-						else
-							push(far, new_point_set[i]);
-					}
-					for(int i = 0; i< new_consumed_set.index; i++) //O(|new_point_set|)
-					{
-						new_consumed_set[i].dist.decr();
-						push(consumed_set, new_consumed_set[i]);
-					}
-					new_point_set.index = 0;
-					new_consumed_set.index = 0;
-				}
-				push(stack,new_point_set);
-				push(stack,new_consumed_set);
-				push(stack,point_set);
-				point_set=far;
-				n.scale = top_scale - max_scale;
-				n.max_dist = max_set(consumed_set);
-				alloc(children,children.index);
-				n.num_children = children.index;
-				n.children = children.elements;
-				return n;
-			}
-		}
-	}
-}
-
-template<class P, class DistanceCallback>
-node<P> batch_create(DistanceCallback& dcb, v_array<P> points)
-{
-	assert(points.index > 0);
-	v_array<ds_node<P> > point_set;
-	v_array<v_array<ds_node<P> > > stack;
-
-	for (int i = 1; i < points.index; i++) {
-		ds_node<P> temp;
-		push(temp.dist, distance(dcb, points[0], points[i], std::numeric_limits<ScalarType>::max()));
-		temp.p = points[i];
-		push(point_set,temp);
-	}
-
-	v_array<ds_node<P> > consumed_set;
-
-	ScalarType max_dist = max_set(point_set);
-
-	node<P> top = batch_insert (dcb, points[0],
-			get_scale(max_dist),
-			get_scale(max_dist),
-			point_set,
-			consumed_set,
-			stack);
-	for (int i = 0; i<consumed_set.index;i++)
-		free(consumed_set[i].dist.elements);
-	free(consumed_set.elements);
-	for (int i = 0; i<stack.index;i++)
-		free(stack[i].elements);
-	free(stack.elements);
-	free(point_set.elements);
-	return top;
-}
-
-void add_height(int d, v_array<int> &heights)
-{
-	if (heights.index <= d)
-		for(;heights.index <= d;)
-			push(heights,0);
-	heights[d] = heights[d] + 1;
-}
-
-template <class P>
-int height_dist(const node<P> top_node,v_array<int> &heights)
-{
-	if (top_node.num_children == 0)
-	{
-		add_height(0,heights);
-		return 0;
-	}
-	else
-	{
-		int max_v=0;
-		for (int i = 0; i<top_node.num_children ;i++)
-		{
-			int d = height_dist(top_node.children[i], heights);
-			if (d > max_v)
-				max_v = d;
-		}
-		add_height(1 + max_v, heights);
-		return (1 + max_v);
-	}
-}
-
-template <class P>
-void depth_dist(int top_scale, const node<P> top_node,v_array<int> &depths)
-{
-	if (top_node.num_children > 0)
-		for (int i = 0; i<top_node.num_children ;i++)
-		{
-			add_height(top_node.scale, depths);
-			depth_dist(top_scale, top_node.children[i], depths);
-		}
-}
-
-template <class P>
-void breadth_dist(const node<P> top_node,v_array<int> &breadths)
-{
-	if (top_node.num_children == 0)
-		add_height(0,breadths);
-	else
-	{
-		for (int i = 0; i<top_node.num_children ;i++)
-			breadth_dist(top_node.children[i], breadths);
-		add_height(top_node.num_children, breadths);
-	}
-}
-
-/**
- * List of cover tree nodes associated to a distance TODO better doc
- */
-template <class P>
-struct d_node
-{
-	/** Distance TODO better doc*/
-	ScalarType dist;
-
-	/** List of nodes TODO better doc*/
-	const node<P> *n;
-};
-
-template <class P>
-inline ScalarType compare(const d_node<P> *p1, const d_node<P>* p2)
-{
-	return p1 -> dist - p2 -> dist;
-}
-
-template <class P>
-void halfsort (v_array<d_node<P> > cover_set)
-{
-	if (cover_set.index <= 1)
-		return;
-	register d_node<P> *base_ptr =  cover_set.elements;
-
-	d_node<P> *hi = &base_ptr[cover_set.index - 1];
-	d_node<P> *right_ptr = hi;
-	d_node<P> *left_ptr;
-
-	while (right_ptr > base_ptr)
-	{
-		d_node<P> *mid = base_ptr + ((hi - base_ptr) >> 1);
-
-		if (compare ( mid,  base_ptr) < 0.)
-			std::swap(*mid, *base_ptr);
-		if (compare ( hi,  mid) < 0.)
-			std::swap(*mid, *hi);
-		else
-			goto jump_over;
-		if (compare ( mid,  base_ptr) < 0.)
-			std::swap(*mid, *base_ptr);
-jump_over:;
-
-		left_ptr  = base_ptr + 1;
-		right_ptr = hi - 1;
-
-		do
-		{
-			while (compare (left_ptr, mid) < 0.)
-				left_ptr++;
-
-			while (compare (mid, right_ptr) < 0.)
-				right_ptr--;
-
-			if (left_ptr < right_ptr)
-			{
-				std::swap(*left_ptr, *right_ptr);
-				if (mid == left_ptr)
-					mid = right_ptr;
-				else if (mid == right_ptr)
-					mid = left_ptr;
-				left_ptr++;
-				right_ptr--;
-			}
-			else if (left_ptr == right_ptr)
-			{
-				left_ptr ++;
-				right_ptr --;
-				break;
-			}
-		}
-		while (left_ptr <= right_ptr);
-		hi = right_ptr;
-	}
-}
-
-template <class P>
-v_array<v_array<d_node<P> > > get_cover_sets(v_array<v_array<v_array<d_node<P> > > > &spare_cover_sets)
-{
-	v_array<v_array<d_node<P> > > ret = pop(spare_cover_sets);
-	while (ret.index < 101)
-	{
-		v_array<d_node<P> > temp;
-		push(ret, temp);
-	}
-	return ret;
-}
-
-inline bool shell(ScalarType parent_query_dist, ScalarType child_parent_dist, ScalarType upper_bound)
-{
-	return parent_query_dist - child_parent_dist <= upper_bound;
-	//    && child_parent_dist - parent_query_dist <= upper_bound;
-}
-
-int internal_k =1;
-void update_k(ScalarType *k_upper_bound, ScalarType upper_bound)
-{
-	ScalarType *end = k_upper_bound + internal_k-1;
-	ScalarType *begin = k_upper_bound;
-	for (;end != begin; begin++)
-	{
-		if (upper_bound < *(begin+1))
-			*begin = *(begin+1);
-		else {
-			*begin = upper_bound;
-			break;
-		}
-	}
-	if (end == begin)
-		*begin = upper_bound;
-}
-ScalarType *alloc_k()
-{
-	return (ScalarType*)malloc(sizeof(ScalarType) * internal_k);
-}
-void set_k(ScalarType* begin, ScalarType max)
-{
-	for(ScalarType *end = begin+internal_k;end != begin; begin++)
-		*begin = max;
-}
-
-ScalarType internal_epsilon =0.;
-//void update_epsilon(ScalarType *upper_bound, ScalarType new_dist) {}
-ScalarType *alloc_epsilon()
-{
-	return (ScalarType *)malloc(sizeof(ScalarType));
-}
-void set_epsilon(ScalarType* begin)
-{
-	*begin = internal_epsilon;
-}
-
-void update_unequal(ScalarType *upper_bound, ScalarType new_dist)
-{
-	if (new_dist != 0.)
-		*upper_bound = new_dist;
-}
-ScalarType* (*alloc_unequal)() = alloc_epsilon;
-void set_unequal(ScalarType* begin, ScalarType max)
-{
-	*begin = max;
-}
-
-void (*update)(ScalarType *foo, ScalarType bar) = update_k;
-void (*setter)(ScalarType *foo, ScalarType bar) = set_k;
-ScalarType* (*alloc_upper)() = alloc_k;
-
-template <class P, class DistanceCallback>
-inline void copy_zero_set(DistanceCallback& dcb, node<P>* query_chi,
-		ScalarType* new_upper_bound, v_array<d_node<P> > &zero_set,
-		v_array<d_node<P> > &new_zero_set)
-{
-	new_zero_set.index = 0;
-	d_node<P> *end = zero_set.elements + zero_set.index;
-	for (d_node<P> *ele = zero_set.elements; ele != end ; ele++)
-	{
-		ScalarType upper_dist = *new_upper_bound + query_chi->max_dist;
-		if (shell(ele->dist, query_chi->parent_dist, upper_dist))
-		{
-			ScalarType d = distance(dcb, query_chi->p, ele->n->p, upper_dist);
-
-			if (d <= upper_dist)
-			{
-				if (d < *new_upper_bound)
-					update(new_upper_bound, d);
-				d_node<P> temp = {d, ele->n};
-				push(new_zero_set,temp);
-			}
-		}
-	}
-}
-
-template <class P, class DistanceCallback>
-inline void copy_cover_sets(DistanceCallback& dcb, node<P>* query_chi,
-		ScalarType* new_upper_bound,
-		v_array<v_array<d_node<P> > > &cover_sets,
-		v_array<v_array<d_node<P> > > &new_cover_sets,
-		int current_scale, int max_scale)
-{
-	for (; current_scale <= max_scale; current_scale++)
-	{
-		d_node<P>* ele = cover_sets[current_scale].elements;
-		d_node<P>* end = cover_sets[current_scale].elements + cover_sets[current_scale].index;
-		for (; ele != end; ele++)
-		{
-			ScalarType upper_dist = *new_upper_bound + query_chi->max_dist + ele->n->max_dist;
-			if (shell(ele->dist, query_chi->parent_dist, upper_dist))
-			{
-				ScalarType d = distance(dcb, query_chi->p, ele->n->p, upper_dist);
-
-				if (d <= upper_dist)
-				{
-					if (d < *new_upper_bound)
-						update(new_upper_bound,d);
-					d_node<P> temp = {d, ele->n};
-					push(new_cover_sets[current_scale],temp);
-				}
-			}
-		}
-	}
-}
-
-template <class P>
-void print_query(const node<P> *top_node)
-{
-	printf("query = \n");
-	print(top_node->p);
-	if ( top_node->num_children > 0 ) {
-		printf("scale = %i\n",top_node->scale);
-		printf("max_dist = %f\n",top_node->max_dist);
-		printf("num children = %i\n",top_node->num_children);
-	}
-}
-
-template <class P>
-void print_cover_sets(v_array<v_array<d_node<P> > > &cover_sets,
-		v_array<d_node<P> > &zero_set,
-		int current_scale, int max_scale)
-{
-	printf("cover set = \n");
-	for (; current_scale <= max_scale; current_scale++)
-	{
-		d_node<P> *ele = cover_sets[current_scale].elements;
-		d_node<P> *end = cover_sets[current_scale].elements + cover_sets[current_scale].index;
-		printf("%i\n", current_scale);
-		for (; ele != end; ele++)
-		{
-			node<P> *n = (node<P> *)ele->n;
-			print(n->p);
-		}
-	}
-	d_node<P> *end = zero_set.elements + zero_set.index;
-	printf("infinity\n");
-	for (d_node<P> *ele = zero_set.elements; ele != end ; ele++)
-	{
-		node<P> *n = (node<P> *)ele->n;
-		print(n->p);
-	}
-}
-
-/*
-   An optimization to consider:
-   Make all distance evaluations occur in descend.
-
-   Instead of passing a cover_set, pass a stack of cover sets.  The
-   last element holds d_nodes with your distance.  The next lower
-   element holds a d_node with the distance to your query parent,
-   next = query grand parent, etc..
-
-   Compute distances in the presence of the tighter upper bound.
-   */
-template <class P, class DistanceCallback>
-inline
-void descend(DistanceCallback& dcb, const node<P>* query, ScalarType* upper_bound,
-		int current_scale,int &max_scale, v_array<v_array<d_node<P> > > &cover_sets,
-		v_array<d_node<P> > &zero_set)
-{
-	d_node<P> *end = cover_sets[current_scale].elements + cover_sets[current_scale].index;
-	for (d_node<P> *parent = cover_sets[current_scale].elements; parent != end; parent++)
-	{
-		const node<P> *par = parent->n;
-		ScalarType upper_dist = *upper_bound + query->max_dist + query->max_dist;
-		if (parent->dist <= upper_dist + par->max_dist)
-		{
-			node<P> *chi = par->children;
-			if (parent->dist <= upper_dist + chi->max_dist)
-			{
-				if (chi->num_children > 0)
-				{
-					if (max_scale < chi->scale)
-						max_scale = chi->scale;
-					d_node<P> temp = {parent->dist, chi};
-					push(cover_sets[chi->scale], temp);
-				}
-				else if (parent->dist <= upper_dist)
-				{
-					d_node<P> temp = {parent->dist, chi};
-					push(zero_set, temp);
-				}
-			}
-			node<P> *child_end = par->children + par->num_children;
-			for (chi++; chi != child_end; chi++)
-			{
-				ScalarType upper_chi = *upper_bound + chi->max_dist + query->max_dist + query->max_dist;
-				if (shell(parent->dist, chi->parent_dist, upper_chi))
-				{
-					ScalarType d = distance(dcb, query->p, chi->p, upper_chi);
-					if (d <= upper_chi)
-					{
-						if (d < *upper_bound)
-							update(upper_bound, d);
-						if (chi->num_children > 0)
-						{
-							if (max_scale < chi->scale)
-								max_scale = chi->scale;
-							d_node<P> temp = {d, chi};
-							push(cover_sets[chi->scale],temp);
-						}
-						else
-							if (d <= upper_chi - chi->max_dist)
-							{
-								d_node<P> temp = {d, chi};
-								push(zero_set, temp);
-							}
-					}
-				}
-			}
-		}
-	}
-}
-
-template <class P, class DistanceCallback>
-void brute_nearest(DistanceCallback& dcb, const node<P>* query,
-		v_array<d_node<P> > zero_set, ScalarType* upper_bound,
-		v_array<v_array<P> > &results,
-		v_array<v_array<d_node<P> > > &spare_zero_sets)
-{
-	if (query->num_children > 0)
-	{
-		v_array<d_node<P> > new_zero_set = pop(spare_zero_sets);
-		node<P> * query_chi = query->children;
-		brute_nearest(dcb, query_chi, zero_set, upper_bound, results, spare_zero_sets);
-		ScalarType* new_upper_bound = alloc_upper();
-
-		node<P> *child_end = query->children + query->num_children;
-		for (query_chi++;query_chi != child_end; query_chi++)
-		{
-			setter(new_upper_bound,*upper_bound + query_chi->parent_dist);
-			copy_zero_set(dcb, query_chi, new_upper_bound, zero_set, new_zero_set);
-			brute_nearest(dcb, query_chi, new_zero_set, new_upper_bound, results, spare_zero_sets);
-		}
-		free (new_upper_bound);
-		new_zero_set.index = 0;
-		push(spare_zero_sets, new_zero_set);
-	}
-	else
-	{
-		v_array<P> temp;
-		push(temp, query->p);
-		d_node<P> *end = zero_set.elements + zero_set.index;
-		for (d_node<P> *ele = zero_set.elements; ele != end ; ele++)
-			if (ele->dist <= *upper_bound)
-				push(temp, ele->n->p);
-		push(results,temp);
-	}
-}
-
-template <class P, class DistanceCallback>
-void internal_batch_nearest_neighbor(DistanceCallback& dcb, const node<P> *query,
-		v_array<v_array<d_node<P> > > &cover_sets,
-		v_array<d_node<P> > &zero_set,
-		int current_scale,
-		int max_scale,
-		ScalarType* upper_bound,
-		v_array<v_array<P> > &results,
-		v_array<v_array<v_array<d_node<P> > > > &spare_cover_sets,
-		v_array<v_array<d_node<P> > > &spare_zero_sets)
-{
-	if (current_scale > max_scale) // All remaining points are in the zero set.
-		brute_nearest(dcb, query, zero_set, upper_bound, results, spare_zero_sets);
-	else
-		if (query->scale <= current_scale && query->scale != 100)
-			// Our query has too much scale.  Reduce.
-		{
-			node<P> *query_chi = query->children;
-			v_array<d_node<P> > new_zero_set = pop(spare_zero_sets);
-			v_array<v_array<d_node<P> > > new_cover_sets = get_cover_sets(spare_cover_sets);
-			ScalarType* new_upper_bound = alloc_upper();
-
-			node<P> *child_end = query->children + query->num_children;
-			for (query_chi++; query_chi != child_end; query_chi++)
-			{
-				setter(new_upper_bound,*upper_bound + query_chi->parent_dist);
-				copy_zero_set(dcb, query_chi, new_upper_bound, zero_set, new_zero_set);
-				copy_cover_sets(dcb, query_chi, new_upper_bound, cover_sets, new_cover_sets,
-						current_scale, max_scale);
-				internal_batch_nearest_neighbor(dcb, query_chi, new_cover_sets, new_zero_set,
-						current_scale, max_scale, new_upper_bound,
-						results, spare_cover_sets, spare_zero_sets);
-			}
-			free (new_upper_bound);
-			new_zero_set.index = 0;
-			push(spare_zero_sets, new_zero_set);
-			push(spare_cover_sets, new_cover_sets);
-			internal_batch_nearest_neighbor(dcb, query->children, cover_sets, zero_set,
-					current_scale, max_scale, upper_bound, results,
-					spare_cover_sets, spare_zero_sets);
-		}
-		else // reduce cover set scale
-		{
-			halfsort(cover_sets[current_scale]);
-			descend(dcb, query, upper_bound, current_scale, max_scale,cover_sets, zero_set);
-			cover_sets[current_scale++].index = 0;
-			internal_batch_nearest_neighbor(dcb, query, cover_sets, zero_set,
-					current_scale, max_scale, upper_bound, results,
-					spare_cover_sets, spare_zero_sets);
-		}
-}
-
-template <class P, class DistanceCallback>
-void batch_nearest_neighbor(DistanceCallback &dcb, const node<P> &top_node,
-		const node<P> &query, v_array<v_array<P> > &results)
-{
-	v_array<v_array<v_array<d_node<P> > > > spare_cover_sets;
-	v_array<v_array<d_node<P> > > spare_zero_sets;
-
-	v_array<v_array<d_node<P> > > cover_sets = get_cover_sets(spare_cover_sets);
-	v_array<d_node<P> > zero_set = pop(spare_zero_sets);
-
-	ScalarType* upper_bound = alloc_upper();
-	setter(upper_bound, std::numeric_limits<ScalarType>::max());
-
-	ScalarType top_dist = distance(dcb, query.p, top_node.p, std::numeric_limits<ScalarType>::max());
-	update(upper_bound, top_dist);
-
-	d_node<P> temp = {top_dist, &top_node};
-	push(cover_sets[0], temp);
-
-	internal_batch_nearest_neighbor(dcb, &query,cover_sets,zero_set,0,0,upper_bound,results,
-			spare_cover_sets,spare_zero_sets);
-
-	free(upper_bound);
-	push(spare_cover_sets, cover_sets);
-
-	for (int i = 0; i < spare_cover_sets.index; i++)
-	{
-		v_array<v_array<d_node<P> > > cover_sets2 = spare_cover_sets[i];
-		for (int j = 0; j < cover_sets2.index; j++)
-			free (cover_sets2[j].elements);
-		free(cover_sets2.elements);
-	}
-	free(spare_cover_sets.elements);
-
-	push(spare_zero_sets, zero_set);
-
-	for (int i = 0; i < spare_zero_sets.index; i++)
-		free(spare_zero_sets[i].elements);
-	free(spare_zero_sets.elements);
-}
-
-template <class P, class DistanceCallback>
-void k_nearest_neighbor(DistanceCallback &dcb, const node<P> &top_node,
-		const node<P> &query, v_array<v_array<P> > &results, int k)
-{
-	internal_k = k;
-	update = update_k;
-	setter = set_k;
-	alloc_upper = alloc_k;
-
-	batch_nearest_neighbor(dcb, top_node, query, results);
-}
-/*
-template <class P, class DistanceCallback>
-void epsilon_nearest_neighbor(DistanceCallback &dcb, const node<P> &top_node,
-		const node<P> &query, v_array<v_array<P> > &results,
-		ScalarType epsilon)
-{
-	internal_epsilon = epsilon;
-	//  update = update_epsilon;
-	setter = set_epsilon;
-	alloc_upper = alloc_epsilon;
-
-	batch_nearest_neighbor(dcb, top_node, query, results);
-}
-
-template <class P, class DistanceCallback>
-void unequal_nearest_neighbor(DistanceCallback &dcb, const node<P> &top_node,
-		const node<P> &query, v_array<v_array<P> > &results)
-{
-	update = update_unequal;
-	setter = set_unequal;
-	alloc_upper = alloc_unequal;
-
-	batch_nearest_neighbor(dcb, top_node, query, results);
-}
-*/
-
-}
-}
-#endif //USE_GPL_SHOGUN
-#endif
diff --git a/src/shogun/lib/tapkee/neighbors/covertree_point.hpp b/src/shogun/lib/tapkee/neighbors/covertree_point.hpp
deleted file mode 100644
index a346b9a76d1..00000000000
--- a/src/shogun/lib/tapkee/neighbors/covertree_point.hpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/* * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Fernando José Iglesias García
- * Written (W) John Langford and Dinoj Surendran, v_array and its templatization
- * Copyright (C) 2012 Fernando José Iglesias García
- */
-
-#ifndef _JL_COVERTREE_POINT_H_
-#define _JL_COVERTREE_POINT_H_
-
-#ifdef USE_GPL_SHOGUN
-
-/* Tapkee includes */
-#include <shogun/lib/tapkee/defines.hpp>
-/* End of Tapkee includes */
-
-#include <iostream>
-#include <cmath>
-
-namespace tapkee
-{
-namespace tapkee_internal
-{
-
-/** @brief Class v_array taken directly from JL's implementation */
-template<class T>
-class v_array{
-
-	public:
-		/** Getter for the the last element of the v_array
-		 *  @return the last element of the array */
-		T last() { return elements[index-1];}
-
-		/** Decrement the pointer to the last element */
-		void decr() { index--;}
-
-		/** Create an empty v_array */
-		v_array() : index(0), length(0), elements(NULL) {}
-
-		/** Element access operator
-		 *  @param i of the element to be read
-		 *  @return the corresponding element */
-		T& operator[](IndexType i) { return elements[i]; }
-
-	public:
-		/** Pointer to the last element of the v_array */
-		int index;
-
-		/** Length of the v_array */
-		int length;
-
-		/** Pointer to the beginning of the v_array elements */
-		T* elements;
-};
-
-/**
- * Insert a new element at the end of the vector
- *
- * @param v vector
- * @param new_ele element to insert
- */
-template<class T>
-void push(v_array<T>& v, const T &new_ele)
-{
-	while(v.index >= v.length)
-	{
-		v.length = 2*v.length + 3;
-		v.elements = (T *)realloc(v.elements,sizeof(T) * v.length);
-	}
-	v[v.index++] = new_ele;
-}
-
-/**
- * Used to modify the capacity of the vector
- *
- * @param v vector
- * @param length the new length of the vector
- */
-template<class T>
-void alloc(v_array<T>& v, int length)
-{
-	v.elements = (T *)realloc(v.elements, sizeof(T) * length);
-	v.length = length;
-}
-
-/**
- * Returns the vector previous to the pointed one in the stack of
- * vectors and decrements the index of the stack. No memory is
- * freed here. If there are no vectors stored in the stack, create
- * and return a new empty vector
- *
- * @param stack of vectors
- * @return the adequate vector according to the previous conditions
- */
-template<class T>
-v_array<T> pop(v_array<v_array<T> > &stack)
-{
-	if (stack.index > 0)
-		return stack[--stack.index];
-	else
-		return v_array<T>();
-}
-
-/** @brief Class Point to use with John Langford's CoverTree. This
- * class must have some associated functions defined (distance,
- * and print, see below) so it can be used with the CoverTree
- * implementation.
- */
-template <class RandomAccessIterator>
-struct CoverTreePoint
-{
-	CoverTreePoint() : iter_(), norm_(0.0)
-	{
-	};
-	CoverTreePoint(const RandomAccessIterator& iter, ScalarType norm) :
-		iter_(iter), norm_(norm)
-	{
-	};
-
-	RandomAccessIterator iter_;
-	ScalarType norm_;
-}; /* struct JLCoverTreePoint */
-
-template <class Type, class RandomAccessIterator, class Callback>
-struct distance_impl;
-
-/** Functions declared out of the class definition to respect CoverTree
- *  structure */
-template <class RandomAccessIterator, class Callback>
-inline ScalarType distance(Callback& cb, const CoverTreePoint<RandomAccessIterator>& l,
-		const CoverTreePoint<RandomAccessIterator>& r, ScalarType upper_bound)
-{
-	//assert(upper_bound>=0);
-
-	if (l.iter_==r.iter_)
-		return 0.0;
-
-	return distance_impl<typename Callback::type,RandomAccessIterator,Callback>()(cb,l,r,upper_bound);
-}
-
-struct KernelType;
-
-template <class RandomAccessIterator, class Callback>
-struct distance_impl<KernelType,RandomAccessIterator,Callback>
-{
-	inline ScalarType operator()(Callback& cb, const CoverTreePoint<RandomAccessIterator>& l,
-                                 const CoverTreePoint<RandomAccessIterator>& r, ScalarType /*upper_bound*/)
-	{
-		return std::sqrt(l.norm_ + r.norm_ - 2*cb(r.iter_,l.iter_));
-	}
-};
-
-struct DistanceType;
-
-template <class RandomAccessIterator, class Callback>
-struct distance_impl<DistanceType,RandomAccessIterator,Callback>
-{
-	inline ScalarType operator()(Callback& cb, const CoverTreePoint<RandomAccessIterator>& l,
-                                 const CoverTreePoint<RandomAccessIterator>& r, ScalarType /*upper_bound*/)
-	{
-		return cb(l.iter_,r.iter_);
-	}
-};
-
-/** Print the information of the CoverTree point */
-template <class RandomAccessIterator>
-void print(const CoverTreePoint<RandomAccessIterator>&)
-{
-}
-
-}
-}
-#endif //USE_GPL_SHOGUN
-#endif /* _JL_COVERTREE_POINT_H_*/
diff --git a/src/shogun/lib/tfhistogram/histogram.cpp b/src/shogun/lib/tfhistogram/histogram.cpp
new file mode 100644
index 00000000000..1b43b77e242
--- /dev/null
+++ b/src/shogun/lib/tfhistogram/histogram.cpp
@@ -0,0 +1,230 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// clang-format off
+#include <shogun/lib/config.h>
+#ifdef HAVE_TFLOGGER
+
+#include "histogram.h"
+#include <float.h>
+#include <math.h>
+#include <vector>
+#include <algorithm>
+#include <tflogger/summary.pb.h>
+
+namespace tensorflow {
+namespace histogram {
+
+static std::vector<double>* InitDefaultBucketsInner() {
+  std::vector<double> buckets;
+  std::vector<double> neg_buckets;
+  // Make buckets whose range grows by 10% starting at 1.0e-12 up to 1.0e20
+  double v = 1.0e-12;
+  while (v < 1.0e20) {
+    buckets.push_back(v);
+    neg_buckets.push_back(-v);
+    v *= 1.1;
+  }
+  buckets.push_back(DBL_MAX);
+  neg_buckets.push_back(-DBL_MAX);
+  std::reverse(neg_buckets.begin(), neg_buckets.end());
+  std::vector<double>* result = new std::vector<double>;
+  result->insert(result->end(), neg_buckets.begin(), neg_buckets.end());
+  result->push_back(0.0);
+  result->insert(result->end(), buckets.begin(), buckets.end());
+  return result;
+}
+
+static std::vector<double> InitDefaultBuckets() {
+  static std::vector<double>* default_bucket_limits = InitDefaultBucketsInner();
+  return *default_bucket_limits;
+}
+
+Histogram::Histogram() : bucket_limits_(InitDefaultBuckets()) { Clear(); }
+
+// Create a histogram with a custom set of bucket limits,
+// specified in "custom_buckets[0..custom_buckets.size()-1]"
+Histogram::Histogram(std::vector<double> custom_bucket_limits)
+    : custom_bucket_limits_(custom_bucket_limits.begin(),
+                            custom_bucket_limits.end()),
+      bucket_limits_(custom_bucket_limits_) {
+  Clear();
+}
+
+bool Histogram::DecodeFromProto(const HistogramProto& proto) {
+  if ((proto.bucket_size() != proto.bucket_limit_size()) ||
+      (proto.bucket_size() == 0)) {
+    return false;
+  }
+  min_ = proto.min();
+  max_ = proto.max();
+  num_ = proto.num();
+  sum_ = proto.sum();
+  sum_squares_ = proto.sum_squares();
+  custom_bucket_limits_.clear();
+  custom_bucket_limits_.insert(custom_bucket_limits_.end(),
+                               proto.bucket_limit().begin(),
+                               proto.bucket_limit().end());
+  bucket_limits_ = custom_bucket_limits_;
+  buckets_.clear();
+  buckets_.insert(buckets_.end(), proto.bucket().begin(), proto.bucket().end());
+  return true;
+}
+
+void Histogram::Clear() {
+  min_ = bucket_limits_[bucket_limits_.size() - 1];
+  max_ = -DBL_MAX;
+  num_ = 0;
+  sum_ = 0;
+  sum_squares_ = 0;
+  buckets_.resize(bucket_limits_.size());
+  for (size_t i = 0; i < bucket_limits_.size(); i++) {
+    buckets_[i] = 0;
+  }
+}
+
+void Histogram::Add(double value) {
+  int b =
+      std::upper_bound(bucket_limits_.begin(), bucket_limits_.end(), value) -
+      bucket_limits_.begin();
+
+  buckets_[b] += 1.0;
+  if (min_ > value) min_ = value;
+  if (max_ < value) max_ = value;
+  num_++;
+  sum_ += value;
+  sum_squares_ += (value * value);
+}
+
+double Histogram::Median() const { return Percentile(50.0); }
+
+// Linearly map the variable x from [x0, x1] unto [y0, y1]
+double Histogram::Remap(double x, double x0, double x1, double y0,
+                        double y1) const {
+  return y0 + (x - x0) / (x1 - x0) * (y1 - y0);
+}
+
+// Pick tight left-hand-side and right-hand-side bounds and then
+// interpolate a histogram value at percentile p
+double Histogram::Percentile(double p) const {
+  if (num_ == 0.0) return 0.0;
+
+  double threshold = num_ * (p / 100.0);
+  double cumsum_prev = 0;
+  for (size_t i = 0; i < buckets_.size(); i++) {
+    double cumsum = cumsum_prev + buckets_[i];
+
+    // Find the first bucket whose cumsum >= threshold
+    if (cumsum >= threshold) {
+      // Prevent divide by 0 in remap which happens if cumsum == cumsum_prev
+      // This should only get hit when p == 0, cumsum == 0, and cumsum_prev == 0
+      if (cumsum == cumsum_prev) {
+        continue;
+      }
+
+      // Calculate the lower bound of interpolation
+      double lhs = (i == 0 || cumsum_prev == 0) ? min_ : bucket_limits_[i - 1];
+      lhs = std::max(lhs, min_);
+
+      // Calculate the upper bound of interpolation
+      double rhs = bucket_limits_[i];
+      rhs = std::min(rhs, max_);
+
+      double weight = Remap(threshold, cumsum_prev, cumsum, lhs, rhs);
+      return weight;
+    }
+
+    cumsum_prev = cumsum;
+  }
+  return max_;
+}
+
+double Histogram::Average() const {
+  if (num_ == 0.0) return 0;
+  return sum_ / num_;
+}
+
+double Histogram::StandardDeviation() const {
+  if (num_ == 0.0) return 0;
+  double variance = (sum_squares_ * num_ - sum_ * sum_) / (num_ * num_);
+  return sqrt(variance);
+}
+
+std::string Histogram::ToString() const {
+  std::string r;
+  char buf[200];
+  snprintf(buf, sizeof(buf), "Count: %.0f  Average: %.4f  StdDev: %.2f\n", num_,
+           Average(), StandardDeviation());
+  r.append(buf);
+  snprintf(buf, sizeof(buf), "Min: %.4f  Median: %.4f  Max: %.4f\n",
+           (num_ == 0.0 ? 0.0 : min_), Median(), max_);
+  r.append(buf);
+  r.append("------------------------------------------------------\n");
+  const double mult = num_ > 0 ? 100.0 / num_ : 0.0;
+  double sum = 0;
+  for (size_t b = 0; b < buckets_.size(); b++) {
+    if (buckets_[b] <= 0.0) continue;
+    sum += buckets_[b];
+    snprintf(buf, sizeof(buf), "[ %10.2g, %10.2g ) %7.0f %7.3f%% %7.3f%% ",
+             ((b == 0) ? -DBL_MAX : bucket_limits_[b - 1]),  // left
+             bucket_limits_[b],                              // right
+             buckets_[b],                                    // count
+             mult * buckets_[b],                             // percentage
+             mult * sum);                                    // cum percentage
+    r.append(buf);
+
+    // Add hash marks based on percentage; 20 marks for 100%.
+    int marks = static_cast<int>(20 * (buckets_[b] / num_) + 0.5);
+    r.append(marks, '#');
+    r.push_back('\n');
+  }
+  return r;
+}
+
+void Histogram::EncodeToProto(HistogramProto* proto,
+                              bool preserve_zero_buckets) const {
+  proto->Clear();
+  proto->set_min(min_);
+  proto->set_max(max_);
+  proto->set_num(num_);
+  proto->set_sum(sum_);
+  proto->set_sum_squares(sum_squares_);
+  for (size_t i = 0; i < buckets_.size();) {
+    double end = bucket_limits_[i];
+    double count = buckets_[i];
+    i++;
+    if (!preserve_zero_buckets && count <= 0.0) {
+      // Find run of empty buckets and collapse them into one
+      while (i < buckets_.size() && buckets_[i] <= 0.0) {
+        end = bucket_limits_[i];
+        count = buckets_[i];
+        i++;
+      }
+    }
+    proto->add_bucket_limit(end);
+    proto->add_bucket(count);
+  }
+  if (proto->bucket_size() == 0.0) {
+    // It's easier when we restore if we always have at least one bucket entry
+    proto->add_bucket_limit(DBL_MAX);
+    proto->add_bucket(0.0);
+  }
+}
+
+}  // namespace histogram
+}  // namespace tensorflow
+
+#endif //HAVE_TFLOGGER
+// clang-format on
diff --git a/src/shogun/lib/tfhistogram/histogram.h b/src/shogun/lib/tfhistogram/histogram.h
new file mode 100644
index 00000000000..7fddd2e7eeb
--- /dev/null
+++ b/src/shogun/lib/tfhistogram/histogram.h
@@ -0,0 +1,107 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// clang-format off
+#include <shogun/lib/config.h>
+#ifdef HAVE_TFLOGGER
+
+#ifndef TENSORFLOW_LIB_HISTOGRAM_HISTOGRAM_H_
+#define TENSORFLOW_LIB_HISTOGRAM_HISTOGRAM_H_
+
+#include <string>
+#include <vector>
+
+namespace tensorflow {
+
+class HistogramProto;
+
+namespace histogram {
+
+class Histogram {
+ public:
+  // Create a histogram with a default set of bucket boundaries.
+  // Buckets near zero cover very small ranges (e.g. 10^-12), and each
+  // bucket range grows by ~10% as we head away from zero.  The
+  // buckets cover the range from -DBL_MAX to DBL_MAX.
+  Histogram();
+
+  // Create a histogram with a custom set of bucket boundaries,
+  // specified in "custom_bucket_limits[0..custom_bucket_limits.size()-1]"
+  // REQUIRES: custom_bucket_limits[i] values are monotonically increasing.
+  // REQUIRES: custom_bucket_limits is not empty()
+  explicit Histogram(std::vector<double> custom_bucket_limits);
+
+  // Restore the state of a histogram that was previously encoded
+  // via Histogram::EncodeToProto.  Note that only the bucket boundaries
+  // generated by EncodeToProto will be restored.
+  bool DecodeFromProto(const HistogramProto& proto);
+
+  ~Histogram() {}
+
+  void Clear();
+  void Add(double value);
+
+  // Save the current state of the histogram to "*proto".  If
+  // "preserve_zero_buckets" is false, only non-zero bucket values and
+  // ranges are saved, and the bucket boundaries of zero-valued buckets
+  // are lost.
+  void EncodeToProto(HistogramProto* proto, bool preserve_zero_buckets) const;
+
+  // Return the median of the values in the histogram
+  double Median() const;
+
+  // Return the "p"th percentile [0.0..100.0] of the values in the
+  // distribution
+  double Percentile(double p) const;
+
+  // Return the average value of the distribution
+  double Average() const;
+
+  // Return the standard deviation of values in the distribution
+  double StandardDeviation() const;
+
+  // Returns a multi-line human-readable string representing the histogram
+  // contents.  Example output:
+  //   Count: 4  Average: 251.7475  StdDev: 432.02
+  //   Min: -3.0000  Median: 5.0000  Max: 1000.0000
+  //   ------------------------------------------------------
+  //   [      -5,       0 )       1  25.000%  25.000% #####
+  //   [       0,       5 )       1  25.000%  50.000% #####
+  //   [       5,      10 )       1  25.000%  75.000% #####
+  //   [    1000,   10000 )       1  25.000% 100.000% #####
+  std::string ToString() const;
+
+ private:
+  double min_;
+  double max_;
+  double num_;
+  double sum_;
+  double sum_squares_;
+
+  std::vector<double> custom_bucket_limits_;
+  std::vector<double> bucket_limits_;
+  std::vector<double> buckets_;
+
+  double Remap(double x, double x0, double x1, double y0, double y1) const;
+
+  Histogram(const Histogram&) = delete;
+  void operator=(const Histogram&) = delete;
+};
+
+}  // namespace histogram
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_LIB_HISTOGRAM_HISTOGRAM_H_
+#endif  // HAVE_TFLOGGER
+// clang-format on
diff --git a/src/shogun/machine/BaggingMachine.cpp b/src/shogun/machine/BaggingMachine.cpp
index 4a9802680bc..0f81fbbf8e9 100644
--- a/src/shogun/machine/BaggingMachine.cpp
+++ b/src/shogun/machine/BaggingMachine.cpp
@@ -8,8 +8,11 @@
  * Copyright (C) 2013 Viktor Gal
  */
 
-#include <shogun/machine/BaggingMachine.h>
 #include <shogun/ensemble/CombinationRule.h>
+#include <shogun/ensemble/MeanRule.h>
+#include <shogun/machine/BaggingMachine.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+
 #include <shogun/evaluation/Evaluation.h>
 
 using namespace shogun;
@@ -44,39 +47,86 @@ CBaggingMachine::~CBaggingMachine()
 
 CBinaryLabels* CBaggingMachine::apply_binary(CFeatures* data)
 {
-	SGVector<float64_t> combined_vector = apply_get_outputs(data);
+	SGMatrix<float64_t> output = apply_outputs_without_combination(data);
+
+	CMeanRule* mean_rule = new CMeanRule();
+
+	SGVector<float64_t> labels = m_combination_rule->combine(output);
+	SGVector<float64_t> probabilities = mean_rule->combine(output);
+
+	float64_t threshold = 0.5;
+	CBinaryLabels* pred = new CBinaryLabels(probabilities, threshold);
+
+	SG_UNREF(mean_rule);
 
-	CBinaryLabels* pred = new CBinaryLabels(combined_vector);
 	return pred;
 }
 
 CMulticlassLabels* CBaggingMachine::apply_multiclass(CFeatures* data)
 {
-	SGVector<float64_t> combined_vector = apply_get_outputs(data);
+	SGMatrix<float64_t> bagged_outputs =
+	    apply_outputs_without_combination(data);
+
+	REQUIRE(m_labels, "Labels not set.\n");
+	REQUIRE(
+	    m_labels->get_label_type() == LT_MULTICLASS,
+	    "Labels (%s) are not compatible with multiclass.\n",
+	    m_labels->get_name());
+
+	auto labels_multiclass = dynamic_cast<CMulticlassLabels*>(m_labels);
+	auto num_samples = bagged_outputs.size() / m_num_bags;
+	auto num_classes = labels_multiclass->get_num_classes();
+
+	CMulticlassLabels* pred = new CMulticlassLabels(num_samples);
+	pred->allocate_confidences_for(num_classes);
+
+	SGMatrix<float64_t> class_probabilities(num_classes, num_samples);
+	class_probabilities.zero();
+
+	for (auto i = 0; i < num_samples; ++i)
+	{
+		for (auto j = 0; j < m_num_bags; ++j)
+		{
+			int32_t class_idx = bagged_outputs(i, j);
+			class_probabilities(class_idx, i) += 1;
+		}
+	}
+
+	class_probabilities = linalg::scale(class_probabilities, 1.0 / m_num_bags);
+
+	for (auto i = 0; i < num_samples; ++i)
+		pred->set_multiclass_confidences(i, class_probabilities.get_column(i));
+
+	SGVector<float64_t> combined = m_combination_rule->combine(bagged_outputs);
+	pred->set_labels(combined);
 
-	CMulticlassLabels* pred = new CMulticlassLabels(combined_vector);
 	return pred;
 }
 
 CRegressionLabels* CBaggingMachine::apply_regression(CFeatures* data)
 {
-	SGVector<float64_t> combined_vector = apply_get_outputs(data);
-
-	CRegressionLabels* pred = new CRegressionLabels(combined_vector);
-
-	return pred;
+	return new CRegressionLabels(apply_get_outputs(data));
 }
 
 SGVector<float64_t> CBaggingMachine::apply_get_outputs(CFeatures* data)
 {
 	ASSERT(data != NULL);
 	REQUIRE(m_combination_rule != NULL, "Combination rule is not set!");
+
+	SGMatrix<float64_t> output = apply_outputs_without_combination(data);
+	SGVector<float64_t> combined = m_combination_rule->combine(output);
+
+	return combined;
+}
+
+SGMatrix<float64_t>
+CBaggingMachine::apply_outputs_without_combination(CFeatures* data)
+{
 	ASSERT(m_num_bags == m_bags->get_num_elements());
 
 	SGMatrix<float64_t> output(data->get_num_vectors(), m_num_bags);
 	output.zero();
 
-
 	#pragma omp parallel for
 	for (int32_t i = 0; i < m_num_bags; ++i)
 	{
@@ -95,9 +145,7 @@ SGVector<float64_t> CBaggingMachine::apply_get_outputs(CFeatures* data)
 		SG_UNREF(m);
 	}
 
-	SGVector<float64_t> combined = m_combination_rule->combine(output);
-
-	return combined;
+	return output;
 }
 
 bool CBaggingMachine::train_machine(CFeatures* data)
@@ -318,7 +366,7 @@ float64_t CBaggingMachine::get_oob_error(CEvaluation* eval) const
 		SG_UNREF(l);
 	}
 
-	DynArray<index_t> idx;
+	std::vector<index_t> idx;
 	for (index_t i = 0; i < m_features->get_num_vectors(); i++)
 	{
 		if (m_all_oob_idx[i])
@@ -326,9 +374,9 @@ float64_t CBaggingMachine::get_oob_error(CEvaluation* eval) const
 	}
 
 	SGVector<float64_t> combined = m_combination_rule->combine(output);
-	SGVector<float64_t> lab(idx.get_num_elements());
+	SGVector<float64_t> lab(idx.size());
 	for (int32_t i=0;i<lab.vlen;i++)
-		lab[i]=combined[idx.get_element(i)];
+		lab[i]=combined[idx[i]];
 
 	CLabels* predicted = NULL;
 	switch (m_labels->get_label_type())
@@ -349,7 +397,7 @@ float64_t CBaggingMachine::get_oob_error(CEvaluation* eval) const
 			SG_ERROR("Unsupported label type\n");
 	}
 
-	m_labels->add_subset(SGVector<index_t>(idx.get_array(), idx.get_num_elements(), false));
+	m_labels->add_subset(SGVector<index_t>(idx.data(), idx.size(), false));
 	float64_t res = eval->evaluate(predicted, m_labels);
 	m_labels->remove_subset();
 
@@ -363,15 +411,8 @@ CDynamicArray<index_t>* CBaggingMachine::get_oob_indices(const SGVector<index_t>
 	out_of_bag.set_const(true);
 
 	// mark the ones that are in_bag
-	index_t oob_count = m_features->get_num_vectors();
 	for (index_t i = 0; i < in_bag.vlen; i++)
-	{
-		if (out_of_bag[in_bag[i]])
-		{
-			out_of_bag[in_bag[i]] = false;
-			oob_count--;
-		}
-	}
+		out_of_bag[in_bag[i]] &= false;
 
 	CDynamicArray<index_t>* oob = new CDynamicArray<index_t>();
 	// store the indicies of vectors that are out of the bag
diff --git a/src/shogun/machine/BaggingMachine.h b/src/shogun/machine/BaggingMachine.h
index 8a1d3ee25ff..25388a4f974 100644
--- a/src/shogun/machine/BaggingMachine.h
+++ b/src/shogun/machine/BaggingMachine.h
@@ -139,20 +139,30 @@ namespace shogun
 			 */
 			SGVector<float64_t> apply_get_outputs(CFeatures* data);
 
-			/** Register paramaters */
-			void register_parameters();
-
-			/** Initialize the members with default values */
-			void init();
-
-			/**
-			 * get the vector of indices for feature vectors that are out of bag
-			 *
-			 * @param in_bag vector of indices that are in bag.
-			 * NOTE: in_bag is a randomly generated with replacement
-			 * @return the vector of indices
-			 */
-			CDynamicArray<index_t>* get_oob_indices(const SGVector<index_t>& in_bag);
+		    /** helper function for the apply_{binary,..} functions that
+		     * computes the output probabilities without combination rules
+		     *
+		     * @param data the data to compute the output for
+		     * @return predictions
+		     */
+		    SGMatrix<float64_t>
+		    apply_outputs_without_combination(CFeatures* data);
+
+		    /** Register paramaters */
+		    void register_parameters();
+
+		    /** Initialize the members with default values */
+		    void init();
+
+		    /**
+		     * get the vector of indices for feature vectors that are out of bag
+		     *
+		     * @param in_bag vector of indices that are in bag.
+		     * NOTE: in_bag is a randomly generated with replacement
+		     * @return the vector of indices
+		     */
+		    CDynamicArray<index_t>*
+		    get_oob_indices(const SGVector<index_t>& in_bag);
 
 		protected:
 			/** bags array */
diff --git a/src/shogun/machine/KernelMachine.cpp b/src/shogun/machine/KernelMachine.cpp
index 06335587a5e..65f4d0d9178 100644
--- a/src/shogun/machine/KernelMachine.cpp
+++ b/src/shogun/machine/KernelMachine.cpp
@@ -9,15 +9,20 @@
  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
  */
 
-#include <shogun/machine/KernelMachine.h>
-#include <shogun/lib/Signal.h>
-#include <shogun/labels/RegressionLabels.h>
+#include <shogun/base/progress.h>
 #include <shogun/io/SGIO.h>
+#include <shogun/labels/RegressionLabels.h>
+#include <shogun/machine/KernelMachine.h>
 
 #include <shogun/kernel/Kernel.h>
 #include <shogun/kernel/CustomKernel.h>
 #include <shogun/labels/Labels.h>
 
+#ifdef HAVE_OPENMP
+#include <omp.h>
+
+#endif
+
 using namespace shogun;
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
@@ -292,8 +297,6 @@ SGVector<float64_t> CKernelMachine::apply_get_outputs(CFeatures* data)
 	{
 		SG_DEBUG("computing output on %d test examples\n", num_vectors)
 
-		CSignal::clear_cancel();
-
 		if (io->get_show_progress())
 			io->enable_progress();
 		else
@@ -333,72 +336,64 @@ SGVector<float64_t> CKernelMachine::apply_get_outputs(CFeatures* data)
 		}
 		else
 		{
-			// TODO: port to use OpenMP backend instead of pthread
-#ifdef HAVE_PTHREAD
-			int32_t num_threads=parallel->get_num_threads();
-#else
-			int32_t num_threads=1;
-#endif
-			ASSERT(num_threads>0)
-
-			if (num_threads < 2)
-			{
-				S_THREAD_PARAM_KERNEL_MACHINE params;
-				params.kernel_machine=this;
-				params.result = output.vector;
-				params.start=0;
-				params.end=num_vectors;
-				params.verbose=true;
-				params.indices = NULL;
-				params.indices_len = 0;
-				apply_helper((void*) &params);
-			}
-#ifdef HAVE_PTHREAD
-			else
+			auto pb = progress(range(num_vectors));
+			int32_t num_threads;
+			int64_t step;
+#pragma omp parallel shared(num_threads, step)
 			{
-				pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
-				S_THREAD_PARAM_KERNEL_MACHINE* params = SG_MALLOC(S_THREAD_PARAM_KERNEL_MACHINE, num_threads);
-				int32_t step= num_vectors/num_threads;
-
-				int32_t t;
 
-				for (t=0; t<num_threads-1; t++)
+#ifdef HAVE_OPENMP
+#pragma omp single
 				{
-					params[t].kernel_machine = this;
-					params[t].result = output.vector;
-					params[t].start = t*step;
-					params[t].end = (t+1)*step;
-					params[t].verbose = false;
-					params[t].indices = NULL;
-					params[t].indices_len = 0;
-					pthread_create(&threads[t], NULL,
-							CKernelMachine::apply_helper, (void*)&params[t]);
+					num_threads = omp_get_num_threads();
+					step = num_vectors / num_threads;
+					num_threads--;
 				}
+				int32_t thread_num = omp_get_thread_num();
+#else
+				num_threads = 0;
+				step = num_vectors;
+				int32_t thread_num = 0;
+#endif
+				int32_t start = thread_num * step;
+				int32_t end = (thread_num == num_threads)
+				                  ? num_vectors
+				                  : (thread_num + 1) * step;
 
-				params[t].kernel_machine = this;
-				params[t].result = output.vector;
-				params[t].start = t*step;
-				params[t].end = num_vectors;
-				params[t].verbose = true;
-				params[t].indices = NULL;
-				params[t].indices_len = 0;
-				apply_helper((void*) &params[t]);
-
-				for (t=0; t<num_threads-1; t++)
-					pthread_join(threads[t], NULL);
-
-				SG_FREE(params);
-				SG_FREE(threads);
-			}
+#ifdef WIN32
+				for (int32_t vec = start; vec < end; vec++)
+#else
+				for (int32_t vec = start; vec < end && !cancel_computation();
+				     vec++)
 #endif
+				{
+					pb.print_progress();
+
+					ASSERT(kernel)
+					if (kernel->has_property(KP_LINADD) &&
+					    (kernel->get_is_initialized()))
+					{
+						float64_t score = kernel->compute_optimized(vec);
+						output[vec] = score + get_bias();
+					}
+					else
+					{
+						float64_t score = 0;
+						for (int32_t i = 0; i < get_num_support_vectors(); i++)
+							score +=
+							    kernel->kernel(get_support_vector(i), vec) *
+							    get_alpha(i);
+						output[vec] = score + get_bias();
+					}
+				}
+			}
+			pb.complete();
 		}
 
 #ifndef WIN32
-		if ( CSignal::cancel_computations() )
+		if (cancel_computation())
 			SG_INFO("prematurely stopped.           \n")
-		else
 #endif
-			SG_DONE()
 	}
 
 	SG_DEBUG("leaving %s::apply_get_outputs(%s at %p)\n",
@@ -407,54 +402,6 @@ SGVector<float64_t> CKernelMachine::apply_get_outputs(CFeatures* data)
 	return output;
 }
 
-float64_t CKernelMachine::apply_one(int32_t num)
-{
-	ASSERT(kernel)
-
-	if (kernel->has_property(KP_LINADD) && (kernel->get_is_initialized()))
-	{
-		float64_t score = kernel->compute_optimized(num);
-		return score+get_bias();
-	}
-	else
-	{
-		float64_t score=0;
-		for(int32_t i=0; i<get_num_support_vectors(); i++)
-			score+=kernel->kernel(get_support_vector(i), num)*get_alpha(i);
-
-		return score+get_bias();
-	}
-}
-
-void* CKernelMachine::apply_helper(void* p)
-{
-	S_THREAD_PARAM_KERNEL_MACHINE* params = (S_THREAD_PARAM_KERNEL_MACHINE*) p;
-	float64_t* result = params->result;
-	CKernelMachine* kernel_machine = params->kernel_machine;
-
-#ifdef WIN32
-	for (int32_t vec=params->start; vec<params->end; vec++)
-#else
-	for (int32_t vec=params->start; vec<params->end &&
-			!CSignal::cancel_computations(); vec++)
-#endif
-	{
-		if (params->verbose)
-		{
-			int32_t num_vectors=params->end - params->start;
-			int32_t v=vec-params->start;
-			if ( (v% (num_vectors/100+1))== 0)
-				SG_SPROGRESS(v, 0.0, num_vectors-1)
-		}
-
-		/* eventually use index mapping if exists */
-		index_t idx=params->indices ? params->indices[vec] : vec;
-		result[vec] = kernel_machine->apply_one(idx);
-	}
-
-	return NULL;
-}
-
 void CKernelMachine::store_model_features()
 {
 	if (!kernel)
@@ -545,91 +492,89 @@ SGVector<float64_t> CKernelMachine::apply_locked_get_output(
 	int32_t num_inds=indices.vlen;
 	SGVector<float64_t> output(num_inds);
 
-	CSignal::clear_cancel();
-
 	if (io->get_show_progress())
 		io->enable_progress();
 	else
 		io->disable_progress();
 
 	/* custom kernel never has batch evaluation property so dont do this here */
-	// TODO: port to use OpenMP backend instead of pthread
-#ifdef HAVE_PTHREAD
-	int32_t num_threads=parallel->get_num_threads();
-#else
-	int32_t num_threads=1;
-#endif
-	ASSERT(num_threads>0)
-
-	if (num_threads<2)
+	auto pb = progress(range(0, num_inds));
+	int32_t num_threads;
+	int64_t step;
+#pragma omp parallel shared(num_threads, step)
 	{
-		S_THREAD_PARAM_KERNEL_MACHINE params;
-		params.kernel_machine=this;
-		params.result=output.vector;
-
-		/* use the parameter index vector */
-		params.start=0;
-		params.end=num_inds;
-		params.indices=indices.vector;
-		params.indices_len=indices.vlen;
-
-		params.verbose=true;
-		apply_helper((void*) &params);
-	}
-#ifdef HAVE_PTHREAD
-	else
-	{
-		pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
-		S_THREAD_PARAM_KERNEL_MACHINE* params=SG_MALLOC(S_THREAD_PARAM_KERNEL_MACHINE, num_threads);
-		int32_t step= num_inds/num_threads;
-
-		int32_t t;
-		for (t=0; t<num_threads-1; t++)
+#ifdef HAVE_OPENMP
+#pragma omp single
 		{
-			params[t].kernel_machine=this;
-			params[t].result=output.vector;
-
-			/* use the parameter index vector */
-			params[t].start=t*step;
-			params[t].end=(t+1)*step;
-			params[t].indices=indices.vector;
-			params[t].indices_len=indices.vlen;
-
-			params[t].verbose=false;
-			pthread_create(&threads[t], NULL, CKernelMachine::apply_helper,
-					(void*)&params[t]);
+			num_threads = omp_get_num_threads();
+			step = num_inds / num_threads;
+			num_threads--;
 		}
+		int32_t thread_num = omp_get_thread_num();
+#else
+		num_threads = 0;
+		step = num_inds;
+		int32_t thread_num = 0;
+#endif
+		int32_t start = thread_num * step;
+		int32_t end =
+		    (thread_num == num_threads) ? num_inds : (thread_num + 1) * step;
+#ifdef WIN32
+		for (int32_t vec = start; vec < end; vec++)
+#else
+		for (int32_t vec = start; vec < end && !cancel_computation(); vec++)
+#endif
+		{
+			pb.print_progress();
+			index_t index = indices[vec];
+			ASSERT(kernel)
+			if (kernel->has_property(KP_LINADD) &&
+			    (kernel->get_is_initialized()))
+			{
+				float64_t score = kernel->compute_optimized(index);
+				output[vec] = score + get_bias();
+			}
+			else
+			{
+				float64_t score = 0;
+				for (int32_t i = 0; i < get_num_support_vectors(); i++)
+					score += kernel->kernel(get_support_vector(i), index) *
+					         get_alpha(i);
 
-		params[t].kernel_machine=this;
-		params[t].result=output.vector;
-
-		/* use the parameter index vector */
-		params[t].start=t*step;
-		params[t].end=num_inds;
-		params[t].indices=indices.vector;
-		params[t].indices_len=indices.vlen;
-
-		params[t].verbose=true;
-		apply_helper((void*) &params[t]);
-
-		for (t=0; t<num_threads-1; t++)
-			pthread_join(threads[t], NULL);
-
-		SG_FREE(params);
-		SG_FREE(threads);
+				output[vec] = score + get_bias();
+			}
+		}
 	}
-#endif
 
 #ifndef WIN32
-	if ( CSignal::cancel_computations() )
+	if (cancel_computation())
 		SG_INFO("prematurely stopped.\n")
 	else
 #endif
-		SG_DONE()
+		pb.complete();
 
 	return output;
 }
 
+float64_t CKernelMachine::apply_one(int32_t num)
+{
+	ASSERT(kernel)
+
+	if (kernel->has_property(KP_LINADD) && (kernel->get_is_initialized()))
+	{
+		float64_t score = kernel->compute_optimized(num);
+		return score + get_bias();
+	}
+	else
+	{
+		float64_t score = 0;
+		for (int32_t i = 0; i < get_num_support_vectors(); i++)
+			score += kernel->kernel(get_support_vector(i), num) * get_alpha(i);
+
+		return score + get_bias();
+	}
+}
+
 void CKernelMachine::data_lock(CLabels* labs, CFeatures* features)
 {
 	if ( !kernel )
@@ -710,4 +655,3 @@ bool CKernelMachine::supports_locking() const
 {
 	return true;
 }
-
diff --git a/src/shogun/machine/KernelMachine.h b/src/shogun/machine/KernelMachine.h
index f2e2017e277..d4e5e1f293f 100644
--- a/src/shogun/machine/KernelMachine.h
+++ b/src/shogun/machine/KernelMachine.h
@@ -228,13 +228,6 @@ class CKernelMachine : public CMachine
 		 */
 		virtual float64_t apply_one(int32_t num);
 
-		/** apply example helper, used in threads
-		 *
-		 * @param p params of the thread
-		 * @return nothing really
-		 */
-		static void* apply_helper(void* p);
-
 #ifndef SWIG // SWIG should skip this part
 		/** Trains a locked machine on a set of indices. Error if machine is
 		 * not locked
@@ -305,7 +298,7 @@ class CKernelMachine : public CMachine
 		 */
 		virtual void store_model_features();
 
-    private:
+	private:
 		/** register parameters and do misc init */
 		void init();
 
diff --git a/src/shogun/machine/LinearMachine.cpp b/src/shogun/machine/LinearMachine.cpp
index eee71ba8ba1..be95ea6cdc1 100644
--- a/src/shogun/machine/LinearMachine.cpp
+++ b/src/shogun/machine/LinearMachine.cpp
@@ -44,7 +44,7 @@ void CLinearMachine::init()
 	features = NULL;
 	m_compute_bias = true;
 
-	SG_ADD(&w, "w", "Parameter vector w.", MS_NOT_AVAILABLE);
+	SG_ADD(&m_w, "w", "Parameter vector w.", MS_NOT_AVAILABLE);
 	SG_ADD(&bias, "bias", "Bias b.", MS_NOT_AVAILABLE);
 	SG_ADD((CSGObject**) &features, "features", "Feature object.",
 	    MS_NOT_AVAILABLE);
@@ -58,7 +58,7 @@ CLinearMachine::~CLinearMachine()
 
 float64_t CLinearMachine::apply_one(int32_t vec_idx)
 {
-	return features->dense_dot(vec_idx, w.vector, w.vlen) + bias;
+	return features->dense_dot(vec_idx, m_w.vector, m_w.vlen) + bias;
 }
 
 CRegressionLabels* CLinearMachine::apply_regression(CFeatures* data)
@@ -88,21 +88,21 @@ SGVector<float64_t> CLinearMachine::apply_get_outputs(CFeatures* data)
 
 	int32_t num=features->get_num_vectors();
 	ASSERT(num>0)
-	ASSERT(w.vlen==features->get_dim_feature_space())
+	ASSERT(m_w.vlen==features->get_dim_feature_space())
 
 	float64_t* out=SG_MALLOC(float64_t, num);
-	features->dense_dot_range(out, 0, num, NULL, w.vector, w.vlen, bias);
+	features->dense_dot_range(out, 0, num, NULL, m_w.vector, m_w.vlen, bias);
 	return SGVector<float64_t>(out,num);
 }
 
 SGVector<float64_t> CLinearMachine::get_w() const
 {
-	return w;
+	return m_w;
 }
 
-void CLinearMachine::set_w(const SGVector<float64_t> src_w)
+void CLinearMachine::set_w(const SGVector<float64_t> w)
 {
-	w = src_w;
+	m_w = w;
 }
 
 void CLinearMachine::set_bias(float64_t b)
diff --git a/src/shogun/machine/LinearMachine.h b/src/shogun/machine/LinearMachine.h
index a9371a18516..457b4e76b1a 100644
--- a/src/shogun/machine/LinearMachine.h
+++ b/src/shogun/machine/LinearMachine.h
@@ -188,7 +188,7 @@ class CLinearMachine : public CMachine
 	private:
 
 		/** w */
-		SGVector<float64_t> w;
+		SGVector<float64_t> m_w;
 
 	protected:
 		/** bias */
diff --git a/src/shogun/machine/Machine.cpp b/src/shogun/machine/Machine.cpp
index 44362db11c9..9cbc218bbcf 100644
--- a/src/shogun/machine/Machine.cpp
+++ b/src/shogun/machine/Machine.cpp
@@ -9,12 +9,16 @@
  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
  */
 
+#include <rxcpp/rx-lite.hpp>
+#include <shogun/base/init.h>
+#include <shogun/lib/Signal.h>
 #include <shogun/machine/Machine.h>
 
 using namespace shogun;
 
-CMachine::CMachine() : CSGObject(), m_max_train_time(0), m_labels(NULL),
-		m_solver_type(ST_AUTO)
+CMachine::CMachine()
+    : CSGObject(), m_max_train_time(0), m_labels(NULL), m_solver_type(ST_AUTO),
+      m_cancel_computation(false), m_pause_computation_flag(false)
 {
 	m_data_locked=false;
 	m_store_model_features=false;
@@ -54,7 +58,10 @@ bool CMachine::train(CFeatures* data)
 		m_labels->ensure_valid(get_name());
 	}
 
+	auto sub = connect_to_signal_handler();
 	bool result = train_machine(data);
+	sub.unsubscribe();
+	reset_computation_variables();
 
 	if (m_store_model_features)
 		store_model_features();
@@ -269,3 +276,17 @@ CLatentLabels* CMachine::apply_locked_latent(SGVector<index_t> indices)
 			"for %s\n", get_name());
 	return NULL;
 }
+
+rxcpp::subscription CMachine::connect_to_signal_handler()
+{
+	// Subscribe this algorithm to the signal handler
+	auto subscriber = rxcpp::make_subscriber<int>(
+	    [this](int i) {
+		    if (i == SG_PAUSE_COMP)
+			    this->on_pause();
+		    else
+			    this->on_next();
+		},
+	    [this]() { this->on_complete(); });
+	return get_global_signal()->get_observable()->subscribe(subscriber);
+}
diff --git a/src/shogun/machine/Machine.h b/src/shogun/machine/Machine.h
index aa8a1d3b940..b53785a9833 100644
--- a/src/shogun/machine/Machine.h
+++ b/src/shogun/machine/Machine.h
@@ -23,6 +23,9 @@
 #include <shogun/labels/LatentLabels.h>
 #include <shogun/features/Features.h>
 
+#include <condition_variable>
+#include <mutex>
+
 namespace shogun
 {
 
@@ -123,6 +126,11 @@ enum EProblemType
 	 */ \
 	virtual EProblemType get_machine_problem_type() const { return PT; }
 
+#define COMPUTATION_CONTROLLERS                                                \
+	if (cancel_computation())                                                  \
+		continue;                                                              \
+	pause_computation();
+
 /** @brief A generic learning machine interface.
  *
  * A machine takes as input CFeatures and CLabels (by default).
@@ -306,6 +314,37 @@ class CMachine : public CSGObject
 			return PT_BINARY;
 		}
 
+#ifndef SWIG
+		/** @return whether the algorithm needs to be stopped */
+		SG_FORCED_INLINE bool cancel_computation() const
+		{
+			return m_cancel_computation.load();
+		}
+#endif
+
+#ifndef SWIG
+		/** Pause the algorithm if the flag is set */
+		SG_FORCED_INLINE void pause_computation()
+		{
+			if (m_pause_computation_flag.load())
+			{
+				std::unique_lock<std::mutex> lck(m_mutex);
+				while (m_pause_computation_flag.load())
+					m_pause_computation.wait(lck);
+			}
+		}
+#endif
+
+#ifndef SWIG
+		/** Resume current computation (sets the flag) */
+		SG_FORCED_INLINE void resume_computation()
+		{
+			std::unique_lock<std::mutex> lck(m_mutex);
+			m_pause_computation_flag = false;
+			m_pause_computation.notify_all();
+		}
+#endif
+
 		virtual const char* get_name() const { return "Machine"; }
 
 	protected:
@@ -357,6 +396,38 @@ class CMachine : public CSGObject
 		/** returns whether machine require labels for training */
 		virtual bool train_require_labels() const { return true; }
 
+		/** connect the machine instance to the signal handler */
+		rxcpp::subscription connect_to_signal_handler();
+
+		/** reset the computation variables */
+		void reset_computation_variables()
+		{
+			m_cancel_computation = false;
+			m_pause_computation_flag = false;
+		}
+
+		/** The action which will be done when the user decides to
+		* premature stop the CMachine execution */
+		virtual void on_next()
+		{
+			m_cancel_computation.store(true);
+		}
+
+		/** The action which will be done when the user decides to
+		* pause the CMachine execution */
+		virtual void on_pause()
+		{
+			m_pause_computation_flag.store(true);
+			/* Here there should be the actual code*/
+			resume_computation();
+		}
+
+		/** The action which will be done when the user decides to
+		* return to prompt and terminate the program execution */
+		virtual void on_complete()
+		{
+		}
+
 	protected:
 		/** maximum training time */
 		float64_t m_max_train_time;
@@ -372,6 +443,18 @@ class CMachine : public CSGObject
 
 		/** whether data is locked */
 		bool m_data_locked;
+
+		/** Cancel computation */
+		std::atomic<bool> m_cancel_computation;
+
+		/** Pause computation flag */
+		std::atomic<bool> m_pause_computation_flag;
+
+		/** Conditional variable to make threads wait */
+		std::condition_variable m_pause_computation;
+
+		/** Mutex used to pause threads */
+		std::mutex m_mutex;
 };
 }
 #endif // _MACHINE_H__
diff --git a/src/shogun/machine/OnlineLinearMachine.cpp b/src/shogun/machine/OnlineLinearMachine.cpp
index 47688e49e44..cb7c2a0d37d 100644
--- a/src/shogun/machine/OnlineLinearMachine.cpp
+++ b/src/shogun/machine/OnlineLinearMachine.cpp
@@ -12,13 +12,16 @@
 #include <shogun/base/Parameter.h>
 #include <shogun/labels/RegressionLabels.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+
+#include <vector>
 
 using namespace shogun;
 
 COnlineLinearMachine::COnlineLinearMachine()
-: CMachine(), w_dim(0), w(NULL), bias(0), features(NULL)
+: CMachine(), bias(0), features(NULL)
 {
-	m_parameters->add_vector(&w, &w_dim, "w", "Parameter vector w.");
+	SG_ADD(&m_w, "m_w", "Parameter vector w.", MS_NOT_AVAILABLE);
 	SG_ADD(&bias, "bias", "Bias b.", MS_NOT_AVAILABLE);
 	SG_ADD((CSGObject**) &features, "features",
 	    "Feature object.", MS_NOT_AVAILABLE);
@@ -26,10 +29,6 @@ COnlineLinearMachine::COnlineLinearMachine()
 
 COnlineLinearMachine::~COnlineLinearMachine()
 {
-	// It is possible that a derived class may have already
-	// called SG_FREE() on the weight vector
-	if (w != NULL)
-		SG_FREE(w);
 	SG_UNREF(features);
 }
 
@@ -58,37 +57,32 @@ SGVector<float64_t> COnlineLinearMachine::apply_get_outputs(CFeatures* data)
 	ASSERT(features)
 	ASSERT(features->has_property(FP_STREAMING_DOT))
 
-	DynArray<float64_t>* labels_dynarray=new DynArray<float64_t>();
-	int32_t num_labels=0;
-
+	std::vector<float64_t> labels;
 	features->start_parser();
 	while (features->get_next_example())
 	{
-		float64_t current_lab=features->dense_dot(w, w_dim) + bias;
-
-		labels_dynarray->append_element(current_lab);
-		num_labels++;
+		float64_t current_lab=features->dense_dot(m_w.vector, m_w.vlen) + bias;
 
+		labels.push_back(current_lab);
 		features->release_example();
 	}
 	features->end_parser();
 
-	SGVector<float64_t> labels_array(num_labels);
-	for (int32_t i=0; i<num_labels; i++)
-		labels_array.vector[i]=(*labels_dynarray)[i];
+	SGVector<float64_t> labels_array(labels.size());
+	sg_memcpy(labels_array.vector, labels.data(), sizeof(float64_t)*labels.size());
 
-	delete labels_dynarray;
 	return labels_array;
 }
 
 float32_t COnlineLinearMachine::apply_one(float32_t* vec, int32_t len)
 {
-		return CMath::dot(vec, w, len)+bias;
+		SGVector<float32_t> wrap(vec, len, false);
+		return linalg::dot(wrap, m_w)+bias;
 }
 
 float32_t COnlineLinearMachine::apply_to_current_example()
 {
-		return features->dense_dot(w, w_dim)+bias;
+		return features->dense_dot(m_w.vector, m_w.vlen)+bias;
 }
 
 bool COnlineLinearMachine::train_machine(CFeatures *data)
diff --git a/src/shogun/machine/OnlineLinearMachine.h b/src/shogun/machine/OnlineLinearMachine.h
index c8f3e625017..6cc6d6f0f94 100644
--- a/src/shogun/machine/OnlineLinearMachine.h
+++ b/src/shogun/machine/OnlineLinearMachine.h
@@ -57,18 +57,6 @@ class COnlineLinearMachine : public CMachine
 		COnlineLinearMachine();
 		virtual ~COnlineLinearMachine();
 
-		/** get w
-		 *
-		 * @param dst_w store w in this argument
-		 * @param dst_dims dimension of w
-		 */
-		virtual void get_w(float32_t*& dst_w, int32_t& dst_dims)
-		{
-			ASSERT(w && w_dim>0)
-			dst_w=w;
-			dst_dims=w_dim;
-		}
-
 		/**
 		 * Get w as a _new_ float64_t array
 		 *
@@ -77,23 +65,20 @@ class COnlineLinearMachine : public CMachine
 		 */
 		virtual void get_w(float64_t*& dst_w, int32_t& dst_dims)
 		{
-			ASSERT(w && w_dim>0)
-			dst_w=SG_MALLOC(float64_t, w_dim);
-			for (int32_t i=0; i<w_dim; i++)
-				dst_w[i]=w[i];
-			dst_dims=w_dim;
+			ASSERT(m_w.vector && m_w.vlen > 0)
+			dst_w=SG_MALLOC(float64_t, m_w.vlen);
+			for (int32_t i=0; i<m_w.vlen; i++)
+				dst_w[i]=m_w[i];
+			dst_dims=m_w.vlen;
 		}
 
 		/** get w
 		 *
 		 * @return weight vector
 		 */
-		virtual SGVector<float32_t> get_w()
+		virtual SGVector<float32_t> get_w() const
 		{
-			float32_t * dst_w = SG_MALLOC(float32_t, w_dim);
-			for (int32_t i=0; i<w_dim; i++)
-				dst_w[i]=w[i];
-			return SGVector<float32_t>(dst_w, w_dim);
+			return m_w;
 		}
 
 		/** set w
@@ -101,12 +86,9 @@ class COnlineLinearMachine : public CMachine
 		 * @param src_w new w
 		 * @param src_w_dim dimension of new w
 		 */
-		virtual void set_w(float32_t* src_w, int32_t src_w_dim)
+		virtual void set_w(const SGVector<float32_t> w)
 		{
-			SG_FREE(w);
-			w=SG_MALLOC(float32_t, src_w_dim);
-			sg_memcpy(w, src_w, size_t(src_w_dim)*sizeof(float32_t));
-			w_dim=src_w_dim;
+			m_w = w;
 		}
 
 		/**
@@ -117,11 +99,9 @@ class COnlineLinearMachine : public CMachine
 		 */
 		virtual void set_w(float64_t* src_w, int32_t src_w_dim)
 		{
-			SG_FREE(w);
-			w=SG_MALLOC(float32_t, src_w_dim);
+			m_w = SGVector<float32_t>(src_w_dim);
 			for (int32_t i=0; i<src_w_dim; i++)
-				w[i] = src_w[i];
-			w_dim=src_w_dim;
+				m_w[i] = src_w[i];
 		}
 
 		/** set bias
@@ -249,10 +229,8 @@ class COnlineLinearMachine : public CMachine
 		virtual bool train_require_labels() const { return false; }
 
 	protected:
-		/** dimension of w */
-		int32_t w_dim;
 		/** w */
-		float32_t* w;
+		SGVector<float32_t> m_w;
 		/** bias */
 		float32_t bias;
 		/** features */
diff --git a/src/shogun/machine/StructuredOutputMachine.cpp b/src/shogun/machine/StructuredOutputMachine.cpp
index 1604cb99dff..cf3426440e2 100644
--- a/src/shogun/machine/StructuredOutputMachine.cpp
+++ b/src/shogun/machine/StructuredOutputMachine.cpp
@@ -15,6 +15,8 @@
 #include <shogun/loss/LossFunction.h>
 #include <shogun/structure/StructuredModel.h>
 
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+
 using namespace shogun;
 
 CStructuredOutputMachine::CStructuredOutputMachine()
@@ -94,7 +96,7 @@ CLossFunction* CStructuredOutputMachine::get_surrogate_loss() const
 	return m_surrogate_loss;
 }
 
-float64_t CStructuredOutputMachine::risk_nslack_margin_rescale(float64_t* subgrad, float64_t* W, TMultipleCPinfo* info)
+float64_t CStructuredOutputMachine::risk_nslack_margin_rescale(SGVector<float64_t>& subgrad, SGVector<float64_t>& W, TMultipleCPinfo* info)
 {
 	int32_t dim = m_model->get_dim();
 
@@ -113,16 +115,15 @@ float64_t CStructuredOutputMachine::risk_nslack_margin_rescale(float64_t* subgra
 	SG_UNREF(features);
 
 	float64_t R = 0.0;
-	for (int32_t i=0; i<dim; i++)
-		subgrad[i] = 0;
+	linalg::zero(subgrad);
 
 	for (int32_t i=from; i<to; i++)
 	{
-		CResultSet* result = m_model->argmax(SGVector<float64_t>(W,dim,false), i, true);
+		CResultSet* result = m_model->argmax(SGVector<float64_t>(W.vector,dim,false), i, true);
 		SGVector<float64_t> psi_pred = result->psi_pred;
 		SGVector<float64_t> psi_truth = result->psi_truth;
-		SGVector<float64_t>::vec1_plus_scalar_times_vec2(subgrad, 1.0, psi_pred.vector, dim);
-		SGVector<float64_t>::vec1_plus_scalar_times_vec2(subgrad, -1.0, psi_truth.vector, dim);
+		SGVector<float64_t>::vec1_plus_scalar_times_vec2(subgrad.vector, 1.0, psi_pred.vector, dim);
+		SGVector<float64_t>::vec1_plus_scalar_times_vec2(subgrad.vector, -1.0, psi_truth.vector, dim);
 		R += result->score;
 		SG_UNREF(result);
 	}
@@ -130,31 +131,31 @@ float64_t CStructuredOutputMachine::risk_nslack_margin_rescale(float64_t* subgra
 	return R;
 }
 
-float64_t CStructuredOutputMachine::risk_nslack_slack_rescale(float64_t* subgrad, float64_t* W, TMultipleCPinfo* info)
+float64_t CStructuredOutputMachine::risk_nslack_slack_rescale(SGVector<float64_t>& subgrad, SGVector<float64_t>& W, TMultipleCPinfo* info)
 {
 	SG_ERROR("%s::risk_nslack_slack_rescale() has not been implemented!\n", get_name());
 	return 0.0;
 }
 
-float64_t CStructuredOutputMachine::risk_1slack_margin_rescale(float64_t* subgrad, float64_t* W, TMultipleCPinfo* info)
+float64_t CStructuredOutputMachine::risk_1slack_margin_rescale(SGVector<float64_t>& subgrad, SGVector<float64_t>& W, TMultipleCPinfo* info)
 {
 	SG_ERROR("%s::risk_1slack_margin_rescale() has not been implemented!\n", get_name());
 	return 0.0;
 }
 
-float64_t CStructuredOutputMachine::risk_1slack_slack_rescale(float64_t* subgrad, float64_t* W, TMultipleCPinfo* info)
+float64_t CStructuredOutputMachine::risk_1slack_slack_rescale(SGVector<float64_t>& subgrad, SGVector<float64_t>& W, TMultipleCPinfo* info)
 {
 	SG_ERROR("%s::risk_1slack_slack_rescale() has not been implemented!\n", get_name());
 	return 0.0;
 }
 
-float64_t CStructuredOutputMachine::risk_customized_formulation(float64_t* subgrad, float64_t* W, TMultipleCPinfo* info)
+float64_t CStructuredOutputMachine::risk_customized_formulation(SGVector<float64_t>& subgrad, SGVector<float64_t>& W, TMultipleCPinfo* info)
 {
 	SG_ERROR("%s::risk_customized_formulation() has not been implemented!\n", get_name());
 	return 0.0;
 }
 
-float64_t CStructuredOutputMachine::risk(float64_t* subgrad, float64_t* W,
+float64_t CStructuredOutputMachine::risk(SGVector<float64_t>& subgrad, SGVector<float64_t>& W,
 		TMultipleCPinfo* info, EStructRiskType rtype)
 {
 	float64_t ret = 0.0;
diff --git a/src/shogun/machine/StructuredOutputMachine.h b/src/shogun/machine/StructuredOutputMachine.h
index 308f549407e..37f0b8b432a 100644
--- a/src/shogun/machine/StructuredOutputMachine.h
+++ b/src/shogun/machine/StructuredOutputMachine.h
@@ -119,7 +119,7 @@ class CStructuredOutputMachine : public CMachine
 		 * @param rtype The type of structured risk
 		 * @return Value of the computed risk at given point W
 		 */
-		virtual float64_t risk(float64_t* subgrad, float64_t* W,
+		virtual float64_t risk(SGVector<float64_t>& subgrad, SGVector<float64_t>& W,
 				TMultipleCPinfo* info=0, EStructRiskType rtype = N_SLACK_MARGIN_RESCALING);
 
 		/** @return training progress helper */
@@ -167,7 +167,7 @@ class CStructuredOutputMachine : public CMachine
 		 * @param info Helper info for multiple cutting plane models algorithm
 		 * @return Value of the computed risk at given point W
 		 */
-		virtual float64_t risk_nslack_margin_rescale(float64_t* subgrad, float64_t* W, TMultipleCPinfo* info=0);
+		virtual float64_t risk_nslack_margin_rescale(SGVector<float64_t>& subgrad, SGVector<float64_t>& W, TMultipleCPinfo* info=0);
 
 		/** n-slack formulation and slack rescaling
 		 *
@@ -176,7 +176,7 @@ class CStructuredOutputMachine : public CMachine
 		 * @param info Helper info for multiple cutting plane models algorithm
 		 * @return Value of the computed risk at given point W
 		 */
-		virtual float64_t risk_nslack_slack_rescale(float64_t* subgrad, float64_t* W, TMultipleCPinfo* info=0);
+		virtual float64_t risk_nslack_slack_rescale(SGVector<float64_t>& subgrad, SGVector<float64_t>& W, TMultipleCPinfo* info=0);
 
 		/** 1-slack formulation and margin rescaling
 		 *
@@ -185,7 +185,7 @@ class CStructuredOutputMachine : public CMachine
 		 * @param info Helper info for multiple cutting plane models algorithm
 		 * @return Value of the computed risk at given point W
 		 */
-		virtual float64_t risk_1slack_margin_rescale(float64_t* subgrad, float64_t* W, TMultipleCPinfo* info=0);
+		virtual float64_t risk_1slack_margin_rescale(SGVector<float64_t>& subgrad, SGVector<float64_t>& W, TMultipleCPinfo* info=0);
 
 		/** 1-slack formulation and slack rescaling
 		 *
@@ -194,7 +194,7 @@ class CStructuredOutputMachine : public CMachine
 		 * @param info Helper info for multiple cutting plane models algorithm
 		 * @return Value of the computed risk at given point W
 		 */
-		virtual float64_t risk_1slack_slack_rescale(float64_t* subgrad, float64_t* W, TMultipleCPinfo* info=0);
+		virtual float64_t risk_1slack_slack_rescale(SGVector<float64_t>& subgrad, SGVector<float64_t>& W, TMultipleCPinfo* info=0);
 
 		/** customized risk type
 		 *
@@ -203,7 +203,7 @@ class CStructuredOutputMachine : public CMachine
 		 * @param info Helper info for multiple cutting plane models algorithm
 		 * @return Value of the computed risk at given point W
 		 */
-		virtual float64_t risk_customized_formulation(float64_t* subgrad, float64_t* W, TMultipleCPinfo* info=0);
+		virtual float64_t risk_customized_formulation(SGVector<float64_t>& subgrad, SGVector<float64_t>& W, TMultipleCPinfo* info=0);
 
 	private:
 		/** register class members */
diff --git a/src/shogun/machine/gp/KLDualInferenceMethod.cpp b/src/shogun/machine/gp/KLDualInferenceMethod.cpp
index f126d96804d..3f9ab520f4c 100644
--- a/src/shogun/machine/gp/KLDualInferenceMethod.cpp
+++ b/src/shogun/machine/gp/KLDualInferenceMethod.cpp
@@ -540,10 +540,8 @@ float64_t CKLDualInferenceMethod::optimization()
 	cost_fun->set_target(this);
 	bool cleanup=false;
 
-#ifdef USE_REFERENCE_COUNTING
 	if(this->ref_count()>1)
 		cleanup=true;
-#endif
 
 	minimizer->set_cost_function(cost_fun);
 	float64_t nlml_opt = minimizer->minimize();
diff --git a/src/shogun/machine/gp/KLInference.cpp b/src/shogun/machine/gp/KLInference.cpp
index c8cad41481c..b820d7a96d7 100644
--- a/src/shogun/machine/gp/KLInference.cpp
+++ b/src/shogun/machine/gp/KLInference.cpp
@@ -344,10 +344,9 @@ float64_t CKLInference::optimization()
         KLInferenceCostFunction *cost_fun=new KLInferenceCostFunction();
         cost_fun->set_target(this);
 	bool cleanup=false;
-#ifdef USE_REFERENCE_COUNTING
 	if(this->ref_count()>1)
 		cleanup=true;
-#endif
+
 	FirstOrderMinimizer* opt= dynamic_cast<FirstOrderMinimizer*>(m_minimizer);
 
 	REQUIRE(opt, "FirstOrderMinimizer is required\n")
diff --git a/src/shogun/machine/gp/LaplaceInference.cpp b/src/shogun/machine/gp/LaplaceInference.cpp
index ddec6e77971..14a2fcec333 100644
--- a/src/shogun/machine/gp/LaplaceInference.cpp
+++ b/src/shogun/machine/gp/LaplaceInference.cpp
@@ -34,7 +34,6 @@
 
 
 #include <shogun/mathematics/Math.h>
-#include <shogun/lib/external/brent.h>
 #include <shogun/mathematics/eigen3.h>
 
 using namespace shogun;
diff --git a/src/shogun/machine/gp/LogitLikelihood.cpp b/src/shogun/machine/gp/LogitLikelihood.cpp
index fb4924c0691..94884392df2 100644
--- a/src/shogun/machine/gp/LogitLikelihood.cpp
+++ b/src/shogun/machine/gp/LogitLikelihood.cpp
@@ -32,7 +32,9 @@
 
 
 #include <shogun/mathematics/Function.h>
+#ifdef USE_GPL_SHOGUN
 #include <shogun/mathematics/Integration.h>
+#endif //USE_GPL_SHOGUN
 #include <shogun/labels/BinaryLabels.h>
 #include <shogun/mathematics/eigen3.h>
 
@@ -388,8 +390,12 @@ SGVector<float64_t> CLogitLikelihood::get_log_zeroth_moments(
 		g->set_a(y[i]);
 
 		// evaluate integral on (-inf, inf)
+#ifdef USE_GPL_SHOGUN
 		r[i]=CIntegration::integrate_quadgk(h, -CMath::INFTY, mu[i])+
 			CIntegration::integrate_quadgk(h, mu[i], CMath::INFTY);
+#else
+		SG_GPL_ONLY
+#endif //USE_GPL_SHOGUN
 	}
 
 	SG_UNREF(h);
@@ -430,15 +436,18 @@ float64_t CLogitLikelihood::get_first_moment(SGVector<float64_t> mu,
 	// create an object of k(x)=x*N(x|mu,sigma^2)*sigmoid(x)
 	CProductFunction* k=new CProductFunction(l, h);
 	SG_REF(k);
-
+	float64_t Ex=0;
+#ifdef USE_GPL_SHOGUN
 	// compute Z = \int N(x|mu,sigma)*sigmoid(a*x) dx
 	float64_t Z=CIntegration::integrate_quadgk(h, -CMath::INFTY, mu[i])+
 		CIntegration::integrate_quadgk(h, mu[i], CMath::INFTY);
 
 	// compute 1st moment: E[x] = Z^-1 * \int x*N(x|mu,sigma)*sigmoid(a*x)dx
-	float64_t Ex=(CIntegration::integrate_quadgk(k, -CMath::INFTY, mu[i])+
+	Ex=(CIntegration::integrate_quadgk(k, -CMath::INFTY, mu[i])+
 			CIntegration::integrate_quadgk(k, mu[i], CMath::INFTY))/Z;
-
+#else
+	SG_GPL_ONLY
+#endif //USE_GPL_SHOGUN
 	SG_UNREF(k);
 
 	return Ex;
@@ -482,18 +491,23 @@ float64_t CLogitLikelihood::get_second_moment(SGVector<float64_t> mu,
 	CProductFunction* p=new CProductFunction(q, h);
 	SG_REF(p);
 
+	float64_t Ex=0;
+	float64_t Ex2=0;
+#ifdef USE_GPL_SHOGUN
 	// compute Z = \int N(x|mu,sigma)*sigmoid(a*x) dx
 	float64_t Z=CIntegration::integrate_quadgk(h, -CMath::INFTY, mu[i])+
 		CIntegration::integrate_quadgk(h, mu[i], CMath::INFTY);
 
 	// compute 1st moment: E[x] = Z^-1 * \int x*N(x|mu,sigma)*sigmoid(a*x)dx
-	float64_t Ex=(CIntegration::integrate_quadgk(k, -CMath::INFTY, mu[i])+
+	Ex=(CIntegration::integrate_quadgk(k, -CMath::INFTY, mu[i])+
 			CIntegration::integrate_quadgk(k, mu[i], CMath::INFTY))/Z;
 
 	// compute E[x^2] = Z^-1 * \int x^2*N(x|mu,sigma)*sigmoid(a*x)dx
-	float64_t Ex2=(CIntegration::integrate_quadgk(p, -CMath::INFTY, mu[i])+
+	Ex2=(CIntegration::integrate_quadgk(p, -CMath::INFTY, mu[i])+
 			CIntegration::integrate_quadgk(p, mu[i], CMath::INFTY))/Z;
-
+#else
+	SG_GPL_ONLY
+#endif //USE_GPL_SHOGUN
 	SG_UNREF(k);
 	SG_UNREF(p);
 
diff --git a/src/shogun/machine/gp/LogitLikelihood.h b/src/shogun/machine/gp/LogitLikelihood.h
index b611a16be18..57251547a3d 100644
--- a/src/shogun/machine/gp/LogitLikelihood.h
+++ b/src/shogun/machine/gp/LogitLikelihood.h
@@ -179,7 +179,6 @@ class CLogitLikelihood : public CLikelihoodModel
 	 */
 	virtual float64_t get_second_moment(SGVector<float64_t> mu,
 			SGVector<float64_t> s2, const CLabels* lab, index_t i) const;
-
 	/** return whether logit likelihood function supports binary classification
 	 *
 	 * @return true
diff --git a/src/shogun/machine/gp/MultiLaplaceInferenceMethod.cpp b/src/shogun/machine/gp/MultiLaplaceInferenceMethod.cpp
index 0373305dbcb..2e27678d2b9 100644
--- a/src/shogun/machine/gp/MultiLaplaceInferenceMethod.cpp
+++ b/src/shogun/machine/gp/MultiLaplaceInferenceMethod.cpp
@@ -44,7 +44,9 @@
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/labels/MulticlassLabels.h>
 #include <shogun/mathematics/Math.h>
+#ifdef USE_GPL_SHOGUN
 #include <shogun/lib/external/brent.h>
+#endif //USE_GPL_SHOGUN
 
 using namespace shogun;
 using namespace Eigen;
@@ -53,7 +55,7 @@ namespace shogun
 {
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
-
+#ifdef USE_GPL_SHOGUN
 /** Wrapper class used for the Brent minimizer */
 class CMultiPsiLine : public func_base
 {
@@ -95,6 +97,7 @@ class CMultiPsiLine : public func_base
 		return result;
 	}
 };
+#endif //USE_GPL_SHOGUN
 
 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
 
@@ -351,7 +354,7 @@ void CMultiLaplaceInferenceMethod::update_alpha()
 
 		VectorXd &eigen_dalpha=eigen_b;
 		eigen_dalpha+=eigen_E.transpose()*(eigen_M.triangularView<Upper>().solve(tmp4))-eigen_c-eigen_alpha;
-
+#ifdef USE_GPL_SHOGUN
 		// perform Brent's optimization
 		CMultiPsiLine func;
 
@@ -368,6 +371,9 @@ void CMultiLaplaceInferenceMethod::update_alpha()
 
 		float64_t x;
 		Psi_New=local_min(0, m_opt_max, m_opt_tolerance, func, x);
+#else
+		SG_GPL_ONLY
+#endif //USE_GPL_SHOGUN
 		m_nlz+=Psi_New;
 	}
 
diff --git a/src/shogun/machine/gp/NumericalVGLikelihood.cpp b/src/shogun/machine/gp/NumericalVGLikelihood.cpp
index 51bf1cf9818..7cfbeec7bba 100644
--- a/src/shogun/machine/gp/NumericalVGLikelihood.cpp
+++ b/src/shogun/machine/gp/NumericalVGLikelihood.cpp
@@ -44,7 +44,9 @@
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/lib/SGVector.h>
 #include <shogun/lib/SGMatrix.h>
+#ifdef USE_GPL_SHOGUN
 #include <shogun/mathematics/Integration.h>
+#endif //USE_GPL_SHOGUN
 
 using namespace Eigen;
 
@@ -254,7 +256,11 @@ bool CNumericalVGLikelihood::set_variational_distribution(SGVector<float64_t> mu
 		{
 			m_xgh=SGVector<float64_t>(m_GHQ_N);
 			m_wgh=SGVector<float64_t>(m_GHQ_N);
+#ifdef USE_GPL_SHOGUN
 			CIntegration::generate_gauher(m_xgh, m_wgh);
+#else
+			SG_GPL_ONLY
+#endif //USE_GPL_SHOGUN
 			m_is_init_GHQ=true;
 		}
 
diff --git a/src/shogun/machine/gp/SingleFITCLaplaceInferenceMethod.cpp b/src/shogun/machine/gp/SingleFITCLaplaceInferenceMethod.cpp
index d0a19ee6ff9..8979f2ab3c9 100644
--- a/src/shogun/machine/gp/SingleFITCLaplaceInferenceMethod.cpp
+++ b/src/shogun/machine/gp/SingleFITCLaplaceInferenceMethod.cpp
@@ -33,7 +33,9 @@
 
 #include <shogun/machine/gp/StudentsTLikelihood.h>
 #include <shogun/mathematics/Math.h>
+#ifdef USE_GPL_SHOGUN
 #include <shogun/lib/external/brent.h>
+#endif //USE_GPL_SHOGUN
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/features/DotFeatures.h>
 #include <shogun/optimization/FirstOrderMinimizer.h>
@@ -45,7 +47,7 @@ namespace shogun
 {
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
-
+#ifdef USE_GPL_SHOGUN
 /** Wrapper class used for the Brent minimizer */
 class CFITCPsiLine : public func_base
 {
@@ -89,6 +91,7 @@ class CFITCPsiLine : public func_base
 		return result;
 	}
 };
+#endif //USE_GPL_SHOGUN
 
 class SingleFITCLaplaceInferenceMethodCostFunction: public FirstOrderCostFunction
 {
@@ -271,7 +274,7 @@ float64_t CSingleFITCLaplaceNewtonOptimizer::minimize()
 		MatrixXd eigen_RV=eigen_tmp2*eigen_V;
 		//dalpha = dd.*b - (W.*dd).*(RV'*(RV*(dd.*b))) - alpha; % Newt dir + line search
 		VectorXd dalpha=dd.cwiseProduct(b)-eigen_t.cwiseProduct(eigen_RV.transpose()*(eigen_RV*(dd.cwiseProduct(b))))-eigen_al;
-
+#ifdef USE_GPL_SHOGUN
 		//perform Brent's optimization
 		CFITCPsiLine func;
 
@@ -289,6 +292,9 @@ float64_t CSingleFITCLaplaceNewtonOptimizer::minimize()
 
 		float64_t x;
 		Psi_New=local_min(0, m_opt_max, m_opt_tolerance, func, x);
+#else
+		SG_GPL_ONLY
+#endif //USE_GPL_SHOGUN
 	}
 
 	if (Psi_Old-Psi_New>m_tolerance && iter>=m_iter)
@@ -563,10 +569,8 @@ void CSingleFITCLaplaceInferenceMethod::update_alpha()
 	if (opt)
 	{
 		opt->set_target(this);
-#ifdef USE_REFERENCE_COUNTING
 		if(this->ref_count()>1)
 			cleanup=true;
-#endif
 		opt->minimize();
 		opt->unset_target(cleanup);
 	}
@@ -577,10 +581,8 @@ void CSingleFITCLaplaceInferenceMethod::update_alpha()
 
 		SingleFITCLaplaceInferenceMethodCostFunction *cost_fun=new SingleFITCLaplaceInferenceMethodCostFunction();
 		cost_fun->set_target(this);
-#ifdef USE_REFERENCE_COUNTING
 		if(this->ref_count()>1)
 			cleanup=true;
-#endif
 		minimizer->set_cost_function(cost_fun);
 		minimizer->minimize();
 		minimizer->unset_cost_function(false);
diff --git a/src/shogun/machine/gp/SingleLaplaceInferenceMethod.cpp b/src/shogun/machine/gp/SingleLaplaceInferenceMethod.cpp
index bb9417906df..a17e1470b20 100644
--- a/src/shogun/machine/gp/SingleLaplaceInferenceMethod.cpp
+++ b/src/shogun/machine/gp/SingleLaplaceInferenceMethod.cpp
@@ -19,7 +19,9 @@
 
 #include <shogun/machine/gp/StudentsTLikelihood.h>
 #include <shogun/mathematics/Math.h>
+#ifdef USE_GPL_SHOGUN
 #include <shogun/lib/external/brent.h>
+#endif //USE_GPL_SHOGUN
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/optimization/FirstOrderMinimizer.h>
 
@@ -29,6 +31,7 @@ using namespace Eigen;
 namespace shogun
 {
 
+#ifdef USE_GPL_SHOGUN
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 /** Wrapper class used for the Brent minimizer */
 class PsiLine : public func_base
@@ -68,6 +71,7 @@ class PsiLine : public func_base
 		return result;
 	}
 };
+#endif //USE_GPL_SHOGUN
 
 class SingleLaplaceInferenceMethodCostFunction: public FirstOrderCostFunction
 {
@@ -236,6 +240,7 @@ float64_t CSingleLaplaceNewtonOptimizer::minimize()
 		VectorXd dalpha=b-eigen_sW.cwiseProduct(
 			L.solve(eigen_sW.cwiseProduct(eigen_ktrtr*b*CMath::exp((m_obj->m_log_scale)*2.0))))-eigen_alpha;
 
+#ifdef USE_GPL_SHOGUN
 		// perform Brent's optimization
 		PsiLine func;
 
@@ -253,6 +258,9 @@ float64_t CSingleLaplaceNewtonOptimizer::minimize()
 
 		float64_t x;
 		Psi_New=local_min(0, m_opt_max, m_opt_tolerance, func, x);
+#else
+		SG_GPL_ONLY
+#endif //USE_GPL_SHOGUN
 	}
 
 	if (Psi_Old-Psi_New>m_tolerance && iter>=m_iter)
@@ -506,10 +514,8 @@ void CSingleLaplaceInferenceMethod::update_alpha()
 	if (opt)
 	{
 		opt->set_target(this);
-#ifdef USE_REFERENCE_COUNTING
 		if(this->ref_count()>1)
 			cleanup=true;
-#endif
 		opt->minimize();
 		opt->unset_target(cleanup);
 	}
@@ -517,18 +523,19 @@ void CSingleLaplaceInferenceMethod::update_alpha()
 	{
 		FirstOrderMinimizer* minimizer= dynamic_cast<FirstOrderMinimizer*>(m_minimizer);
 		REQUIRE(minimizer, "The provided minimizer is not supported\n");
-
+#ifdef USE_GPL_SHOGUN
 		SingleLaplaceInferenceMethodCostFunction *cost_fun=new SingleLaplaceInferenceMethodCostFunction();
 		cost_fun->set_target(this);
-#ifdef USE_REFERENCE_COUNTING
 		if(this->ref_count()>1)
 			cleanup=true;
-#endif
 		minimizer->set_cost_function(cost_fun);
 		minimizer->minimize();
 		minimizer->unset_cost_function(false);
 		cost_fun->unset_target(cleanup);
 		SG_UNREF(cost_fun);
+#else
+		SG_GPL_ONLY
+#endif //USE_GPL_SHOGUN
 	}
 	// get mean vector and create eigen representation of it
 	Map<VectorXd> eigen_mean(m_mean_f.vector, m_mean_f.vlen);
diff --git a/src/shogun/machine/gp/SingleSparseInference.cpp b/src/shogun/machine/gp/SingleSparseInference.cpp
index f5fd2c4f3bf..2d50137f906 100644
--- a/src/shogun/machine/gp/SingleSparseInference.cpp
+++ b/src/shogun/machine/gp/SingleSparseInference.cpp
@@ -363,10 +363,8 @@ void CSingleSparseInference::optimize_inducing_features()
 	SingleSparseInferenceCostFunction *cost_fun=new SingleSparseInferenceCostFunction();
 	cost_fun->set_target(this);
 	bool cleanup=false;
-#ifdef USE_REFERENCE_COUNTING
 	if(this->ref_count()>1)
 		cleanup=true;
-#endif
 
 #ifdef USE_GPL_SHOGUN
 #ifdef HAVE_NLOPT
diff --git a/src/shogun/machine/gp/StudentsTLikelihood.cpp b/src/shogun/machine/gp/StudentsTLikelihood.cpp
index 4a9c2c56294..d031747350c 100644
--- a/src/shogun/machine/gp/StudentsTLikelihood.cpp
+++ b/src/shogun/machine/gp/StudentsTLikelihood.cpp
@@ -35,7 +35,9 @@
 
 
 #include <shogun/mathematics/Function.h>
+#ifdef USE_GPL_SHOGUN
 #include <shogun/mathematics/Integration.h>
+#endif //USE_GPL_SHOGUN
 #include <shogun/labels/RegressionLabels.h>
 #include <shogun/mathematics/Statistics.h>
 #include <shogun/mathematics/Math.h>
@@ -617,9 +619,13 @@ SGVector<float64_t> CStudentsTLikelihood::get_log_zeroth_moments(
 		// set Stundent's-t pdf parameters
 		g->set_mu(y[i]);
 
+#ifdef USE_GPL_SHOGUN
 		// evaluate integral on (-inf, inf)
 		r[i]=CIntegration::integrate_quadgk(h, -CMath::INFTY, mu[i])+
 			CIntegration::integrate_quadgk(h, mu[i], CMath::INFTY);
+#else
+			SG_ERROR("StudentsT likelihood moments only supported under GPL.\n")
+#endif //USE_GPL_SHOGUN
 	}
 
 	SG_UNREF(h);
@@ -658,15 +664,19 @@ float64_t CStudentsTLikelihood::get_first_moment(SGVector<float64_t> mu,
 	CProductFunction* k=new CProductFunction(new CLinearFunction(), h);
 	SG_REF(k);
 
+	float64_t Ex=0;
+#ifdef USE_GPL_SHOGUN
 	// compute Z = \int N(x|mu,sigma)*t(x|mu,sigma,nu) dx
 	float64_t Z=CIntegration::integrate_quadgk(h, -CMath::INFTY, mu[i])+
 		CIntegration::integrate_quadgk(h, mu[i], CMath::INFTY);
 
 	// compute 1st moment:
 	// E[x] = Z^-1 * \int x*N(x|mu,sigma)*t(x|mu,sigma,nu)dx
-	float64_t Ex=(CIntegration::integrate_quadgk(k, -CMath::INFTY, mu[i])+
+	Ex=(CIntegration::integrate_quadgk(k, -CMath::INFTY, mu[i])+
 			CIntegration::integrate_quadgk(k, mu[i], CMath::INFTY))/Z;
-
+#else
+			SG_ERROR("StudentsT likelihood moments only supported under GPL.\n")
+#endif //USE_GPL_SHOGUN
 	SG_UNREF(k);
 
 	return Ex;
@@ -704,19 +714,24 @@ float64_t CStudentsTLikelihood::get_second_moment(SGVector<float64_t> mu,
 	CProductFunction* p=new CProductFunction(new CQuadraticFunction(), h);
 	SG_REF(p);
 
+	float64_t Ex=0;
+	float64_t Ex2=0;
+#ifdef USE_GPL_SHOGUN
 	// compute Z = \int N(x|mu,sigma)*t(x|mu,sigma,nu) dx
 	float64_t Z=CIntegration::integrate_quadgk(h, -CMath::INFTY, mu[i])+
 		CIntegration::integrate_quadgk(h, mu[i], CMath::INFTY);
 
 	// compute 1st moment:
 	// E[x] = Z^-1 * \int x*N(x|mu,sigma)*t(x|mu,sigma,nu)dx
-	float64_t Ex=(CIntegration::integrate_quadgk(k, -CMath::INFTY, mu[i])+
+	Ex=(CIntegration::integrate_quadgk(k, -CMath::INFTY, mu[i])+
 			CIntegration::integrate_quadgk(k, mu[i], CMath::INFTY))/Z;
 
 	// compute E[x^2] = Z^-1 * \int x^2*N(x|mu,sigma)*t(x|mu,sigma,nu)dx
-	float64_t Ex2=(CIntegration::integrate_quadgk(p, -CMath::INFTY, mu[i])+
+	Ex2=(CIntegration::integrate_quadgk(p, -CMath::INFTY, mu[i])+
 			CIntegration::integrate_quadgk(p, mu[i], CMath::INFTY))/Z;
-
+#else
+	SG_GPL_ONLY
+#endif //USE_GPL_SHOGUN
 	SG_UNREF(k);
 	SG_UNREF(p);
 
diff --git a/src/shogun/mathematics/Integration.cpp b/src/shogun/mathematics/Integration.cpp
deleted file mode 100644
index 8633a623c87..00000000000
--- a/src/shogun/mathematics/Integration.cpp
+++ /dev/null
@@ -1,910 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (w) 2014 Wu Lin
- * Written (W) 2013 Roman Votyakov
- *
- * The abscissae and weights for Gauss-Kronrod rules are taken form
- * QUADPACK, which is in public domain.
- * http://www.netlib.org/quadpack/
- *
- * See header file for which functions are adapted from GNU Octave,
- * file quadgk.m: Copyright (C) 2008-2012 David Bateman under GPLv3
- * http://www.gnu.org/software/octave/
- *
- * See header file for which functions are adapted from
- * Gaussian Process Machine Learning Toolbox, file util/gauher.m,
- * http://www.gaussianprocess.org/gpml/code/matlab/doc/
- */
-
-#include <shogun/mathematics/Integration.h>
-
-
-#include <shogun/mathematics/eigen3.h>
-#include <shogun/lib/SGVector.h>
-
-using namespace shogun;
-using namespace Eigen;
-
-namespace shogun
-{
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-
-/** @brief Class of the function, which is used for standard infinite
- * to finite integral transformation
- *
- * \f[
- * \int_{-\infty}^{\infty}f(x)dx = \int_{-1}^{1}f(g(t))g'(t)dt
- * \f]
- *
- * where \f$g(t)=\frac{t}{1-t^2}\f$ and
- * \f$g'(t)=\frac{1+t^2}{(1-t^2)^2}\f$.
- */
-class CITransformFunction : public CFunction
-{
-public:
-	/** constructor
-	 *
-	 * @param f function \f$f(x)\f$
-	 */
-	CITransformFunction(CFunction* f)
-	{
-		SG_REF(f);
-		m_f=f;
-	}
-
-	virtual ~CITransformFunction() { SG_UNREF(m_f); }
-
-	/** return the real value of the function at given point
-	 *
-	 * @param x argument
-	 *
-	 * @return \f$f(g(x))*g'(x)\f$, where \f$g(x)=\frac{x}{1-x^2}\f$
-	 * and \f$g'(t)=\frac{1+t^2}{(1-t^2)^2}\f$
-	 */
-	virtual float64_t operator() (float64_t x)
-	{
-		float64_t hx=1.0/(1.0-CMath::sq(x));
-		float64_t gx=x*hx;
-		float64_t dgx=(1.0+CMath::sq(x))*CMath::sq(hx);
-
-		return (*m_f)(gx)*dgx;
-	}
-
-private:
-	/** function \f$f(x)\f$ */
-	CFunction* m_f;
-};
-
-/** @brief Class of the function, which is used for singularity
- * weakening transform on \f$(-\infty, b]\f$
- *
- * \f[
- * \int_{-\infty}^{b} f(x)dx=-\int_{-\infty}^{0} f(b-t^2)2tdt
- * \f]
- *
- * and the finite interval transform
- *
- * \f[
- * \int_{-\infty}^{0} f(b-t^2)2tdt = \int_{-1}^{0} f(b-g(s)^2)2g(s)g'(s)ds
- * \f]
- *
- * where \f$g(s)=\frac{s}{1+s}\f$ and \f$g'(s)=\frac{1}{(1+s)^2}\f$.
- */
-class CILTransformFunction : public CFunction
-{
-public:
-	/** constructor
-	 *
-	 * @param f function \f$f(x)\f$
-	 * @param b upper bound
-	 */
-	CILTransformFunction(CFunction* f, float64_t b)
-	{
-		SG_REF(f);
-		m_f=f;
-		m_b=b;
-	}
-
-	virtual ~CILTransformFunction() { SG_UNREF(m_f); }
-
-	/** return the real value of the function at given point
-	 *
-	 * @param x argument of a function
-	 *
-	 * @return \f$f(b-g(x)^2)2g(x)g'(x)dx\f$, where
-	 * \f$g(x)=\frac{x}{1+x}\f$ and \f$g'(x)=\frac{1}{(1+x)^2}\f$
-	 */
-	virtual float64_t operator() (float64_t x)
-	{
-		float64_t hx=1.0/(1.0+x);
-		float64_t gx=x*hx;
-		float64_t dgx=CMath::sq(hx);
-
-		return -(*m_f)(m_b-CMath::sq(gx))*2*gx*dgx;
-	}
-
-private:
-	/** function \f$f(x)\f$ */
-	CFunction* m_f;
-
-	/** upper bound */
-	float64_t m_b;
-};
-
-/** @brief Class of the function, which is used for singularity
- * weakening transform on \f$[a, \infty)\f$
- *
- * \f[
- * \int_{a}^{\infty} f(x)dx=\int_{0}^{\infty} f(a+t^2)2tdt
- * \f]
- *
- * and the finite interval transform
- *
- * \f[
- * \int_{0}^{\infty} f(a+t^2)2tdt = \int_{0}^{1} f(a+g(s)^2)2g(s)g'(s)ds
- * \f]
- *
- * where \f$g(s)=\frac{s}{1-s}\f$ and \f$g'(s)=\frac{1}{(1-s)^2}\f$.
- */
-class CIUTransformFunction : public CFunction
-{
-public:
-	/** constructor
-	 *
-	 * @param f function \f$f(x)\f$
-	 * @param a lower bound
-	 */
-	CIUTransformFunction(CFunction* f, float64_t a)
-	{
-		SG_REF(f);
-		m_f=f;
-		m_a=a;
-	}
-
-	virtual ~CIUTransformFunction() { SG_UNREF(m_f); }
-
-	/** return the real value of the function at given point
-	 *
-	 * @param x argument of a function
-	 *
-	 * @return \f$f(a+g(x)^2)2g(x)g'(x)\f$, where
-	 * \f$g(x)=\frac{x}{1-x}\f$ and \f$g'(x)=\frac{1}{(1-x)^2}\f$
-	 */
-	virtual float64_t operator() (float64_t x)
-	{
-		float64_t hx=1.0/(1.0-x);
-		float64_t gx=x*hx;
-		float64_t dgx=CMath::sq(hx);
-
-		return (*m_f)(m_a+CMath::sq(gx))*2*gx*dgx;
-	}
-
-private:
-	/** function \f$f(x)\f$ */
-	CFunction* m_f;
-
-	/** lower bound */
-	float64_t m_a;
-};
-
-/** @brief Class of a function, which is used for finite integral
- * transformation
- *
- * \f[
- * \int_{a}^{b}f(x)dx = \int_{-1}^{1} f(g(t))g'(t)dt
- * \f]
- *
- * where \f$g(t)=\frac{b-a}{2}(\frac{t}{2}(3-t^2))+\frac{b+a}{2}\f$
- * and \f$g'(t)=\frac{b-a}{4}(3-3t^2)\f$.
- */
-class CTransformFunction : public CFunction
-{
-public:
-	/** constructor
-	 *
-	 * @param f function \f$f(x)\f$
-	 * @param a lower bound
-	 * @param b upper bound
-	 */
-	CTransformFunction(CFunction* f, float64_t a, float64_t b)
-	{
-		SG_REF(f);
-		m_f=f;
-		m_a=a;
-		m_b=b;
-	}
-
-	virtual ~CTransformFunction() { SG_UNREF(m_f); }
-
-	/** return the real value of the function at given point
-	 *
-	 * @param x argument of a function
-	 *
-	 * @return \f$f(g(x))g'(x)\f$, where
-	 * \f$g(t)=\frac{b-a}{2}(\frac{t}{2}(3-t^2))+\frac{b+a}{2}\f$ and
-	 * \f$g'(t)=\frac{b-a}{4}(3-3t^2)\f$
-	 */
-	virtual float64_t operator() (float64_t x)
-	{
-		float64_t qw=(m_b-m_a)/4.0;
-		float64_t gx=qw*(x*(3.0-CMath::sq(x)))+(m_b+m_a)/2.0;
-		float64_t dgx=qw*3.0*(1.0-CMath::sq(x));
-
-		return (*m_f)(gx)*dgx;
-	}
-
-private:
-	/** function \f$f(x)\f$ */
-	CFunction* m_f;
-
-	/** lower bound */
-	float64_t m_a;
-
-	/** upper bound */
-	float64_t m_b;
-};
-
-#endif /* DOXYGEN_SHOULD_SKIP_THIS */
-
-float64_t CIntegration::integrate_quadgk(CFunction* f, float64_t a,
-		float64_t b, float64_t abs_tol, float64_t rel_tol, uint32_t max_iter,
-		index_t sn)
-{
-	// check the parameters
-	REQUIRE(f, "Integrable function should not be NULL\n")
-	REQUIRE(abs_tol>0.0, "Absolute tolerance must be positive, but is %f\n",
-			abs_tol)
-	REQUIRE(rel_tol>0.0, "Relative tolerance must be positive, but is %f\n",
-			rel_tol)
-	REQUIRE(max_iter>0, "Maximum number of iterations must be greater than 0, "
-			"but is %d\n", max_iter)
-	REQUIRE(sn>0, "Initial number of subintervals must be greater than 0, "
-			"but is %d\n", sn)
-
-	// integral evaluation function
-	typedef void TQuadGKEvaluationFunction(CFunction* f,
-		CDynamicArray<float64_t>* subs,	CDynamicArray<float64_t>* q,
-		CDynamicArray<float64_t>* err);
-
-	TQuadGKEvaluationFunction* evaluate_quadgk;
-
-	CFunction* tf;
-	float64_t ta;
-	float64_t tb;
-	float64_t q_sign;
-
-	// negate integral value and swap a and b, if a>b
-	if (a>b)
-	{
-		ta=b;
-		tb=a;
-		q_sign=-1.0;
-	}
-	else
-	{
-		ta=a;
-		tb=b;
-		q_sign=1.0;
-	}
-
-	// transform integrable function and domain of integration
-	if (a==-CMath::INFTY && b==CMath::INFTY)
-	{
-		tf=new CITransformFunction(f);
-		evaluate_quadgk=&evaluate_quadgk15;
-		ta=-1.0;
-		tb=1.0;
-	}
-	else if (a==-CMath::INFTY)
-	{
-		tf=new CILTransformFunction(f, b);
-		evaluate_quadgk=&evaluate_quadgk15;
-		ta=-1.0;
-		tb=0.0;
-	}
-	else if (b==CMath::INFTY)
-	{
-		tf=new CIUTransformFunction(f, a);
-		evaluate_quadgk=&evaluate_quadgk15;
-		ta=0.0;
-		tb=1.0;
-	}
-	else
-	{
-		tf=new CTransformFunction(f, a, b);
-		evaluate_quadgk=&evaluate_quadgk21;
-		ta=-1.0;
-		tb=1.0;
-	}
-
-	// compute initial subintervals, by dividing domain [a, b] into sn
-	// parts
-	CDynamicArray<float64_t>* subs=new CDynamicArray<float64_t>();
-
-	// width of each subinterval
-	float64_t sw=(tb-ta)/sn;
-
-	for (index_t i=0; i<sn; i++)
-	{
-		subs->push_back(ta+i*sw);
-		subs->push_back(ta+(i+1)*sw);
-	}
-
-	// evaluate integrals on initial subintervals
-	CDynamicArray<float64_t>* q_subs=new CDynamicArray<float64_t>();
-	CDynamicArray<float64_t>* err_subs=new CDynamicArray<float64_t>();
-
-	evaluate_quadgk(tf, subs, q_subs, err_subs);
-
-	// compute value of integral and error on [a, b]
-	float64_t q=0.0;
-	float64_t err=0.0;
-
-	for (index_t i=0; i<q_subs->get_num_elements(); i++)
-		q+=(*q_subs)[i];
-
-	for (index_t i=0; i<err_subs->get_num_elements(); i++)
-		err+=(*err_subs)[i];
-
-	// evaluate tolerance
-	float64_t tol=CMath::max(abs_tol, rel_tol*CMath::abs(q));
-
-	// number of iterations
-	uint32_t iter=1;
-
-	CDynamicArray<float64_t>* new_subs=new CDynamicArray<float64_t>();
-
-	while (err>tol && iter<max_iter)
-	{
-		// choose and bisect subintervals with estimated error, which
-		// is larger or equal to tolerance
-		for (index_t i=0; i<subs->get_num_elements()/2; i++)
-		{
-			if (CMath::abs((*err_subs)[i])>=tol*CMath::abs((*subs)[2*i+1]-
-				(*subs)[2*i])/(tb-ta))
-			{
-				// bisect subinterval
-				float64_t mid=((*subs)[2*i]+(*subs)[2*i+1])/2.0;
-
-				new_subs->push_back((*subs)[2*i]);
-				new_subs->push_back(mid);
-				new_subs->push_back(mid);
-				new_subs->push_back((*subs)[2*i+1]);
-
-				// subtract value of the integral and error on this
-				// subinterval from total value and error
-				q-=(*q_subs)[i];
-				err-=(*err_subs)[i];
-			}
-		}
-
-		subs->set_array(new_subs->get_array(), new_subs->get_num_elements(),
-			new_subs->get_num_elements());
-
-		new_subs->reset_array();
-
-		// break if no new subintervals
-		if (!subs->get_num_elements())
-			break;
-
-		// evaluate integrals on selected subintervals
-		evaluate_quadgk(tf, subs, q_subs, err_subs);
-
-		for (index_t i=0; i<q_subs->get_num_elements(); i++)
-			q+=(*q_subs)[i];
-
-		for (index_t i=0; i<err_subs->get_num_elements(); i++)
-			err+=(*err_subs)[i];
-
-		// evaluate tolerance
-		tol=CMath::max(abs_tol, rel_tol*CMath::abs(q));
-
-		iter++;
-	}
-
-	SG_UNREF(new_subs);
-
-	if (err>tol)
-	{
-		SG_SWARNING("Error tolerance not met. Estimated error is equal to %g "
-				"after %d iterations\n", err, iter)
-	}
-
-	// clean up
-	SG_UNREF(subs);
-	SG_UNREF(q_subs);
-	SG_UNREF(err_subs);
-	SG_UNREF(tf);
-
-	return q_sign*q;
-}
-
-float64_t CIntegration::integrate_quadgh(CFunction* f)
-{
-	SG_REF(f);
-
-	// evaluate integral using Gauss-Hermite 64-point rule
-	float64_t q=evaluate_quadgh64(f);
-
-	SG_UNREF(f);
-
-	return q;
-}
-
-float64_t CIntegration::integrate_quadgh_customized(CFunction* f,
-	SGVector<float64_t> xgh, SGVector<float64_t> wgh)
-{
-	REQUIRE(xgh.vlen == wgh.vlen,
-		"The length of node array (%d) and weight array (%d) should be the same\n",
-		xgh.vlen, wgh.vlen);
-
-	SG_REF(f);
-
-	float64_t q=evaluate_quadgh(f, xgh.vlen, xgh.vector, wgh.vector);
-
-	SG_UNREF(f);
-
-	return q;
-}
-
-void CIntegration::evaluate_quadgk(CFunction* f, CDynamicArray<float64_t>* subs,
-		CDynamicArray<float64_t>* q, CDynamicArray<float64_t>* err, index_t n,
-		float64_t* xgk, float64_t* wg, float64_t* wgk)
-{
-	// check the parameters
-	REQUIRE(f, "Integrable function should not be NULL\n")
-	REQUIRE(subs, "Array of subintervals should not be NULL\n")
-	REQUIRE(!(subs->get_array_size()%2), "Size of the array of subintervals "
-		"should be even\n")
-	REQUIRE(q, "Array of values of integrals should not be NULL\n")
-	REQUIRE(err, "Array of errors should not be NULL\n")
-	REQUIRE(n%2, "Order of Gauss-Kronrod should be odd\n")
-	REQUIRE(xgk, "Gauss-Kronrod nodes should not be NULL\n")
-	REQUIRE(wgk, "Gauss-Kronrod weights should not be NULL\n")
-	REQUIRE(wg, "Gauss weights should not be NULL\n")
-
-	// create eigen representation of subs, xgk, wg, wgk
-	Map<MatrixXd> eigen_subs(subs->get_array(), 2, subs->get_num_elements()/2);
-	Map<VectorXd> eigen_xgk(xgk, n);
-	Map<VectorXd> eigen_wg(wg, n/2);
-	Map<VectorXd> eigen_wgk(wgk, n);
-
-	// compute half width and centers of each subinterval
-	VectorXd eigen_hw=(eigen_subs.row(1)-eigen_subs.row(0))/2.0;
-	VectorXd eigen_center=eigen_subs.colwise().sum()/2.0;
-
-	// compute Gauss-Kronrod nodes x for each subinterval: x=hw*xgk+center
-	MatrixXd x=eigen_hw*eigen_xgk.adjoint()+eigen_center*
-		(VectorXd::Ones(n)).adjoint();
-
-	// compute ygk=f(x)
-	MatrixXd ygk(x.rows(), x.cols());
-
-	for (index_t i=0; i<ygk.rows(); i++)
-		for (index_t j=0; j<ygk.cols(); j++)
-			ygk(i,j)=(*f)(x(i,j));
-
-	// compute value of definite integral on each subinterval
-	VectorXd eigen_q=((ygk*eigen_wgk.asDiagonal()).rowwise().sum()).cwiseProduct(
-		eigen_hw);
-	q->set_array(eigen_q.data(), eigen_q.size());
-
-	// choose function values for Gauss nodes
-	MatrixXd yg(ygk.rows(), ygk.cols()/2);
-
-	for (index_t i=1, j=0; i<ygk.cols(); i+=2, j++)
-		yg.col(j)=ygk.col(i);
-
-	// compute error on each subinterval
-	VectorXd eigen_err=(((yg*eigen_wg.asDiagonal()).rowwise().sum()).cwiseProduct(
-		eigen_hw)-eigen_q).array().abs();
-	err->set_array(eigen_err.data(), eigen_err.size());
-}
-
-void CIntegration::generate_gauher(SGVector<float64_t> xgh, SGVector<float64_t> wgh)
-{
-	REQUIRE(xgh.vlen == wgh.vlen,
-		"The length of node array (%d) and weight array (%d) should be the same\n",
-		xgh.vlen, wgh.vlen);
-
-	index_t n = xgh.vlen;
-
-	if (n == 20)
-	{
-		generate_gauher20(xgh, wgh);
-	}
-	else
-	{
-		Map<VectorXd> eigen_xgh(xgh.vector, xgh.vlen);
-		Map<VectorXd> eigen_wgh(wgh.vector, wgh.vlen);
-
-		eigen_xgh = MatrixXd::Zero(n,1);
-		eigen_wgh = MatrixXd::Ones(n,1);
-
-		if (n > 1)
-		{
-			MatrixXd v = MatrixXd::Zero(n,n);
-
-			//b = sqrt( (1:N-1)/2 )';
-			//[V,D] = eig( diag(b,1) + diag(b,-1) );
-			v.block(0, 1, n-1, n-1).diagonal() = (0.5*ArrayXd::LinSpaced(n-1,1,n-1)).sqrt();
-			v.block(1, 0, n-1, n-1).diagonal() = v.block(0, 1, n-1, n-1).diagonal();
-			EigenSolver<MatrixXd> eig(v);
-
-			//w = V(1,:)'.^2
-			eigen_wgh = eig.eigenvectors().row(0).transpose().real().array().pow(2);
-
-			//x = sqrt(2)*diag(D)
-			eigen_xgh = eig.eigenvalues().real()*sqrt(2.0);
-		}
-	}
-}
-
-void CIntegration::generate_gauher20(SGVector<float64_t> xgh, SGVector<float64_t> wgh)
-{
-	REQUIRE(xgh.vlen == wgh.vlen,
-		"The length of node array (%d) and weight array (%d) should be the same\n",
-		xgh.vlen, wgh.vlen);
-	REQUIRE(xgh.vlen == 20, "The length of xgh and wgh should be 20\n");
-
-	static const index_t n = 20;
-	static float64_t wgh_pre[n]=
-	{
-		0.0000000000001257800672437920121938444754,
-		0.0000000002482062362315158465220083577413,
-		0.0000000612749025998290679114578012251502,
-		0.0000044021210902308611768963750310312832,
-		0.0001288262799619289543807260089991473251,
-		0.0018301031310804880686271545187082665507,
-		0.0139978374471010288959682554832397727296,
-		0.0615063720639768204967445797137770568952,
-		0.1617393339840000332507941038784338161349,
-		0.2607930634495548849471902030927594751120,
-		0.2607930634495547739248877405771054327488,
-		0.1617393339840003108065502601675689220428,
-		0.0615063720639767788633811562704067910090,
-		0.0139978374471010080792865437615546397865,
-		0.0018301031310804856833823750505985117343,
-		0.0001288262799619298488475183095403053812,
-		0.0000044021210902308865878847926600414553,
-		0.0000000612749025998294252534824241331057,
-		0.0000000002482062362315177593771748866178,
-		0.0000000000001257800672437921636551382778
-	};
-
-	static float64_t xgh_pre[n]=
-	{
-		-7.6190485416797573137159815814811736345291,
-		-6.5105901570136559541879250900819897651672,
-		-5.5787388058932032564030123467091470956802,
-		-4.7345813340460569662582201999612152576447,
-		-3.9439673506573176275935566081898286938667,
-		-3.1890148165533904744961546384729444980621,
-		-2.4586636111723669806394809711491689085960,
-		-1.7452473208141270344384565760265104472637,
-		-1.0429453488027506935509336472023278474808,
-		-0.3469641570813560282893206476728664711118,
-		0.3469641570813561393116231101885205134749,
-		1.0429453488027513596847484222962521016598,
-		1.7452473208141265903492467259638942778111,
-		2.4586636111723669806394809711491689085960,
-		3.1890148165533904744961546384729444980621,
-		3.9439673506573162953259270580019801855087,
-		4.7345813340460569662582201999612152576447,
-		5.5787388058932014800461729464586824178696,
-		6.5105901570136532896526659897062927484512,
-		7.6190485416797573137159815814811736345291
-
-	};
-
-	for (index_t idx = 0; idx < n; idx++)
-	{
-		wgh[idx] = wgh_pre[idx];
-		xgh[idx] = xgh_pre[idx];
-	}
-
-}
-
-void CIntegration::evaluate_quadgk15(CFunction* f, CDynamicArray<float64_t>* subs,
-		CDynamicArray<float64_t>* q, CDynamicArray<float64_t>* err)
-{
-	static const index_t n=15;
-
-	// Gauss-Kronrod nodes
-	static float64_t xgk[n]=
-		{
-			-0.991455371120812639206854697526329,
-			-0.949107912342758524526189684047851,
-			-0.864864423359769072789712788640926,
-			-0.741531185599394439863864773280788,
-			-0.586087235467691130294144838258730,
-			-0.405845151377397166906606412076961,
-			-0.207784955007898467600689403773245,
-			0.000000000000000000000000000000000,
-			0.207784955007898467600689403773245,
-			0.405845151377397166906606412076961,
-			0.586087235467691130294144838258730,
-			0.741531185599394439863864773280788,
-			0.864864423359769072789712788640926,
-			0.949107912342758524526189684047851,
-			0.991455371120812639206854697526329
-		};
-
-	// Gauss weights
-	static float64_t wg[n/2]=
-		{
-			0.129484966168869693270611432679082,
-			0.279705391489276667901467771423780,
-			0.381830050505118944950369775488975,
-			0.417959183673469387755102040816327,
-			0.381830050505118944950369775488975,
-			0.279705391489276667901467771423780,
-			0.129484966168869693270611432679082
-		};
-
-	// Gauss-Kronrod weights
-	static float64_t wgk[n]=
-		{
-			0.022935322010529224963732008058970,
-			0.063092092629978553290700663189204,
-			0.104790010322250183839876322541518,
-			0.140653259715525918745189590510238,
-			0.169004726639267902826583426598550,
-			0.190350578064785409913256402421014,
-			0.204432940075298892414161999234649,
-			0.209482141084727828012999174891714,
-			0.204432940075298892414161999234649,
-			0.190350578064785409913256402421014,
-			0.169004726639267902826583426598550,
-			0.140653259715525918745189590510238,
-			0.104790010322250183839876322541518,
-			0.063092092629978553290700663189204,
-			0.022935322010529224963732008058970
-		};
-
-	// evaluate definite integral on each subinterval using Gauss-Kronrod rule
-	evaluate_quadgk(f, subs, q, err, n, xgk, wg, wgk);
-}
-
-void CIntegration::evaluate_quadgk21(CFunction* f, CDynamicArray<float64_t>* subs,
-		CDynamicArray<float64_t>* q, CDynamicArray<float64_t>* err)
-{
-	static const index_t n=21;
-
-	// Gauss-Kronrod nodes
-	static float64_t xgk[n]=
-		{
-			-0.995657163025808080735527280689003,
-			-0.973906528517171720077964012084452,
-			-0.930157491355708226001207180059508,
-			-0.865063366688984510732096688423493,
-			-0.780817726586416897063717578345042,
-			-0.679409568299024406234327365114874,
-			-0.562757134668604683339000099272694,
-			-0.433395394129247190799265943165784,
-			-0.294392862701460198131126603103866,
-			-0.148874338981631210884826001129720,
-			0.000000000000000000000000000000000,
-			0.148874338981631210884826001129720,
-			0.294392862701460198131126603103866,
-			0.433395394129247190799265943165784,
-			0.562757134668604683339000099272694,
-			0.679409568299024406234327365114874,
-			0.780817726586416897063717578345042,
-			0.865063366688984510732096688423493,
-			0.930157491355708226001207180059508,
-			0.973906528517171720077964012084452,
-			0.995657163025808080735527280689003
-		};
-
-	// Gauss weights
-	static float64_t wg[n/2]=
-		{
-			0.066671344308688137593568809893332,
-			0.149451349150580593145776339657697,
-			0.219086362515982043995534934228163,
-			0.269266719309996355091226921569469,
-			0.295524224714752870173892994651338,
-			0.295524224714752870173892994651338,
-			0.269266719309996355091226921569469,
-			0.219086362515982043995534934228163,
-			0.149451349150580593145776339657697,
-			0.066671344308688137593568809893332
-		};
-
-	// Gauss-Kronrod weights
-	static float64_t wgk[n]=
-		{
-			0.011694638867371874278064396062192,
-			0.032558162307964727478818972459390,
-			0.054755896574351996031381300244580,
-			0.075039674810919952767043140916190,
-			0.093125454583697605535065465083366,
-			0.109387158802297641899210590325805,
-			0.123491976262065851077958109831074,
-			0.134709217311473325928054001771707,
-			0.142775938577060080797094273138717,
-			0.147739104901338491374841515972068,
-			0.149445554002916905664936468389821,
-			0.147739104901338491374841515972068,
-			0.142775938577060080797094273138717,
-			0.134709217311473325928054001771707,
-			0.123491976262065851077958109831074,
-			0.109387158802297641899210590325805,
-			0.093125454583697605535065465083366,
-			0.075039674810919952767043140916190,
-			0.054755896574351996031381300244580,
-			0.032558162307964727478818972459390,
-			0.011694638867371874278064396062192
-		};
-
-	evaluate_quadgk(f, subs, q, err, n, xgk, wg, wgk);
-}
-
-float64_t CIntegration::evaluate_quadgh(CFunction* f, index_t n, float64_t* xgh,
-		float64_t* wgh)
-{
-	// check the parameters
-	REQUIRE(f, "Integrable function should not be NULL\n");
-	REQUIRE(xgh, "Gauss-Hermite nodes should not be NULL\n");
-	REQUIRE(wgh, "Gauss-Hermite weights should not be NULL\n");
-
-	float64_t q=0.0;
-
-	for (index_t i=0; i<n; i++)
-		q+=wgh[i]*(*f)(xgh[i]);
-
-	return q;
-}
-
-float64_t CIntegration::evaluate_quadgh64(CFunction* f)
-{
-	static const index_t n=64;
-
-	// Gauss-Hermite nodes
-	static float64_t xgh[n]=
-	{
-		-10.52612316796054588332682628381528,
-		-9.895287586829539021204461477159608,
-		-9.373159549646721162545652439723862,
-		-8.907249099964769757295972885642943,
-		-8.477529083379863090564166344821916,
-		-8.073687285010225225858791140758144,
-		-7.68954016404049682844780422986949,
-		-7.321013032780949201189569363719477,
-		-6.965241120551107529242642193492688,
-		-6.620112262636027379036660108937914,
-		-6.284011228774828235418093195070243,
-		-5.955666326799486045344567180984366,
-		-5.634052164349972147249920483307154,
-		-5.318325224633270857323649515199378,
-		-5.007779602198768196443702627184136,
-		-4.701815647407499816097538015812822,
-		-4.399917168228137647767932535438923,
-		-4.101634474566656714970981238455522,
-		-3.806571513945360461165972000460225,
-		-3.514375935740906211539950586474333,
-		-3.224731291992035725848171110188419,
-		-2.93735082300462180968533902619139,
-		-2.651972435430635011005457785998431,
-		-2.368354588632401404111511265341516,
-		-2.086272879881762020832563302363221,
-		-1.805517171465544918903773574186889,
-		-1.525889140209863662948970133151528,
-		-1.24720015694311794069356453069359,
-		-0.9692694230711780167435414890191023,
-		-0.6919223058100445772682192875955947,
-		-0.4149888241210786845769291291996859,
-		-0.1383022449870097241150497679666744,
-		0.1383022449870097241150497679666744,
-		0.4149888241210786845769291291996859,
-		0.6919223058100445772682192875955947,
-		0.9692694230711780167435414890191023,
-		1.24720015694311794069356453069359,
-		1.525889140209863662948970133151528,
-		1.805517171465544918903773574186889,
-		2.086272879881762020832563302363221,
-		2.368354588632401404111511265341516,
-		2.651972435430635011005457785998431,
-		2.93735082300462180968533902619139,
-		3.224731291992035725848171110188419,
-		3.514375935740906211539950586474333,
-		3.806571513945360461165972000460225,
-		4.101634474566656714970981238455522,
-		4.399917168228137647767932535438923,
-		4.701815647407499816097538015812822,
-		5.007779602198768196443702627184136,
-		5.318325224633270857323649515199378,
-		5.634052164349972147249920483307154,
-		5.955666326799486045344567180984366,
-		6.284011228774828235418093195070243,
-		6.620112262636027379036660108937914,
-		6.965241120551107529242642193492688,
-		7.321013032780949201189569363719477,
-		7.68954016404049682844780422986949,
-		8.073687285010225225858791140758144,
-		8.477529083379863090564166344821916,
-		8.907249099964769757295972885642943,
-		9.373159549646721162545652439723862,
-		9.895287586829539021204461477159608,
-		10.52612316796054588332682628381528
-	};
-
-	// Gauss-Hermite weights
-	static float64_t wgh[n]=
-	{
-		5.535706535856942820575463300987E-49,
-		1.6797479901081592186662883306299E-43,
-		3.4211380112557405043272218281457E-39,
-		1.557390624629763802309335380265E-35,
-		2.549660899112999256604766580441E-32,
-		1.92910359546496685030196877906707E-29,
-		7.8617977889259103690999914962788E-27,
-		1.911706883300642829958456965534449E-24,
-		2.982862784279851154478700702016E-22,
-		3.15225456650378141612134668341E-20,
-		2.35188471067581911695767591555844E-18,
-		1.28009339132243804163956329526337E-16,
-		5.218623726590847522957808513052588E-15,
-		1.628340730709720362084307081240893E-13,
-		3.95917776694772392723644586425458E-12,
-		7.61521725014545135331529567531937E-11,
-		1.1736167423215493435425064670822E-9,
-		1.465125316476109354926622003804004E-8,
-		1.495532936727247061102461692934817E-7,
-		1.258340251031184576157842180019028E-6,
-		8.7884992308503591814440474067043E-6,
-		5.125929135786274660821911412739621E-5,
-		2.509836985130624860823620179819094E-4,
-		0.001036329099507577663456741746283101,
-		0.00362258697853445876066812537162265,
-		0.01075604050987913704946517278667313,
-		0.0272031289536889184538348212614932,
-		0.0587399819640994345496889462518317,
-		0.1084983493061868406330258455060973,
-		0.1716858423490837020007279701237768,
-		0.2329947860626780466505660293325675,
-		0.2713774249413039779456065084184279,
-		0.2713774249413039779456065084184279,
-		0.2329947860626780466505660293325675,
-		0.1716858423490837020007279701237768,
-		0.1084983493061868406330258455060973,
-		0.0587399819640994345496889462518317,
-		0.0272031289536889184538348212614932,
-		0.01075604050987913704946517278667313,
-		0.00362258697853445876066812537162265,
-		0.001036329099507577663456741746283101,
-		2.509836985130624860823620179819094E-4,
-		5.125929135786274660821911412739621E-5,
-		8.7884992308503591814440474067043E-6,
-		1.258340251031184576157842180019028E-6,
-		1.495532936727247061102461692934817E-7,
-		1.465125316476109354926622003804004E-8,
-		1.1736167423215493435425064670822E-9,
-		7.61521725014545135331529567531937E-11,
-		3.95917776694772392723644586425458E-12,
-		1.628340730709720362084307081240893E-13,
-		5.218623726590847522957808513052588E-15,
-		1.28009339132243804163956329526337E-16,
-		2.35188471067581911695767591555844E-18,
-		3.15225456650378141612134668341E-20,
-		2.982862784279851154478700702016E-22,
-		1.911706883300642829958456965534449E-24,
-		7.8617977889259103690999914962788E-27,
-		1.92910359546496685030196877906707E-29,
-		2.549660899112999256604766580441E-32,
-		1.557390624629763802309335380265E-35,
-		3.4211380112557405043272218281457E-39,
-		1.6797479901081592186662883306299E-43,
-		5.535706535856942820575463300987E-49
-	};
-
-	return evaluate_quadgh(f, n, xgh, wgh);
-}
-}
-
diff --git a/src/shogun/mathematics/Integration.h b/src/shogun/mathematics/Integration.h
deleted file mode 100644
index 43bde7724b3..00000000000
--- a/src/shogun/mathematics/Integration.h
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (w) 2014 Wu Lin
- * Written (W) 2013 Roman Votyakov
- *
- * The abscissae and weights for Gauss-Kronrod rules are taken form
- * QUADPACK, which is in public domain.
- * http://www.netlib.org/quadpack/
- *
- * See method comments which functions are adapted from GNU Octave,
- * file quadgk.m: Copyright (C) 2008-2012 David Bateman under GPLv3
- * http://www.gnu.org/software/octave/
- *
- * See method comments which functions are adapted from
- * Gaussian Process Machine Learning Toolbox, file util/gauher.m,
- * http://www.gaussianprocess.org/gpml/code/matlab/doc/
- *
- */
-
-#ifndef _INTEGRATION_H_
-#define _INTEGRATION_H_
-
-#include <shogun/lib/config.h>
-
-
-#include <shogun/base/SGObject.h>
-#include <shogun/lib/DynamicArray.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/mathematics/Function.h>
-
-namespace shogun
-{
-template<class T> class SGVector;
-
-/** @brief Class that contains certain methods related to numerical
- * integration
- */
-class CIntegration : public CSGObject
-{
-public:
-	/** numerically evaluate definite integral \f$\int_a^b f(x) dx\f$,
-	 * where \f$f(x)\f$ - function of one variable, using adaptive
-	 * Gauss-Kronrod quadrature formula
-	 *
-	 * \f[
-	 * \int_a^b f(x)\dx \approx \sum_{i=1}^n w_i f(x_i)
-	 * \f]
-	 *
-	 * where x_i and w_i - Gauss-Kronrod nodes and weights
-	 * respectively.
-	 *
-	 * This function applies the Gauss-Kronrod 21-point integration
-	 * rule for finite bounds \f$[a, b]\f$ and 15-point rule for
-	 * infinite ones.
-	 *
-	 * Based on ideas form GNU Octave (file quadgk.m) under GPLv3.
-	 *
-	 * @param f integrable function of one variable
-	 * @param a lower bound of the domain of integration
-	 * @param b upper bound of the domain of integration
-	 * @param abs_tol absolute tolerance of the quadrature
-	 * @param rel_tol relative tolerance of the quadrature
-	 * @param max_iter maximum number of iterations of the method
-	 * @param sn initial number of subintervals
-	 *
-	 * @return approximate value of definite integral of the function
-	 * on given domain
-	 */
-	static float64_t integrate_quadgk(CFunction* f, float64_t a,
-			float64_t b, float64_t abs_tol=1e-10, float64_t rel_tol=1e-5,
-			uint32_t max_iter=1000, index_t sn=10);
-
-	/** numerically evaluate integral of the following kind
-	 *
-	 * \f[
-	 * \int_{-\infty}^{\infty}e^{-x^2}f(x)dx
-	 * \f]
-	 *
-	 * using 64-point Gauss-Hermite rule
-	 *
-	 * \f[
-	 * \int_{-\infty}^{\infty}e^{-x^2}f(x)dx \approx
-	 * \sum_{i=1}^{64} w_if(x_i)
-	 * \f]
-	 *
-	 * where x_i and w_i - ith node and weight for the 64-point
-	 * Gauss-Hermite formula respectively.
-	 *
-	 * @param f integrable function of one variable
-	 *
-	 * @return approximate value of the
-	 * integral \f$\int_{-\infty}^{\infty}e^{-x^2}f(x)dx\f$
-	 */
-	static float64_t integrate_quadgh(CFunction* f);
-
-	/** numerically evaluate integral of the following kind
-	 *
-	 * \f[
-	 * \int_{-\infty}^{\infty}e^{-x^2}f(x)dx
-	 * \f]
-	 *
-	 * using provided Gauss-Hermite points
-	 *
-	 * \f[
-	 * \int_{-\infty}^{\infty}e^{-x^2}f(x)dx \approx
-	 * \sum_{i=1}^{64} w_if(x_i)
-	 * \f]
-	 *
-	 * where x_i and w_i - ith node and weight for the provided
-	 * Gauss-Hermite formula respectively.
-	 *
-	 * @param f integrable function of one variable
-	 * @param xgh the provided array of nodes
-	 * @param wgh the provided array of weights
-	 *
-	 * @return approximate value of the
-	 * integral \f$\int_{-\infty}^{\infty}e^{-x^2}f(x)dx\f$
-	 */
-
-	static float64_t integrate_quadgh_customized(CFunction* f,
-		SGVector<float64_t> xgh, SGVector<float64_t> wgh);
-
-
-	/** generate Gauss-Hermite nodes
-	 *
-	 * Adapted form Gaussian Process Machine Learning Toolbox
-	 * (file util/gauher.m)
-	 *
-	 * @param xgh nodes are saved in this pre-allocated array
-	 * @param wgh weights are saved in this pre-allocated array
-	 *
-	 */
-	static void generate_gauher(SGVector<float64_t> xgh, SGVector<float64_t> wgh);
-
-
-	/** generate 20 Gauss-Hermite nodes using precomputed result
-	 *
-	 * Adapted form Gaussian Process Machine Learning Toolbox
-	 * (file util/gauher.m)
-	 *
-	 * @param xgh nodes are saved in this pre-allocated array
-	 * @param wgh weights are saved in this pre-allocated array
-	 *
-	 */
-	static void generate_gauher20(SGVector<float64_t> xgh, SGVector<float64_t> wgh);
-
-	/** get object name
-	 *
-	 * @return name Integration
-	 */
-	virtual const char* get_name() const { return "Integration"; }
-
-private:
-	/** evaluate definite integral of a function and error on each
-	 * subinterval using Gauss-Kronrod quadrature formula of order n
-	 *
-	 * Adapted form GNU Octave (file quadgk.m) under GPLv3.
-	 *
-	 * @param f integrable function of one variable
-	 * @param subs subintervals of integration
-	 * @param q approximate value of definite integral of the function
-	 * on each subinterval
-	 * @param err error on each subinterval
-	 * @param n order of the Gauss-Kronrod rule
-	 * @param xgk Gauss-Kronrod nodes
-	 * @param wg Gauss weights
-	 * @param wgk Gauss-Kronrod weights
-	 */
-	static void evaluate_quadgk(CFunction* f, CDynamicArray<float64_t>* subs,
-			CDynamicArray<float64_t>* q, CDynamicArray<float64_t>* err, index_t n,
-			float64_t* xgk, float64_t* wg, float64_t* wgk);
-
-	/** evaluate definite integral of a function and error on each
-	 * subinterval using Gauss-Kronrod quadrature formula of order 15.
-	 *
-	 * Gauss-Kronrod nodes, Gauss weights and Gauss-Kronrod weights
-	 * are precomputed.
-	 *
-	 * The abscissae and weights for 15-point rule are taken from from
-	 * QUADPACK (file dqk15.f).
-	 *
-	 * @param f integrable function of one variable
-	 * @param subs subintervals of integration
-	 * @param q approximate value of definite integral of the function
-	 * on each subinterval
-	 * @param err error on each subinterval
-	 */
-	static void evaluate_quadgk15(CFunction* f, CDynamicArray<float64_t>* subs,
-			CDynamicArray<float64_t>* q, CDynamicArray<float64_t>* err);
-
-	/** evaluate definite integral of a function and error on each
-	 * subinterval using Gauss-Kronrod quadrature formula of order 21.
-	 *
-	 * Gauss-Kronrod nodes, Gauss weights and Gauss-Kronrod weights
-	 * are precomputed.
-	 *
-	 * The abscissae and weights for 21-point rule are taken from
-	 * QUADPACK (file dqk21.f).
-	 *
-	 * @param f integrable function of one variable
-	 * @param subs subintervals of integration
-	 * @param q approximate value of definite integral of the function
-	 * on each subinterval
-	 * @param err error on each subinterval
-	 */
-	static void evaluate_quadgk21(CFunction* f, CDynamicArray<float64_t>* subs,
-			CDynamicArray<float64_t>* q, CDynamicArray<float64_t>* err);
-
-	/** evaluate integral \f$\int_{-\infty}^{\infty}e^{-x^2}f(x)dx\f$
-	 * using Gauss-Hermite quadrature formula of order n
-	 *
-	 * @param f integrable function of one variable
-	 * @param n order of the Gauss-Hermite rule
-	 * @param xh Gauss-Hermite nodes
-	 * @param wh Gauss-Hermite weights
-	 *
-	 * @return approximate value of the integral
-	 * \f$\int_{-\infty}^{\infty}e^{-x^2}f(x)dx\f$
-	 */
-	static float64_t evaluate_quadgh(CFunction* f, index_t n, float64_t* xh,
-			float64_t* wh);
-
-	/** evaluate integral \f$\int_{-\infty}^{\infty}e^{-x^2}f(x)dx\f$
-	 * using Gauss-Hermite quadrature formula of order 64.
-	 *
-	 * Gauss-Hermite nodes \f$x_i\f$ and weights \f$w_i\f$ are
-	 * precomputed: \f$x_i\f$ - the i-th zero of \f$H_n(x)\f$,
-	 * \f$w_i=\frac{2^{n-1}n!\sqrt{\pi}}{n^2[H_{n-1}(x_i)]^2}\f$,
-	 * where \f$H_n(x)\f$ is physicists' Hermite polynomials.
-	 *
-	 * @param f integrable function of one variable
-	 *
-	 * @return approximate value of the integral
-	 * \f$\int_{-\infty}^{\infty}e^{-x^2}f(x)dx\f$
-	 */
-	static float64_t evaluate_quadgh64(CFunction* f);
-};
-}
-#endif /* _INTEGRATION_H_ */
diff --git a/src/shogun/mathematics/JacobiEllipticFunctions.cpp b/src/shogun/mathematics/JacobiEllipticFunctions.cpp
deleted file mode 100644
index 8f3cec3e8ef..00000000000
--- a/src/shogun/mathematics/JacobiEllipticFunctions.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2013 Soumyajit De
- *
- * KRYLSTAT Copyright 2011 by Erlend Aune <erlenda@math.ntnu.no> under GPL2+
- * (few parts rewritten and adjusted for shogun)
- */
-
-#include <shogun/mathematics/Math.h>
-#include <shogun/mathematics/JacobiEllipticFunctions.h>
-#include <shogun/lib/config.h>
-
-using namespace shogun;
-
-void CJacobiEllipticFunctions::ellipKKp(Real L, Real &K, Real &Kp)
-{
-	REQUIRE(L>=0.0,
-		"CJacobiEllipticFunctions::ellipKKp(): \
-		Parameter L should be non-negative\n");
-#if defined HAVE_ARPREC && defined USE_GPL_SHOGUN
-	const Real eps=Real(std::numeric_limits<float64_t>::epsilon());
-	const Real pi=mp_real::_pi;
-#else
-	const Real eps=std::numeric_limits<Real>::epsilon();
-	const Real pi=M_PI;
-#endif //(HAVE_ARPREC && USE_GPL_SHOGUN)
-	if (L>10.0)
-	{
-		K=pi*0.5;
-		Kp=pi*L+log(4.0);
-	}
-	else
-	{
-		Real m=exp(-2.0*pi*L);
-		Real mp=1.0-m;
-		if (m<eps)
-		{
-			K=compute_quarter_period(sqrt(mp));
-			Kp=Real(std::numeric_limits<float64_t>::max());
-		}
-		else if (mp<eps)
-		{
-			K=Real(std::numeric_limits<float64_t>::max());
-			Kp=compute_quarter_period(sqrt(m));
-		}
-		else
-		{
-			K=compute_quarter_period(sqrt(mp));
-			Kp=compute_quarter_period(sqrt(m));
-		}
-	}
-}
-
-void CJacobiEllipticFunctions
-	::ellipJC(Complex u, Real m, Complex &sn, Complex &cn, Complex &dn)
-{
-	REQUIRE(m>=0.0 && m<=1.0,
-		"CJacobiEllipticFunctions::ellipJC(): \
-		Parameter m should be >=0 and <=1\n");
-
-#if defined HAVE_ARPREC && defined USE_GPL_SHOGUN
-	const Real eps=sqrt(mp_real::_eps);
-#else
-	const Real eps=sqrt(std::numeric_limits<Real>::epsilon());
-#endif //(HAVE_ARPREC && USE_GPL_SHOGUN)
-	if (m>=(1.0-eps))
-	{
-#if defined HAVE_ARPREC && defined USE_GPL_SHOGUN
-		complex128_t _u(dble(u.real),dble(u.imag));
-		complex128_t t=CMath::tanh(_u);
-		complex128_t b=CMath::cosh(_u);
-		complex128_t twon=b*CMath::sinh(_u);
-		complex128_t ai=0.25*(1.0-dble(m));
-		complex128_t _sn=t+ai*(twon-_u)/(b*b);
-		complex128_t phi=1.0/b;
-		complex128_t _cn=phi-ai*(twon-_u);
-		complex128_t _dn=phi+ai*(twon+_u);
-		sn=mp_complex(_sn.real(),_sn.imag());
-		cn=mp_complex(_cn.real(),_cn.imag());
-		dn=mp_complex(_dn.real(),_dn.imag());
-#else
-		Complex t=CMath::tanh(u);
-		Complex b=CMath::cosh(u);
-		Complex ai=0.25*(1.0-m);
-		Complex twon=b*CMath::sinh(u);
-		sn=t+ai*(twon-u)/(b*b);
-		Complex phi=Real(1.0)/b;
-		ai*=t*phi;
-		cn=phi-ai*(twon-u);
-		dn=phi+ai*(twon+u);
-#endif //(HAVE_ARPREC && USE_GPL_SHOGUN)
-	}
-	else
-	{
-		const Real prec=4.0*eps;
-		const index_t MAX_ITER=128;
-		index_t i=0;
-		Real kappa[MAX_ITER];
-
-		while (i<MAX_ITER && m>prec)
-		{
-			Real k;
-			if (m>0.001)
-			{
-				Real mp=sqrt(1.0-m);
-				k=(1.0-mp)/(1.0+mp);
-			}
-			else
-				k=poly_six(m/4.0);
-			u/=(1.0+k);
-			m=k*k;
-			kappa[i++]=k;
-		}
-		Complex sin_u=sin(u);
-		Complex cos_u=cos(u);
-		Complex t=Real(0.25*m)*(u-sin_u*cos_u);
-		sn=sin_u-t*cos_u;
-		cn=cos_u+t*sin_u;
-		dn=Real(1.0)+Real(0.5*m)*(cos_u*cos_u);
-
-		i--;
-		while (i>=0)
-		{
-			Real k=kappa[i--];
-			Complex ksn2=k*(sn*sn);
-			Complex d=Real(1.0)+ksn2;
-			sn*=(1.0+k)/d;
-			cn*=dn/d;
-			dn=(Real(1.0)-ksn2)/d;
-		}
-	}
-}
diff --git a/src/shogun/mathematics/JacobiEllipticFunctions.h b/src/shogun/mathematics/JacobiEllipticFunctions.h
deleted file mode 100644
index 5a469c7d879..00000000000
--- a/src/shogun/mathematics/JacobiEllipticFunctions.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2013 Soumyajit De
- *
- * KRYLSTAT Copyright 2011 by Erlend Aune <erlenda@math.ntnu.no> under GPL2+
- * (few parts rewritten and adjusted for shogun)
- *
- * NOTE: For higher precision, the methods in this class rely on an external
- * library, ARPREC (http://crd-legacy.lbl.gov/~dhbailey/mpdist/), in absense of
- * which they fallback to shogun datatypes. To use it with shogun, configure
- * ARPREC with `CXX="c++ -fPIC" ./configure' in order to link.
- */
-
-#ifndef JACOBI_ELLIPTIC_FUNCTIONS_H_
-#define JACOBI_ELLIPTIC_FUNCTIONS_H_
-
-#include <shogun/lib/config.h>
-#include <shogun/base/SGObject.h>
-#include <shogun/mathematics/Math.h>
-#include <limits>
-
-#if defined HAVE_ARPREC && defined USE_GPL_SHOGUN
-#include <arprec/mp_real.h>
-#include <arprec/mp_complex.h>
-#endif //(HAVE_ARPREC && USE_GPL_SHOGUN)
-
-namespace shogun
-{
-
-/** @brief Class that contains methods for computing Jacobi elliptic functions
- * related to complex analysis. These functions are inverse of the elliptic
- * integral of first kind, i.e.
- * \f[
- * u(k,m)=\int_{0}^{k}\frac{dt}{\sqrt{(1-t^{2})(1-m^{2}t^{2})}}
- * =\int_{0}^{\varphi}\frac{d\theta}{\sqrt{(1-m^{2}sin^{2}\theta)}}
- * \f]
- * where \f$k=sin\varphi\f$, \f$t=sin\theta\f$ and parameter \f$m, 0\le m
- * \le 1\f$ is called modulus. Three main Jacobi elliptic functions are defined
- * as \f$sn(u,m)=k=sin\theta\f$, \f$cn(u,m)=cos\theta=\sqrt{1-sn(u,m)^{2}}\f$
- * and \f$dn(u,m)=\sqrt{1-m^{2}sn(u,m)^{2}}\f$.
- * For \f$k=1\f$, i.e. \f$\varphi=\frac{\pi}{2}\f$, \f$u(1,m)=K(m)\f$ is known
- * as the complete elliptic integral of first kind. Similarly, \f$u(1,m'))=
- * K'(m')\f$, \f$m'=\sqrt{1-m^{2}}\f$ is called the complementary complete
- * elliptic integral of first kind. Jacobi functions are double periodic with
- * quardratic periods \f$K\f$ and \f$K'\f$.
- *
- * This class provides two sets of methods for computing \f$K,K'\f$, and
- * \f$sn,cn,dn\f$. Useful for computing rational approximation of matrix
- * functions given by Cauchy's integral formula, etc.
- */
-class CJacobiEllipticFunctions: public CSGObject
-{
-#if defined HAVE_ARPREC && defined USE_GPL_SHOGUN
-	typedef mp_real Real;
-	typedef mp_complex Complex;
-#else
-	typedef float64_t Real;
-	typedef complex128_t Complex;
-#endif //(HAVE_ARPREC && USE_GPL_SHOGUN)
-private:
-	static inline Real compute_quarter_period(Real b)
-	{
-#if defined HAVE_ARPREC && defined USE_GPL_SHOGUN
-		const Real eps=mp_real::_eps;
-		const Real pi=mp_real::_pi;
-#else
-		const Real eps=std::numeric_limits<Real>::epsilon();
-		const Real pi=M_PI;
-#endif //(HAVE_ARPREC && USE_GPL_SHOGUN)
-		Real a=1.0;
-		Real mm=1.0;
-
-		int64_t p=2;
-		do
-		{
-			Real a_new=(a+b)*0.5;
-			Real b_new=sqrt(a*b);
-			Real c=(a-b)*0.5;
-			mm=Real(p)*c*c;
-			p<<=1;
-			a=a_new;
-			b=b_new;
-		} while (mm>eps);
-		return pi*0.5/a;
-	}
-
-	static inline Real poly_six(Real x)
-	{
-		return (132*pow(x,6)+42*pow(x,5)+14*pow(x,4)+5*pow(x,3)+2*pow(x,2)+x);
-	}
-
-public:
-	/** Computes the quarter periods (K and K') of Jacobian elliptic functions
-	 * (see class description).
-	 * @param L
-	 * @param K the quarter period (to be computed) on the Real axis
-	 * @param Kp the quarter period (to be computed) on the Imaginary axis
-	 * computed
-	 */
-	static void ellipKKp(Real L, Real &K, Real &Kp);
-
-	/** Computes three main Jacobi elliptic functions, \f$sn(u,m)\f$,
-	 * \f$cn(u,m)\f$ and \f$dn(u,m)\f$ (see class description).
-	 * @param u the elliptic integral of the first kind \f$u(k,m)\f$
-	 * @param m the modulus parameter, \f$0\le m \le 1\f$
-	 * @param sn Jacobi elliptic function sn(u,m)
-	 * @param cn Jacobi elliptic function cn(u,m)
-	 * @param dn Jacobi elliptic function dn(u,m)
-	 */
-	static void ellipJC(Complex u, Real m, Complex &sn, Complex &cn,
-		Complex &dn);
-
-#if defined HAVE_ARPREC && defined USE_GPL_SHOGUN
-	/** Wrapper method for ellipKKp if ARPREC is present (for high precision)
-	 * @param L
-	 * @param K the quarter period (to be computed) on the Real axis
-	 * @param Kp the quarter period (to be computed) on the Imaginary axis
-	 * computed
-	 */
-	static void ellipKKp(float64_t L, float64_t &K, float64_t &Kp)
-	{
-		mp::mp_init(100, NULL, true);
-		mp_real _K, _Kp;
-		ellipKKp(mp_real(L), _K, _Kp);
-		K=dble(_K);
-		Kp=dble(_Kp);
-		mp::mp_finalize();
-	}
-
-	/** Wrapper method for ellipJC if ARPREC is present (for high precision)
-	 * @param u the elliptic integral of the first kind \f$u(k,m)\f$
-	 * @param m the modulus parameter, \f$0\le m \le 1\f$
-	 * @param sn Jacobi elliptic function sn(u,m)
-	 * @param cn Jacobi elliptic function cn(u,m)
-	 * @param dn Jacobi elliptic function dn(u,m)
-	 */
-	static void ellipJC(complex128_t u, float64_t m,
-		complex128_t &sn, complex128_t &cn, complex128_t &dn)
-	{
-		mp::mp_init(100, NULL, true);
-		mp_complex _sn, _cn, _dn;
-		ellipJC(mp_complex(u.real(),u.imag()), mp_real(m), _sn, _cn, _dn);
-		sn=complex128_t(dble(_sn.real),dble(_sn.imag));
-		cn=complex128_t(dble(_cn.real),dble(_cn.imag));
-		dn=complex128_t(dble(_dn.real),dble(_dn.imag));
-		mp::mp_finalize();
-	}
-#endif //(HAVE_ARPREC && USE_GPL_SHOGUN)
-
-	/** @return object name */
-	virtual const char* get_name() const
-	{
-		return "JacobiEllipticFunctions";
-	}
-};
-
-}
-
-#endif /* JACOBI_ELLIPTIC_FUNCTIONS_H_ */
diff --git a/src/shogun/mathematics/Math.cpp b/src/shogun/mathematics/Math.cpp
index adf58828ab0..604387f8ea2 100644
--- a/src/shogun/mathematics/Math.cpp
+++ b/src/shogun/mathematics/Math.cpp
@@ -81,24 +81,6 @@ CMath::~CMath()
 #endif
 }
 
-float64_t CMath::dot(const float64_t* v1, const float64_t* v2, int32_t n)
-{
-	float64_t r=0;
-	Eigen::Map<const Eigen::VectorXd> ev1(v1,n);
-	Eigen::Map<const Eigen::VectorXd> ev2(v2,n);
-	r = ev1.dot(ev2);
-	return r;
-}
-
-float32_t CMath::dot(const float32_t* v1, const float32_t* v2, int32_t n)
-{
-	float32_t r=0;
-	Eigen::Map<const Eigen::VectorXf> ev1(v1,n);
-	Eigen::Map<const Eigen::VectorXf> ev2(v2,n);
-	r = ev1.dot(ev2);
-	return r;
-}
-
 #ifdef USE_LOGCACHE
 int32_t CMath::determine_logrange()
 {
diff --git a/src/shogun/mathematics/Math.h b/src/shogun/mathematics/Math.h
index 021b4bb214a..401e7b7d326 100644
--- a/src/shogun/mathematics/Math.h
+++ b/src/shogun/mathematics/Math.h
@@ -140,6 +140,7 @@ class CMath : public CSGObject
 		virtual ~CMath();
 		//@}
 
+#ifndef SWIG // SWIG should skip this part
 		/**@name min/max/abs functions.
 		*/
 		//@{
@@ -149,10 +150,10 @@ class CMath : public CSGObject
 		 * @param b second value
 		 * @return minimum value amongst a and b
 		 */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 			static inline T min(T a, T b)
 			{
-				return (a<=b) ? a : b;
+				return std::min(a, b);
 			}
 
 		/** Returns the greatest element amongst two input values
@@ -160,11 +161,12 @@ class CMath : public CSGObject
 		 * @param b second value
 		 * @return maximum value amongst a and b
 		 */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 			static inline T max(T a, T b)
 			{
-				return (a>=b) ? a : b;
+				return std::max(a, b);
 			}
+#endif
 
 		/** Returns the absolute value of a number, that is
 		 * if a>0, output is a; if a<0 ,output is -a
@@ -205,12 +207,7 @@ class CMath : public CSGObject
 			static T min(T* vec, int32_t len)
 			{
 				ASSERT(len>0)
-				T minv=vec[0];
-
-				for (int32_t i=1; i<len; i++)
-					minv=min(vec[i], minv);
-
-				return minv;
+				return *std::min_element(vec, vec+len);
 			}
 
 		/** Returns the greatest element in the vector
@@ -222,21 +219,17 @@ class CMath : public CSGObject
 			static T max(T* vec, int32_t len)
 			{
 				ASSERT(len>0)
-				T maxv=vec[0];
-
-				for (int32_t i=1; i<len; i++)
-					maxv=max(vec[i], maxv);
-
-				return maxv;
+				return *std::max_element(vec, vec+len);
 			}
 
+#ifndef SWIG // SWIG should skip this part
 		/** Returns the value clamped to interval [lb,ub]
 		 * @param value input value
 		 * @param lb lower bound
 		 * @param ub upper bound
 		 * @return the corresponding clamped value
 		 */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 			static inline T clamp(T value, T lb, T ub)
 			{
 				if (value<=lb)
@@ -254,7 +247,7 @@ class CMath : public CSGObject
 		 * @param maxv_ptr pointer to store the maximum value
 		 * @return index of the maximum value
 		 */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 			static int32_t arg_max(T * vec, int32_t inc, int32_t len, T * maxv_ptr = NULL)
 			{
 				ASSERT(len > 0 || inc > 0)
@@ -281,7 +274,7 @@ class CMath : public CSGObject
 		 * @param minv_ptr pointer to store the minimum value
 		 * @return index of the minimum value
 		 */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 			static int32_t arg_min(T * vec, int32_t inc, int32_t len, T * minv_ptr = NULL)
 		{
 			ASSERT(len > 0 || inc > 0)
@@ -310,7 +303,7 @@ class CMath : public CSGObject
 		 * @param eps threshold for values to be equal/different
 		 * @return true if values are equal within eps accuracy, false if not.
 		 */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_floating_point<T>::value>::type>
 			static inline bool fequals_abs(const T& a, const T& b,
 				const float64_t eps)
 			{
@@ -327,14 +320,13 @@ class CMath : public CSGObject
 		 * @param tolerant allows linient check on float equality (within accuracy)
 		 * @return true if values are equal within eps accuracy, false if not.
 		 */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_floating_point<T>::value>::type>
 			static inline bool fequals(const T& a, const T& b,
 				const float64_t eps, bool tolerant=false)
 			{
 				const T absA = CMath::abs<T>(a);
 				const T absB = CMath::abs<T>(b);
 				const T diff = CMath::abs<T>((a-b));
-				T comp;
 
 				// Handle this separately since NAN is unordered
 				if (CMath::is_nan((float64_t)a) && CMath::is_nan((float64_t)b))
@@ -345,11 +337,7 @@ class CMath : public CSGObject
 					return CMath::fequals_abs<T>(a, b, eps);
 
 				// handles float32_t and float64_t separately
-				if (sizeof(T) == 4)
-					comp = CMath::F_MIN_NORM_VAL32;
-
-				else
-					comp = CMath::F_MIN_NORM_VAL64;
+				T comp = (std::is_same<float32_t, T>::value) ? CMath::F_MIN_NORM_VAL32 : CMath::F_MIN_NORM_VAL64;
 
 				if (a == b)
 					return true;
@@ -365,6 +353,7 @@ class CMath : public CSGObject
 					return (check < eps);
 				}
 			}
+#endif
 
 		/* Get the corresponding absolute tolerance for unit test given a relative tolerance
 		 *
@@ -618,140 +607,6 @@ class CMath : public CSGObject
 			return ::exp((double) x);
 		}
 
-		/// Compute dot product between v1 and v2 (blas optimized)
-		static inline float64_t dot(const bool* v1, const bool* v2, int32_t n)
-		{
-			float64_t r=0;
-			for (int32_t i=0; i<n; i++)
-				r+=((v1[i]) ? 1 : 0) * ((v2[i]) ? 1 : 0);
-			return r;
-		}
-
-		/// Compute dot product between v1 and v2 (blas optimized)
-		static inline floatmax_t dot(const floatmax_t* v1, const floatmax_t* v2, int32_t n)
-		{
-			floatmax_t r=0;
-			for (int32_t i=0; i<n; i++)
-				r+=v1[i]*v2[i];
-			return r;
-		}
-
-
-		/// Compute dot product between v1 and v2 (blas optimized)
-		static float64_t dot(const float64_t* v1, const float64_t* v2, int32_t n);
-
-		/// Compute dot product between v1 and v2 (blas optimized)
-		static float32_t dot(const float32_t* v1, const float32_t* v2, int32_t n);
-
-		/// compute dot product between v1 and v2 (for 64bit unsigned ints)
-		static inline float64_t dot(
-			const uint64_t* v1, const uint64_t* v2, int32_t n)
-		{
-			float64_t r=0;
-			for (int32_t i=0; i<n; i++)
-				r+=((float64_t) v1[i])*v2[i];
-
-			return r;
-		}
-		/// Compute dot product between v1 and v2 (for 64bit ints)
-		static inline float64_t dot(
-			const int64_t* v1, const int64_t* v2, int32_t n)
-		{
-			float64_t r=0;
-			for (int32_t i=0; i<n; i++)
-				r+=((float64_t) v1[i])*v2[i];
-
-			return r;
-		}
-
-		/// Compute dot product between v1 and v2 (for 32bit ints)
-		static inline float64_t dot(
-			const int32_t* v1, const int32_t* v2, int32_t n)
-		{
-			float64_t r=0;
-			for (int32_t i=0; i<n; i++)
-				r+=((float64_t) v1[i])*v2[i];
-
-			return r;
-		}
-
-		/// Compute dot product between v1 and v2 (for 32bit unsigned ints)
-		static inline float64_t dot(
-			const uint32_t* v1, const uint32_t* v2, int32_t n)
-		{
-			float64_t r=0;
-			for (int32_t i=0; i<n; i++)
-				r+=((float64_t) v1[i])*v2[i];
-
-			return r;
-		}
-
-		/// Compute dot product between v1 and v2 (for 16bit unsigned ints)
-		static inline float64_t dot(
-			const uint16_t* v1, const uint16_t* v2, int32_t n)
-		{
-			float64_t r=0;
-			for (int32_t i=0; i<n; i++)
-				r+=((float64_t) v1[i])*v2[i];
-
-			return r;
-		}
-
-		/// Compute dot product between v1 and v2 (for 16bit unsigned ints)
-		static inline float64_t dot(
-			const int16_t* v1, const int16_t* v2, int32_t n)
-		{
-			float64_t r=0;
-			for (int32_t i=0; i<n; i++)
-				r+=((float64_t) v1[i])*v2[i];
-
-			return r;
-		}
-
-		/// Compute dot product between v1 and v2 (for 8bit (un)signed ints)
-		static inline float64_t dot(
-			const char* v1, const char* v2, int32_t n)
-		{
-			float64_t r=0;
-			for (int32_t i=0; i<n; i++)
-				r+=((float64_t) v1[i])*v2[i];
-
-			return r;
-		}
-
-		/// Compute dot product between v1 and v2 (for 8bit (un)signed ints)
-		static inline float64_t dot(
-			const uint8_t* v1, const uint8_t* v2, int32_t n)
-		{
-			float64_t r=0;
-			for (int32_t i=0; i<n; i++)
-				r+=((float64_t) v1[i])*v2[i];
-
-			return r;
-		}
-
-		/// Compute dot product between v1 and v2 (for 8bit (un)signed ints)
-		static inline float64_t dot(
-			const int8_t* v1, const int8_t* v2, int32_t n)
-		{
-			float64_t r=0;
-			for (int32_t i=0; i<n; i++)
-				r+=((float64_t) v1[i])*v2[i];
-
-			return r;
-		}
-
-		/// Compute dot product between v1 and v2
-		static inline float64_t dot(
-			const float64_t* v1, const char* v2, int32_t n)
-		{
-			float64_t r=0;
-			for (int32_t i=0; i<n; i++)
-				r+=((float64_t) v1[i])*v2[i];
-
-			return r;
-		}
-
 		/// exp(x), x being a complex128_t
 		COMPLEX128_STDMATH(exp)
 
@@ -1227,13 +1082,14 @@ class CMath : public CSGObject
 		 */
 		static void linspace(float64_t* output, float64_t start, float64_t end, int32_t n = 100);
 
+#ifndef SWIG // SWIG should skip this part
 		/** Returns an array with n linearly spaced elements between start and end.
 		 * @param start beginning of the interval to divide
 		 * @param end upper bound of the interval to divide
 		 * @param n number of elements used to divide the interval
 		 * @return array with linearly spaced elements within the interval
 		 */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 			static float64_t* linspace(T start, T end, int32_t n)
 			{
 				float64_t* output = SG_MALLOC(float64_t, n);
@@ -1241,6 +1097,7 @@ class CMath : public CSGObject
 
 				return output;
 			}
+#endif
 
 		/** Returns a vector with n linearly spaced elements between start and end.
 		 * @param start beginning of the interval to divide
@@ -1325,12 +1182,13 @@ class CMath : public CSGObject
 		 */
 		static void sort(float64_t *a, int32_t*idx, int32_t N);
 
+#ifndef SWIG // SWIG should skip this part
 		/** Performs a quicksort on an array output of length size
 		 * it is sorted from in ascending (for type T)
 		 * @param output array to be sorted
 		 * @param size size of array
 		 */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 			static void qsort(T* output, int32_t size)
 			{
 				if (size<=1)
@@ -1375,7 +1233,7 @@ class CMath : public CSGObject
 		 * @param output array to be sorted
 		 * @param size size of array
 		 */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 			static void insertion_sort(T* output, int32_t size)
 			{
 				for (int32_t i=0; i<size-1; i++)
@@ -1395,11 +1253,12 @@ class CMath : public CSGObject
 		 * @param array array to be sorted
 		 * @param size size of array
 		 */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 			inline static void radix_sort(T* array, int32_t size)
 			{
 				radix_sort_helper(array,size,0);
 			}
+#endif
 
 		/** Extract the byte at position p (from left)
 		 * of a 64 bit integer. The function is somewhat identical to
@@ -1531,13 +1390,14 @@ class CMath : public CSGObject
 			SG_SERROR("CMath::radix_sort_helper():: Not supported for complex128_t\n");
 		}
 
+#ifndef SWIG // SWIG should skip this part
 		/** Performs a quicksort on an array of pointers.
 		 * It is sorted from in ascending (for type T)
 		 * Every element is dereferenced once before being compared
 		 * @param vector array of pointers to sort
 		 * @param length length of array
 		 */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 			static void qsort(T** vector, index_t length)
 			{
 				if (length<=1)
@@ -1576,20 +1436,15 @@ class CMath : public CSGObject
 					qsort(&vector[left],length-left);
 			}
 
-		/// qsort not implemented for complex128_t
-		static void qsort(complex128_t** vector, index_t length)
-		{
-			SG_SERROR("CMath::qsort():: Not supported for complex128_t\n");
-		}
-
 		/** Quicksort the vector in ascending order (for type T)
 		  * @param vector vector to be sorted
 		  */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 			static void qsort(SGVector<T> vector)
 			{
 				qsort<T>(vector, vector.size());
 			}
+#endif
 
 		/** Helper functor for the function argsort */
 		template<class T>
@@ -1609,6 +1464,7 @@ class CMath : public CSGObject
 				const T* data;
 			};
 
+#ifndef SWIG // SWIG should skip this part
 		/** Get sorted index.
 		 *
 		 * idx = v.argsort() is similar to Matlab [~, idx] = sort(v)
@@ -1616,7 +1472,7 @@ class CMath : public CSGObject
 		 * @param vector vector to be sorted
 		 * @return sorted index for this vector
 		 */
-		template<class T>
+		template<class T, class = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 			static SGVector<index_t> argsort(SGVector<T> vector)
 			{
 				IndexSorter<T> cmp(&vector);
@@ -1634,7 +1490,7 @@ class CMath : public CSGObject
 		 * @param vector input vector
 		 * @return true if vector is sorted, false otherwise
 		 */
-		template <class T>
+		template <class T, class = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 			static bool is_sorted(SGVector<T> vector)
 			{
 				if (vector.size() < 2)
@@ -1648,6 +1504,7 @@ class CMath : public CSGObject
 
 				return true;
 			}
+#endif
 
 		/** Display bits (useful for debugging)
 		 * @param word input to be displayed as bits
@@ -2338,23 +2195,6 @@ void CMath::min(float64_t* output, T* index, int32_t size)
 	swap(index[0], index[min_index]);
 }
 
-/// linspace not implemented for complex128_t, returns null instead
-template <>
-inline float64_t* CMath::linspace<complex128_t>(complex128_t start, complex128_t end, int32_t n)
-{
-	SG_SERROR("SGVector::linspace():: Not supported for complex128_t\n");
-	return NULL;
-}
-
-#define COMPLEX128_ERROR_ONEVECARG_RETURNS_T(function, return_type, return_statement) \
-template <> \
-inline return_type CMath::function<complex128_t>(SGVector<complex128_t> vector) \
-{ \
-	SG_SERROR("CMath::%s():: Not supported for complex128_t\n", \
-		#function); \
-	return_statement; \
-}
-
 #define COMPLEX128_ERROR_ONEARG_T(function)	\
 template <> \
 inline complex128_t CMath::function<complex128_t>(complex128_t a)	\
@@ -2364,83 +2204,11 @@ inline complex128_t CMath::function<complex128_t>(complex128_t a)	\
 	return complex128_t(0.0, 0.0);	\
 }
 
-#define COMPLEX128_ERROR_TWOARGS_T(function) \
-template <> \
-inline complex128_t CMath::function<complex128_t>(complex128_t a, complex128_t b)	\
-{	\
-	SG_SERROR("CMath::%s():: Not supported for complex128_t\n",\
-		#function);\
-	return complex128_t(0.0, 0.0);	\
-}
-
-#define COMPLEX128_ERROR_THREEARGS_T(function) \
-template <> \
-inline complex128_t CMath::function<complex128_t>(complex128_t a, complex128_t b, complex128_t c)	\
-{	\
-	SG_SERROR("CMath::%s():: Not supported for complex128_t\n",\
-		#function);\
-	return complex128_t(0.0, 0.0);	\
-}
-
-#define COMPLEX128_ERROR_SORT_T(function)	\
-template <> \
-inline void CMath::function<complex128_t>(complex128_t* output, int32_t b)	\
-{	\
-	SG_SERROR("CMath::%s():: Not supported for complex128_t\n",\
-		#function);\
-}
-
-#define COMPLEX128_ERROR_ARG_MAX_MIN(function)	\
-template <> \
-inline int32_t CMath::function<complex128_t>(complex128_t * a, int32_t b, int32_t c, complex128_t * d) \
-{ \
-	int32_t maxIdx=0; \
-	SG_SERROR("CMath::%s():: Not supported for complex128_t\n",\
-		#function);\
-	return maxIdx; \
-}
-
-/// qsort not implemented for complex128_t, returns void instead
-COMPLEX128_ERROR_ONEVECARG_RETURNS_T(qsort, void, return;)
-
-/// argsort not implemented for complex128_t, returns a vector
-COMPLEX128_ERROR_ONEVECARG_RETURNS_T(argsort, SGVector<index_t>, SGVector<index_t> idx(vector.size());return idx;)
-
-/// is_sorted not implemented for complex128_t, returns false
-COMPLEX128_ERROR_ONEVECARG_RETURNS_T(is_sorted, bool, return false;)
-
-/// min not implemented for complex128_t, returns (0.0)+i(0.0) instead
-COMPLEX128_ERROR_TWOARGS_T(min)
-
-/// max not implemented for complex128_t, returns (0.0)+i(0.0) instead
-COMPLEX128_ERROR_TWOARGS_T(max)
-
-/// clamp not implemented for complex128_t, returns (0.0)+i(0.0) instead
-COMPLEX128_ERROR_THREEARGS_T(clamp)
-
 /// signum not implemented for complex128_t, returns (0.0)+i(0.0) instead
 // COMPLEX128_ERROR_ONEARG_T(sign)
 
-/// qsort not implemented for complex128_t
-COMPLEX128_ERROR_SORT_T(qsort)
-
-/// insertion_sort not implemented for complex128_t
-COMPLEX128_ERROR_SORT_T(insertion_sort)
-
-/// radix_sort not implemented for complex128_t
-COMPLEX128_ERROR_SORT_T(radix_sort)
-
-/// arg_max not implemented for complex128_t
-COMPLEX128_ERROR_ARG_MAX_MIN(arg_max)
-
-/// arg_min not implemented for complex128_t
-COMPLEX128_ERROR_ARG_MAX_MIN(arg_min)
-
 }
 #undef COMPLEX128_ERROR_ONEARG
 #undef COMPLEX128_ERROR_ONEARG_T
-#undef COMPLEX128_ERROR_TWOARGS_T
-#undef COMPLEX128_ERROR_THREEARGS_T
 #undef COMPLEX128_STDMATH
-#undef COMPLEX128_ERROR_SORT_T
 #endif /** __MATHEMATICS_H_ */
diff --git a/src/shogun/mathematics/SparseInverseCovariance.cpp b/src/shogun/mathematics/SparseInverseCovariance.cpp
deleted file mode 100644
index 9e8a60601b3..00000000000
--- a/src/shogun/mathematics/SparseInverseCovariance.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2009-2011 Jun Liu, Jieping Ye
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-
-#include <shogun/mathematics/SparseInverseCovariance.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/base/Parameter.h>
-#include <shogun/lib/slep/SpInvCoVa/invCov.h>
-
-using namespace shogun;
-
-CSparseInverseCovariance::CSparseInverseCovariance() :
-	CSGObject(), m_lasso_max_iter(1000),
-	m_max_iter(1000), m_f_gap(1e-6), m_x_gap(1e-4),
-	m_xtol(1e-4)
-{
-	register_parameters();
-}
-
-CSparseInverseCovariance::~CSparseInverseCovariance()
-{
-}
-
-void CSparseInverseCovariance::register_parameters()
-{
-	SG_ADD(&m_lasso_max_iter,"lasso_max_iter",
-	       "maximum iteration of LASSO step",MS_NOT_AVAILABLE);
-	SG_ADD(&m_max_iter,"max_iter","maximum total iteration",
-	       MS_NOT_AVAILABLE);
-	SG_ADD(&m_f_gap,"f_gap","f gap",MS_NOT_AVAILABLE);
-	SG_ADD(&m_x_gap,"x_gap","x gap",MS_NOT_AVAILABLE);
-	SG_ADD(&m_xtol,"xtol","xtol",MS_NOT_AVAILABLE);
-}
-
-SGMatrix<float64_t> CSparseInverseCovariance::estimate(SGMatrix<float64_t> S, float64_t lambda_c)
-{
-	ASSERT(S.num_cols==S.num_rows)
-
-	int32_t n = S.num_cols;
-	float64_t sum_S = 0.0;
-	for (int32_t i=0; i<n; i++)
-		sum_S += S(i,i);
-
-	float64_t* Theta = SG_CALLOC(float64_t, n*n);
-	float64_t* W = SG_CALLOC(float64_t, n*n);
-
-	invCov(Theta, W, S.matrix, lambda_c, sum_S, n, m_lasso_max_iter,
-	       m_f_gap, m_x_gap, m_max_iter, m_xtol);
-
-	SG_FREE(W);
-	return SGMatrix<float64_t>(Theta,n,n);
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/mathematics/SparseInverseCovariance.h b/src/shogun/mathematics/SparseInverseCovariance.h
deleted file mode 100644
index feda723cc92..00000000000
--- a/src/shogun/mathematics/SparseInverseCovariance.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-#ifndef SPINVCOV_H_
-#define SPINVCOV_H_
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/base/SGObject.h>
-#include <shogun/lib/SGMatrix.h>
-
-namespace shogun
-{
-
-/** @brief used to estimate inverse covariance matrix using graphical lasso
- *
- * implementation is based on SLEP library's code
- */
-class CSparseInverseCovariance : public CSGObject
-{
-public:
-
-	/** constructor */
-	CSparseInverseCovariance();
-
-	/** destructor */
-	virtual ~CSparseInverseCovariance();
-
-	/** estimate inverse covariance matrix
-	 *
-	 * @param S empirical covariance matrix
-	 * @param lambda_c regularization constant
-	 */
-	SGMatrix<float64_t> estimate(SGMatrix<float64_t> S, float64_t lambda_c);
-
-	/** get name */
-	const char* get_name() const { return "SparseInverseCovariance"; };
-
-
-	/** get lasso max iter
-	 * @return lasso max iter
-	 */
-	int32_t get_lasso_max_iter() const { return m_lasso_max_iter; }
-	/** get max iter
-	 * @return max iter
-	 */
-	int32_t get_max_iter() const { return m_max_iter; }
-	/** get lasso max iter
-	 * @return lasso max iter
-	 */
-	float64_t get_f_gap() const { return m_f_gap; }
-	/** get lasso max iter
-	 * @return lasso max iter
-	 */
-	float64_t get_x_gap() const { return m_x_gap; }
-	/** get lasso max iter
-	 * @return lasso max iter
-	 */
-	float64_t get_xtol() const { return m_xtol; }
-
-	/** set lasso max iter
-	 * @param lasso_max_iter lasso max iter
-	 */
-	void set_lasso_max_iter(int32_t lasso_max_iter)
-	{
-		m_lasso_max_iter = lasso_max_iter;
-	}
-	/** set max iter
-	 * @param max_iter max iter
-	 */
-	void set_max_iter(int32_t max_iter)
-	{
-		m_max_iter = max_iter;
-	}
-	/** set f gap
-	 * @param f_gap f gap
-	 */
-	void set_f_gap(int32_t f_gap)
-	{
-		m_f_gap = f_gap;
-	}
-	/** set x gap
-	 * @param x_gap x gap
-	 */
-	void set_x_gap(int32_t x_gap)
-	{
-		m_x_gap = x_gap;
-	}
-	/** set xtol
-	 * @param xtol xtol
-	 */
-	void set_xtol(int32_t xtol)
-	{
-		m_xtol = xtol;
-	}
-
-private:
-
-	/** register parameters */
-	void register_parameters();
-
-protected:
-
-	/** LASSO max iter */
-	int32_t m_lasso_max_iter;
-
-	/** max iter */
-	int32_t m_max_iter;
-
-	/** fGap */
-	float64_t m_f_gap;
-
-	/** xGap */
-	float64_t m_x_gap;
-
-	/** xtol */
-	float64_t m_xtol;
-};
-
-}
-#endif //USE_GPL_SHOGUN
-#endif
diff --git a/src/shogun/mathematics/ajd/UWedge.cpp b/src/shogun/mathematics/ajd/UWedge.cpp
index e7911c80ee0..8cec2dd4dd0 100644
--- a/src/shogun/mathematics/ajd/UWedge.cpp
+++ b/src/shogun/mathematics/ajd/UWedge.cpp
@@ -113,7 +113,12 @@ SGMatrix<float64_t> CUWedge::diagonalize(SGNDArray<float64_t> C, SGMatrix<float6
 	}
 
 	if (iter == itermax)
-		SG_SERROR("Convergence not reached\n")
+	{
+		SG_SWARNING(
+		    "Convergence delta (%f) not below tolerance (%f) after %d "
+		    "iterations.\n",
+		    improve, eps, itermax);
+	}
 
 	return V;
 
diff --git a/src/shogun/mathematics/eigen3.h b/src/shogun/mathematics/eigen3.h
index d68b71247a4..204d420ebf6 100644
--- a/src/shogun/mathematics/eigen3.h
+++ b/src/shogun/mathematics/eigen3.h
@@ -15,51 +15,7 @@
 	//#define EIGEN_RUNTIME_NO_MALLOC
 	#include <Eigen/Eigen>
 	#include <Eigen/Dense>
-	#if EIGEN_VERSION_AT_LEAST(3,0,93)
-		#include <Eigen/Sparse>
-	#else
-		#define EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
-		#include <unsupported/Eigen/SparseExtra>
-
-		#ifndef DOXYGEN_SHOULD_SKIP_THIS
-		// Triplet definition for Eigen3 backword compatibility
-		namespace Eigen {
-		template <typename T> struct Triplet
-		{
-			Triplet(index_t colIndex, index_t rowIndex, T valueT) :
-			ecol(colIndex), erow(rowIndex), evalue(valueT)
-			{
-			}
-			index_t col() const { return ecol; };
-			index_t row() const { return erow; };
-			T value() const { return evalue; };
-			index_t ecol;
-			index_t erow;
-			T evalue;
-		};
-
-		// SimplicialLLT definition for Eigen3 backword compatibility
-		template <typename T> class SimplicialLLT
-		: public SimplicialCholesky<T,Lower>
-		{
-		public:
-			SimplicialLLT()
-			{
-				SimplicialCholesky<T>::setMode(SimplicialCholeskyLLt);
-			}
-			inline const T matrixL()
-			{
-				return SimplicialCholesky<T>::m_matrix;
-			}
-			inline const T matrixU()
-			{
-				return SimplicialCholesky<T>::m_matrix.transpose();
-			}
-		};
-		}
-		#endif //DOXYGEN_SHOULD_SKIP_THIS
-
-	#endif	//EIGEN_VERSION_AT_LEAST(3,0,93)
+	#include <Eigen/Sparse>
 
 #if ((EIGEN_WORLD_VERSION == 3) && (EIGEN_MAJOR_VERSION == 2) && \
 	((EIGEN_MINOR_VERSION == 91) || (EIGEN_MINOR_VERSION == 92)))
diff --git a/src/shogun/mathematics/lapack.cpp b/src/shogun/mathematics/lapack.cpp
index 44318b3af51..8bbe0b67de0 100644
--- a/src/shogun/mathematics/lapack.cpp
+++ b/src/shogun/mathematics/lapack.cpp
@@ -20,7 +20,10 @@
 #include <shogun/base/Parallel.h>
 #include <shogun/io/SGIO.h>
 
-#include <pthread.h>
+#if defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_LAPACKE_STRICT)
+// Eigen's lapacke.h will take care of the declaration of the lapacke functions
+#include <shogun/mathematics/eigen3.h>
+#endif
 
 using namespace shogun;
 
@@ -310,17 +313,17 @@ void wrap_dsyevr(char jobz, char uplo, int n, double *a, int lda, int il, int iu
 	int m;
 	double vl,vu;
 	double abstol = 0.0;
-	char I = 'I';
-	int* isuppz = SG_MALLOC(int, n);
+	char range = 'I';
+	int* isuppz = SG_MALLOC(int, 2 * (iu - il + 1));
 #ifdef HAVE_ACML
-	DSYEVR(jobz,I,uplo,n,a,lda,vl,vu,il,iu,abstol,m,
+	DSYEVR(jobz,range,uplo,n,a,lda,vl,vu,il,iu,abstol,m,
 	       eigenvalues,eigenvectors,n,isuppz,info);
 #else
 	int lwork = -1;
 	int liwork = -1;
 	double work1 = 0;
 	int work2 = 0;
-	DSYEVR(&jobz,&I,&uplo,&n,a,&lda,&vl,&vu,&il,&iu,&abstol,
+	DSYEVR(&jobz,&range,&uplo,&n,a,&lda,&vl,&vu,&il,&iu,&abstol,
                &m,eigenvalues,eigenvectors,&n,isuppz,
                &work1,&lwork,&work2,&liwork,info);
 	ASSERT(*info==0)
@@ -328,7 +331,7 @@ void wrap_dsyevr(char jobz, char uplo, int n, double *a, int lda, int il, int iu
 	liwork = work2;
 	double* work = SG_MALLOC(double, lwork);
 	int* iwork = SG_MALLOC(int, liwork);
-	DSYEVR(&jobz,&I,&uplo,&n,a,&lda,&vl,&vu,&il,&iu,&abstol,
+	DSYEVR(&jobz,&range,&uplo,&n,a,&lda,&vl,&vu,&il,&iu,&abstol,
                &m,eigenvalues,eigenvectors,&n,isuppz,
                work,&lwork,iwork,&liwork,info);
 	ASSERT(*info==0)
@@ -346,21 +349,21 @@ void wrap_dsygvx(int itype, char jobz, char uplo, int n, double *a, int lda, dou
 	double abstol = 0.0;
 	double vl,vu;
 	int* ifail = SG_MALLOC(int, n);
-	char I = 'I';
+	char range = 'I';
 #ifdef HAVE_ACML
-	DSYGVX(itype,jobz,I,uplo,n,a,lda,b,ldb,vl,vu,
+	DSYGVX(itype,jobz,range,uplo,n,a,lda,b,ldb,vl,vu,
                il,iu,abstol,m,eigenvalues,
                eigenvectors,n,ifail,info);
 #else
 	int lwork = -1;
 	double work1 = 0;
 	int* iwork = SG_MALLOC(int, 5*n);
-	DSYGVX(&itype,&jobz,&I,&uplo,&n,a,&lda,b,&ldb,&vl,&vu,
+	DSYGVX(&itype,&jobz,&range,&uplo,&n,a,&lda,b,&ldb,&vl,&vu,
                &il,&iu,&abstol,&m,eigenvalues,eigenvectors,
                &n,&work1,&lwork,iwork,ifail,info);
 	lwork = (int)work1;
 	double* work = SG_MALLOC(double, lwork);
-	DSYGVX(&itype,&jobz,&I,&uplo,&n,a,&lda,b,&ldb,&vl,&vu,
+	DSYGVX(&itype,&jobz,&range,&uplo,&n,a,&lda,b,&ldb,&vl,&vu,
                &il,&iu,&abstol,&m,eigenvalues,eigenvectors,
                &n,work,&lwork,iwork,ifail,info);
 	SG_FREE(work);
diff --git a/src/shogun/mathematics/lapack.h b/src/shogun/mathematics/lapack.h
index ca42a6c28c5..6ba466f21fa 100644
--- a/src/shogun/mathematics/lapack.h
+++ b/src/shogun/mathematics/lapack.h
@@ -32,8 +32,12 @@ extern "C" {
 #ifdef HAVE_CXX11
 #define __VFORCE_H 1
 #endif
+#if !(defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_LAPACKE_STRICT))
 #include <Accelerate/Accelerate.h>
 #else
+#include <vecLib/cblas.h>
+#endif
+#else
 #include <cblas.h>
 #endif
 
@@ -44,7 +48,7 @@ extern "C" {
 #ifdef HAVE_ATLAS
 #include <clapack.h>
 #else
-// ACML and MKL do not provide clapack_* routines
+// ACML, MKL and Mac OS vecLib do not provide clapack_* routines
 // double precision
 int clapack_dpotrf(const CBLAS_ORDER Order, const CBLAS_UPLO Uplo,
 		const int N, double *A, const int lda);
@@ -87,7 +91,7 @@ void wrap_dstemr(char jobz, char range, int n, double* d__, double *e, double vl
 }
 
 // only MKL, ACML and Mac OS vector library provide a header file for the lapack routines
-#if !defined(HAVE_ACML) && !defined(HAVE_MKL) && !defined(HAVE_MVEC)
+#if !defined(HAVE_ACML) && !defined(HAVE_MKL) && !defined(HAVE_MVEC) && !(defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_LAPACKE_STRICT))
 // double precision
 int dsyev_(char*, char*, int*, double*, int*, double*, double*, int*, int*);
 int dgesvd_(char* jobu, char* jobvt, int* m, int* n, double* a, int* lda,
diff --git a/src/shogun/mathematics/linalg/GPUMemoryBase.h b/src/shogun/mathematics/linalg/GPUMemoryBase.h
index 61f7a3cecd8..8882e1b9118 100644
--- a/src/shogun/mathematics/linalg/GPUMemoryBase.h
+++ b/src/shogun/mathematics/linalg/GPUMemoryBase.h
@@ -36,24 +36,23 @@
 namespace shogun
 {
 
-/** @brief Interface for GPU memory libraries */
-template <typename T>
-struct GPUMemoryBase
-{
-	/** Default constructor */
-	GPUMemoryBase()
+	/** @brief Interface for GPU memory libraries */
+	template <typename T>
+	struct GPUMemoryBase
 	{
-	}
-
-	/** Clone GPU memory, i.e. vector or matrix
-	 *
-	 * @param GPUMemoryBase structure pointer
-	 * @return A deep-copy of GPUMemoryBase structure pointer
-	 */
-	virtual GPUMemoryBase<T>* clone_vector(GPUMemoryBase<T>* gpu_ptr,
-		index_t vlen) const = 0;
-};
+		/** Default constructor */
+		GPUMemoryBase()
+		{
+		}
 
+		/** Clone GPU memory, i.e. vector or matrix
+		 *
+		 * @param gpu_ptr GPUMemoryBase structure pointer
+		 * @return A deep-copy of GPUMemoryBase structure pointer
+		 */
+		virtual GPUMemoryBase<T>*
+		clone_vector(GPUMemoryBase<T>* gpu_ptr, index_t vlen) const = 0;
+	};
 }
 
-#endif //GPU_MEMORY_BASE_H__
+#endif // GPU_MEMORY_BASE_H__
diff --git a/src/shogun/mathematics/linalg/GPUMemoryViennaCL.h b/src/shogun/mathematics/linalg/GPUMemoryViennaCL.h
index 470927563f5..1deaa0ed1f3 100644
--- a/src/shogun/mathematics/linalg/GPUMemoryViennaCL.h
+++ b/src/shogun/mathematics/linalg/GPUMemoryViennaCL.h
@@ -36,118 +36,127 @@
 #include <shogun/lib/common.h>
 
 #ifdef HAVE_VIENNACL
-#include <viennacl/vector.hpp>
-#include <viennacl/matrix.hpp>
 #include <memory>
+#include <viennacl/matrix.hpp>
+#include <viennacl/vector.hpp>
 
 namespace shogun
 {
 
-/** @brief ViennaCL memory structure.
- * Saves data to GPU and clone data.
- * @see SGVector
- */
-template <typename T>
-struct GPUMemoryViennaCL : public GPUMemoryBase<T>
-{
-	friend class LinalgBackendViennaCL;
+	/** @brief ViennaCL memory structure.
+	 * Saves data to GPU and clone data.
+	 * @see SGVector
+	 */
+	template <typename T>
+	struct GPUMemoryViennaCL : public GPUMemoryBase<T>
+	{
+		friend class LinalgBackendViennaCL;
 
-	typedef viennacl::backend::mem_handle VCLMemoryArray;
-	typedef viennacl::vector_base<T, std::size_t, std::ptrdiff_t> VCLVectorBase;
+		typedef viennacl::backend::mem_handle VCLMemoryArray;
+		typedef viennacl::vector_base<T, std::size_t, std::ptrdiff_t>
+		    VCLVectorBase;
 
 /** @see <a href="http://viennacl.sourceforge.net/doc/changelog.html"> */
 #if VIENNACL_VERSION >= 10600
-	typedef viennacl::matrix_base<T, std::size_t, std::ptrdiff_t> VCLMatrixBase;
+		typedef viennacl::matrix_base<T, std::size_t, std::ptrdiff_t>
+		    VCLMatrixBase;
 #else
-	typedef viennacl::matrix_base<T, viennacl::column_major, std::size_t, std::ptrdiff_t> VCLMatrixBase;
+		typedef viennacl::matrix_base<T, viennacl::column_major, std::size_t,
+		                              std::ptrdiff_t>
+		    VCLMatrixBase;
 #endif
 
-	/** Default constructor */
-	GPUMemoryViennaCL() : m_data(new VCLMemoryArray())
-	{
-		init();
-	};
-
-	/** Create a new vector
-	 *
-	 * @param len Number of elements
-	 */
-	GPUMemoryViennaCL(index_t len): m_data(new VCLMemoryArray())
-	{
-		init();
-		viennacl::backend::memory_create(*m_data, sizeof(T)*len,
-			viennacl::context());
-	}
-
-	/** Wrap a vector around an existing memory segment
-	 *
-	 * @param gpu_ptr GPUMemoryBase pointer
-	 */
-	GPUMemoryViennaCL(GPUMemoryBase<T>* gpu_ptr) : m_data(new VCLMemoryArray())
-	{
-		GPUMemoryViennaCL<T>* temp_ptr = static_cast<GPUMemoryViennaCL<T>*>(gpu_ptr);
-		init();
-		m_data = temp_ptr->m_data;
-		m_offset = temp_ptr->m_offset;
-	};
-
-	/** Clone GPU vector
-	 *
-	 * @param vector GPUMemoryBase pointer
-	 * @param vlen Length of the vector
-	 */
-	GPUMemoryBase<T>* clone_vector(GPUMemoryBase<T>* vector, index_t vlen) const
-	{
-		GPUMemoryViennaCL<T>* src_ptr = static_cast<GPUMemoryViennaCL<T>*>(vector);
-		GPUMemoryViennaCL<T>* gpu_ptr = new GPUMemoryViennaCL<T>();
-
-		viennacl::backend::memory_create(*(gpu_ptr->m_data), sizeof(T)*vlen,
-			viennacl::context());
-		viennacl::backend::memory_copy(*(src_ptr->m_data), *(gpu_ptr->m_data),
-			0, 0, vlen*sizeof(T));
-
-		return gpu_ptr;
-	}
-
-	/** ViennaCL Vector structure that saves the data
-	 *
-	 * @param len Number of elements
-	 */
-	VCLVectorBase data_vector(index_t len)
-	{
-		return VCLVectorBase(*m_data, len, m_offset, 1);
-	}
-
-	/** ViennaCL Vector structure that saves the data
-	 *
-	 * @param nrows Row number of the matrix
-	 * @param ncols Column number of the matrix
-	 */
-	VCLMatrixBase data_matrix(index_t nrows, index_t ncols)
-	{
-	#if VIENNACL_VERSION >= 10600
-		return VCLMatrixBase(*m_data, nrows, m_offset, 1, nrows, ncols, 0, 1, ncols, false);
-	#else
-		return VCLMatrixBase(*m_data, nrows, m_offset, 1, nrows, ncols, 0, 1, ncols);
-	#endif
-	}
-
-private:
-	void init()
-	{
-		m_offset = 0;
-	}
+		/** Default constructor */
+		GPUMemoryViennaCL() : m_data(new VCLMemoryArray())
+		{
+			init();
+		};
+
+		/** Create a new vector
+		 *
+		 * @param len Number of elements
+		 */
+		GPUMemoryViennaCL(index_t len) : m_data(new VCLMemoryArray())
+		{
+			init();
+			viennacl::backend::memory_create(
+			    *m_data, sizeof(T) * len, viennacl::context());
+		}
+
+		/** Wrap a vector around an existing memory segment
+		 *
+		 * @param gpu_ptr GPUMemoryBase pointer
+		 */
+		GPUMemoryViennaCL(GPUMemoryBase<T>* gpu_ptr)
+		    : m_data(new VCLMemoryArray())
+		{
+			GPUMemoryViennaCL<T>* temp_ptr =
+			    static_cast<GPUMemoryViennaCL<T>*>(gpu_ptr);
+			init();
+			m_data = temp_ptr->m_data;
+			m_offset = temp_ptr->m_offset;
+		};
+
+		/** Clone GPU vector
+		 *
+		 * @param vector GPUMemoryBase pointer
+		 * @param vlen Length of the vector
+		 */
+		GPUMemoryBase<T>*
+		clone_vector(GPUMemoryBase<T>* vector, index_t vlen) const
+		{
+			GPUMemoryViennaCL<T>* src_ptr =
+			    static_cast<GPUMemoryViennaCL<T>*>(vector);
+			GPUMemoryViennaCL<T>* gpu_ptr = new GPUMemoryViennaCL<T>();
+
+			viennacl::backend::memory_create(
+			    *(gpu_ptr->m_data), sizeof(T) * vlen, viennacl::context());
+			viennacl::backend::memory_copy(
+			    *(src_ptr->m_data), *(gpu_ptr->m_data), 0, 0, vlen * sizeof(T));
+
+			return gpu_ptr;
+		}
+
+		/** ViennaCL Vector structure that saves the data
+		 *
+		 * @param len Number of elements
+		 */
+		VCLVectorBase data_vector(index_t len)
+		{
+			return VCLVectorBase(*m_data, len, m_offset, 1);
+		}
+
+		/** ViennaCL Vector structure that saves the data
+		 *
+		 * @param nrows Row number of the matrix
+		 * @param ncols Column number of the matrix
+		 */
+		VCLMatrixBase data_matrix(index_t nrows, index_t ncols)
+		{
+#if VIENNACL_VERSION >= 10600
+			return VCLMatrixBase(
+			    *m_data, nrows, m_offset, 1, nrows, ncols, 0, 1, ncols, false);
+#else
+			return VCLMatrixBase(
+			    *m_data, nrows, m_offset, 1, nrows, ncols, 0, 1, ncols);
+#endif
+		}
 
-	/** Memory segment holding the data for the vector */
-	alignas(CPU_CACHE_LINE_SIZE) std::shared_ptr<VCLMemoryArray> m_data;
+	private:
+		void init()
+		{
+			m_offset = 0;
+		}
 
-	/** Offset for the memory segment, i.e the data of the vector
-	 * starts at vector+offset
-	 */
-	alignas(CPU_CACHE_LINE_SIZE) index_t m_offset;
-};
+		/** Memory segment holding the data for the vector */
+		alignas(CPU_CACHE_LINE_SIZE) std::shared_ptr<VCLMemoryArray> m_data;
 
+		/** Offset for the memory segment, i.e the data of the vector
+		 * starts at vector+offset
+		 */
+		alignas(CPU_CACHE_LINE_SIZE) index_t m_offset;
+	};
 }
 #endif // HAVE_VIENNACL
 
-#endif //GPU_MEMORY_VIENNACL_H__
+#endif // GPU_MEMORY_VIENNACL_H__
diff --git a/src/shogun/mathematics/linalg/LinalgBackendBase.h b/src/shogun/mathematics/linalg/LinalgBackendBase.h
index 82b70239ea1..1e6a9471d6f 100644
--- a/src/shogun/mathematics/linalg/LinalgBackendBase.h
+++ b/src/shogun/mathematics/linalg/LinalgBackendBase.h
@@ -33,412 +33,730 @@
 #ifndef LINALG_BACKEND_BASE_H__
 #define LINALG_BACKEND_BASE_H__
 
-#include <shogun/lib/config.h>
-#include <shogun/lib/common.h>
-#include <shogun/lib/SGVector.h>
-#include <shogun/lib/SGMatrix.h>
+#include <memory>
 #include <shogun/io/SGIO.h>
+#include <shogun/lib/SGMatrix.h>
+#include <shogun/lib/SGVector.h>
+#include <shogun/lib/common.h>
+#include <shogun/lib/config.h>
+#include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/linalg/GPUMemoryBase.h>
+#include <shogun/mathematics/linalg/LinalgEnums.h>
 #include <shogun/mathematics/linalg/internal/Block.h>
-#include <memory>
 
 namespace shogun
 {
 
-/** @brief Base interface of generic linalg methods
- * and generic memory transfer methods.
- */
-class LinalgBackendBase
-{
-public:
-	#define DEFINE_FOR_ALL_PTYPE(METHODNAME, Container) \
-	METHODNAME(bool, Container); \
-	METHODNAME(char, Container); \
-	METHODNAME(int8_t, Container); \
-	METHODNAME(uint8_t, Container); \
-	METHODNAME(int16_t, Container); \
-	METHODNAME(uint16_t, Container); \
-	METHODNAME(int32_t, Container); \
-	METHODNAME(uint32_t, Container); \
-	METHODNAME(int64_t, Container); \
-	METHODNAME(uint64_t, Container); \
-	METHODNAME(float32_t, Container); \
-	METHODNAME(float64_t, Container); \
-	METHODNAME(floatmax_t, Container); \
-	METHODNAME(complex128_t, Container); \
-
-	#define DEFINE_FOR_REAL_PTYPE(METHODNAME, Container) \
-	METHODNAME(bool, Container); \
-	METHODNAME(char, Container); \
-	METHODNAME(int8_t, Container); \
-	METHODNAME(uint8_t, Container); \
-	METHODNAME(int16_t, Container); \
-	METHODNAME(uint16_t, Container); \
-	METHODNAME(int32_t, Container); \
-	METHODNAME(uint32_t, Container); \
-	METHODNAME(int64_t, Container); \
-	METHODNAME(uint64_t, Container); \
-	METHODNAME(float32_t, Container); \
-	METHODNAME(float64_t, Container); \
+	/** @brief Base interface of generic linalg methods
+	 * and generic memory transfer methods.
+	 */
+	class LinalgBackendBase
+	{
+	public:
+#define DEFINE_FOR_ALL_PTYPE(METHODNAME, Container)                            \
+	METHODNAME(bool, Container);                                               \
+	METHODNAME(char, Container);                                               \
+	METHODNAME(int8_t, Container);                                             \
+	METHODNAME(uint8_t, Container);                                            \
+	METHODNAME(int16_t, Container);                                            \
+	METHODNAME(uint16_t, Container);                                           \
+	METHODNAME(int32_t, Container);                                            \
+	METHODNAME(uint32_t, Container);                                           \
+	METHODNAME(int64_t, Container);                                            \
+	METHODNAME(uint64_t, Container);                                           \
+	METHODNAME(float32_t, Container);                                          \
+	METHODNAME(float64_t, Container);                                          \
+	METHODNAME(floatmax_t, Container);                                         \
+	METHODNAME(complex128_t, Container);
+
+#define DEFINE_FOR_REAL_PTYPE(METHODNAME, Container)                           \
+	METHODNAME(bool, Container);                                               \
+	METHODNAME(char, Container);                                               \
+	METHODNAME(int8_t, Container);                                             \
+	METHODNAME(uint8_t, Container);                                            \
+	METHODNAME(int16_t, Container);                                            \
+	METHODNAME(uint16_t, Container);                                           \
+	METHODNAME(int32_t, Container);                                            \
+	METHODNAME(uint32_t, Container);                                           \
+	METHODNAME(int64_t, Container);                                            \
+	METHODNAME(uint64_t, Container);                                           \
+	METHODNAME(float32_t, Container);                                          \
+	METHODNAME(float64_t, Container);                                          \
 	METHODNAME(floatmax_t, Container);
 
-	#define DEFINE_FOR_NON_INTEGER_PTYPE(METHODNAME, Container) \
-	METHODNAME(float32_t, Container); \
-	METHODNAME(float64_t, Container); \
-	METHODNAME(floatmax_t, Container); \
+#define DEFINE_FOR_NON_INTEGER_PTYPE(METHODNAME, Container)                    \
+	METHODNAME(float32_t, Container);                                          \
+	METHODNAME(float64_t, Container);                                          \
+	METHODNAME(floatmax_t, Container);                                         \
 	METHODNAME(complex128_t, Container);
 
-	/**
-	 * Wrapper method of add operation the operation result = alpha*a + beta*b.
-	 *
-	 * @see linalg::add
-	 */
-	#define BACKEND_GENERIC_IN_PLACE_ADD(Type, Container) \
-	virtual void add(Container<Type>& a, Container<Type>& b, Type alpha, Type beta, Container<Type>& result) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_ADD, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_ADD, SGMatrix)
-	#undef BACKEND_GENERIC_IN_PLACE_ADD
-
-	/**
-	 * Wrapper method of Cholesky decomposition.
-	 *
-	 * @see linalg::cholesky_factor
-	 */
-	#define BACKEND_GENERIC_CHOLESKY_FACTOR(Type, Container) \
-	virtual Container<Type> cholesky_factor(const Container<Type>& A, \
-		const bool lower) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CHOLESKY_FACTOR, SGMatrix)
-	#undef BACKEND_GENERIC_CHOLESKY_FACTOR
-
-	/**
-	 * Wrapper triangular solver with Choleksy decomposition.
-	 *
-	 * @see linalg::cholesky_solver
-	 */
-	#define BACKEND_GENERIC_CHOLESKY_SOLVER(Type, Container) \
-	virtual SGVector<Type> cholesky_solver(const Container<Type>& L, \
-		const SGVector<Type>& b, const bool lower) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CHOLESKY_SOLVER, SGMatrix)
-	#undef BACKEND_GENERIC_CHOLESKY_SOLVER
-
-	/**
-	 * Wrapper method of vector dot-product that works with generic vectors.
-	 *
-	 * @see linalg::dot
-	 */
-	#define BACKEND_GENERIC_DOT(Type, Container) \
+/**
+ * Wrapper method of add operation the operation result = alpha*a + beta*b.
+ *
+ * @see linalg::add
+ */
+#define BACKEND_GENERIC_IN_PLACE_ADD(Type, Container)                          \
+	virtual void add(                                                          \
+	    Container<Type>& a, Container<Type>& b, Type alpha, Type beta,         \
+	    Container<Type>& result) const                                         \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_ADD, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_ADD, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_ADD
+
+/**
+ * Wrapper method of add column vector result = alpha*A.col(i) + beta*b.
+ *
+ * @see linalg::add_col_vec
+ */
+#define BACKEND_GENERIC_ADD_COL_VEC(Type, Container)                           \
+	virtual void add_col_vec(                                                  \
+	    const SGMatrix<Type>& A, index_t i, const SGVector<Type>& b,           \
+	    Container<Type>& result, Type alpha, Type beta) const                  \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return;                                                                \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ADD_COL_VEC, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ADD_COL_VEC, SGMatrix)
+#undef BACKEND_GENERIC_ADD_COL_VEC
+
+/**
+ * Wrapper method of add vector to each column of matrix.
+ *
+ * @see linalg::add_vector
+ */
+#define BACKEND_GENERIC_ADD_VECTOR(Type, Container)                            \
+	virtual void add_vector(                                                   \
+	    const SGMatrix<Type>& A, const SGVector<Type>& b,                      \
+	    SGMatrix<Type>& result, Type alpha, Type beta) const                   \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return;                                                                \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ADD_VECTOR, SGMatrix)
+#undef BACKEND_GENERIC_ADD_VECTOR
+
+/**
+ * Wrapper method of add scalar operation.
+ *
+ * @see linalg::add_scalar
+ */
+#define BACKEND_GENERIC_ADD_SCALAR(Type, Container)                            \
+	virtual void add_scalar(Container<Type>& a, Type b) const                  \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ADD_SCALAR, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ADD_SCALAR, SGMatrix)
+#undef BACKEND_GENERIC_ADD_SCALAR
+
+/**
+ * Wrapper method of center matrix operation.
+ *
+ * @see linalg::center_matrix
+ */
+#define BACKEND_GENERIC_CENTER_MATRIX(Type, Container)                         \
+	virtual void center_matrix(Container<Type>& A) const                       \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CENTER_MATRIX, SGMatrix)
+#undef BACKEND_GENERIC_CENTER_MATRIX
+
+/**
+ * Wrapper method of Cholesky decomposition.
+ *
+ * @see linalg::cholesky_factor
+ */
+#define BACKEND_GENERIC_CHOLESKY_FACTOR(Type, Container)                       \
+	virtual Container<Type> cholesky_factor(                                   \
+	    const Container<Type>& A, const bool lower) const                      \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CHOLESKY_FACTOR, SGMatrix)
+#undef BACKEND_GENERIC_CHOLESKY_FACTOR
+
+/**
+ * Wrapper triangular solver with Choleksy decomposition.
+ *
+ * @see linalg::cholesky_solver
+ */
+#define BACKEND_GENERIC_CHOLESKY_SOLVER(Type, Container)                       \
+	virtual SGVector<Type> cholesky_solver(                                    \
+	    const Container<Type>& L, const SGVector<Type>& b, const bool lower)   \
+	    const                                                                  \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CHOLESKY_SOLVER, SGMatrix)
+#undef BACKEND_GENERIC_CHOLESKY_SOLVER
+
+/**
+ * Wrapper method of cross entropy.
+ *
+ * @see linalg::cross_entropy
+ */
+#define BACKEND_GENERIC_CROSS_ENTROPY(Type, Container)                         \
+	virtual Type cross_entropy(                                                \
+	    const Container<Type>& P, const Container<Type>& Q) const              \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_CROSS_ENTROPY, SGMatrix)
+#undef BACKEND_GENERIC_CROSS_ENTROPY
+
+/**
+ * Wrapper method of vector dot-product that works with generic vectors.
+ *
+ * @see linalg::dot
+ */
+#define BACKEND_GENERIC_DOT(Type, Container)                                   \
 	virtual Type dot(const Container<Type>& a, const Container<Type>& b) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
 	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_DOT, SGVector)
-	#undef BACKEND_GENERIC_DOT
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_DOT, SGVector)
+#undef BACKEND_GENERIC_DOT
 
-	/**
-	 * Wrapper method of in-place matrix elementwise product.
-	 *
-	 * @see linalg::element_prod
-	 */
-	#define BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD(Type, Container) \
-	virtual void element_prod(Container<Type>& a, Container<Type>& b,\
-		Container<Type>& result) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD, SGMatrix)
-	#undef BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD
-
-	/**
-	 * Wrapper method of in-place matrix block elementwise product.
-	 *
-	 * @see linalg::element_prod
-	 */
-	#define BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD(Type, Container) \
-	virtual void element_prod(linalg::Block<Container<Type>>& a, \
-		linalg::Block<Container<Type>>& b, Container<Type>& result) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD, SGMatrix)
-	#undef BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD
-
-	/**
-	 * Wrapper method of logistic function f(x) = 1/(1+exp(-x))
-	 *
-	 * @see linalg::logistic
-	 */
-	#define BACKEND_GENERIC_LOGISTIC(Type, Container) \
-	virtual void logistic(Container<Type>& a, Container<Type>& result) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_LOGISTIC, SGMatrix)
-	#undef BACKEND_GENERIC_LOGISTIC
-
-	/**
-	 * Wrapper method of matrix product method.
-	 *
-	 * @see linalg::matrix_prod
-	 */
-	#define BACKEND_GENERIC_IN_PLACE_MATRIX_PROD(Type, Container) \
-	virtual void matrix_prod(SGMatrix<Type>& a, Container<Type>& b,\
-		Container<Type>& result, bool transpose_A, bool transpose_B) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_MATRIX_PROD, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_MATRIX_PROD, SGMatrix)
-	#undef BACKEND_GENERIC_IN_PLACE_MATRIX_PROD
-
-	/**
-	 * Wrapper method of max method. Return the largest element in a vector or matrix.
-	 *
-	 * @see linalg::max
-	 */
-	#define BACKEND_GENERIC_MAX(Type, Container) \
-	virtual Type max(const Container<Type>& a) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_MAX, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_MAX, SGMatrix)
-	#undef BACKEND_GENERIC_MAX
-
-	/**
-	* Wrapper method that computes mean of SGVectors and SGMatrices
-	* that are composed of real numbers.
-	*
-	* @see linalg::mean
-	*/
-	#define BACKEND_GENERIC_REAL_MEAN(Type, Container) \
-	virtual float64_t mean(const Container<Type>& a) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_REAL_MEAN, SGVector)
-	DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_REAL_MEAN, SGMatrix)
-	#undef BACKEND_GENERIC_REAL_MEAN
-
-	/**
-	* Wrapper method that computes mean of SGVectors and SGMatrices
-	* that are composed of complex numbers.
-	*
-	* @see linalg::mean
-	*/
-	#define BACKEND_GENERIC_COMPLEX_MEAN(Container) \
-	virtual complex128_t mean(const Container<complex128_t>& a) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	BACKEND_GENERIC_COMPLEX_MEAN(SGVector)
-	BACKEND_GENERIC_COMPLEX_MEAN(SGMatrix)
-	#undef BACKEND_GENERIC_COMPLEX_MEAN
-
-	/**
-	* Wrapper method that range fills a vector of matrix.
-	*
-	* @see linalg::range_fill
-	*/
-	#define BACKEND_GENERIC_RANGE_FILL(Type, Container) \
-	virtual void range_fill(Container<Type>& a, const Type start) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_RANGE_FILL, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_RANGE_FILL, SGMatrix)
-	#undef BACKEND_GENERIC_RANGE_FILL
-
-	/**
-	 * Wrapper method of scale operation the operation result = alpha*A.
-	 *
-	 * @see linalg::scale
-	 */
-	#define BACKEND_GENERIC_IN_PLACE_SCALE(Type, Container) \
-	virtual void scale(Container<Type>& a, Type alpha, Container<Type>& result) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_SCALE, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_SCALE, SGMatrix)
-	#undef BACKEND_GENERIC_IN_PLACE_SCALE
-
-	/**
-	 * Wrapper method that sets const values to vectors or matrices.
-	 *
-	 * @see linalg::set_const
-	 */
-	#define BACKEND_GENERIC_SET_CONST(Type, Container) \
-	virtual void set_const(Container<Type>& a, const Type value) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SET_CONST, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SET_CONST, SGMatrix)
-	#undef BACKEND_GENERIC_SET_CONST
-
-	/**
-	* Wrapper method of sum that works with generic vectors or matrices.
-	*
-	* @see linalg::sum
-	*/
-	#define BACKEND_GENERIC_SUM(Type, Container) \
-	virtual Type sum(const Container<Type>& a, bool no_diag) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SUM, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_SUM
-
-	/**
-	* Wrapper method of sum that works with matrix blocks.
-	*
-	* @see linalg::sum
-	*/
-	#define BACKEND_GENERIC_BLOCK_SUM(Type, Container) \
-	virtual Type sum(const linalg::Block<Container<Type>>& a, bool no_diag) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_BLOCK_SUM
-
-	/**
-	* Wrapper method of sum that works with symmetric matrices.
-	*
-	* @see linalg::sum_symmetric
-	*/
-	#define BACKEND_GENERIC_SYMMETRIC_SUM(Type, Container) \
-	virtual Type sum_symmetric(const Container<Type>& a, bool no_diag) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SYMMETRIC_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_SYMMETRIC_SUM
-
-	/**
-	* Wrapper method of sum that works with symmetric matrix blocks.
-	*
-	* @see linalg::sum
-	*/
-	#define BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM(Type, Container) \
-	virtual Type sum_symmetric(const linalg::Block<Container<Type>>& a, bool no_diag) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM
-
-	/**
-	 * Wrapper method of matrix rowwise sum that works with dense matrices.
-	 *
-	 * @see linalg::colwise_sum
-	 */
-	#define BACKEND_GENERIC_COLWISE_SUM(Type, Container) \
-	virtual SGVector<Type> colwise_sum(const Container<Type>& a, bool no_diag) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_COLWISE_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_COLWISE_SUM
-
-	/**
-	* Wrapper method of matrix colwise sum that works with dense matrices.
-	*
-	* @see linalg::colwise_sum
-	*/
-	#define BACKEND_GENERIC_BLOCK_COLWISE_SUM(Type, Container) \
-	virtual SGVector<Type> colwise_sum(const linalg::Block<Container<Type>>& a, bool no_diag) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_COLWISE_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_BLOCK_COLWISE_SUM
-
-	/**
-	 * Wrapper method of matrix rowwise sum that works with dense matrices.
-	 *
-	 * @see linalg::rowwise_sum
-	 */
-	#define BACKEND_GENERIC_ROWWISE_SUM(Type, Container) \
-	virtual SGVector<Type> rowwise_sum(const Container<Type>& a, bool no_diag) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ROWWISE_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_ROWWISE_SUM
-
-	/**
-	* Wrapper method of matrix rowwise sum that works with dense matrices.
-	*
-	* @see linalg::rowwise_sum
-	*/
-	#define BACKEND_GENERIC_BLOCK_ROWWISE_SUM(Type, Container) \
-	virtual SGVector<Type> rowwise_sum(const linalg::Block<Container<Type>>& a, bool no_diag) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_ROWWISE_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_BLOCK_ROWWISE_SUM
-
-	/**
-	 * Wrapper method of Transferring data to GPU memory.
-	 * Does nothing if no GPU backend registered.
-	 *
-	 * @see linalg::to_gpu
-	 */
-	#define BACKEND_GENERIC_TO_GPU(Type, Container) \
-	virtual GPUMemoryBase<Type>* to_gpu(const Container<Type>&) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
-		return 0; \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_TO_GPU, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_TO_GPU, SGMatrix)
-	#undef BACKEND_GENERIC_TO_GPU
-
-	/**
-	 * Wrapper method of fetching data from GPU memory.
-	 *
-	 * @see linalg::from_gpu
-	 */
-	#define BACKEND_GENERIC_FROM_GPU(Type, Container) \
-	virtual void from_gpu(const Container<Type>&, Type* data) const \
-	{  \
-		SG_SNOTIMPLEMENTED; \
+/**
+ * Wrapper method of eigenvalues and eigenvectors computation.
+ *
+ * @see linalg::eigen_solver
+ */
+#define BACKEND_GENERIC_EIGEN_SOLVER(Type, Container)                          \
+	virtual void eigen_solver(                                                 \
+	    const Container<Type>& A, SGVector<Type>& eigenvalues,                 \
+	    SGMatrix<Type>& eigenvectors) const                                    \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_EIGEN_SOLVER, SGMatrix)
+#undef BACKEND_GENERIC_EIGEN_SOLVER
+
+/**
+ * Wrapper method of eigenvalues and eigenvectors computation
+ * for symmetric matrices.
+ *
+ * @see linalg::eigen_solver_symmetric
+ */
+#define BACKEND_GENERIC_EIGEN_SOLVER_SYMMETRIC(Type, Container)                \
+	virtual void eigen_solver_symmetric(                                       \
+	    const Container<Type>& A, SGVector<Type>& eigenvalues,                 \
+	    SGMatrix<Type>& eigenvectors, index_t k) const                         \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_NON_INTEGER_PTYPE(
+		    BACKEND_GENERIC_EIGEN_SOLVER_SYMMETRIC, SGMatrix)
+#undef BACKEND_GENERIC_EIGEN_SOLVER_SYMMETRIC
+
+/**
+ * Wrapper method of in-place matrix elementwise product.
+ *
+ * @see linalg::element_prod
+ */
+#define BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD(Type, Container)                 \
+	virtual void element_prod(                                                 \
+	    Container<Type>& a, Container<Type>& b, Container<Type>& result) const \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD
+
+/**
+ * Wrapper method of in-place matrix block elementwise product.
+ *
+ * @see linalg::element_prod
+ */
+#define BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD(Type, Container)           \
+	virtual void element_prod(                                                 \
+	    linalg::Block<Container<Type>>& a, linalg::Block<Container<Type>>& b,  \
+	    Container<Type>& result) const                                         \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(
+		    BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD
+
+/**
+ * Wrapper method of in-place exponent method.
+ *
+ * @see linalg::exponent
+ */
+#define BACKEND_GENERIC_EXPONENT(Type, Container)                              \
+	virtual void exponent(const Container<Type>& a, Container<Type>& result)   \
+	    const                                                                  \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_EXPONENT, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_EXPONENT, SGMatrix)
+#undef BACKEND_GENERIC_EXPONENT
+
+/**
+ * Wrapper method of set matrix to identity.
+ *
+ * @see linalg::identity
+ */
+#define BACKEND_GENERIC_IDENTITY(Type, Container)                              \
+	virtual void identity(Container<Type>& identity_matrix) const              \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return;                                                                \
 	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_FROM_GPU, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_FROM_GPU, SGMatrix)
-	#undef BACKEND_GENERIC_FROM_GPU
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IDENTITY, SGMatrix)
+#undef BACKEND_GENERIC_IDENTITY
+
+/**
+ * Wrapper method of logistic function f(x) = 1/(1+exp(-x))
+ *
+ * @see linalg::logistic
+ */
+#define BACKEND_GENERIC_LOGISTIC(Type, Container)                              \
+	virtual void logistic(Container<Type>& a, Container<Type>& result) const   \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_LOGISTIC, SGMatrix)
+#undef BACKEND_GENERIC_LOGISTIC
+
+/**
+ * Wrapper method of matrix product method.
+ *
+ * @see linalg::matrix_prod
+ */
+#define BACKEND_GENERIC_IN_PLACE_MATRIX_PROD(Type, Container)                  \
+	virtual void matrix_prod(                                                  \
+	    SGMatrix<Type>& a, Container<Type>& b, Container<Type>& result,        \
+	    bool transpose_A, bool transpose_B) const                              \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_MATRIX_PROD, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_MATRIX_PROD, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_MATRIX_PROD
+
+/**
+ * Wrapper method of max method. Return the largest element in a vector or
+ * matrix.
+ *
+ * @see linalg::max
+ */
+#define BACKEND_GENERIC_MAX(Type, Container)                                   \
+	virtual Type max(const Container<Type>& a) const                           \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_MAX, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_MAX, SGMatrix)
+#undef BACKEND_GENERIC_MAX
+
+/**
+* Wrapper method that computes mean of SGVectors and SGMatrices
+* that are composed of real numbers.
+*
+* @see linalg::mean
+*/
+#define BACKEND_GENERIC_REAL_MEAN(Type, Container)                             \
+	virtual float64_t mean(const Container<Type>& a) const                     \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_REAL_MEAN, SGVector)
+		DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_REAL_MEAN, SGMatrix)
+#undef BACKEND_GENERIC_REAL_MEAN
+
+/**
+* Wrapper method that computes mean of SGVectors and SGMatrices
+* that are composed of complex numbers.
+*
+* @see linalg::mean
+*/
+#define BACKEND_GENERIC_COMPLEX_MEAN(Container)                                \
+	virtual complex128_t mean(const Container<complex128_t>& a) const          \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		BACKEND_GENERIC_COMPLEX_MEAN(SGVector)
+		BACKEND_GENERIC_COMPLEX_MEAN(SGMatrix)
+#undef BACKEND_GENERIC_COMPLEX_MEAN
+
+/**
+ * Wrapper method of multiply_by_logistic_derivative function f(x) =
+ * 1/(1+exp(-x))
+ *
+ * @see linalg::multiply_by_logistic_derivative
+ */
+#define BACKEND_GENERIC_MULTIPLY_BY_LOGISTIC_DERIV(Type, Container)            \
+	virtual void multiply_by_logistic_derivative(                              \
+	    Container<Type>& a, Container<Type>& result) const                     \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(
+		    BACKEND_GENERIC_MULTIPLY_BY_LOGISTIC_DERIV, SGMatrix)
+#undef BACKEND_GENERIC_MULTIPLY_BY_LOGISTIC_DERIV
+
+/**
+ * Wrapper method of multiply_by_rectified_linear_derivative
+ *
+ * @see linalg::multiply_by_rectified_linear_derivative
+ */
+#define BACKEND_GENERIC_MULTIPLY_BY_RECTIFIED_LINEAR_DERIV(Type, Container)    \
+	virtual void multiply_by_rectified_linear_derivative(                      \
+	    Container<Type>& a, Container<Type>& result) const                     \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(
+		    BACKEND_GENERIC_MULTIPLY_BY_RECTIFIED_LINEAR_DERIV, SGMatrix)
+#undef BACKEND_GENERIC_MULTIPLY_BY_RECTIFIED_LINEAR_DERIV
+
+/**
+* Wrapper method that range fills a vector of matrix.
+*
+* @see linalg::range_fill
+*/
+#define BACKEND_GENERIC_RANGE_FILL(Type, Container)                            \
+	virtual void range_fill(Container<Type>& a, const Type start) const        \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_RANGE_FILL, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_RANGE_FILL, SGMatrix)
+#undef BACKEND_GENERIC_RANGE_FILL
+
+/**
+ * Wrapper method of rectified_linear method f(x) = max(0, x)
+ *
+ * @see linalg::rectified_linear
+ */
+#define BACKEND_GENERIC_RECTIFIED_LINEAR(Type, Container)                      \
+	virtual void rectified_linear(Container<Type>& a, Container<Type>& result) \
+	    const                                                                  \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_RECTIFIED_LINEAR, SGMatrix)
+#undef BACKEND_GENERIC_RECTIFIED_LINEAR
+
+/**
+ * Wrapper method that solves a system of linear equations
+ * using QR decomposition.
+ *
+ * @see linalg::qr_solver
+ */
+#define BACKEND_GENERIC_QR_SOLVER(Type, Container)                             \
+	virtual Container<Type> qr_solver(                                         \
+	    const SGMatrix<Type>& A, const Container<Type>& b) const               \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_QR_SOLVER, SGVector)
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_QR_SOLVER, SGMatrix)
+#undef BACKEND_GENERIC_QR_SOLVER
+
+/**
+ * Wrapper method of scale operation the operation result = alpha*A.
+ *
+ * @see linalg::scale
+ */
+#define BACKEND_GENERIC_IN_PLACE_SCALE(Type, Container)                        \
+	virtual void scale(                                                        \
+	    Container<Type>& a, Type alpha, Container<Type>& result) const         \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_SCALE, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_SCALE, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_SCALE
+
+/**
+ * Wrapper method that sets const values to vectors or matrices.
+ *
+ * @see linalg::set_const
+ */
+#define BACKEND_GENERIC_SET_CONST(Type, Container)                             \
+	virtual void set_const(Container<Type>& a, const Type value) const         \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SET_CONST, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SET_CONST, SGMatrix)
+#undef BACKEND_GENERIC_SET_CONST
+
+/**
+* Wrapper method of sum that works with generic vectors or matrices.
+*
+* @see linalg::sum
+*/
+#define BACKEND_GENERIC_SUM(Type, Container)                                   \
+	virtual Type sum(const Container<Type>& a, bool no_diag) const             \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SUM, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SUM, SGMatrix)
+#undef BACKEND_GENERIC_SUM
+
+/**
+ * Wrapper method of softmax method.
+ *
+ * @see linalg::softmax
+ */
+#define BACKEND_GENERIC_SOFTMAX(Type, Container)                               \
+	virtual void softmax(Container<Type>& a) const                             \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SOFTMAX, SGMatrix)
+#undef BACKEND_GENERIC_SOFTMAX
+
+/**
+* Wrapper method of squared error method.
+*
+* @see linalg::squared_error
+*/
+#define BACKEND_GENERIC_SQUARED_ERROR(Type, Container)                         \
+	virtual Type squared_error(                                                \
+	    const Container<Type>& P, const Container<Type>& Q) const              \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SQUARED_ERROR, SGMatrix)
+#undef BACKEND_GENERIC_SQUARED_ERROR
+
+/**
+* Wrapper method of sum that works with matrix blocks.
+*
+* @see linalg::sum
+*/
+#define BACKEND_GENERIC_BLOCK_SUM(Type, Container)                             \
+	virtual Type sum(const linalg::Block<Container<Type>>& a, bool no_diag)    \
+	    const                                                                  \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_SUM, SGMatrix)
+#undef BACKEND_GENERIC_BLOCK_SUM
+
+/**
+* Wrapper method of sum that works with symmetric matrices.
+*
+* @see linalg::sum_symmetric
+*/
+#define BACKEND_GENERIC_SYMMETRIC_SUM(Type, Container)                         \
+	virtual Type sum_symmetric(const Container<Type>& a, bool no_diag) const   \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SYMMETRIC_SUM, SGMatrix)
+#undef BACKEND_GENERIC_SYMMETRIC_SUM
+
+/**
+* Wrapper method of sum that works with symmetric matrix blocks.
+*
+* @see linalg::sum
+*/
+#define BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM(Type, Container)                   \
+	virtual Type sum_symmetric(                                                \
+	    const linalg::Block<Container<Type>>& a, bool no_diag) const           \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM, SGMatrix)
+#undef BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM
+
+/**
+ * Wrapper method of matrix rowwise sum that works with dense matrices.
+ *
+ * @see linalg::colwise_sum
+ */
+#define BACKEND_GENERIC_COLWISE_SUM(Type, Container)                           \
+	virtual SGVector<Type> colwise_sum(const Container<Type>& a, bool no_diag) \
+	    const                                                                  \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_COLWISE_SUM, SGMatrix)
+#undef BACKEND_GENERIC_COLWISE_SUM
+
+/**
+* Wrapper method of matrix colwise sum that works with dense matrices.
+*
+* @see linalg::colwise_sum
+*/
+#define BACKEND_GENERIC_BLOCK_COLWISE_SUM(Type, Container)                     \
+	virtual SGVector<Type> colwise_sum(                                        \
+	    const linalg::Block<Container<Type>>& a, bool no_diag) const           \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_COLWISE_SUM, SGMatrix)
+#undef BACKEND_GENERIC_BLOCK_COLWISE_SUM
+
+/**
+ * Wrapper method of matrix rowwise sum that works with dense matrices.
+ *
+ * @see linalg::rowwise_sum
+ */
+#define BACKEND_GENERIC_ROWWISE_SUM(Type, Container)                           \
+	virtual SGVector<Type> rowwise_sum(const Container<Type>& a, bool no_diag) \
+	    const                                                                  \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ROWWISE_SUM, SGMatrix)
+#undef BACKEND_GENERIC_ROWWISE_SUM
+
+/**
+* Wrapper method of matrix rowwise sum that works with dense matrices.
+*
+* @see linalg::rowwise_sum
+*/
+#define BACKEND_GENERIC_BLOCK_ROWWISE_SUM(Type, Container)                     \
+	virtual SGVector<Type> rowwise_sum(                                        \
+	    const linalg::Block<Container<Type>>& a, bool no_diag) const           \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_ROWWISE_SUM, SGMatrix)
+#undef BACKEND_GENERIC_BLOCK_ROWWISE_SUM
+
+/**
+ * Wrapper method of svd computation.
+ *
+ * @see linalg::svd
+ */
+#define BACKEND_GENERIC_SVD(Type, Container)                                   \
+	virtual void svd(                                                          \
+	    const Container<Type>& A, SGVector<Type> s, SGMatrix<Type> U,          \
+	    bool thin_U, linalg::SVDAlgorithm alg) const                           \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_SVD, SGMatrix)
+#undef BACKEND_GENERIC_SVD
+
+/**
+ * Wrapper method of trace computation.
+ *
+ * @see linalg::trace
+ */
+#define BACKEND_GENERIC_TRACE(Type, Container)                                 \
+	virtual Type trace(const Container<Type>& A) const                         \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_TRACE, SGMatrix)
+#undef BACKEND_GENERIC_TRACE
+
+/**
+ * Wrapper method of trace computation.
+ *
+ * @see linalg::transpose_matrix
+ */
+#define BACKEND_GENERIC_TRANSPOSE_MATRIX(Type, Container)                      \
+	virtual Container<Type> transpose_matrix(const Container<Type>& A) const   \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_TRANSPOSE_MATRIX, SGMatrix)
+#undef BACKEND_GENERIC_TRANSPOSE_MATRIX
+
+/**
+ * Wrapper method of triangular solver.
+ *
+ * @see linalg::triangular_solver
+ */
+#define BACKEND_GENERIC_TRIANGULAR_SOLVER(Type, Container)                     \
+	virtual Container<Type> triangular_solver(                                 \
+	    const SGMatrix<Type>& L, const Container<Type>& b,                     \
+	    const bool lower = true) const                                         \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_NON_INTEGER_PTYPE(
+		    BACKEND_GENERIC_TRIANGULAR_SOLVER, SGVector)
+		DEFINE_FOR_NON_INTEGER_PTYPE(
+		    BACKEND_GENERIC_TRIANGULAR_SOLVER, SGMatrix)
+#undef BACKEND_GENERIC_TRIANGULAR_SOLVER
+
+/**
+ * Wrapper method of set vector or matrix to zero.
+ *
+ * @see linalg::zero
+ */
+#define BACKEND_GENERIC_ZERO(Type, Container)                                  \
+	virtual void zero(Container<Type>& a) const                                \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return;                                                                \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ZERO, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ZERO, SGMatrix)
+#undef BACKEND_GENERIC_ZERO
+
+/**
+ * Wrapper method of Transferring data to GPU memory.
+ * Does nothing if no GPU backend registered.
+ *
+ * @see linalg::to_gpu
+ */
+#define BACKEND_GENERIC_TO_GPU(Type, Container)                                \
+	virtual GPUMemoryBase<Type>* to_gpu(const Container<Type>&) const          \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+		return 0;                                                              \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_TO_GPU, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_TO_GPU, SGMatrix)
+#undef BACKEND_GENERIC_TO_GPU
+
+/**
+ * Wrapper method of fetching data from GPU memory.
+ *
+ * @see linalg::from_gpu
+ */
+#define BACKEND_GENERIC_FROM_GPU(Type, Container)                              \
+	virtual void from_gpu(const Container<Type>&, Type* data) const            \
+	{                                                                          \
+		SG_SNOTIMPLEMENTED;                                                    \
+	}
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_FROM_GPU, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_FROM_GPU, SGMatrix)
+#undef BACKEND_GENERIC_FROM_GPU
 
 #undef DEFINE_FOR_ALL_PTYPE
 #undef DEFINE_FOR_REAL_PTYPE
 #undef DEFINE_FOR_NON_INTEGER_PTYPE
-};
-
+	};
 }
 
-#endif //LINALG_BACKEND_BASE_H__
+#endif // LINALG_BACKEND_BASE_H__
diff --git a/src/shogun/mathematics/linalg/LinalgBackendEigen.h b/src/shogun/mathematics/linalg/LinalgBackendEigen.h
index 99c410b862a..96b80e0da9c 100644
--- a/src/shogun/mathematics/linalg/LinalgBackendEigen.h
+++ b/src/shogun/mathematics/linalg/LinalgBackendEigen.h
@@ -33,684 +33,625 @@
 #ifndef LINALG_BACKEND_EIGEN_H__
 #define LINALG_BACKEND_EIGEN_H__
 
-#include <shogun/lib/SGVector.h>
+#include <numeric>
+#include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/mathematics/linalg/LinalgBackendBase.h>
-#include <numeric>
+#include <shogun/mathematics/linalg/LinalgEnums.h>
+#include <shogun/mathematics/linalg/LinalgMacros.h>
 
 namespace shogun
 {
 
-/** @brief Linalg methods with Eigen3 backend */
-class LinalgBackendEigen : public LinalgBackendBase
-{
-public:
-	#define DEFINE_FOR_ALL_PTYPE(METHODNAME, Container) \
-	METHODNAME(bool, Container); \
-	METHODNAME(char, Container); \
-	METHODNAME(int8_t, Container); \
-	METHODNAME(uint8_t, Container); \
-	METHODNAME(int16_t, Container); \
-	METHODNAME(uint16_t, Container); \
-	METHODNAME(int32_t, Container); \
-	METHODNAME(uint32_t, Container); \
-	METHODNAME(int64_t, Container); \
-	METHODNAME(uint64_t, Container); \
-	METHODNAME(float32_t, Container); \
-	METHODNAME(float64_t, Container); \
-	METHODNAME(floatmax_t, Container); \
-	METHODNAME(complex128_t, Container); \
-
-	#define DEFINE_FOR_REAL_PTYPE(METHODNAME, Container) \
-	METHODNAME(bool, Container); \
-	METHODNAME(char, Container); \
-	METHODNAME(int8_t, Container); \
-	METHODNAME(uint8_t, Container); \
-	METHODNAME(int16_t, Container); \
-	METHODNAME(uint16_t, Container); \
-	METHODNAME(int32_t, Container); \
-	METHODNAME(uint32_t, Container); \
-	METHODNAME(int64_t, Container); \
-	METHODNAME(uint64_t, Container); \
-	METHODNAME(float32_t, Container); \
-	METHODNAME(float64_t, Container); \
-	METHODNAME(floatmax_t, Container);
-
-	#define DEFINE_FOR_NON_INTEGER_PTYPE(METHODNAME, Container) \
-	METHODNAME(float32_t, Container); \
-	METHODNAME(float64_t, Container); \
-	METHODNAME(floatmax_t, Container); \
-	METHODNAME(complex128_t, Container);
-
-	#define DEFINE_FOR_NUMERIC_PTYPE(METHODNAME, Container) \
-	METHODNAME(char, Container); \
-	METHODNAME(int8_t, Container); \
-	METHODNAME(uint8_t, Container); \
-	METHODNAME(int16_t, Container); \
-	METHODNAME(uint16_t, Container); \
-	METHODNAME(int32_t, Container); \
-	METHODNAME(uint32_t, Container); \
-	METHODNAME(int64_t, Container); \
-	METHODNAME(uint64_t, Container); \
-	METHODNAME(float32_t, Container); \
-	METHODNAME(float64_t, Container); \
-	METHODNAME(floatmax_t, Container);
-
-	/** Implementation of @see LinalgBackendBase::add */
-	#define BACKEND_GENERIC_IN_PLACE_ADD(Type, Container) \
-	virtual void add(Container<Type>& a, Container<Type>& b, Type alpha, \
-		Type beta, Container<Type>& result) const \
-	{  \
-		add_impl(a, b, alpha, beta, result); \
-	}
-	DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_IN_PLACE_ADD, SGVector)
-	DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_IN_PLACE_ADD, SGMatrix)
-	#undef BACKEND_GENERIC_IN_PLACE_ADD
-
-	/** Implementation of @see LinalgBackendBase::cholesky_factor */
-	#define BACKEND_GENERIC_CHOLESKY_FACTOR(Type, Container) \
-	virtual Container<Type> cholesky_factor(const Container<Type>& A, \
-		const bool lower) const \
-	{  \
-		return cholesky_factor_impl(A, lower); \
-	}
-	DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CHOLESKY_FACTOR, SGMatrix)
-	#undef BACKEND_GENERIC_CHOLESKY_FACTOR
-
-	/** Implementation of @see LinalgBackendBase::cholesky_solver */
-	#define BACKEND_GENERIC_CHOLESKY_SOLVER(Type, Container) \
-	virtual SGVector<Type> cholesky_solver(const Container<Type>& L, \
-		const SGVector<Type>& b, const bool lower) const \
-	{  \
-		return cholesky_solver_impl(L, b, lower); \
-	}
-	DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CHOLESKY_SOLVER, SGMatrix)
-	#undef BACKEND_GENERIC_CHOLESKY_SOLVER
-
-	/** Implementation of @see LinalgBackendBase::dot */
-	#define BACKEND_GENERIC_DOT(Type, Container) \
-	virtual Type dot(const Container<Type>& a, const Container<Type>& b) const \
-	{  \
-		return dot_impl(a, b);  \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_DOT, SGVector)
-	#undef BACKEND_GENERIC_DOT
-
-	/** Implementation of @see LinalgBackendBase::element_prod */
-	#define BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD(Type, Container) \
-	virtual void element_prod(Container<Type>& a, Container<Type>& b,\
-		Container<Type>& result) const \
-	{  \
-		element_prod_impl(a, b, result); \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD, SGMatrix)
-	#undef BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD
-
-	/** Implementation of @see LinalgBackendBase::element_prod */
-	#define BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD(Type, Container) \
-	virtual void element_prod(linalg::Block<Container<Type>>& a, \
-		linalg::Block<Container<Type>>& b, Container<Type>& result) const \
-	{  \
-		element_prod_impl(a, b, result); \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD, SGMatrix)
-	#undef BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD
-
-	/** Implementation of @see LinalgBackendBase::logistic */
-	#define BACKEND_GENERIC_LOGISTIC(Type, Container) \
-	virtual void logistic(Container<Type>& a, Container<Type>& result) const \
-	{  \
-		logistic_impl(a, result); \
-	}
-	DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_LOGISTIC, SGMatrix)
-	#undef BACKEND_GENERIC_LOGISTIC
-
-	/** Implementation of @see LinalgBackendBase::matrix_prod */
-	#define BACKEND_GENERIC_IN_PLACE_MATRIX_PROD(Type, Container) \
-	virtual void matrix_prod(SGMatrix<Type>& a, Container<Type>& b,\
-		Container<Type>& result, bool transpose_A, bool transpose_B) const \
-	{  \
-		matrix_prod_impl(a, b, result, transpose_A, transpose_B); \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_MATRIX_PROD, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_MATRIX_PROD, SGMatrix)
-	#undef BACKEND_GENERIC_IN_PLACE_MATRIX_PROD
-
-	/** Implementation of @see LinalgBackendBase::max */
-	#define BACKEND_GENERIC_MAX(Type, Container) \
-	virtual Type max(const Container<Type>& a) const \
-	{  \
-		return max_impl(a); \
-	}
-	DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_MAX, SGVector)
-	DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_MAX, SGMatrix)
-	#undef BACKEND_GENERIC_MAX
-
-	/** Implementation of @see LinalgBackendBase::mean */
-	#define BACKEND_GENERIC_REAL_MEAN(Type, Container) \
-	virtual float64_t mean(const Container<Type>& a) const \
-	{  \
-		return mean_impl(a);  \
-	}
-	DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_REAL_MEAN, SGVector)
-	DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_REAL_MEAN, SGMatrix)
-	#undef BACKEND_GENERIC_REAL_MEAN
-
-	/** Implementation of @see LinalgBackendBase::mean */
-	#define BACKEND_GENERIC_COMPLEX_MEAN(Container) \
-	virtual complex128_t mean(const Container<complex128_t>& a) const \
-	{  \
-		return mean_impl(a);  \
-	}
-	BACKEND_GENERIC_COMPLEX_MEAN(SGVector)
-	BACKEND_GENERIC_COMPLEX_MEAN(SGMatrix)
-	#undef BACKEND_GENERIC_COMPLEX_MEAN
-
-	/** Implementation of @see LinalgBackendBase::range_fill */
-	#define BACKEND_GENERIC_RANGE_FILL(Type, Container) \
-	virtual void range_fill(Container<Type>& a, const Type start) const \
-	{  \
-		range_fill_impl(a, start); \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_RANGE_FILL, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_RANGE_FILL, SGMatrix)
-	#undef BACKEND_GENERIC_RANGE_FILL
-
-	/** Implementation of @see linalg::scale */
-	#define BACKEND_GENERIC_IN_PLACE_SCALE(Type, Container) \
-	virtual void scale(Container<Type>& a, Type alpha, Container<Type>& result) const \
-	{  \
-		scale_impl(a, alpha, result); \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_SCALE, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_SCALE, SGMatrix)
-	#undef BACKEND_GENERIC_IN_PLACE_SCALE
-
-	/** Implementation of @see LinalgBackendBase::set_const */
-	#define BACKEND_GENERIC_SET_CONST(Type, Container) \
-	virtual void set_const(Container<Type>& a, const Type value) const \
-	{  \
-		set_const_impl(a, value); \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SET_CONST, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SET_CONST, SGMatrix)
-	#undef BACKEND_GENERIC_SET_CONST
-
-	/** Implementation of @see LinalgBackendBase::sum */
-	#define BACKEND_GENERIC_SUM(Type, Container) \
-	virtual Type sum(const Container<Type>& a, bool no_diag) const \
-	{  \
-		return sum_impl(a, no_diag);  \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SUM, SGVector)
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_SUM
-
-	/** Implementation of @see LinalgBackendBase::sum */
-	#define BACKEND_GENERIC_BLOCK_SUM(Type, Container) \
-	virtual Type sum(const linalg::Block<Container<Type>>& a, bool no_diag) const \
-	{  \
-		return sum_impl(a, no_diag); \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_BLOCK_SUM
-
-	/** Implementation of @see LinalgBackendBase::sum_symmetric */
-	#define BACKEND_GENERIC_SYMMETRIC_SUM(Type, Container) \
-	virtual Type sum_symmetric(const Container<Type>& a, bool no_diag) const \
-	{  \
-		return sum_symmetric_impl(a, no_diag); \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SYMMETRIC_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_SYMMETRIC_SUM
-
-	/** Implementation of @see LinalgBackendBase::sum_symmetric */
-	#define BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM(Type, Container) \
-	virtual Type sum_symmetric(const linalg::Block<Container<Type>>& a, bool no_diag) const \
-	{  \
-		return sum_symmetric_impl(a, no_diag); \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM
-
-	/** Implementation of @see LinalgBackendBase::colwise_sum */
-	#define BACKEND_GENERIC_COLWISE_SUM(Type, Container) \
-	virtual SGVector<Type> colwise_sum(const Container<Type>& a, bool no_diag) const \
-	{  \
-		return colwise_sum_impl(a, no_diag); \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_COLWISE_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_COLWISE_SUM
-
-	/** Implementation of @see LinalgBackendBase::colwise_sum */
-	#define BACKEND_GENERIC_BLOCK_COLWISE_SUM(Type, Container) \
-	virtual SGVector<Type> colwise_sum(const linalg::Block<Container<Type>>& a, bool no_diag) const \
-	{  \
-		return colwise_sum_impl(a, no_diag); \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_COLWISE_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_BLOCK_COLWISE_SUM
-
-	/** Implementation of @see LinalgBackendBase::rowwise_sum */
-	#define BACKEND_GENERIC_ROWWISE_SUM(Type, Container) \
-	virtual SGVector<Type> rowwise_sum(const Container<Type>& a, bool no_diag) const \
-	{  \
-		return rowwise_sum_impl(a, no_diag); \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ROWWISE_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_ROWWISE_SUM
-
-	/** Implementation of @see LinalgBackendBase::rowwise_sum */
-	#define BACKEND_GENERIC_BLOCK_ROWWISE_SUM(Type, Container) \
-	virtual SGVector<Type> rowwise_sum(const linalg::Block<Container<Type>>& a, bool no_diag) const \
-	{  \
-		return rowwise_sum_impl(a, no_diag); \
-	}
-	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_ROWWISE_SUM, SGMatrix)
-	#undef BACKEND_GENERIC_BLOCK_ROWWISE_SUM
-
-	#undef DEFINE_FOR_ALL_PTYPE
-
-private:
-	/** Eigen3 vector result = alpha*A + beta*B method */
-	template <typename T>
-	void add_impl(SGVector<T>& a, SGVector<T>& b, T alpha, T beta, SGVector<T>& result) const
-	{
-		typename SGVector<T>::EigenVectorXtMap a_eig = a;
-		typename SGVector<T>::EigenVectorXtMap b_eig = b;
-		typename SGVector<T>::EigenVectorXtMap result_eig = result;
-
-		result_eig = alpha * a_eig + beta * b_eig;
-	}
-
-	/** Eigen3 matrix result = alpha*A + beta*B method */
-	template <typename T>
-	void add_impl(SGMatrix<T>& a, SGMatrix<T>& b, T alpha, T beta, SGMatrix<T>& result) const
+	/** @brief Linalg methods with Eigen3 backend */
+	class LinalgBackendEigen : public LinalgBackendBase
 	{
-		typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
-		typename SGMatrix<T>::EigenMatrixXtMap b_eig = b;
-		typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+	public:
+/** Implementation of @see LinalgBackendBase::add */
+#define BACKEND_GENERIC_IN_PLACE_ADD(Type, Container)                          \
+	virtual void add(                                                          \
+	    Container<Type>& a, Container<Type>& b, Type alpha, Type beta,         \
+	    Container<Type>& result) const;
+		DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_IN_PLACE_ADD, SGVector)
+		DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_IN_PLACE_ADD, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_ADD
+
+/** Implementation of @see LinalgBackendBase::add_col_vec */
+#define BACKEND_GENERIC_ADD_COL_VEC(Type, Container)                           \
+	virtual void add_col_vec(                                                  \
+	    const SGMatrix<Type>& A, index_t i, const SGVector<Type>& b,           \
+	    Container<Type>& result, Type alpha, Type beta) const;
+		DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_COL_VEC, SGVector)
+		DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_COL_VEC, SGMatrix)
+#undef BACKEND_GENERIC_ADD_COL_VEC
+
+/** Implementation of @see LinalgBackendBase::add_vector */
+#define BACKEND_GENERIC_ADD(Type, Container)                                   \
+	virtual void add_vector(                                                   \
+	    const SGMatrix<Type>& A, const SGVector<Type>& b,                      \
+	    SGMatrix<Type>& result, Type alpha, Type beta) const;
+		DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD, SGMatrix)
+#undef BACKEND_GENERIC_ADD
+
+/** Implementation of @see LinalgBackendBase::add_scalar */
+#define BACKEND_GENERIC_ADD_SCALAR(Type, Container)                            \
+	virtual void add_scalar(Container<Type>& a, Type b) const;
+		DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_SCALAR, SGVector)
+		DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_SCALAR, SGMatrix)
+#undef BACKEND_GENERIC_ADD_SCALAR
+
+/** Implementation of @see LinalgBackendBase::center_matrix */
+#define BACKEND_GENERIC_CENTER_MATRIX(Type, Container)                         \
+	virtual void center_matrix(Container<Type>& A) const;
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CENTER_MATRIX, SGMatrix)
+#undef BACKEND_GENERIC_CENTER_MATRIX
+
+/** Implementation of @see LinalgBackendBase::cholesky_factor */
+#define BACKEND_GENERIC_CHOLESKY_FACTOR(Type, Container)                       \
+	virtual Container<Type> cholesky_factor(                                   \
+	    const Container<Type>& A, const bool lower) const;
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CHOLESKY_FACTOR, SGMatrix)
+#undef BACKEND_GENERIC_CHOLESKY_FACTOR
+
+/** Implementation of @see LinalgBackendBase::cholesky_solver */
+#define BACKEND_GENERIC_CHOLESKY_SOLVER(Type, Container)                       \
+	virtual SGVector<Type> cholesky_solver(                                    \
+	    const Container<Type>& L, const SGVector<Type>& b, const bool lower)   \
+	    const;
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CHOLESKY_SOLVER, SGMatrix)
+#undef BACKEND_GENERIC_CHOLESKY_SOLVER
+
+/** Implementation of @see linalg::cross_entropy */
+#define BACKEND_GENERIC_CROSS_ENTROPY(Type, Container)                         \
+	virtual Type cross_entropy(                                                \
+	    const Container<Type>& P, const Container<Type>& Q) const;
+		DEFINE_FOR_NON_INTEGER_REAL_PTYPE(
+		    BACKEND_GENERIC_CROSS_ENTROPY, SGMatrix)
+#undef BACKEND_GENERIC_CROSS_ENTROPY
+
+/** Implementation of @see LinalgBackendBase::dot */
+#define BACKEND_GENERIC_DOT(Type, Container)                                   \
+	virtual Type dot(const Container<Type>& a, const Container<Type>& b) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_DOT, SGVector)
+#undef BACKEND_GENERIC_DOT
+
+/** Implementation of @see LinalgBackendBase::eigen_solver */
+#define BACKEND_GENERIC_EIGEN_SOLVER(Type, Container)                          \
+	virtual void eigen_solver(                                                 \
+	    const Container<Type>& A, SGVector<Type>& eigenvalues,                 \
+	    SGMatrix<Type>& eigenvectors) const;
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_EIGEN_SOLVER, SGMatrix)
+#undef BACKEND_GENERIC_EIGEN_SOLVER
+
+/** Implementation of @see LinalgBackendBase::eigen_solver_symmetric */
+#define BACKEND_GENERIC_EIGEN_SOLVER_SYMMETRIC(Type, Container)                \
+	virtual void eigen_solver_symmetric(                                       \
+	    const Container<Type>& A, SGVector<Type>& eigenvalues,                 \
+	    SGMatrix<Type>& eigenvectors, index_t k) const;
+		DEFINE_FOR_NON_INTEGER_PTYPE(
+		    BACKEND_GENERIC_EIGEN_SOLVER_SYMMETRIC, SGMatrix)
+#undef BACKEND_GENERIC_EIGEN_SOLVER_SYMMETRIC
+
+/** Implementation of @see LinalgBackendBase::element_prod */
+#define BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD(Type, Container)                 \
+	virtual void element_prod(                                                 \
+	    Container<Type>& a, Container<Type>& b, Container<Type>& result)       \
+	    const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD
+
+/** Implementation of @see LinalgBackendBase::element_prod */
+#define BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD(Type, Container)           \
+	virtual void element_prod(                                                 \
+	    linalg::Block<Container<Type>>& a, linalg::Block<Container<Type>>& b,  \
+	    Container<Type>& result) const;
+		DEFINE_FOR_ALL_PTYPE(
+		    BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD
+
+/** Implementation of @see linalg::exponent */
+#define BACKEND_GENERIC_EXPONENT(Type, Container)                              \
+	virtual void exponent(const Container<Type>& a, Container<Type>& result)   \
+	    const
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_EXPONENT, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_EXPONENT, SGMatrix)
+#undef BACKEND_GENERIC_EXPONENT
+
+/** Implementation of @see LinalgBackendBase::identity */
+#define BACKEND_GENERIC_IDENTITY(Type, Container)                              \
+	virtual void identity(Container<Type>& identity_matrix) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IDENTITY, SGMatrix)
+#undef BACKEND_GENERIC_IDENTITY
+
+/** Implementation of @see LinalgBackendBase::logistic */
+#define BACKEND_GENERIC_LOGISTIC(Type, Container)                              \
+	virtual void logistic(Container<Type>& a, Container<Type>& result) const;
+		DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_LOGISTIC, SGMatrix)
+#undef BACKEND_GENERIC_LOGISTIC
+
+/** Implementation of @see LinalgBackendBase::matrix_prod */
+#define BACKEND_GENERIC_IN_PLACE_MATRIX_PROD(Type, Container)                  \
+	virtual void matrix_prod(                                                  \
+	    SGMatrix<Type>& a, Container<Type>& b, Container<Type>& result,        \
+	    bool transpose_A, bool transpose_B) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_MATRIX_PROD, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_MATRIX_PROD, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_MATRIX_PROD
+
+/** Implementation of @see LinalgBackendBase::max */
+#define BACKEND_GENERIC_MAX(Type, Container)                                   \
+	virtual Type max(const Container<Type>& a) const;
+		DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_MAX, SGVector)
+		DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_MAX, SGMatrix)
+#undef BACKEND_GENERIC_MAX
+
+/** Implementation of @see LinalgBackendBase::mean */
+#define BACKEND_GENERIC_REAL_MEAN(Type, Container)                             \
+	virtual float64_t mean(const Container<Type>& a) const;
+		DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_REAL_MEAN, SGVector)
+		DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_REAL_MEAN, SGMatrix)
+#undef BACKEND_GENERIC_REAL_MEAN
+
+/** Implementation of @see LinalgBackendBase::mean */
+#define BACKEND_GENERIC_COMPLEX_MEAN(Container)                                \
+	virtual complex128_t mean(const Container<complex128_t>& a) const;
+		BACKEND_GENERIC_COMPLEX_MEAN(SGVector)
+		BACKEND_GENERIC_COMPLEX_MEAN(SGMatrix)
+#undef BACKEND_GENERIC_COMPLEX_MEAN
+
+/** Implementation of @see linalg::multiply_by_logistic_derivative */
+#define BACKEND_GENERIC_MULTIPLY_BY_LOGISTIC_DERIV(Type, Container)            \
+	virtual void multiply_by_logistic_derivative(                              \
+	    Container<Type>& a, Container<Type>& result) const;
+		DEFINE_FOR_NUMERIC_PTYPE(
+		    BACKEND_GENERIC_MULTIPLY_BY_LOGISTIC_DERIV, SGMatrix)
+#undef BACKEND_GENERIC_MULTIPLY_BY_LOGISTIC_DERIV
+
+/** Implementation of @see linalg::multiply_by_rectified_linear_derivative */
+#define BACKEND_GENERIC_MULTIPLY_BY_RECTIFIED_LINEAR_DERIV(Type, Container)    \
+	virtual void multiply_by_rectified_linear_derivative(                      \
+	    Container<Type>& a, Container<Type>& result) const;
+		DEFINE_FOR_NON_INTEGER_REAL_PTYPE(
+		    BACKEND_GENERIC_MULTIPLY_BY_RECTIFIED_LINEAR_DERIV, SGMatrix)
+#undef BACKEND_GENERIC_MULTIPLY_BY_RECTIFIED_LINEAR_DERIV
+
+/** Implementation of @see LinalgBackendBase::qr_solver */
+#define BACKEND_GENERIC_QR_SOLVER(Type, Container)                             \
+	virtual Container<Type> qr_solver(                                         \
+	    const SGMatrix<Type>& A, const Container<Type>& b) const;
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_QR_SOLVER, SGVector)
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_QR_SOLVER, SGMatrix)
+#undef BACKEND_GENERIC_QR_SOLVER
+
+/** Implementation of @see LinalgBackendBase::range_fill */
+#define BACKEND_GENERIC_RANGE_FILL(Type, Container)                            \
+	virtual void range_fill(Container<Type>& a, const Type start) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_RANGE_FILL, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_RANGE_FILL, SGMatrix)
+#undef BACKEND_GENERIC_RANGE_FILL
+
+/** Implementation of @see linalg::rectified_linear */
+#define BACKEND_GENERIC_RECTIFIED_LINEAR(Type, Container)                      \
+	virtual void rectified_linear(Container<Type>& a, Container<Type>& result) \
+	    const;
+		DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_RECTIFIED_LINEAR, SGMatrix)
+#undef BACKEND_GENERIC_RECTIFIED_LINEAR
+
+/** Implementation of @see linalg::scale */
+#define BACKEND_GENERIC_IN_PLACE_SCALE(Type, Container)                        \
+	virtual void scale(                                                        \
+	    Container<Type>& a, Type alpha, Container<Type>& result) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_SCALE, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_SCALE, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_SCALE
+
+/** Implementation of @see LinalgBackendBase::set_const */
+#define BACKEND_GENERIC_SET_CONST(Type, Container)                             \
+	virtual void set_const(Container<Type>& a, const Type value) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SET_CONST, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SET_CONST, SGMatrix)
+#undef BACKEND_GENERIC_SET_CONST
+
+/** Implementation of @see linalg::softmax */
+#define BACKEND_GENERIC_SOFTMAX(Type, Container)                               \
+	virtual void softmax(Container<Type>& a) const;
+		DEFINE_FOR_NON_INTEGER_REAL_PTYPE(BACKEND_GENERIC_SOFTMAX, SGMatrix)
+#undef BACKEND_GENERIC_SOFTMAX
+
+/** Implementation of @see linalg::squared_error */
+#define BACKEND_GENERIC_SQUARED_ERROR(Type, Container)                         \
+	virtual Type squared_error(                                                \
+	    const Container<Type>& P, const Container<Type>& Q) const;
+		DEFINE_FOR_NON_INTEGER_REAL_PTYPE(
+		    BACKEND_GENERIC_SQUARED_ERROR, SGMatrix)
+#undef BACKEND_GENERIC_SQUARED_ERROR
+
+/** Implementation of @see LinalgBackendBase::sum */
+#define BACKEND_GENERIC_SUM(Type, Container)                                   \
+	virtual Type sum(const Container<Type>& a, bool no_diag) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SUM, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SUM, SGMatrix)
+#undef BACKEND_GENERIC_SUM
+
+/** Implementation of @see LinalgBackendBase::sum */
+#define BACKEND_GENERIC_BLOCK_SUM(Type, Container)                             \
+	virtual Type sum(const linalg::Block<Container<Type>>& a, bool no_diag)    \
+	    const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_SUM, SGMatrix)
+#undef BACKEND_GENERIC_BLOCK_SUM
+
+/** Implementation of @see LinalgBackendBase::sum_symmetric */
+#define BACKEND_GENERIC_SYMMETRIC_SUM(Type, Container)                         \
+	virtual Type sum_symmetric(const Container<Type>& a, bool no_diag) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SYMMETRIC_SUM, SGMatrix)
+#undef BACKEND_GENERIC_SYMMETRIC_SUM
+
+/** Implementation of @see LinalgBackendBase::sum_symmetric */
+#define BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM(Type, Container)                   \
+	virtual Type sum_symmetric(                                                \
+	    const linalg::Block<Container<Type>>& a, bool no_diag) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM, SGMatrix)
+#undef BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM
+
+/** Implementation of @see LinalgBackendBase::colwise_sum */
+#define BACKEND_GENERIC_COLWISE_SUM(Type, Container)                           \
+	virtual SGVector<Type> colwise_sum(const Container<Type>& a, bool no_diag) \
+	    const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_COLWISE_SUM, SGMatrix)
+#undef BACKEND_GENERIC_COLWISE_SUM
+
+/** Implementation of @see LinalgBackendBase::colwise_sum */
+#define BACKEND_GENERIC_BLOCK_COLWISE_SUM(Type, Container)                     \
+	virtual SGVector<Type> colwise_sum(                                        \
+	    const linalg::Block<Container<Type>>& a, bool no_diag) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_COLWISE_SUM, SGMatrix)
+#undef BACKEND_GENERIC_BLOCK_COLWISE_SUM
+
+/** Implementation of @see LinalgBackendBase::rowwise_sum */
+#define BACKEND_GENERIC_ROWWISE_SUM(Type, Container)                           \
+	virtual SGVector<Type> rowwise_sum(const Container<Type>& a, bool no_diag) \
+	    const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ROWWISE_SUM, SGMatrix)
+#undef BACKEND_GENERIC_ROWWISE_SUM
+
+/** Implementation of @see LinalgBackendBase::rowwise_sum */
+#define BACKEND_GENERIC_BLOCK_ROWWISE_SUM(Type, Container)                     \
+	virtual SGVector<Type> rowwise_sum(                                        \
+	    const linalg::Block<Container<Type>>& a, bool no_diag) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_ROWWISE_SUM, SGMatrix)
+#undef BACKEND_GENERIC_BLOCK_ROWWISE_SUM
+
+/** Implementation of @see LinalgBackendBase::svd */
+#define BACKEND_GENERIC_SVD(Type, Container)                                   \
+	virtual void svd(                                                          \
+	    const Container<Type>& A, SGVector<Type> s, Container<Type> U,         \
+	    bool thin_U, linalg::SVDAlgorithm alg) const;
+		DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_SVD, SGMatrix)
+#undef BACKEND_GENERIC_SVD
+
+/** Implementation of @see LinalgBackendBase::trace */
+#define BACKEND_GENERIC_TRACE(Type, Container)                                 \
+	virtual Type trace(const Container<Type>& A) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_TRACE, SGMatrix)
+#undef BACKEND_GENERIC_TRACE
+
+/** Implementation of @see LinalgBackendBase::transpose_matrix */
+#define BACKEND_GENERIC_TRANSPOSE_MATRIX(Type, Container)                      \
+	virtual Container<Type> transpose_matrix(const Container<Type>& A) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_TRANSPOSE_MATRIX, SGMatrix)
+#undef BACKEND_GENERIC_TRANSPOSE_MATRIX
+
+/** Implementation of @see LinalgBackendBase::triangular_solver */
+#define BACKEND_GENERIC_TRIANGULAR_SOLVER(Type, Container)                     \
+	virtual Container<Type> triangular_solver(                                 \
+	    const SGMatrix<Type>& L, const Container<Type>& b, const bool lower)   \
+	    const;
+		DEFINE_FOR_NON_INTEGER_PTYPE(
+		    BACKEND_GENERIC_TRIANGULAR_SOLVER, SGVector)
+		DEFINE_FOR_NON_INTEGER_PTYPE(
+		    BACKEND_GENERIC_TRIANGULAR_SOLVER, SGMatrix)
+#undef BACKEND_GENERIC_TRIANGULAR_SOLVER
+
+/** Implementation of @see LinalgBackendBase::zero */
+#define BACKEND_GENERIC_ZERO(Type, Container)                                  \
+	virtual void zero(Container<Type>& a) const;
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ZERO, SGVector)
+		DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ZERO, SGMatrix)
+#undef BACKEND_GENERIC_ZERO
 
-		result_eig = alpha * a_eig + beta * b_eig;
-	}
+#undef DEFINE_FOR_ALL_PTYPE
+#undef DEFINE_FOR_REAL_PTYPE
+#undef DEFINE_FOR_NON_INTEGER_PTYPE
+#undef DEFINE_FOR_NUMERIC_PTYPE
 
-	/** Eigen3 Cholesky decomposition */
-	template <typename T>
-	SGMatrix<T> cholesky_factor_impl(const SGMatrix<T>& A, const bool lower) const
-	{
-		SGMatrix<T> c(A.num_rows, A.num_cols);
-		set_const_impl<T>(c, 0);
-		typename SGMatrix<T>::EigenMatrixXtMap A_eig = A;
-		typename SGMatrix<T>::EigenMatrixXtMap c_eig = c;
-
-		Eigen::LLT<Eigen::Matrix<T,Eigen::Dynamic,Eigen::Dynamic> > llt(A_eig);
-
-		//compute matrix L or U
-		if(lower==false)
-			c_eig = llt.matrixU();
-		else
-			c_eig = llt.matrixL();
-
-		/*
-		 * checking for success
-		 *
-		 * 0: Eigen::Success. Decomposition was successful
-		 * 1: Eigen::NumericalIssue. The provided data did not satisfy the prerequisites.
+	private:
+		/** Eigen3 vector result = alpha*A + beta*B method */
+		template <typename T>
+		void add_impl(
+		    SGVector<T>& a, SGVector<T>& b, T alpha, T beta,
+		    SGVector<T>& result) const;
+
+		/** Eigen3 matrix result = alpha*A + beta*B method */
+		template <typename T>
+		void add_impl(
+		    SGMatrix<T>& a, SGMatrix<T>& b, T alpha, T beta,
+		    SGMatrix<T>& result) const;
+
+		/** Eigen3 add column vector method */
+		template <typename T>
+		void add_col_vec_impl(
+		    const SGMatrix<T>& A, index_t i, const SGVector<T>& b,
+		    SGMatrix<T>& result, T alpha, T beta) const;
+
+		/** Eigen3 add column vector method */
+		template <typename T>
+		void add_col_vec_impl(
+		    const SGMatrix<T>& A, index_t i, const SGVector<T>& b,
+		    SGVector<T>& result, T alpha, T beta) const;
+
+		/** Eigen3 add vector to each column of matrix method */
+		template <typename T>
+		void add_vector_impl(
+		    const SGMatrix<T>& A, const SGVector<T>& b, SGMatrix<T>& result,
+		    T alpha, T beta) const;
+
+		/** Eigen3 vector add scalar method */
+		template <typename T>
+		void add_scalar_impl(SGVector<T>& a, T b) const;
+
+		/** Eigen3 matrix add scalar method */
+		template <typename T>
+		void add_scalar_impl(SGMatrix<T>& a, T b) const;
+
+		/** Eigen3 center matrix method */
+		template <typename T>
+		void center_matrix_impl(SGMatrix<T>& A) const;
+
+		/** Eigen3 Cholesky decomposition */
+		template <typename T>
+		SGMatrix<T>
+		cholesky_factor_impl(const SGMatrix<T>& A, const bool lower) const;
+
+		/** Eigen3 Cholesky solver */
+		template <typename T>
+		SGVector<T> cholesky_solver_impl(
+		    const SGMatrix<T>& L, const SGVector<T>& b, const bool lower) const;
+
+		/** Eigen3 cross_entropy method
+		 * The cross entropy is defined as \f$ H(P,Q) = - \sum_{ij}
+		 * P[i,j]log(Q[i,j]) \f$
 		 */
-		REQUIRE(llt.info()!=Eigen::NumericalIssue, "Matrix is not Hermitian positive definite!\n");
+		template <typename T>
+		T cross_entropy_impl(const SGMatrix<T>& p, const SGMatrix<T>& q) const;
 
-		return c;
-	}
-
-	/** Eigen3 Cholesky solver */
-	template <typename T>
-	SGVector<T> cholesky_solver_impl(const SGMatrix<T>& L, const SGVector<T>& b,
-		const bool lower) const
-	{
-		SGVector<T> x(b.size());
-		set_const_impl<T>(x, 0);
-		typename SGMatrix<T>::EigenMatrixXtMap L_eig = L;
-		typename SGVector<T>::EigenVectorXtMap b_eig = b;
-		typename SGVector<T>::EigenVectorXtMap x_eig = x;
-
-		if (lower == false)
-		{
-			Eigen::TriangularView<Eigen::Map<typename SGMatrix<T>::EigenMatrixXt,
-				0, Eigen::Stride<0,0> >, Eigen::Upper> tlv(L_eig);
-
-			x_eig = (tlv.transpose()).solve(tlv.solve(b_eig));
-		}
-		else
-		{
-			Eigen::TriangularView<Eigen::Map<typename SGMatrix<T>::EigenMatrixXt,
-				0, Eigen::Stride<0,0> >, Eigen::Lower> tlv(L_eig);
-			x_eig = (tlv.transpose()).solve(tlv.solve(b_eig));
-		}
-
-		return x;
-	}
-
-	/** Eigen3 vector dot-product method */
-	template <typename T>
-	T dot_impl(const SGVector<T>& a, const SGVector<T>& b) const
-	{
-		return (typename SGVector<T>::EigenVectorXtMap(a)).dot(typename SGVector<T>::EigenVectorXtMap(b));
-	}
-
-	/** Eigen3 matrix in-place elementwise product method */
-	template <typename T>
-	void element_prod_impl(SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result) const
-	{
-		typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
-		typename SGMatrix<T>::EigenMatrixXtMap b_eig = b;
-		typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
-
-		result_eig = a_eig.array() * b_eig.array();
-	}
-
-	/** Eigen3 logistic method. Calculates f(x) = 1/(1+exp(-x)) */
-	template <typename T>
-	void logistic_impl(SGMatrix<T>& a, SGMatrix<T>& result) const
-	{
-		typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
-		typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
-
-		result_eig = (T)1 / (1 + ((-1 * a_eig).array()).exp());
-	}
-
-	/** Eigen3 matrix block in-place elementwise product method */
-	template <typename T>
-	void element_prod_impl(linalg::Block<SGMatrix<T>>& a,
-		linalg::Block<SGMatrix<T>>& b, SGMatrix<T>& result) const
-	{
-		typename SGMatrix<T>::EigenMatrixXtMap a_eig = a.m_matrix;
-		typename SGMatrix<T>::EigenMatrixXtMap b_eig = b.m_matrix;
-		typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
-
-		Eigen::Block<typename SGMatrix<T>::EigenMatrixXtMap> a_block =
-			a_eig.block(a.m_row_begin, a.m_col_begin, a.m_row_size, a.m_col_size);
-		Eigen::Block<typename SGMatrix<T>::EigenMatrixXtMap> b_block =
-			b_eig.block(b.m_row_begin, b.m_col_begin, b.m_row_size, b.m_col_size);
-
-		result_eig = a_block.array() * b_block.array();
-	}
-
-	/** Eigen3 matrix * vector in-place product method */
-	template <typename T>
-	void matrix_prod_impl(SGMatrix<T>& a, SGVector<T>& b, SGVector<T>& result,
-		bool transpose, bool transpose_B=false) const
-	{
-		typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
-		typename SGVector<T>::EigenVectorXtMap b_eig = b;
-		typename SGVector<T>::EigenVectorXtMap result_eig = result;
-
-		if (transpose)
-			result_eig = a_eig.transpose() * b_eig;
-		else
-			result_eig = a_eig * b_eig;
-	}
-
-	/** Eigen3 matrix in-place product method */
-	template <typename T>
-	void matrix_prod_impl(SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result,
-		bool transpose_A, bool transpose_B) const
-	{
-		typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
-		typename SGMatrix<T>::EigenMatrixXtMap b_eig = b;
-		typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
-
-		if (transpose_A && transpose_B)
-			result_eig = a_eig.transpose() * b_eig.transpose();
-
-		else if (transpose_A)
-			result_eig = a_eig.transpose() * b_eig;
-
-		else if (transpose_B)
-			result_eig = a_eig * b_eig.transpose();
-
-		else
-			result_eig = a_eig * b_eig;
-	}
-
-	/** Return the largest element in the vector with Eigen3 library */
-	template <typename T>
-	T max_impl(const SGVector<T>& vec) const
-	{
-		return (typename SGVector<T>::EigenVectorXtMap(vec)).maxCoeff();
-	}
-
-	/** Return the largest element in the matrix with Eigen3 library */
-	template <typename T>
-	T max_impl(const SGMatrix<T>& mat) const
-	{
-		return (typename SGMatrix<T>::EigenMatrixXtMap(mat)).maxCoeff();
-	}
-
-	/** Real eigen3 vector and matrix mean method */
-	template <typename T, template <typename> class Container>
-	typename std::enable_if<!std::is_same<T, complex128_t>::value, float64_t>::type
-	mean_impl(const Container<T>& a) const
-	{
-		return sum_impl(a)/(float64_t(a.size()));
-	}
-
-	/** Complex eigen3 vector and matrix mean method */
-	template<template <typename> class Container>
-	complex128_t mean_impl(const Container<complex128_t>& a) const
-	{
-		return sum_impl(a)/(complex128_t(a.size()));
-	}
-
-	/** Range fill a vector or matrix with start...start+len-1. */
-	template <typename T, template <typename> class Container>
-	void range_fill_impl(Container<T>& a, const T start) const
-	{
-		for (index_t i = 0; i < a.size(); ++i)
-			a[i] = start + T(i);
-	}
-
-	/** Eigen3 vector inplace scale method: result = alpha * A */
-	template <typename T>
-	void scale_impl(SGVector<T>& a, T alpha, SGVector<T>& result) const
-	{
-		typename SGVector<T>::EigenVectorXtMap a_eig = a;
-		typename SGVector<T>::EigenVectorXtMap result_eig = result;
-
-		result_eig = alpha * a_eig;
-	}
-
-	/** Eigen3 matrix inplace scale method: result = alpha * A */
-	template <typename T>
-	void scale_impl(SGMatrix<T>& a, T alpha, SGMatrix<T>& result) const
-	{
-		typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
-		typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+		/** Eigen3 vector dot-product method */
+		template <typename T>
+		T dot_impl(const SGVector<T>& a, const SGVector<T>& b) const;
 
-		result_eig = alpha * a_eig;
-	}
-
-	/** Eigen3 set const method */
-	template <typename T, template <typename> class Container>
-	void set_const_impl(Container<T>& a, T value) const
-	{
-		for (index_t i = 0; i < a.size(); ++i)
-			a[i] = value;
-	}
-
-	/** Eigen3 vector sum method */
-	template <typename T>
-	T sum_impl(const SGVector<T>& vec, bool no_diag=false) const
-	{
-		return (typename SGVector<T>::EigenVectorXtMap(vec)).sum();
-	}
-
-	/** Eigen3 matrix sum method */
-	template <typename T>
-	T sum_impl(const SGMatrix<T>& mat, bool no_diag=false) const
-	{
-		typename SGMatrix<T>::EigenMatrixXtMap m = mat;
-		T result = m.sum();
-		if (no_diag)
-			result -= m.diagonal().sum();
-
-		return result;
-	}
-
-	/** Eigen3 matrix block sum method */
-	template <typename T>
-	T sum_impl(const linalg::Block<SGMatrix<T>>& mat, bool no_diag=false) const
-	{
-		typename SGMatrix<T>::EigenMatrixXtMap m = mat.m_matrix;
-		Eigen::Block<typename SGMatrix<T>::EigenMatrixXtMap> m_block = m.block(
-			mat.m_row_begin, mat.m_col_begin, mat.m_row_size, mat.m_col_size);
-
-		T result = m_block.sum();
-		if (no_diag)
-			result -= m_block.diagonal().sum();
-
-		return result;
-	}
-
-	/** Eigen3 symmetric matrix sum method */
-	template <typename T>
-	T sum_symmetric_impl(const SGMatrix<T>& mat, bool no_diag=false) const
-	{
-		typename SGMatrix<T>::EigenMatrixXtMap m = mat;
-
-		// since the matrix is symmetric with main diagonal inside, we can save half
-		// the computation with using only the upper triangular part.
-		typename SGMatrix<T>::EigenMatrixXt m_upper =
-			m.template triangularView<Eigen::StrictlyUpper>();
-		T result = m_upper.sum();
-		result += result;
-
-		if (!no_diag)
-			result += m.diagonal().sum();
-		return result;
-	}
-
-	/** Eigen3 symmetric matrix block sum method */
-	template <typename T>
-	T sum_symmetric_impl(const linalg::Block<SGMatrix<T>>& mat, bool no_diag=false) const
-	{
-		typename SGMatrix<T>::EigenMatrixXtMap m = mat.m_matrix;
-		Eigen::Block<typename SGMatrix<T>::EigenMatrixXtMap> m_block = m.block(
-			mat.m_row_begin, mat.m_col_begin, mat.m_row_size, mat.m_col_size);
-
-		// since the matrix is symmetric with main diagonal inside, we can save half
-		// the computation with using only the upper triangular part.
-		typename SGMatrix<T>::EigenMatrixXt m_upper =
-			m_block.template triangularView<Eigen::StrictlyUpper>();
-		T result = m_upper.sum();
-		result += result;
-
-		if (!no_diag)
-			result += m_block.diagonal().sum();
-		return result;
-	}
-
-	/** Eigen3 matrix colwise sum method */
-	template <typename T>
-	SGVector<T> colwise_sum_impl(const SGMatrix<T>& mat, bool no_diag) const
-	{
-		SGVector<T> result(mat.num_cols);
-
-		typename SGMatrix<T>::EigenMatrixXtMap mat_eig = mat;
-		typename SGVector<T>::EigenVectorXtMap result_eig = result;
-
-		result_eig = mat_eig.colwise().sum();
-
-		// remove the main diagonal elements if required
-		if (no_diag)
-		{
-			index_t len_major_diag = mat_eig.rows() < mat_eig.cols()
-				? mat_eig.rows() : mat_eig.cols();
-			for (index_t i = 0; i < len_major_diag; ++i)
-				result_eig[i] -= mat_eig(i,i);
-		}
-
-		return result;
-	}
-
-	/** Eigen3 matrix block colwise sum method */
-	template <typename T>
-	SGVector<T> colwise_sum_impl(const linalg::Block<SGMatrix<T>>& mat, bool no_diag) const
-	{
-		SGVector<T> result(mat.m_col_size);
-
-		typename SGMatrix<T>::EigenMatrixXtMap m = mat.m_matrix;
-		Eigen::Block<typename SGMatrix<T>::EigenMatrixXtMap> m_block = m.block(
-			mat.m_row_begin, mat.m_col_begin, mat.m_row_size, mat.m_col_size);
-		typename SGVector<T>::EigenVectorXtMap result_eig = result;
-
-		result_eig = m_block.colwise().sum();
-
-		// remove the main diagonal elements if required
-		if (no_diag)
-		{
-			index_t len_major_diag = m_block.rows() < m_block.cols()
-				? m_block.rows() : m_block.cols();
-			for (index_t i = 0; i < len_major_diag; ++i)
-				result_eig[i] -= m_block(i,i);
-		}
-
-		return result;
-	}
-
-	/** Eigen3 matrix rowwise sum method */
-	template <typename T>
-	SGVector<T> rowwise_sum_impl(const SGMatrix<T>& mat, bool no_diag) const
-	{
-		SGVector<T> result(mat.num_rows);
-
-		typename SGMatrix<T>::EigenMatrixXtMap mat_eig = mat;
-		typename SGVector<T>::EigenVectorXtMap result_eig = result;
-
-		result_eig = mat_eig.rowwise().sum();
-
-		// remove the main diagonal elements if required
-		if (no_diag)
-		{
-			index_t len_major_diag = mat_eig.rows() < mat_eig.cols()
-				? mat_eig.rows() : mat_eig.cols();
-			for (index_t i = 0; i < len_major_diag; ++i)
-				result_eig[i] -= mat_eig(i,i);
-		}
-
-		return result;
-	}
-
-	/** Eigen3 matrix block rowwise sum method */
-	template <typename T>
-	SGVector<T> rowwise_sum_impl(const linalg::Block<SGMatrix<T>>& mat, bool no_diag) const
-	{
-		SGVector<T> result(mat.m_row_size);
-
-		typename SGMatrix<T>::EigenMatrixXtMap m = mat.m_matrix;
-		Eigen::Block<typename SGMatrix<T>::EigenMatrixXtMap> m_block = m.block(
-			mat.m_row_begin, mat.m_col_begin, mat.m_row_size, mat.m_col_size);
-		typename SGVector<T>::EigenVectorXtMap result_eig = result;
-
-		result_eig = m_block.rowwise().sum();
-
-		// remove the main diagonal elements if required
-		if (no_diag)
-		{
-			index_t len_major_diag = m_block.rows() < m_block.cols()
-				? m_block.rows() : m_block.cols();
-			for (index_t i = 0; i < len_major_diag; ++i)
-				result_eig[i] -= m_block(i,i);
-		}
-
-		return result;
-	}
+		/** Eigen3 eigenvalues and eigenvectors computation for real matrices.
+		 */
+		template <typename T>
+		void eigen_solver_impl(
+		    const SGMatrix<T>& A, SGVector<T>& eigenvalues,
+		    SGMatrix<T>& eigenvectors) const;
+
+		/** Eigen3 eigenvalues and eigenvectors computation for complex
+		 * matrices. */
+		void eigen_solver_impl(
+		    const SGMatrix<complex128_t>& A,
+		    SGVector<complex128_t>& eigenvalues,
+		    SGMatrix<complex128_t>& eigenvectors) const;
+
+		/** Eigen3 eigenvalues and eigenvectors computation of symmetric
+		 * matrices */
+		template <typename T>
+		void eigen_solver_symmetric_impl(
+		    const SGMatrix<T>& A, SGVector<T>& eigenvalues,
+		    SGMatrix<T>& eigenvectors, index_t k) const;
+
+		/** Eigen3 matrix in-place elementwise product method */
+		template <typename T>
+		void element_prod_impl(
+		    SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result) const;
+
+		/** Eigen3 matrix block in-place elementwise product method */
+		template <typename T>
+		void element_prod_impl(
+		    linalg::Block<SGMatrix<T>>& a, linalg::Block<SGMatrix<T>>& b,
+		    SGMatrix<T>& result) const;
+
+		/** Eigen3 vector exponent method */
+		template <typename T>
+		void exponent_impl(const SGVector<T>& a, SGVector<T>& result) const;
+
+		/** Eigen3 matrix exponent method */
+		template <typename T>
+		void exponent_impl(const SGMatrix<T>& a, SGMatrix<T>& result) const;
+
+		/** Eigen3 set matrix to identity method */
+		template <typename T>
+		void identity_impl(SGMatrix<T>& identity_matrix) const;
+
+		/** Eigen3 logistic method. Calculates f(x) = 1/(1+exp(-x)) */
+		template <typename T>
+		void logistic_impl(SGMatrix<T>& a, SGMatrix<T>& result) const;
+
+		/** Eigen3 matrix * vector in-place product method */
+		template <typename T>
+		void matrix_prod_impl(
+		    SGMatrix<T>& a, SGVector<T>& b, SGVector<T>& result, bool transpose,
+		    bool transpose_B = false) const;
+
+		/** Eigen3 matrix in-place product method */
+		template <typename T>
+		void matrix_prod_impl(
+		    SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result,
+		    bool transpose_A, bool transpose_B) const;
+
+		/** Return the largest element in the vector with Eigen3 library */
+		template <typename T>
+		T max_impl(const SGVector<T>& vec) const;
+
+		/** Return the largest element in the matrix with Eigen3 library */
+		template <typename T>
+		T max_impl(const SGMatrix<T>& mat) const;
+
+		/** Real eigen3 vector and matrix mean method */
+		template <typename T, template <typename> class Container>
+		typename std::enable_if<!std::is_same<T, complex128_t>::value,
+		                        float64_t>::type
+		mean_impl(const Container<T>& a) const;
+
+		/** Complex eigen3 vector and matrix mean method */
+		template <template <typename> class Container>
+		complex128_t mean_impl(const Container<complex128_t>& a) const;
+
+		/** Eigen3 multiply_by_logistic_derivative method
+		 * Performs the operation C(i,j) = C(i,j) * A(i,j) * (1.0-A(i,j)) for
+		 * all i
+		 * and j
+		 */
+		template <typename T>
+		void multiply_by_logistic_derivative_impl(
+		    SGMatrix<T>& a, SGMatrix<T>& result) const;
 
-#undef DEFINE_FOR_ALL_PTYPE
-#undef DEFINE_FOR_REAL_PTYPE
-#undef DEFINE_FOR_NON_INTEGER_PTYPE
-#undef DEFINE_FOR_NUMERIC_PTYPE
-};
+		/** Eigen3 multiply_by_rectified_linear_derivative method
+		 * Performs the operation C(i,j) = C(i,j) * (A(i,j)!=0) for all i and j
+		 */
+		template <typename T>
+		void multiply_by_rectified_linear_derivative_impl(
+		    SGMatrix<T>& a, SGMatrix<T>& result) const;
+
+		/** Eigen3 vector QR solver. */
+		template <typename T>
+		SGVector<T>
+		qr_solver_impl(const SGMatrix<T>& A, const SGVector<T>& b) const;
+
+		/** Eigen3 matrix QR solver. */
+		template <typename T>
+		SGMatrix<T>
+		qr_solver_impl(const SGMatrix<T>& A, const SGMatrix<T> b) const;
+
+		/** Range fill a vector or matrix with start...start+len-1. */
+		template <typename T, template <typename> class Container>
+		void range_fill_impl(Container<T>& a, const T start) const;
+
+		/** Applies the elementwise rectified linear function f(x) = max(0,x) */
+		template <typename T>
+		void rectified_linear_impl(SGMatrix<T>& a, SGMatrix<T>& result) const;
+
+		/** Eigen3 vector inplace scale method: result = alpha * A */
+		template <typename T>
+		void scale_impl(SGVector<T>& a, T alpha, SGVector<T>& result) const;
+
+		/** Eigen3 matrix inplace scale method: result = alpha * A */
+		template <typename T>
+		void scale_impl(SGMatrix<T>& a, T alpha, SGMatrix<T>& result) const;
+
+		/** Eigen3 set const method */
+		template <typename T, template <typename> class Container>
+		void set_const_impl(Container<T>& a, T value) const;
+
+		/** Eigen3 softmax method */
+		template <typename T, template <typename> class Container>
+		void softmax_impl(Container<T>& a) const;
+
+		/** Eigen3 squared error method
+		 * The squared error is defined as \f$ E(P,Q) = \frac{1}{2} \sum_{ij}
+		 * (P[i,j]-Q[i,j])^2 \f$
+		 */
+		template <typename T>
+		T squared_error_impl(const SGMatrix<T>& p, const SGMatrix<T>& q) const;
+
+		/** Eigen3 vector sum method */
+		template <typename T>
+		T sum_impl(const SGVector<T>& vec, bool no_diag = false) const;
+
+		/** Eigen3 matrix sum method */
+		template <typename T>
+		T sum_impl(const SGMatrix<T>& mat, bool no_diag = false) const;
+
+		/** Eigen3 matrix block sum method */
+		template <typename T>
+		T sum_impl(
+		    const linalg::Block<SGMatrix<T>>& mat, bool no_diag = false) const;
+
+		/** Eigen3 symmetric matrix sum method */
+		template <typename T>
+		T
+		sum_symmetric_impl(const SGMatrix<T>& mat, bool no_diag = false) const;
+
+		/** Eigen3 symmetric matrix block sum method */
+		template <typename T>
+		T sum_symmetric_impl(
+		    const linalg::Block<SGMatrix<T>>& mat, bool no_diag = false) const;
+
+		/** Eigen3 matrix colwise sum method */
+		template <typename T>
+		SGVector<T>
+		colwise_sum_impl(const SGMatrix<T>& mat, bool no_diag) const;
+
+		/** Eigen3 matrix block colwise sum method */
+		template <typename T>
+		SGVector<T> colwise_sum_impl(
+		    const linalg::Block<SGMatrix<T>>& mat, bool no_diag) const;
+
+		/** Eigen3 matrix rowwise sum method */
+		template <typename T>
+		SGVector<T>
+		rowwise_sum_impl(const SGMatrix<T>& mat, bool no_diag) const;
+
+		/** Eigen3 matrix block rowwise sum method */
+		template <typename T>
+		SGVector<T> rowwise_sum_impl(
+		    const linalg::Block<SGMatrix<T>>& mat, bool no_diag) const;
+
+		/** Eigen3 compute svd method */
+		template <typename T>
+		void svd_impl(
+		    const SGMatrix<T>& A, SGVector<T>& s, SGMatrix<T>& U, bool thin_U,
+		    linalg::SVDAlgorithm alg) const;
+
+		/** Eigen3 compute trace method */
+		template <typename T>
+		T trace_impl(const SGMatrix<T>& A) const;
+
+		/** Eigen3 transpose matrix method */
+		template <typename T>
+		SGMatrix<T> transpose_matrix_impl(const SGMatrix<T>& A) const;
+
+		/** Eigen3 triangular solver method */
+		template <typename T>
+		SGMatrix<T> triangular_solver_impl(
+		    const SGMatrix<T>& L, const SGMatrix<T>& b, const bool lower) const;
+
+		/** Eigen3 triangular solver method */
+		template <typename T>
+		SGVector<T> triangular_solver_impl(
+		    const SGMatrix<T>& L, const SGVector<T>& b, const bool lower) const;
+
+		/** Eigen3 set vector to zero method */
+		template <typename T>
+		void zero_impl(SGVector<T>& a) const;
+
+		/** Eigen3 set matrix to zero method */
+		template <typename T>
+		void zero_impl(SGMatrix<T>& a) const;
+	};
 
+/*
+ * Eigen's symmetric eigensolver uses a slower algorithm in comparison
+ * to LAPACK's dsyevr, so if LAPACK is available we use it for float64 type.
+ * This should be removed if eventually Eigen will provide a faster
+ * symmetric eigensolver (@see
+ * http://eigen.tuxfamily.org/bz/show_bug.cgi?id=522).
+ */
+#ifdef HAVE_LAPACK
+	template <>
+	void LinalgBackendEigen::eigen_solver_symmetric_impl<float64_t>(
+	    const SGMatrix<float64_t>& A, SGVector<float64_t>& eigenvalues,
+	    SGMatrix<float64_t>& eigenvectors, index_t k) const;
+#endif
 }
 
-#endif //LINALG_BACKEND_EIGEN_H__
+#endif // LINALG_BACKEND_EIGEN_H__
diff --git a/src/shogun/mathematics/linalg/LinalgBackendGPUBase.h b/src/shogun/mathematics/linalg/LinalgBackendGPUBase.h
index f0f996b375b..58d750606c6 100644
--- a/src/shogun/mathematics/linalg/LinalgBackendGPUBase.h
+++ b/src/shogun/mathematics/linalg/LinalgBackendGPUBase.h
@@ -33,23 +33,24 @@
 #ifndef LINALG_BACKEND_GPU_BASE_H__
 #define LINALG_BACKEND_GPU_BASE_H__
 
-#include <shogun/lib/config.h>
-#include <shogun/lib/common.h>
-#include <shogun/lib/SGVector.h>
+#include <memory>
 #include <shogun/io/SGIO.h>
+#include <shogun/lib/SGVector.h>
+#include <shogun/lib/common.h>
+#include <shogun/lib/config.h>
 #include <shogun/mathematics/linalg/GPUMemoryBase.h>
-#include <memory>
 
 namespace shogun
 {
 
-/** @brief Base interface of generic GPU linalg methods
- * and generic GPU memory transfer methods.
- */
-class LinalgBackendGPUBase : public LinalgBackendBase
-{
-public:
-	#define DEFINE_FOR_ALL_PTYPE(METHODNAME, Container) \
+	/** @brief Base interface of generic GPU linalg methods
+	 * and generic GPU memory transfer methods.
+	 */
+	class LinalgBackendGPUBase : public LinalgBackendBase
+	{
+	public:
+// clang-format off
+#define DEFINE_FOR_ALL_PTYPE(METHODNAME, Container) \
 	METHODNAME(char, Container); \
 	METHODNAME(uint8_t, Container); \
 	METHODNAME(int16_t, Container); \
@@ -84,8 +85,8 @@ class LinalgBackendGPUBase : public LinalgBackendBase
 	#undef BACKEND_GENERIC_FROM_GPU
 
 	#undef DEFINE_FOR_ALL_PTYPE
-};
-
+		// clang-format on
+	};
 }
 
-#endif //LINALG_BACKEND_GPU_BASE_H__
+#endif // LINALG_BACKEND_GPU_BASE_H__
diff --git a/src/shogun/mathematics/linalg/LinalgBackendViennaCL.h b/src/shogun/mathematics/linalg/LinalgBackendViennaCL.h
index 119ea9557f2..9bbd205db66 100644
--- a/src/shogun/mathematics/linalg/LinalgBackendViennaCL.h
+++ b/src/shogun/mathematics/linalg/LinalgBackendViennaCL.h
@@ -38,11 +38,11 @@
 
 #ifdef HAVE_VIENNACL
 
+#include <shogun/mathematics/linalg/GPUMemoryViennaCL.h>
 #include <viennacl/linalg/inner_prod.hpp>
 #include <viennacl/linalg/prod.hpp>
-#include <viennacl/vector.hpp>
 #include <viennacl/matrix.hpp>
-#include <shogun/mathematics/linalg/GPUMemoryViennaCL.h>
+#include <viennacl/vector.hpp>
 
 #if VIENNACL_VERSION >= 10700
 #include <viennacl/linalg/sum.hpp>
@@ -51,15 +51,16 @@
 namespace shogun
 {
 
-/** @brief linalg methods with ViennaCL backend
- * implementation of @see LinalgBackendGPUBase
- */
-class LinalgBackendViennaCL : public LinalgBackendGPUBase
-{
-	template <typename T>
-	friend struct GPUMemoryViennaCL;
+	/** @brief linalg methods with ViennaCL backend
+	 * implementation of @see LinalgBackendGPUBase
+	 */
+	class LinalgBackendViennaCL : public LinalgBackendGPUBase
+	{
+		template <typename T>
+		friend struct GPUMemoryViennaCL;
 
-public:
+	public:
+// clang-format off
 	#define DEFINE_FOR_ALL_PTYPE(METHODNAME, Container) \
 	METHODNAME(char, Container); \
 	METHODNAME(uint8_t, Container); \
@@ -85,6 +86,17 @@ class LinalgBackendViennaCL : public LinalgBackendGPUBase
 	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_ADD, SGMatrix)
 	#undef BACKEND_GENERIC_ADD
 
+
+	/** Implementation of @see linalg::cross_entropy */
+	#define BACKEND_GENERIC_CROSS_ENTROPY(Type, Container) \
+	virtual Type cross_entropy(const Container<Type>& P, \
+	 	 const Container<Type>& Q) const \
+	{  \
+ 		return cross_entropy_impl(P, Q); \
+	}
+  DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CROSS_ENTROPY, SGMatrix)
+  #undef BACKEND_GENERIC_CROSS_ENTROPY
+
 	/** Implementation of @see LinalgBackendBase::dot */
 	#define BACKEND_GENERIC_DOT(Type, Container) \
 	virtual Type dot(const Container<Type>& a, const Container<Type>& b) const \
@@ -144,6 +156,35 @@ class LinalgBackendViennaCL : public LinalgBackendGPUBase
 	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_MEAN, SGMatrix)
 	#undef BACKEND_GENERIC_MEAN
 
+	/** Implementation of @see linalg::multiply_by_logistic_derivative */
+	#define BACKEND_GENERIC_MULTIPLY_BY_LOGISTIC_DERIV(Type, Container) \
+	virtual void multiply_by_logistic_derivative(Container<Type>& a,\
+		Container<Type>& result) const \
+	{  \
+		multiply_by_logistic_derivative_impl(a, result); \
+	}
+	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_MULTIPLY_BY_LOGISTIC_DERIV, SGMatrix)
+	#undef BACKEND_GENERIC_MULTIPLY_BY_LOGISTIC_DERIV
+
+	/** Implementation of @see linalg::multiply_by_rectified_linear_derivative */
+	#define BACKEND_GENERIC_MULTIPLY_BY_RECTIFIED_LINEAR_DERIV(Type, Container) \
+	virtual void multiply_by_rectified_linear_derivative(Container<Type>& a,\
+		Container<Type>& result) const \
+	{  \
+		multiply_by_rectified_linear_derivative_impl(a, result); \
+	}
+	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_MULTIPLY_BY_RECTIFIED_LINEAR_DERIV, SGMatrix)
+	#undef BACKEND_GENERIC_MULTIPLY_BY_RECTIFIED_LINEAR_DERIV
+
+	/** Implementation of @see linalg::rectified_linear */
+	#define BACKEND_GENERIC_RECTIFIED_LINEAR(Type, Container) \
+	virtual void rectified_linear(Container<Type>& a, Container<Type>& result) const \
+	{  \
+		rectified_linear_impl(a, result); \
+	}
+	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_RECTIFIED_LINEAR, SGMatrix)
+	#undef BACKEND_GENERIC_RECTIFIED_LINEAR
+
 	/** Implementation of @see LinalgBackendBase::scale */
 	#define BACKEND_GENERIC_IN_PLACE_SCALE(Type, Container) \
 	virtual void scale(Container<Type>& a, Type alpha, Container<Type>& result) const \
@@ -164,6 +205,24 @@ class LinalgBackendViennaCL : public LinalgBackendGPUBase
 	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SET_CONST, SGMatrix)
 	#undef BACKEND_GENERIC_SET_CONST
 
+	/** Implementation of @see linalg::softmax */
+	#define BACKEND_GENERIC_SOFTMAX(Type, Container) \
+	virtual void softmax(Container<Type>& a) const \
+	{  \
+		softmax_impl(a); \
+	}
+	DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SOFTMAX, SGMatrix)
+	#undef BACKEND_GENERIC_SOFTMAX
+
+	/** Implementation of @see linalg::squared_error */
+	#define BACKEND_GENERIC_SQUARED_ERROR(Type, Container) \
+	virtual Type squared_error(const Container<Type>& P, const Container<Type>& Q) const \
+	{  \
+		return squared_error_impl(P, Q); \
+	}
+	DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_SQUARED_ERROR, SGMatrix)
+	#undef BACKEND_GENERIC_SQUARED_ERROR
+
 	/** Implementation of @see LinalgBackendBase::sum */
 	#define BACKEND_GENERIC_SUM(Type, Container) \
 	virtual Type sum(const Container<Type>& a, bool no_diag) const \
@@ -222,300 +281,531 @@ class LinalgBackendViennaCL : public LinalgBackendGPUBase
 	#undef BACKEND_GENERIC_FROM_GPU
 
 	#undef DEFINE_FOR_ALL_PTYPE
-
-private:
-	/** Static cast @see GPUMemoryBase class to @see GPUMemoryViennaCL */
-	template <typename T, template<typename> class Container>
-	GPUMemoryViennaCL<T>* cast_to_viennacl(const Container<T> &a) const
-	{
-		return static_cast<GPUMemoryViennaCL<T>*>(a.gpu_ptr.get());
-	}
-
-	/** ViennaCL vector result = alpha * A + beta * B method */
-	template <typename T>
-	void add_impl(SGVector<T>& a, SGVector<T>& b, T alpha, T beta, SGVector<T>& result) const
-	{
-		GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
-		GPUMemoryViennaCL<T>* b_gpu = cast_to_viennacl(b);
-		GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
-
-		result_gpu->data_vector(a.size()) =
-			alpha * a_gpu->data_vector(a.size()) + beta * b_gpu->data_vector(b.size());
-	}
-
-	/** ViennaCL matrix result = alpha * A + beta * B method */
-	template <typename T>
-	void add_impl(SGMatrix<T>& a, SGMatrix<T>& b, T alpha, T beta, SGMatrix<T>& result) const
-	{
-		GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
-		GPUMemoryViennaCL<T>* b_gpu = cast_to_viennacl(b);
-		GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
-
-		result_gpu->data_matrix(a.num_rows, a.num_cols) =
-			alpha * a_gpu->data_matrix(a.num_rows, a.num_cols)
-			+ beta * b_gpu->data_matrix(b.num_rows, b.num_cols);
-	}
-
-	/** ViennaCL vector dot-product method. */
-	template <typename T>
-	T dot_impl(const SGVector<T>& a, const SGVector<T>& b) const
-	{
-		GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
-		GPUMemoryViennaCL<T>* b_gpu = cast_to_viennacl(b);
-
-		return viennacl::linalg::inner_prod(
-			a_gpu->data_vector(a.size()), b_gpu->data_vector(b.size()));
-	}
-
-	/** ViennaCL matrix in-place elementwise product method */
-	template <typename T>
-	void element_prod_impl(SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result) const
-	{
-		GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
-		GPUMemoryViennaCL<T>* b_gpu = cast_to_viennacl(b);
-		GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
-
-		result_gpu->data_matrix(a.num_rows, a.num_cols) =
-			viennacl::linalg::element_prod(a_gpu->data_matrix(a.num_rows,
-				a.num_cols), b_gpu->data_matrix(a.num_rows, a.num_cols));
-	}
-
-	/** ViennaCL logistic method. Calculates f(x) = 1/(1+exp(-x)) */
-	template <typename T>
-	void logistic_impl(SGMatrix<T>& a, SGMatrix<T>& result) const
-	{
-		GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
-		GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
-
-		const std::string operation = "return 1.0/(1+exp(-1*element));";
-
-		std::string kernel_name = "logistic_" + linalg::implementation::ocl::get_type_string<T>();
-		viennacl::ocl::kernel& kernel =
-			linalg::implementation::ocl::
-			generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
-
-		kernel.global_work_size(0,
-			linalg::implementation::ocl::align_to_multiple_1d(a.num_rows*a.num_cols));
-
-		viennacl::ocl::enqueue(kernel(a_gpu->data_matrix(a.num_rows, a.num_cols),
-			cl_int(a.num_rows*a.num_cols), cl_int(a_gpu->m_offset),
-			result_gpu->data_matrix(a.num_rows, a.num_cols), cl_int(result_gpu->m_offset)));
-
-		result.gpu_ptr = std::shared_ptr<GPUMemoryBase<T>>(
-			result_gpu->clone_vector(result_gpu,a.num_rows*a.num_cols));
-	}
-
-	/** ViennaCL matrix * vector in-place product method */
-	template <typename T>
-	void matrix_prod_impl(SGMatrix<T>& a, SGVector<T>& b, SGVector<T>& result,
-		bool transpose, bool transpose_B=false) const
-	{
-		GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
-		GPUMemoryViennaCL<T>* b_gpu = cast_to_viennacl(b);
-		GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
-
-		if (transpose)
-			result_gpu->data_vector(result.vlen) = viennacl::linalg::prod(
-				viennacl::trans(a_gpu->data_matrix(a.num_rows, a.num_cols)),
-				b_gpu->data_vector(b.vlen));
-		else
-			result_gpu->data_vector(result.vlen) = viennacl::linalg::prod(
-				a_gpu->data_matrix(a.num_rows, a.num_cols), b_gpu->data_vector(b.vlen));
-	}
-
-	/** ViennaCL matrices in-place product method */
-	template <typename T>
-	void matrix_prod_impl(SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result,
-		bool transpose_A, bool transpose_B) const
-	{
-		GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
-		GPUMemoryViennaCL<T>* b_gpu = cast_to_viennacl(b);
-		GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
-
-		if (transpose_A && transpose_B)
-			result_gpu->data_matrix(result.num_rows, result.num_cols) =
-				viennacl::linalg::prod(viennacl::trans(a_gpu->data_matrix(
-				a.num_rows, a.num_cols)), viennacl::trans(b_gpu->data_matrix(
-				b.num_rows, b.num_cols)));
-
-		else if (transpose_A)
-			result_gpu->data_matrix(result.num_rows, result.num_cols) =
-				viennacl::linalg::prod(viennacl::trans(a_gpu->data_matrix(
-				a.num_rows, a.num_cols)), b_gpu->data_matrix(b.num_rows,
-				b.num_cols));
-
-		else if (transpose_B)
-			result_gpu->data_matrix(result.num_rows, result.num_cols) =
-				viennacl::linalg::prod(a_gpu->data_matrix(a.num_rows, a.num_cols),
-				viennacl::trans(b_gpu->data_matrix(b.num_rows, b.num_cols)));
-
-		else
-			result_gpu->data_matrix(result.num_rows, result.num_cols) =
-				viennacl::linalg::prod(a_gpu->data_matrix(a.num_rows, a.num_cols),
-				b_gpu->data_matrix(b.num_rows, b.num_cols));
-	}
-
-	/** ViennaCL max method */
-	template <typename T, template<typename> class Container>
-	T max_impl(const Container<T>& a) const
-	{
-		typedef typename std::aligned_storage<sizeof(T), alignof(T)>::type aligned_t;
-
-		GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
-		GPUMemoryViennaCL<T>* result_gpu = new GPUMemoryViennaCL<T>(1);
-
-		viennacl::ocl::kernel& kernel = generate_max_kernel<T>();
-		viennacl::ocl::enqueue(kernel(a_gpu->data_vector(a.size()),
-			cl_int(a.size()), cl_int(a_gpu->m_offset),
-			result_gpu->data_vector(1)));
-
-		T* result = reinterpret_cast<T*>(SG_MALLOC(aligned_t, 1));
-		viennacl::backend::memory_read(*(result_gpu->m_data),
-			result_gpu->m_offset*sizeof(T), sizeof(T), result);
-
-		return result[0];
-	}
-
-	/** ViennaCL vectors or matrices mean method */
-	template <typename T, template <typename> class Container>
-	float64_t mean_impl(const Container<T>& a) const
-	{
-		return sum_impl(a)/float64_t(a.size());
-	}
-
-	/** ViennaCL vector inplace scale method: result = alpha * A */
-	template <typename T>
-	void scale_impl(SGVector<T>& a, SGVector<T>& result, T alpha) const
-	{
-		GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
-		GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
-
-		result_gpu->data_vector(a.size()) = alpha * a_gpu->data_vector(a.size());
-	}
-
-	/** ViennaCL vector inplace scale method: result = alpha * A */
-	template <typename T>
-	void scale_impl(SGMatrix<T>& a, SGMatrix<T>& result, T alpha) const
-	{
-		GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
-		GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
-
-		result_gpu->data_matrix(a.num_rows, a.num_cols) =
-			alpha * a_gpu->data_matrix(a.num_rows, a.num_cols);
-	}
-
-	/** Set const to vector or matrix with ViennaCL. */
-	template <typename T, template <typename> class Container>
-	void set_const_impl(Container<T>& a, T value) const
-	{
-		GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
-		typename GPUMemoryViennaCL<T>::VCLVectorBase vcl_vector =
-			a_gpu->data_vector(a.size());
-		viennacl::linalg::vector_assign(vcl_vector, value);
-	}
-
-	/** ViennaCL matrix sum method. */
-	template <typename T>
-	T sum_impl(const SGMatrix<T>& mat, bool no_diag=false) const
-	{
-		typedef typename std::aligned_storage<sizeof(T), alignof(T)>::type aligned_t;
-
-		GPUMemoryViennaCL<T>* mat_gpu = cast_to_viennacl(mat);
-		GPUMemoryViennaCL<T>* result_gpu = new GPUMemoryViennaCL<T>(1);
-
-		viennacl::ocl::kernel& kernel = generate_sum_kernel<T>(no_diag);
-		viennacl::ocl::enqueue(kernel(mat_gpu->data_matrix(mat.num_rows, mat.num_cols),
-			cl_int(mat.num_rows), cl_int(mat.num_cols), cl_int(mat_gpu->m_offset),
-			result_gpu->data_vector(1)));
-
-		T* result;
-		result = reinterpret_cast<T*>(SG_MALLOC(aligned_t, 1));
-		viennacl::backend::memory_read(*(result_gpu->m_data),
-			result_gpu->m_offset*sizeof(T), sizeof(T), result);
-
-		return result[0];
-	}
-
-	/** ViennaCL vector sum method. */
-	template <typename T>
-	T sum_impl(const SGVector<T>& vec, bool no_diag=false) const
-	{
-		#if VIENNACL_VERSION >= 10700
+		// clang-format on
+	private:
+		/** Static cast @see GPUMemoryBase class to @see GPUMemoryViennaCL */
+		template <typename T, template <typename> class Container>
+		GPUMemoryViennaCL<T>* cast_to_viennacl(const Container<T>& a) const
+		{
+			return static_cast<GPUMemoryViennaCL<T>*>(a.gpu_ptr.get());
+		}
+
+		/** ViennaCL vector result = alpha * A + beta * B method */
+		template <typename T>
+		void add_impl(
+		    SGVector<T>& a, SGVector<T>& b, T alpha, T beta,
+		    SGVector<T>& result) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			GPUMemoryViennaCL<T>* b_gpu = cast_to_viennacl(b);
+			GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
+
+			result_gpu->data_vector(a.size()) =
+			    alpha * a_gpu->data_vector(a.size()) +
+			    beta * b_gpu->data_vector(b.size());
+		}
+
+		/** ViennaCL matrix result = alpha * A + beta * B method */
+		template <typename T>
+		void add_impl(
+		    SGMatrix<T>& a, SGMatrix<T>& b, T alpha, T beta,
+		    SGMatrix<T>& result) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			GPUMemoryViennaCL<T>* b_gpu = cast_to_viennacl(b);
+			GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
+
+			result_gpu->data_matrix(a.num_rows, a.num_cols) =
+			    alpha * a_gpu->data_matrix(a.num_rows, a.num_cols) +
+			    beta * b_gpu->data_matrix(b.num_rows, b.num_cols);
+		}
+
+		/** ViennaCL cross_entropy method
+		 * The cross entropy is defined as \f$ H(P,Q) = - \sum_{ij}
+		 * P[i,j]log(Q[i,j]) \f$
+		 */
+		template <typename T>
+		T cross_entropy_impl(const SGMatrix<T>& p, const SGMatrix<T>& q) const
+		{
+			typedef typename std::aligned_storage<sizeof(T), alignof(T)>::type
+			    aligned_t;
+
+			GPUMemoryViennaCL<T>* p_gpu = cast_to_viennacl(p);
+			GPUMemoryViennaCL<T>* q_gpu = cast_to_viennacl(q);
+			GPUMemoryViennaCL<T>* result_gpu = new GPUMemoryViennaCL<T>(1);
+
+			viennacl::ocl::kernel& kernel = generate_cross_entropy_kernel<T>();
+			viennacl::ocl::enqueue(
+			    kernel(
+			        p_gpu->data_matrix(p.num_rows, p.num_cols),
+			        cl_int(p.num_rows * p.num_cols), cl_int(p_gpu->m_offset),
+			        q_gpu->data_matrix(q.num_rows, q.num_cols),
+			        cl_int(q_gpu->m_offset), result_gpu->data_vector(1)));
+
+			T* result = reinterpret_cast<T*>(SG_MALLOC(aligned_t, 1));
+			viennacl::backend::memory_read(
+			    *(result_gpu->m_data), result_gpu->m_offset * sizeof(T),
+			    sizeof(T), result);
+
+			return result[0];
+		}
+
+		/** ViennaCL vector dot-product method. */
+		template <typename T>
+		T dot_impl(const SGVector<T>& a, const SGVector<T>& b) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			GPUMemoryViennaCL<T>* b_gpu = cast_to_viennacl(b);
+
+			return viennacl::linalg::inner_prod(
+			    a_gpu->data_vector(a.size()), b_gpu->data_vector(b.size()));
+		}
+
+		/** ViennaCL matrix in-place elementwise product method */
+		template <typename T>
+		void element_prod_impl(
+		    SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			GPUMemoryViennaCL<T>* b_gpu = cast_to_viennacl(b);
+			GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
+
+			result_gpu->data_matrix(a.num_rows, a.num_cols) =
+			    viennacl::linalg::element_prod(
+			        a_gpu->data_matrix(a.num_rows, a.num_cols),
+			        b_gpu->data_matrix(a.num_rows, a.num_cols));
+		}
+
+		/** ViennaCL logistic method. Calculates f(x) = 1/(1+exp(-x)) */
+		template <typename T>
+		void logistic_impl(SGMatrix<T>& a, SGMatrix<T>& result) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
+
+			const std::string operation = "return 1.0/(1+exp(-1*element));";
+
+			std::string kernel_name =
+			    "logistic_" + linalg::implementation::ocl::get_type_string<T>();
+			viennacl::ocl::kernel& kernel = linalg::implementation::ocl::
+			    generate_single_arg_elementwise_kernel<T>(
+			        kernel_name, operation);
+
+			kernel.global_work_size(
+			    0, linalg::implementation::ocl::align_to_multiple_1d(
+			           a.num_rows * a.num_cols));
+
+			viennacl::ocl::enqueue(
+			    kernel(
+			        a_gpu->data_matrix(a.num_rows, a.num_cols),
+			        cl_int(a.num_rows * a.num_cols), cl_int(a_gpu->m_offset),
+			        result_gpu->data_matrix(a.num_rows, a.num_cols),
+			        cl_int(result_gpu->m_offset)));
+
+			result.gpu_ptr = std::shared_ptr<GPUMemoryBase<T>>(
+			    result_gpu->clone_vector(result_gpu, a.num_rows * a.num_cols));
+		}
+
+		/** ViennaCL matrix * vector in-place product method */
+		template <typename T>
+		void matrix_prod_impl(
+		    SGMatrix<T>& a, SGVector<T>& b, SGVector<T>& result, bool transpose,
+		    bool transpose_B = false) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			GPUMemoryViennaCL<T>* b_gpu = cast_to_viennacl(b);
+			GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
+
+			if (transpose)
+				result_gpu->data_vector(result.vlen) = viennacl::linalg::prod(
+				    viennacl::trans(a_gpu->data_matrix(a.num_rows, a.num_cols)),
+				    b_gpu->data_vector(b.vlen));
+			else
+				result_gpu->data_vector(result.vlen) = viennacl::linalg::prod(
+				    a_gpu->data_matrix(a.num_rows, a.num_cols),
+				    b_gpu->data_vector(b.vlen));
+		}
+
+		/** ViennaCL matrices in-place product method */
+		template <typename T>
+		void matrix_prod_impl(
+		    SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result,
+		    bool transpose_A, bool transpose_B) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			GPUMemoryViennaCL<T>* b_gpu = cast_to_viennacl(b);
+			GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
+
+			if (transpose_A && transpose_B)
+				result_gpu->data_matrix(result.num_rows, result.num_cols) =
+				    viennacl::linalg::prod(
+				        viennacl::trans(
+				            a_gpu->data_matrix(a.num_rows, a.num_cols)),
+				        viennacl::trans(
+				            b_gpu->data_matrix(b.num_rows, b.num_cols)));
+
+			else if (transpose_A)
+				result_gpu->data_matrix(result.num_rows, result.num_cols) =
+				    viennacl::linalg::prod(
+				        viennacl::trans(
+				            a_gpu->data_matrix(a.num_rows, a.num_cols)),
+				        b_gpu->data_matrix(b.num_rows, b.num_cols));
+
+			else if (transpose_B)
+				result_gpu->data_matrix(result.num_rows, result.num_cols) =
+				    viennacl::linalg::prod(
+				        a_gpu->data_matrix(a.num_rows, a.num_cols),
+				        viennacl::trans(
+				            b_gpu->data_matrix(b.num_rows, b.num_cols)));
+
+			else
+				result_gpu->data_matrix(result.num_rows, result.num_cols) =
+				    viennacl::linalg::prod(
+				        a_gpu->data_matrix(a.num_rows, a.num_cols),
+				        b_gpu->data_matrix(b.num_rows, b.num_cols));
+		}
+
+		/** ViennaCL max method */
+		template <typename T, template <typename> class Container>
+		T max_impl(const Container<T>& a) const
+		{
+			typedef typename std::aligned_storage<sizeof(T), alignof(T)>::type
+			    aligned_t;
+
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			GPUMemoryViennaCL<T>* result_gpu = new GPUMemoryViennaCL<T>(1);
+
+			viennacl::ocl::kernel& kernel = generate_max_kernel<T>();
+			viennacl::ocl::enqueue(
+			    kernel(
+			        a_gpu->data_vector(a.size()), cl_int(a.size()),
+			        cl_int(a_gpu->m_offset), result_gpu->data_vector(1)));
+
+			T* result = reinterpret_cast<T*>(SG_MALLOC(aligned_t, 1));
+			viennacl::backend::memory_read(
+			    *(result_gpu->m_data), result_gpu->m_offset * sizeof(T),
+			    sizeof(T), result);
+
+			return result[0];
+		}
+
+		/** ViennaCL vectors or matrices mean method */
+		template <typename T, template <typename> class Container>
+		float64_t mean_impl(const Container<T>& a) const
+		{
+			return sum_impl(a) / float64_t(a.size());
+		}
+
+		/** ViennaCL multiply_by_logistic_derivative method
+		 * Performs the operation C(i,j) = C(i,j) * A(i,j) * (1.0-A(i,j) for all
+		 * i
+		 * and j
+		 */
+		template <typename T>
+		void multiply_by_logistic_derivative_impl(
+		    SGMatrix<T>& a, SGMatrix<T>& result) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
+
+			const std::string operation =
+			    "return element2 * element1*(1.0-element1);";
+
+			std::string kernel_name =
+			    "multiply_by_logistic_derivative_" +
+			    linalg::implementation::ocl::get_type_string<T>();
+			viennacl::ocl::kernel& kernel = linalg::implementation::ocl::
+			    generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
+
+			kernel.global_work_size(
+			    0, linalg::implementation::ocl::align_to_multiple_1d(
+			           a.num_rows * a.num_cols));
+
+			viennacl::ocl::enqueue(
+			    kernel(
+			        a_gpu->data_matrix(a.num_rows, a.num_cols),
+			        cl_int(a.num_rows * a.num_cols), cl_int(a_gpu->m_offset),
+			        result_gpu->data_matrix(result.num_rows, result.num_cols),
+			        cl_int(result_gpu->m_offset),
+			        result_gpu->data_matrix(result.num_rows, result.num_cols),
+			        cl_int(result_gpu->m_offset)));
+
+			result.gpu_ptr = std::shared_ptr<GPUMemoryBase<T>>(
+			    result_gpu->clone_vector(result_gpu, a.num_rows * a.num_cols));
+		}
+
+		/** ViennaCL multiply_by_rectified_linear_derivative method
+		 * Performs the operation C(i,j) = C(i,j) * (A(i,j)!=0) for all i and j
+		 */
+		template <typename T>
+		void multiply_by_rectified_linear_derivative_impl(
+		    SGMatrix<T>& a, SGMatrix<T>& result) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
+
+			const std::string operation = "return element1==0 ? 0 : element2;";
+
+			std::string kernel_name =
+			    "multiply_by_rectified_linear_derivative_" +
+			    linalg::implementation::ocl::get_type_string<T>();
+			viennacl::ocl::kernel& kernel = linalg::implementation::ocl::
+			    generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
+
+			kernel.global_work_size(
+			    0, linalg::implementation::ocl::align_to_multiple_1d(
+			           a.num_rows * a.num_cols));
+
+			viennacl::ocl::enqueue(
+			    kernel(
+			        a_gpu->data_matrix(a.num_rows, a.num_cols),
+			        cl_int(a.num_rows * a.num_cols), cl_int(a_gpu->m_offset),
+			        result_gpu->data_matrix(result.num_rows, result.num_cols),
+			        cl_int(result_gpu->m_offset),
+			        result_gpu->data_matrix(result.num_rows, result.num_cols),
+			        cl_int(result_gpu->m_offset)));
+
+			result.gpu_ptr = std::shared_ptr<GPUMemoryBase<T>>(
+			    result_gpu->clone_vector(result_gpu, a.num_rows * a.num_cols));
+		}
+
+		/** Applies the elementwise rectified linear function f(x) = max(0,x) */
+		template <typename T>
+		void rectified_linear_impl(SGMatrix<T>& a, SGMatrix<T>& result) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
+
+			const std::string operation = "return max((DATATYPE)0,element);";
+
+			std::string kernel_name =
+			    "rectified_linear_" +
+			    linalg::implementation::ocl::get_type_string<T>();
+			viennacl::ocl::kernel& kernel = linalg::implementation::ocl::
+			    generate_single_arg_elementwise_kernel<T>(
+			        kernel_name, operation);
+
+			kernel.global_work_size(
+			    0, linalg::implementation::ocl::align_to_multiple_1d(
+			           a.num_rows * a.num_cols));
+
+			viennacl::ocl::enqueue(
+			    kernel(
+			        a_gpu->data_matrix(a.num_rows, a.num_cols),
+			        cl_int(a.num_rows * a.num_cols), cl_int(a_gpu->m_offset),
+			        result_gpu->data_matrix(result.num_rows, result.num_cols),
+			        cl_int(result_gpu->m_offset)));
+
+			result.gpu_ptr = std::shared_ptr<GPUMemoryBase<T>>(
+			    result_gpu->clone_vector(result_gpu, a.num_rows * a.num_cols));
+		}
+
+		/** ViennaCL vector inplace scale method: result = alpha * A */
+		template <typename T>
+		void scale_impl(SGVector<T>& a, SGVector<T>& result, T alpha) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
+
+			result_gpu->data_vector(a.size()) =
+			    alpha * a_gpu->data_vector(a.size());
+		}
+
+		/** ViennaCL vector inplace scale method: result = alpha * A */
+		template <typename T>
+		void scale_impl(SGMatrix<T>& a, SGMatrix<T>& result, T alpha) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			GPUMemoryViennaCL<T>* result_gpu = cast_to_viennacl(result);
+
+			result_gpu->data_matrix(a.num_rows, a.num_cols) =
+			    alpha * a_gpu->data_matrix(a.num_rows, a.num_cols);
+		}
+
+		/** Set const to vector or matrix with ViennaCL. */
+		template <typename T, template <typename> class Container>
+		void set_const_impl(Container<T>& a, T value) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+			typename GPUMemoryViennaCL<T>::VCLVectorBase vcl_vector =
+			    a_gpu->data_vector(a.size());
+			viennacl::linalg::vector_assign(vcl_vector, value);
+		}
+
+		/** ViennaCL softmax method */
+		template <typename T, template <typename> class Container>
+		void softmax_impl(Container<T>& a) const
+		{
+			GPUMemoryViennaCL<T>* a_gpu = cast_to_viennacl(a);
+
+			viennacl::ocl::kernel& kernel = generate_softmax_kernel<T>();
+			kernel.global_work_size(
+			    0,
+			    linalg::implementation::ocl::align_to_multiple_1d(a.num_cols));
+
+			viennacl::ocl::enqueue(
+			    kernel(
+			        a_gpu->data_matrix(a.num_rows, a.num_cols),
+			        cl_int(a.num_rows), cl_int(a.num_cols),
+			        cl_int(a_gpu->m_offset)));
+
+			a.gpu_ptr = std::shared_ptr<GPUMemoryBase<T>>(
+			    a_gpu->clone_vector(a_gpu, a.num_rows * a.num_cols));
+		}
+
+		/** ViennaCL squared error method
+		 * The squared error is defined as \f$ E(P,Q) = \frac{1}{2} \sum_{ij}
+		 * (P[i,j]-Q[i,j])^2 \f$
+		 */
+		template <typename T>
+		T squared_error_impl(const SGMatrix<T>& p, const SGMatrix<T>& q) const
+		{
+			typedef typename std::aligned_storage<sizeof(T), alignof(T)>::type
+			    aligned_t;
+
+			GPUMemoryViennaCL<T>* p_gpu = cast_to_viennacl(p);
+			GPUMemoryViennaCL<T>* q_gpu = cast_to_viennacl(q);
+			GPUMemoryViennaCL<T>* result_gpu = new GPUMemoryViennaCL<T>(1);
+
+			viennacl::ocl::kernel& kernel = generate_squared_error_kernel<T>();
+			viennacl::ocl::enqueue(
+			    kernel(
+			        p_gpu->data_matrix(p.num_rows, p.num_cols),
+			        cl_int(p.num_rows * p.num_cols), cl_int(p_gpu->m_offset),
+			        q_gpu->data_matrix(q.num_rows, q.num_cols),
+			        cl_int(q_gpu->m_offset), result_gpu->data_vector(1)));
+
+			T* result = reinterpret_cast<T*>(SG_MALLOC(aligned_t, 1));
+			viennacl::backend::memory_read(
+			    *(result_gpu->m_data), result_gpu->m_offset * sizeof(T),
+			    sizeof(T), result);
+
+			return result[0];
+		}
+
+		/** ViennaCL matrix sum method. */
+		template <typename T>
+		T sum_impl(const SGMatrix<T>& mat, bool no_diag = false) const
+		{
+			typedef typename std::aligned_storage<sizeof(T), alignof(T)>::type
+			    aligned_t;
+
+			GPUMemoryViennaCL<T>* mat_gpu = cast_to_viennacl(mat);
+			GPUMemoryViennaCL<T>* result_gpu = new GPUMemoryViennaCL<T>(1);
+
+			viennacl::ocl::kernel& kernel = generate_sum_kernel<T>(no_diag);
+			viennacl::ocl::enqueue(
+			    kernel(
+			        mat_gpu->data_matrix(mat.num_rows, mat.num_cols),
+			        cl_int(mat.num_rows), cl_int(mat.num_cols),
+			        cl_int(mat_gpu->m_offset), result_gpu->data_vector(1)));
+
+			T* result;
+			result = reinterpret_cast<T*>(SG_MALLOC(aligned_t, 1));
+			viennacl::backend::memory_read(
+			    *(result_gpu->m_data), result_gpu->m_offset * sizeof(T),
+			    sizeof(T), result);
+
+			return result[0];
+		}
+
+		/** ViennaCL vector sum method. */
+		template <typename T>
+		T sum_impl(const SGVector<T>& vec, bool no_diag = false) const
+		{
+#if VIENNACL_VERSION >= 10700
 			GPUMemoryViennaCL<T>* vec_gpu = cast_to_viennacl(vec);
 			return viennacl::linalg::sum(vec_gpu->data_vector(vec.size()));
-		#else
+#else
 			return sum_impl(SGMatrix<T>(vec));
-		#endif
-	}
-
-	/** ViennaCL matrix sum method. */
-	template <typename T>
-	T sum_symmetric_impl(const SGMatrix<T>& mat, bool no_diag=false) const
-	{
-		return sum_impl(mat, no_diag);
-	}
-
-	/** ViennaCL matrix colwise sum method */
-	template <typename T>
-	SGVector<T> colwise_sum_impl(const SGMatrix<T>& mat, bool no_diag) const
-	{
-		GPUMemoryViennaCL<T>* mat_gpu = cast_to_viennacl(mat);
-		GPUMemoryViennaCL<T>* result_gpu = new GPUMemoryViennaCL<T>(mat.num_cols);
-		viennacl::ocl::kernel& kernel = generate_colwise_sum_kernel<T>(no_diag);
-		kernel.global_work_size(0, linalg::implementation::ocl::align_to_multiple_1d(mat.num_cols));
-
-		viennacl::ocl::enqueue(kernel(mat_gpu->data_matrix(mat.num_rows, mat.num_cols),
-			cl_int(mat.num_rows), cl_int(mat.num_cols), cl_int(mat_gpu->m_offset),
-			result_gpu->data_vector(mat.num_cols), cl_int(result_gpu->m_offset)));
-
-		return SGVector<T>(result_gpu, mat.num_cols);
-	}
-
-	/** ViennaCL matrix rowwise sum method */
-	template <typename T>
-	SGVector<T> rowwise_sum_impl(const SGMatrix<T>& mat, bool no_diag) const
-	{
-		GPUMemoryViennaCL<T>* mat_gpu = cast_to_viennacl(mat);
-		GPUMemoryViennaCL<T>* result_gpu = new GPUMemoryViennaCL<T>(mat.num_rows);
-		viennacl::ocl::kernel& kernel = generate_rowwise_sum_kernel<T>(no_diag);
-		kernel.global_work_size(0, linalg::implementation::ocl::align_to_multiple_1d(mat.num_rows));
-
-		viennacl::ocl::enqueue(kernel(mat_gpu->data_matrix(mat.num_rows, mat.num_cols),
-			cl_int(mat.num_rows), cl_int(mat.num_cols), cl_int(mat_gpu->m_offset),
-			result_gpu->data_vector(mat.num_rows), cl_int(result_gpu->m_offset)));
-
-		return SGVector<T>(result_gpu, mat.num_rows);
-	}
-
-	/** Transfer data to GPU with ViennaCL method. */
-	template <typename T, template<typename> class Container>
-	GPUMemoryBase<T>* to_gpu_impl(const Container<T>& a) const
-	{
-		GPUMemoryViennaCL<T>* gpu_ptr = new GPUMemoryViennaCL<T>();
-
-		viennacl::backend::memory_create(*(gpu_ptr->m_data), sizeof(T)*a.size(),
-			viennacl::context());
-		viennacl::backend::memory_write(*(gpu_ptr->m_data), 0,
-			a.size()*sizeof(T), a.data());
-
-		return gpu_ptr;
-	}
-
-	/** Fetch data from GPU with ViennaCL method. */
-	template <typename T, template<typename> class Container>
-	void from_gpu_impl(const Container<T>& a, T* data) const
-	{
-		GPUMemoryViennaCL<T>* gpu_ptr = cast_to_viennacl(a);
-		viennacl::backend::memory_read(*(gpu_ptr->m_data),
-			gpu_ptr->m_offset*sizeof(T), a.size()*sizeof(T), data);
-	}
-
+#endif
+		}
+
+		/** ViennaCL matrix sum method. */
+		template <typename T>
+		T sum_symmetric_impl(const SGMatrix<T>& mat, bool no_diag = false) const
+		{
+			return sum_impl(mat, no_diag);
+		}
+
+		/** ViennaCL matrix colwise sum method */
+		template <typename T>
+		SGVector<T> colwise_sum_impl(const SGMatrix<T>& mat, bool no_diag) const
+		{
+			GPUMemoryViennaCL<T>* mat_gpu = cast_to_viennacl(mat);
+			GPUMemoryViennaCL<T>* result_gpu =
+			    new GPUMemoryViennaCL<T>(mat.num_cols);
+			viennacl::ocl::kernel& kernel =
+			    generate_colwise_sum_kernel<T>(no_diag);
+			kernel.global_work_size(
+			    0, linalg::implementation::ocl::align_to_multiple_1d(
+			           mat.num_cols));
+
+			viennacl::ocl::enqueue(
+			    kernel(
+			        mat_gpu->data_matrix(mat.num_rows, mat.num_cols),
+			        cl_int(mat.num_rows), cl_int(mat.num_cols),
+			        cl_int(mat_gpu->m_offset),
+			        result_gpu->data_vector(mat.num_cols),
+			        cl_int(result_gpu->m_offset)));
+
+			return SGVector<T>(result_gpu, mat.num_cols);
+		}
+
+		/** ViennaCL matrix rowwise sum method */
+		template <typename T>
+		SGVector<T> rowwise_sum_impl(const SGMatrix<T>& mat, bool no_diag) const
+		{
+			GPUMemoryViennaCL<T>* mat_gpu = cast_to_viennacl(mat);
+			GPUMemoryViennaCL<T>* result_gpu =
+			    new GPUMemoryViennaCL<T>(mat.num_rows);
+			viennacl::ocl::kernel& kernel =
+			    generate_rowwise_sum_kernel<T>(no_diag);
+			kernel.global_work_size(
+			    0, linalg::implementation::ocl::align_to_multiple_1d(
+			           mat.num_rows));
+
+			viennacl::ocl::enqueue(
+			    kernel(
+			        mat_gpu->data_matrix(mat.num_rows, mat.num_cols),
+			        cl_int(mat.num_rows), cl_int(mat.num_cols),
+			        cl_int(mat_gpu->m_offset),
+			        result_gpu->data_vector(mat.num_rows),
+			        cl_int(result_gpu->m_offset)));
+
+			return SGVector<T>(result_gpu, mat.num_rows);
+		}
+
+		/** Transfer data to GPU with ViennaCL method. */
+		template <typename T, template <typename> class Container>
+		GPUMemoryBase<T>* to_gpu_impl(const Container<T>& a) const
+		{
+			GPUMemoryViennaCL<T>* gpu_ptr = new GPUMemoryViennaCL<T>();
+
+			viennacl::backend::memory_create(
+			    *(gpu_ptr->m_data), sizeof(T) * a.size(), viennacl::context());
+			viennacl::backend::memory_write(
+			    *(gpu_ptr->m_data), 0, a.size() * sizeof(T), a.data());
+
+			return gpu_ptr;
+		}
+
+		/** Fetch data from GPU with ViennaCL method. */
+		template <typename T, template <typename> class Container>
+		void from_gpu_impl(const Container<T>& a, T* data) const
+		{
+			GPUMemoryViennaCL<T>* gpu_ptr = cast_to_viennacl(a);
+			viennacl::backend::memory_read(
+			    *(gpu_ptr->m_data), gpu_ptr->m_offset * sizeof(T),
+			    a.size() * sizeof(T), data);
+		}
+// clang-format off
 #undef DEFINE_FOR_ALL_PTYPE
 #undef DEFINE_FOR_NON_INTEGER_PTYPE
-};
-
+		// clang-format on
+	};
 }
 
-#endif //HAVE_VIENNACL
+#endif // HAVE_VIENNACL
 
-#endif //LINALG_BACKEND_VIENNACL_H__
+#endif // LINALG_BACKEND_VIENNACL_H__
diff --git a/src/shogun/mathematics/linalg/LinalgBackendViennaclKernels.h b/src/shogun/mathematics/linalg/LinalgBackendViennaclKernels.h
index d8826d8909e..3730dafe762 100644
--- a/src/shogun/mathematics/linalg/LinalgBackendViennaclKernels.h
+++ b/src/shogun/mathematics/linalg/LinalgBackendViennaclKernels.h
@@ -36,26 +36,86 @@
 #include <shogun/lib/common.h>
 
 #ifdef HAVE_VIENNACL
-#include <shogun/mathematics/linalg/internal/opencl_util.h>
 #include <memory>
+#include <shogun/mathematics/linalg/internal/opencl_util.h>
 
 namespace shogun
 {
+	/** Generates the cross entropy computation kernel
+	 * The OpenCL kernel that helps to calculate the cross entropy SGMatrices
+	 */
+	template <typename T>
+	static viennacl::ocl::kernel& generate_cross_entropy_kernel()
+	{
+		std::string kernel_name =
+		    "cross_entropy_" +
+		    linalg::implementation::ocl::get_type_string<T>();
+
+		if (linalg::implementation::ocl::kernel_exists(kernel_name))
+			return linalg::implementation::ocl::get_kernel(kernel_name);
+
+		std::string source =
+		    linalg::implementation::ocl::generate_kernel_preamble<T>(
+		        kernel_name);
+
+		source.append(
+		    R"(
+				__kernel void KERNEL_NAME(
+					__global DATATYPE* p, int size, int p_offset,
+					__global DATATYPE* q, int q_offset,
+					__global DATATYPE* result)
+				{
+					__local DATATYPE buffer[WORK_GROUP_SIZE_1D];
+
+					int local_id = get_local_id(0);
+
+					DATATYPE thread_sum = 0;
+					for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
+						thread_sum += p[i+p_offset]*log(q[i+q_offset]+1e-30);
+
+					buffer[local_id] = thread_sum;
+
+					for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
+					{
+						barrier(CLK_LOCAL_MEM_FENCE);
+						if (local_id < j)
+							buffer[local_id] += buffer[local_id + j];
+					}
+
+					barrier(CLK_LOCAL_MEM_FENCE);
+
+					if (get_global_id(0)==0)
+						*result = -1*buffer[0];
+				}
+			)");
+
+		viennacl::ocl::kernel& kernel =
+		    linalg::implementation::ocl::compile_kernel(kernel_name, source);
+
+		kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
+		kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
+
+		return kernel;
+	}
+
 	/** Generates the max computation kernel
 	 * The OpenCL kernel that helps to calculate the max of SGVector or SGMatrix
 	 */
 	template <typename T>
 	static viennacl::ocl::kernel& generate_max_kernel()
 	{
-		std::string kernel_name = "max_" + linalg::implementation::ocl::get_type_string<T>();
+		std::string kernel_name =
+		    "max_" + linalg::implementation::ocl::get_type_string<T>();
 
 		if (linalg::implementation::ocl::kernel_exists(kernel_name))
 			return linalg::implementation::ocl::get_kernel(kernel_name);
 
-		std::string source = linalg::implementation::ocl::generate_kernel_preamble<T>(kernel_name);
+		std::string source =
+		    linalg::implementation::ocl::generate_kernel_preamble<T>(
+		        kernel_name);
 
 		source.append(
-			R"(
+		    R"(
 				__kernel void KERNEL_NAME(
 					__global DATATYPE* vec, int size, int offset,
 					__global DATATYPE* result)
@@ -79,10 +139,118 @@ namespace shogun
 					if (get_global_id(0)==0)
 						*result = buffer[0];
 				}
-			)"
-		);
+			)");
 
-		viennacl::ocl::kernel& kernel = linalg::implementation::ocl::compile_kernel(kernel_name, source);
+		viennacl::ocl::kernel& kernel =
+		    linalg::implementation::ocl::compile_kernel(kernel_name, source);
+
+		kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
+		kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
+
+		return kernel;
+	}
+
+	/** Generates the softmax computation kernel
+	 * The OpenCL kernel that helps to calculate the softmax of SGMatrix
+	 */
+	template <class T>
+	static viennacl::ocl::kernel& generate_softmax_kernel()
+	{
+		std::string kernel_name =
+		    "softmax_" + linalg::implementation::ocl::get_type_string<T>();
+
+		if (linalg::implementation::ocl::kernel_exists(kernel_name))
+			return linalg::implementation::ocl::get_kernel(kernel_name);
+
+		std::string source =
+		    linalg::implementation::ocl::generate_kernel_preamble<T>(
+		        kernel_name);
+
+		source.append(
+		    R"(
+				__kernel void KERNEL_NAME(
+					__global DATATYPE* A, int nrows, int ncols, int offset)
+				{
+					int j = get_global_id(0);
+
+					if (j>=ncols)
+						return;
+
+					DATATYPE col_max = -INFINITY;
+					for (int i=0; i<nrows; i++)
+						col_max = max(col_max, A[offset + i+j*nrows]);
+
+					DATATYPE col_sum = 0;
+					for (int i=0; i<nrows; i++)
+						col_sum += exp(A[offset + i+j*nrows]-col_max);
+
+					DATATYPE normalizer = log(col_sum);
+					for (int i=0; i<nrows; i++)
+					{
+						int index = offset + i+j*nrows;
+						A[index] = exp(A[index]-col_max-normalizer);
+					}
+				}
+			)");
+
+		viennacl::ocl::kernel& kernel =
+		    linalg::implementation::ocl::compile_kernel(kernel_name, source);
+
+		kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
+
+		return kernel;
+	}
+
+	/** Generates the squared error computation kernel
+	 * The OpenCL kernel that helps to calculate the squared error of SGMatrices
+	 */
+	template <class T>
+	static viennacl::ocl::kernel& generate_squared_error_kernel()
+	{
+		std::string kernel_name =
+		    "squared_error_" +
+		    linalg::implementation::ocl::get_type_string<T>();
+
+		if (linalg::implementation::ocl::kernel_exists(kernel_name))
+			return linalg::implementation::ocl::get_kernel(kernel_name);
+
+		std::string source =
+		    linalg::implementation::ocl::generate_kernel_preamble<T>(
+		        kernel_name);
+
+		source.append(
+		    R"(
+				__kernel void KERNEL_NAME(
+					__global DATATYPE* p, int size, int p_offset,
+					__global DATATYPE* q, int q_offset,
+					__global DATATYPE* result)
+				{
+					__local DATATYPE buffer[WORK_GROUP_SIZE_1D];
+
+					int local_id = get_local_id(0);
+
+					DATATYPE thread_sum = 0;
+					for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
+						thread_sum += pown(p[i+p_offset]-q[i+q_offset], 2);
+
+					buffer[local_id] = thread_sum;
+
+					for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
+					{
+						barrier(CLK_LOCAL_MEM_FENCE);
+						if (local_id < j)
+							buffer[local_id] += buffer[local_id + j];
+					}
+
+					barrier(CLK_LOCAL_MEM_FENCE);
+
+					if (get_global_id(0)==0)
+						*result = 0.5*buffer[0];
+				}
+			)");
+
+		viennacl::ocl::kernel& kernel =
+		    linalg::implementation::ocl::compile_kernel(kernel_name, source);
 
 		kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
 		kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
@@ -98,17 +266,22 @@ namespace shogun
 	template <class T>
 	static viennacl::ocl::kernel& generate_sum_kernel(bool no_diag)
 	{
-		std::string kernel_name = "sum_" + linalg::implementation::ocl::get_type_string<T>();
-		if (no_diag) kernel_name.append("_no_diag");
+		std::string kernel_name =
+		    "sum_" + linalg::implementation::ocl::get_type_string<T>();
+		if (no_diag)
+			kernel_name.append("_no_diag");
 
 		if (linalg::implementation::ocl::kernel_exists(kernel_name))
 			return linalg::implementation::ocl::get_kernel(kernel_name);
 
-		std::string source = linalg::implementation::ocl::generate_kernel_preamble<T>(kernel_name);
-		if (no_diag) source.append("#define NO_DIAG\n");
+		std::string source =
+		    linalg::implementation::ocl::generate_kernel_preamble<T>(
+		        kernel_name);
+		if (no_diag)
+			source.append("#define NO_DIAG\n");
 
 		source.append(
-			R"(
+		    R"(
 				__kernel void KERNEL_NAME(
 					__global DATATYPE* mat, int nrows, int ncols, int offset,
 					__global DATATYPE* result)
@@ -141,11 +314,10 @@ namespace shogun
 					if (get_global_id(0)==0)
 						*result = buffer[0];
 				}
-			)"
-		);
+			)");
 
 		viennacl::ocl::kernel& kernel =
-			linalg::implementation::ocl::compile_kernel(kernel_name, source);
+		    linalg::implementation::ocl::compile_kernel(kernel_name, source);
 
 		kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
 		kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
@@ -161,17 +333,22 @@ namespace shogun
 	template <class T>
 	static viennacl::ocl::kernel& generate_colwise_sum_kernel(bool no_diag)
 	{
-		std::string kernel_name = "colwise_sum_" + linalg::implementation::ocl::get_type_string<T>();
-		if (no_diag) kernel_name.append("_no_diag");
+		std::string kernel_name =
+		    "colwise_sum_" + linalg::implementation::ocl::get_type_string<T>();
+		if (no_diag)
+			kernel_name.append("_no_diag");
 
 		if (linalg::implementation::ocl::kernel_exists(kernel_name))
 			return linalg::implementation::ocl::get_kernel(kernel_name);
 
-		std::string source = linalg::implementation::ocl::generate_kernel_preamble<T>(kernel_name);
-		if (no_diag) source.append("#define NO_DIAG\n");
+		std::string source =
+		    linalg::implementation::ocl::generate_kernel_preamble<T>(
+		        kernel_name);
+		if (no_diag)
+			source.append("#define NO_DIAG\n");
 
 		source.append(
-			R"(
+		    R"(
 				__kernel void KERNEL_NAME(
 					__global DATATYPE* mat, int nrows, int ncols, int offset,
 					__global DATATYPE* result, int result_offset)
@@ -192,11 +369,10 @@ namespace shogun
 
 					result[j+result_offset] = sum;
 				}
-			)"
-		);
+			)");
 
 		viennacl::ocl::kernel& kernel =
-			linalg::implementation::ocl::compile_kernel(kernel_name, source);
+		    linalg::implementation::ocl::compile_kernel(kernel_name, source);
 
 		kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
 
@@ -211,17 +387,22 @@ namespace shogun
 	template <class T>
 	static viennacl::ocl::kernel& generate_rowwise_sum_kernel(bool no_diag)
 	{
-		std::string kernel_name = "rowwise_sum_" + linalg::implementation::ocl::get_type_string<T>();
-		if (no_diag) kernel_name.append("_no_diag");
+		std::string kernel_name =
+		    "rowwise_sum_" + linalg::implementation::ocl::get_type_string<T>();
+		if (no_diag)
+			kernel_name.append("_no_diag");
 
 		if (linalg::implementation::ocl::kernel_exists(kernel_name))
 			return linalg::implementation::ocl::get_kernel(kernel_name);
 
-		std::string source = linalg::implementation::ocl::generate_kernel_preamble<T>(kernel_name);
-		if (no_diag) source.append("#define NO_DIAG\n");
+		std::string source =
+		    linalg::implementation::ocl::generate_kernel_preamble<T>(
+		        kernel_name);
+		if (no_diag)
+			source.append("#define NO_DIAG\n");
 
 		source.append(
-			R"(
+		    R"(
 				__kernel void KERNEL_NAME(
 					__global DATATYPE* mat, int nrows, int ncols, int offset,
 					__global DATATYPE* result, int result_offset)
@@ -242,16 +423,15 @@ namespace shogun
 
 					result[i+result_offset] = sum;
 				}
-			)"
-		);
+			)");
 
-		viennacl::ocl::kernel& kernel = linalg::implementation::ocl::compile_kernel(kernel_name, source);
+		viennacl::ocl::kernel& kernel =
+		    linalg::implementation::ocl::compile_kernel(kernel_name, source);
 
 		kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
 
 		return kernel;
 	}
-
 }
 #endif // HAVE_VIENNACL
 
diff --git a/src/shogun/mathematics/linalg/LinalgEnums.h b/src/shogun/mathematics/linalg/LinalgEnums.h
new file mode 100644
index 00000000000..7290ad97b9d
--- /dev/null
+++ b/src/shogun/mathematics/linalg/LinalgEnums.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LINALG_ENUMS_H__
+#define LINALG_ENUMS_H__
+
+namespace shogun
+{
+
+	namespace linalg
+	{
+
+		/**
+		 * Enum for choosing the algorithm used to calculate SVD.
+		 * The <em>bidiagonal divide and conquer</em> algorithm
+		 * is faster on large matrices but it's available with
+		 * Eigen >= 3.3, furthermore it may produce inaccurate
+		 * results when compiled with unsafe math optimization.
+		 * For more details see:
+		 * https://eigen.tuxfamily.org/dox/classEigen_1_1BDCSVD.html
+		 * https://eigen.tuxfamily.org/dox/classEigen_1_1JacobiSVD.html
+		 */
+		enum class SVDAlgorithm
+		{
+			BidiagonalDivideConquer,
+			Jacobi
+		};
+	}
+}
+
+#endif // LINALG_ENUMS_H__
\ No newline at end of file
diff --git a/src/shogun/mathematics/linalg/LinalgMacros.h b/src/shogun/mathematics/linalg/LinalgMacros.h
new file mode 100644
index 00000000000..0cdbcd90da4
--- /dev/null
+++ b/src/shogun/mathematics/linalg/LinalgMacros.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: 2016 Pan Deng, Soumyajit De, Heiko Strathmann, Viktor Gal
+ */
+
+#define DEFINE_FOR_ALL_PTYPE(METHODNAME, Container)                            \
+	METHODNAME(bool, Container);                                               \
+	METHODNAME(char, Container);                                               \
+	METHODNAME(int8_t, Container);                                             \
+	METHODNAME(uint8_t, Container);                                            \
+	METHODNAME(int16_t, Container);                                            \
+	METHODNAME(uint16_t, Container);                                           \
+	METHODNAME(int32_t, Container);                                            \
+	METHODNAME(uint32_t, Container);                                           \
+	METHODNAME(int64_t, Container);                                            \
+	METHODNAME(uint64_t, Container);                                           \
+	METHODNAME(float32_t, Container);                                          \
+	METHODNAME(float64_t, Container);                                          \
+	METHODNAME(floatmax_t, Container);                                         \
+	METHODNAME(complex128_t, Container);
+
+#define DEFINE_FOR_REAL_PTYPE(METHODNAME, Container)                           \
+	METHODNAME(bool, Container);                                               \
+	METHODNAME(char, Container);                                               \
+	METHODNAME(int8_t, Container);                                             \
+	METHODNAME(uint8_t, Container);                                            \
+	METHODNAME(int16_t, Container);                                            \
+	METHODNAME(uint16_t, Container);                                           \
+	METHODNAME(int32_t, Container);                                            \
+	METHODNAME(uint32_t, Container);                                           \
+	METHODNAME(int64_t, Container);                                            \
+	METHODNAME(uint64_t, Container);                                           \
+	METHODNAME(float32_t, Container);                                          \
+	METHODNAME(float64_t, Container);                                          \
+	METHODNAME(floatmax_t, Container);
+
+#define DEFINE_FOR_NON_INTEGER_PTYPE(METHODNAME, Container)                    \
+	METHODNAME(float32_t, Container);                                          \
+	METHODNAME(float64_t, Container);                                          \
+	METHODNAME(floatmax_t, Container);                                         \
+	METHODNAME(complex128_t, Container);
+
+#define DEFINE_FOR_NON_INTEGER_REAL_PTYPE(METHODNAME, Container)               \
+	METHODNAME(float32_t, Container);                                          \
+	METHODNAME(float64_t, Container);                                          \
+	METHODNAME(floatmax_t, Container);
+
+#define DEFINE_FOR_NUMERIC_PTYPE(METHODNAME, Container)                        \
+	METHODNAME(char, Container);                                               \
+	METHODNAME(int8_t, Container);                                             \
+	METHODNAME(uint8_t, Container);                                            \
+	METHODNAME(int16_t, Container);                                            \
+	METHODNAME(uint16_t, Container);                                           \
+	METHODNAME(int32_t, Container);                                            \
+	METHODNAME(uint32_t, Container);                                           \
+	METHODNAME(int64_t, Container);                                            \
+	METHODNAME(uint64_t, Container);                                           \
+	METHODNAME(float32_t, Container);                                          \
+	METHODNAME(float64_t, Container);                                          \
+	METHODNAME(floatmax_t, Container);
diff --git a/src/shogun/mathematics/linalg/LinalgNamespace.h b/src/shogun/mathematics/linalg/LinalgNamespace.h
index 9c4d0cd124e..4148b685982 100644
--- a/src/shogun/mathematics/linalg/LinalgNamespace.h
+++ b/src/shogun/mathematics/linalg/LinalgNamespace.h
@@ -34,890 +34,1502 @@
 #define LINALG_NAMESPACE_H_
 
 #include <shogun/mathematics/linalg/LinalgBackendBase.h>
+#include <shogun/mathematics/linalg/LinalgEnums.h>
 #include <shogun/mathematics/linalg/SGLinalg.h>
 
 namespace shogun
 {
 
-namespace linalg
-{
-
-/** Infer the appropriate backend for linalg operations
- * from the input SGVector or SGMatrix (Container).
- *
- * @param a SGVector or SGMatrix
- * @return @see LinalgBackendBase pointer
- */
-template <typename T, template <typename> class Container>
-LinalgBackendBase* infer_backend(const Container<T>& a)
-{
-	if (a.on_gpu())
+	namespace linalg
 	{
-		if (sg_linalg->get_gpu_backend())
-			return sg_linalg->get_gpu_backend();
-		else
+
+		/** Infer the appropriate backend for linalg operations
+		 * from the input SGVector or SGMatrix (Container).
+		 *
+		 * @param a SGVector or SGMatrix
+		 * @return @see LinalgBackendBase pointer
+		 */
+		template <typename T, template <typename> class Container>
+		LinalgBackendBase* infer_backend(const Container<T>& a)
 		{
-			SG_SERROR("Vector or matrix is on GPU but no GPU backend registered. \
+			if (a.on_gpu())
+			{
+				if (sg_linalg->get_gpu_backend())
+					return sg_linalg->get_gpu_backend();
+				else
+				{
+					SG_SERROR(
+					    "Vector or matrix is on GPU but no GPU backend registered. \
 						This can happen if the GPU backend was de-activated \
 						after memory has been transferred to GPU.\n");
-			return NULL;
+					return NULL;
+				}
+			}
+			else
+				return sg_linalg->get_cpu_backend();
 		}
-	}
-	else
-		return sg_linalg->get_cpu_backend();
-}
 
-/** Infer the appropriate backend for linalg operations
- * from the input SGVector or SGMatrix (Container).
- * Raise error if the backends of the two Containers conflict.
- *
- * @param a The first SGVector/SGMatrix
- * @param b The second SGVector/SGMatrix
- * @return @see LinalgBackendBase pointer
- */
-template <typename T, template <typename> class Container>
-LinalgBackendBase* infer_backend(const Container<T>& a, const Container<T>& b)
-{
-	if (a.on_gpu() && b.on_gpu())
-	{
-		if (sg_linalg->get_gpu_backend())
-			return sg_linalg->get_gpu_backend();
-		else
+		/** Infer the appropriate backend for linalg operations
+		 * from the input SGVector or SGMatrix (Container).
+		 * Raise error if the backends of the two Containers conflict.
+		 *
+		 * @param a The first SGVector/SGMatrix
+		 * @param b The second SGVector/SGMatrix
+		 * @return @see LinalgBackendBase pointer
+		 */
+		template <typename T, template <typename> class Container>
+		LinalgBackendBase*
+		infer_backend(const Container<T>& a, const Container<T>& b)
 		{
-			SG_SERROR("Vector or matrix is on GPU but no GPU backend registered. \
+			if (a.on_gpu() && b.on_gpu())
+			{
+				if (sg_linalg->get_gpu_backend())
+					return sg_linalg->get_gpu_backend();
+				else
+				{
+					SG_SERROR(
+					    "Vector or matrix is on GPU but no GPU backend registered. \
 					  This can happen if the GPU backend was de-activated \
 					  after memory has been transferred to GPU.\n");
-			return NULL;
-		}
-	}
-	else if (a.on_gpu() || b.on_gpu())
-	{
-		SG_SERROR("Cannot operate with first vector/matrix on_gpu flag(%d) \
+					return NULL;
+				}
+			}
+			else if (a.on_gpu() || b.on_gpu())
+			{
+				SG_SERROR(
+				    "Cannot operate with first vector/matrix on_gpu flag(%d) \
 					and second vector/matrix on_gpu flag (%d).\n",
-					a.on_gpu(), b.on_gpu());
-		return NULL;
-	}
-	else
-		return sg_linalg->get_cpu_backend();
-}
+				    a.on_gpu(), b.on_gpu());
+				return NULL;
+			}
+			else
+				return sg_linalg->get_cpu_backend();
+		}
 
-/**
- * Transfers data to GPU memory.
- * Shallow-copy of SGVector with vector on CPU if GPU backend not available
- *
- * @param a SGVector to be transferred
- * @param b SGVector to be set
- */
-template <typename T>
-void to_gpu(SGVector<T>& a, SGVector<T>& b)
-{
-	sg_linalg->m_gpu_transfer.lock();
+		/**
+		 * Transfers data to GPU memory.
+		 * Shallow-copy of SGVector with vector on CPU if GPU backend not
+		 * available
+		 *
+		 * @param a SGVector to be transferred
+		 * @param b SGVector to be set
+		 */
+		template <typename T>
+		void to_gpu(SGVector<T>& a, SGVector<T>& b)
+		{
+			sg_linalg->m_gpu_transfer.lock();
+
+			if (a.on_gpu())
+			{
+				if (sg_linalg->get_linalg_warnings())
+					SG_SWARNING("The vector is already on GPU.\n");
+			}
+			else
+			{
+				LinalgBackendBase* gpu_backend = sg_linalg->get_gpu_backend();
+
+				if (gpu_backend)
+					b = SGVector<T>(gpu_backend->to_gpu(a), a.vlen);
+				else
+				{
+					if (sg_linalg->get_linalg_warnings())
+						SG_SWARNING("Trying to access GPU memory\
+				 	without GPU backend registered.\n");
+					b = a;
+				}
+			}
 
-	if (a.on_gpu())
-	{
-		if (sg_linalg->get_linalg_warnings())
-			SG_SWARNING("The vector is already on GPU.\n");
-	}
-	else
-	{
-		LinalgBackendBase* gpu_backend = sg_linalg->get_gpu_backend();
+			sg_linalg->m_gpu_transfer.unlock();
+		}
 
-		if (gpu_backend)
-			b = SGVector<T>(gpu_backend->to_gpu(a), a.vlen);
-		else
+		/**
+		 * Transfers data to GPU memory. Does nothing if no GPU backend
+		 * registered.
+		 * Shallow-copy SGMatrix on CPU if GPU backend not available
+		 *
+		 * @param a SGMatrix to be transferred
+		 * @param b SGMatrix to be set
+		 */
+		template <typename T>
+		void to_gpu(SGMatrix<T>& a, SGMatrix<T>& b)
 		{
-			if (sg_linalg->get_linalg_warnings())
-				SG_SWARNING("Trying to access GPU memory\
+			sg_linalg->m_gpu_transfer.lock();
+
+			if (a.on_gpu())
+			{
+				if (sg_linalg->get_linalg_warnings())
+					SG_SWARNING("The matrix is already on GPU.\n");
+			}
+			else
+			{
+				LinalgBackendBase* gpu_backend = sg_linalg->get_gpu_backend();
+
+				if (gpu_backend)
+					b = SGMatrix<T>(
+					    gpu_backend->to_gpu(a), a.num_rows, a.num_cols);
+				else
+				{
+					if (sg_linalg->get_linalg_warnings())
+						SG_SWARNING("Trying to access GPU memory\
 				 	without GPU backend registered.\n");
-			b = a;
+					b = a;
+				}
+			}
+
+			sg_linalg->m_gpu_transfer.unlock();
 		}
-	}
 
-	sg_linalg->m_gpu_transfer.unlock();
-}
+		/**
+		* Transfers data to GPU memory in-place.
+		*
+		* @param a SGVector or SGMatrix to be transferred
+		*/
+		template <typename T, template <typename> class Container>
+		void to_gpu(Container<T>& a)
+		{
+			to_gpu(a, a);
+		}
 
-/**
- * Transfers data to GPU memory. Does nothing if no GPU backend registered.
- * Shallow-copy SGMatrix on CPU if GPU backend not available
- *
- * @param a SGMatrix to be transferred
- * @param b SGMatrix to be set
- */
-template <typename T>
-void to_gpu(SGMatrix<T>& a, SGMatrix<T>& b)
-{
-	sg_linalg->m_gpu_transfer.lock();
+		/**
+		 * Fetches data from GPU memory.
+		 * Transfer vectors to CPU if GPU backend is still available.
+		 *
+		 * @param a SGVector to be transferred
+		 * @param b SGVector to be set
+		 */
+		template <typename T>
+		void from_gpu(SGVector<T>& a, SGVector<T>& b)
+		{
+			sg_linalg->m_gpu_transfer.lock();
+			if (a.on_gpu())
+			{
+				LinalgBackendBase* gpu_backend = sg_linalg->get_gpu_backend();
+				if (gpu_backend)
+				{
+					typedef typename std::aligned_storage<
+					    sizeof(T), alignof(T)>::type aligned_t;
+					T* data;
+					data = reinterpret_cast<T*>(SG_MALLOC(aligned_t, a.size()));
+					gpu_backend->from_gpu(a, data);
+					b = SGVector<T>(data, a.size());
+				}
+				else
+					SG_SERROR(
+					    "Data memory on GPU but no GPU backend registered. \
+						This can happen if the GPU backend was de-activated \
+						after memory has been transferred to GPU.\n");
+			}
+			else
+			{
+				if (sg_linalg->get_linalg_warnings())
+					SG_SWARNING("The data is already on CPU.\n");
+				b = a;
+			}
+
+			sg_linalg->m_gpu_transfer.unlock();
+		}
 
-	if (a.on_gpu())
-	{
-		if (sg_linalg->get_linalg_warnings())
-			SG_SWARNING("The matrix is already on GPU.\n");
-	}
-	else
-	{
-		LinalgBackendBase* gpu_backend = sg_linalg->get_gpu_backend();
+		/**
+		 * Fetches data from GPU memory.
+		 * Transfer matrices to CPU if GPU backend is still available.
+		 *
+		 * @param a SGMatrix to be transferred
+		 * @param b SGMatrix to be set
+		 */
+		template <typename T>
+		void from_gpu(SGMatrix<T>& a, SGMatrix<T>& b)
+		{
+			sg_linalg->m_gpu_transfer.lock();
+			if (a.on_gpu())
+			{
+				LinalgBackendBase* gpu_backend = sg_linalg->get_gpu_backend();
+				if (gpu_backend)
+				{
+					typedef typename std::aligned_storage<
+					    sizeof(T), alignof(T)>::type aligned_t;
+					T* data;
+					data = reinterpret_cast<T*>(
+					    SG_MALLOC(aligned_t, a.num_rows * a.num_cols));
+					gpu_backend->from_gpu(a, data);
+					b = SGMatrix<T>(data, a.num_rows, a.num_cols);
+				}
+				else
+					SG_SERROR(
+					    "Data memory on GPU but no GPU backend registered. \
+						This can happen if the GPU backend was de-activated \
+						after memory has been transferred to GPU.\n");
+			}
+			else
+			{
+				if (sg_linalg->get_linalg_warnings())
+					SG_SWARNING("The data is already on CPU.\n");
+				b = a;
+			}
+
+			sg_linalg->m_gpu_transfer.unlock();
+		}
 
-		if (gpu_backend)
-			b = SGMatrix<T>(gpu_backend->to_gpu(a), a.num_rows, a.num_cols);
-		else
+		/**
+		 * Fetches data from GPU memory.
+		 * Transfer vector or matrix to CPU if GPU backend is still available.
+		 *
+		 * @param a SGVector or SGMatrix to be transferred
+		*/
+		template <typename T, template <typename> class Container>
+		void from_gpu(Container<T>& a)
 		{
-			if (sg_linalg->get_linalg_warnings())
-				SG_SWARNING("Trying to access GPU memory\
-				 	without GPU backend registered.\n");
-			b = a;
+			from_gpu(a, a);
 		}
-	}
 
-	sg_linalg->m_gpu_transfer.unlock();
-}
+		/**
+		 * Performs the operation result = alpha * a + beta * b on vectors.
+		 * This version returns the result in-place.
+		 * User should pass an appropriately pre-allocated memory matrix
+		 * Or pass one of the operands arguments (A or B) as a result
+		 *
+		 * @param a First vector
+		 * @param b Second vector
+		 * @param result The vector that saves the result
+		 * @param alpha Constant to be multiplied by the first vector
+		 * @param beta Constant to be multiplied by the second vector
+		 */
+		template <typename T>
+		void
+		add(SGVector<T>& a, SGVector<T>& b, SGVector<T>& result, T alpha = 1,
+		    T beta = 1)
+		{
+			REQUIRE(
+			    a.vlen == b.vlen,
+			    "Length of vector a (%d) doesn't match vector b (%d).\n",
+			    a.vlen, b.vlen);
+			REQUIRE(
+			    result.vlen == b.vlen,
+			    "Length of vector result (%d) doesn't match vector a (%d).\n",
+			    result.vlen, a.vlen);
+
+			REQUIRE(
+			    !(result.on_gpu() ^ a.on_gpu()), "Cannot operate with vector "
+			                                     "result on_gpu (%d) and "
+			                                     "vector a on_gpu (%d).\n",
+			    result.on_gpu(), a.on_gpu());
+			REQUIRE(
+			    !(result.on_gpu() ^ b.on_gpu()), "Cannot operate with vector "
+			                                     "result on_gpu (%d) and "
+			                                     "vector b on_gpu (%d).\n",
+			    result.on_gpu(), b.on_gpu());
+
+			infer_backend(a, b)->add(a, b, alpha, beta, result);
+		}
 
-/**
-* Transfers data to GPU memory in-place.
-*
-* @param a SGVector or SGMatrix to be transferred
-*/
-template <typename T, template<typename> class Container>
-void to_gpu(Container<T>& a)
-{
-	to_gpu(a, a);
-}
+		/**
+		 * Performs the operation result = alpha * a + beta * b on matrices.
+		 * This version returns the result in-place.
+		 * User should pass an appropriately pre-allocated memory matrix
+		 * Or pass one of the operands arguments (A or B) as a result
+		 *
+		 * @param a First matrix
+		 * @param b Second matrix
+		 * @param result The matrix that saves the result
+		 * @param alpha Constant to be multiplied by the first matrix
+		 * @param beta Constant to be multiplied by the second matrix
+		 */
+		template <typename T>
+		void
+		add(SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result, T alpha = 1,
+		    T beta = 1)
+		{
+			REQUIRE(
+			    (a.num_rows == b.num_rows),
+			    "Number of rows of matrix a (%d) must match matrix b (%d).\n",
+			    a.num_rows, b.num_rows);
+			REQUIRE(
+			    (a.num_cols == b.num_cols), "Number of columns of matrix a "
+			                                "(%d) must match matrix b (%d).\n",
+			    a.num_cols, b.num_cols);
+
+			REQUIRE(
+			    !(result.on_gpu() ^ a.on_gpu()), "Cannot operate with matrix "
+			                                     "result on_gpu (%d) and "
+			                                     "matrix a on_gpu (%d).\n",
+			    result.on_gpu(), a.on_gpu());
+			REQUIRE(
+			    !(result.on_gpu() ^ b.on_gpu()), "Cannot operate with matrix "
+			                                     "result on_gpu (%d) and "
+			                                     "matrix b on_gpu (%d).\n",
+			    result.on_gpu(), b.on_gpu());
+
+			infer_backend(a, b)->add(a, b, alpha, beta, result);
+		}
 
-/**
- * Fetches data from GPU memory.
- * Transfer vectors to CPU if GPU backend is still available.
- *
- * @param a SGVector to be transferred
- * @param b SGVector to be set
- */
-template <typename T>
-void from_gpu(SGVector<T>& a, SGVector<T>& b)
-{
-	sg_linalg->m_gpu_transfer.lock();
-	if (a.on_gpu())
-	{
-		LinalgBackendBase* gpu_backend = sg_linalg->get_gpu_backend();
-		if (gpu_backend)
+		/**
+		 * Performs the operation C = alpha * A + beta * B.
+		 * This version returns the result in a newly created vector or matrix.
+		 *
+		 * @param a First vector or matrix
+		 * @param b Second vector or matrix
+		 * @param alpha Constant to be multiplied by the first vector or matrix
+		 * @param beta Constant to be multiplied by the second vector or matrix
+		 * @return The result vector or matrix
+		 */
+		template <typename T, template <typename> class Container>
+		Container<T>
+		add(Container<T>& a, Container<T>& b, T alpha = 1, T beta = 1)
 		{
-			typedef typename std::aligned_storage<sizeof(T), alignof(T)>::type aligned_t;
-			T* data;
-			data = reinterpret_cast<T*>(SG_MALLOC(aligned_t, a.size()));
-			gpu_backend->from_gpu(a, data);
-			b = SGVector<T>(data, a.size());
+			auto result = a.clone();
+			add(a, b, result, alpha, beta);
+			return result;
 		}
-		else
-			SG_SERROR("Data memory on GPU but no GPU backend registered. \
-						This can happen if the GPU backend was de-activated \
-						after memory has been transferred to GPU.\n");
-	}
-	else
-	{
-		if (sg_linalg->get_linalg_warnings())
-			SG_SWARNING("The data is already on CPU.\n");
-		b = a;
-	}
 
-	sg_linalg->m_gpu_transfer.unlock();
-}
+		/**
+		 * Performs the operation result.col(i) = alpha * A.col(i) + beta * b.
+		 * User should pass an appropriately pre-allocated memory matrix
+		 * Or pass the operand argument A as a result.
+		 *
+		 * @param A The matrix
+		 * @param b The vector
+		 * @param result The matrix that saves the result
+		 * @param alpha Constant to be multiplied by the matrix
+		 * @param beta Constant to be multiplied by the vector
+		 */
+		template <typename T>
+		void add_col_vec(
+		    const SGMatrix<T>& A, index_t i, const SGVector<T>& b,
+		    SGMatrix<T>& result, T alpha = 1, T beta = 1)
+		{
+			REQUIRE(
+			    A.num_rows == b.vlen, "Number of rows of matrix A (%d) doesn't "
+			                          "match length of vector b (%d).\n",
+			    A.num_rows, b.vlen);
+			REQUIRE(
+			    result.num_rows == A.num_rows,
+			    "Number of rows of result (%d) doesn't match matrix A (%d).\n",
+			    result.num_rows, A.num_rows);
+			REQUIRE(
+			    i >= 0 && i < A.num_cols, "Index i (%d) is out of range (0-%d)",
+			    i, A.num_cols - 1);
+
+			infer_backend(A, SGMatrix<T>(b))
+			    ->add_col_vec(A, i, b, result, alpha, beta);
+		}
 
-/**
- * Fetches data from GPU memory.
- * Transfer matrices to CPU if GPU backend is still available.
- *
- * @param a SGMatrix to be transferred
- * @param b SGMatrix to be set
- */
-template <typename T>
-void from_gpu(SGMatrix<T>& a, SGMatrix<T>& b)
-{
-	sg_linalg->m_gpu_transfer.lock();
-	if (a.on_gpu())
-	{
-		LinalgBackendBase* gpu_backend = sg_linalg->get_gpu_backend();
-		if (gpu_backend)
+		/**
+		 * Performs the operation result = alpha * A.col(i) + beta * b.
+		 * User should pass an appropriately pre-allocated vector
+		 * Or pass the operand argument b as a result.
+		 *
+		 * @param A The matrix
+		 * @param b The vector
+		 * @param result The vector that saves the result
+		 * @param alpha Constant to be multiplied by the matrix
+		 * @param beta Constant to be multiplied by the vector
+		 */
+		template <typename T>
+		void add_col_vec(
+		    const SGMatrix<T>& A, index_t i, const SGVector<T>& b,
+		    SGVector<T>& result, T alpha = 1, T beta = 1)
 		{
-			typedef typename std::aligned_storage<sizeof(T), alignof(T)>::type aligned_t;
-			T* data;
-			data = reinterpret_cast<T*>(SG_MALLOC(aligned_t, a.num_rows*a.num_cols));
-			gpu_backend->from_gpu(a, data);
-			b = SGMatrix<T>(data, a.num_rows, a.num_cols);
+			REQUIRE(
+			    A.num_rows == b.vlen, "Number of rows of matrix A (%d) doesn't "
+			                          "match length of vector b (%d).\n",
+			    A.num_rows, b.vlen);
+			REQUIRE(
+			    result.vlen == b.vlen,
+			    "Length of result (%d) doesn't match vector b (%d).\n",
+			    result.vlen, b.vlen);
+			REQUIRE(
+			    i >= 0 && i < A.num_cols, "Index i (%d) is out of range (0-%d)",
+			    i, A.num_cols - 1);
+
+			infer_backend(A, SGMatrix<T>(b))
+			    ->add_col_vec(A, i, b, result, alpha, beta);
 		}
-		else
-			SG_SERROR("Data memory on GPU but no GPU backend registered. \
-						This can happen if the GPU backend was de-activated \
-						after memory has been transferred to GPU.\n");
-	}
-	else
-	{
-		if (sg_linalg->get_linalg_warnings())
-			SG_SWARNING("The data is already on CPU.\n");
-		b = a;
-	}
 
-	sg_linalg->m_gpu_transfer.unlock();
-}
+		/**
+		 * Performs the operation result = alpha * A.col(i) + beta * b,
+		 * for each column of A.
+		 * User should pass an appropriately pre-allocated memory matrix
+		 * or pass the operand argument A as a result.
+		 *
+		 * @param A The matrix
+		 * @param b The vector
+		 * @param result The matrix that saves the result
+		 * @param alpha Constant to be multiplied by the matrix
+		 * @param beta Constant to be multiplied by the vector
+		 */
+		template <typename T>
+		void add_vector(
+		    const SGMatrix<T>& A, const SGVector<T>& b, SGMatrix<T>& result,
+		    T alpha = 1, T beta = 1)
+		{
+			REQUIRE(
+			    A.num_rows == b.vlen, "Number of rows of matrix A (%d) doesn't "
+			                          "match length of vector b (%d).\n",
+			    A.num_rows, b.vlen);
+			REQUIRE(
+			    result.num_rows == A.num_rows && result.num_cols == A.num_cols,
+			    "Dimension mismatch! A (%d x %d) vs result (%d x %d).\n",
+			    A.num_rows, A.num_cols, result.num_rows, result.num_cols);
+
+			infer_backend(A, SGMatrix<T>(b))
+			    ->add_vector(A, b, result, alpha, beta);
+		}
 
-/**
- * Fetches data from GPU memory.
- * Transfer vector or matrix to CPU if GPU backend is still available.
- *
- * @param a SGVector or SGMatrix to be transferred
-*/
-template <typename T, template<typename> class Container>
-void from_gpu(Container<T>& a)
-{
-	from_gpu(a, a);
-}
+		/**
+		 * Adds a scalar to each element of a vector or a matrix in-place.
+		 *
+		 * @param a Vector or matrix
+		 * @param b Scalar to be added
+		 */
+		template <typename T, template <typename> class Container>
+		void add_scalar(Container<T>& a, T b)
+		{
+			infer_backend(a)->add_scalar(a, b);
+		}
 
-/**
- * Performs the operation result = alpha * a + beta * b on vectors.
- * This version returns the result in-place.
- * User should pass an appropriately pre-allocated memory matrix
- * Or pass one of the operands arguments (A or B) as a result
- *
- * @param a First vector
- * @param b Second vector
- * @param result The vector that saves the result
- * @param alpha Constant to be multiplied by the first vector
- * @param beta Constant to be multiplied by the second vector
- */
-template <typename T>
-void add(SGVector<T>& a, SGVector<T>& b, SGVector<T>& result, T alpha=1, T beta=1)
-{
-	REQUIRE(a.vlen == b.vlen,
-		"Length of vector a (%d) doesn't match vector b (%d).\n", a.vlen, b.vlen);
-	REQUIRE(result.vlen == b.vlen,
-		"Length of vector result (%d) doesn't match vector a (%d).\n",
-		result.vlen, a.vlen);
-
-	REQUIRE(!(result.on_gpu()^a.on_gpu()),
-		"Cannot operate with vector result on_gpu (%d) and vector a on_gpu (%d).\n",
-		result.on_gpu(), a.on_gpu());
-	REQUIRE(!(result.on_gpu()^b.on_gpu()),
-		"Cannot operate with vector result on_gpu (%d) and vector b on_gpu (%d).\n",
-		result.on_gpu(), b.on_gpu());
-
-	infer_backend(a, b)->add(a, b, alpha, beta, result);
-}
+		/**
+		 * Centers a square matrix in-place, i.e. removes column/row mean from
+		 * columns/rows.
+		 * In particular it computes A = A - 1N*A - A*1N + 1N*A*1N
+		 * where 1N denotes the matrix of the same size as A for which each
+		 * element
+		 * takes value 1/n, where n is the number of columns and rows of A.
+		 *
+		 * @param A The matrix to be centered
+		 */
+		template <typename T>
+		void center_matrix(SGMatrix<T>& A)
+		{
+			REQUIRE(
+			    A.num_rows == A.num_cols, "Matrix A (%d x% d) is not square!\n",
+			    A.num_rows, A.num_cols);
+			infer_backend(A)->center_matrix(A);
+		}
 
-/**
- * Performs the operation result = alpha * a + beta * b on matrices.
- * This version returns the result in-place.
- * User should pass an appropriately pre-allocated memory matrix
- * Or pass one of the operands arguments (A or B) as a result
- *
- * @param a First matrix
- * @param b Second matrix
- * @param result The matrix that saves the result
- * @param alpha Constant to be multiplied by the first matrix
- * @param beta Constant to be multiplied by the second matrix
- */
-template <typename T>
-void add(SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result, T alpha=1, T beta=1)
-{
-	REQUIRE((a.num_rows == b.num_rows),
-		"Number of rows of matrix a (%d) must match matrix b (%d).\n",
-		a.num_rows, b.num_rows);
-	REQUIRE((a.num_cols == b.num_cols),
-		"Number of columns of matrix a (%d) must match matrix b (%d).\n",
-		a.num_cols, b.num_cols);
-
-	REQUIRE(!(result.on_gpu()^a.on_gpu()),
-		"Cannot operate with matrix result on_gpu (%d) and matrix a on_gpu (%d).\n",
-		result.on_gpu(), a.on_gpu());
-	REQUIRE(!(result.on_gpu()^b.on_gpu()),
-		"Cannot operate with matrix result on_gpu (%d) and matrix b on_gpu (%d).\n",
-		result.on_gpu(), b.on_gpu());
-
-	infer_backend(a, b)->add(a, b, alpha, beta, result);
-}
+		/**
+		 * Performs the operation A = alpha * x * y' + A
+		 *
+		 * @param alpha scaling factor for vector x
+		 * @param x vector
+		 * @param y vector
+		 * @param A m artix
+		 */
+		template <typename T>
+		void
+		dger(T alpha, const SGVector<T> x, const SGVector<T> y, SGMatrix<T>& A)
+		{
+			auto x_martix =
+			    SGVector<T>::convert_to_matrix(x, A.num_rows, 1, false);
+			auto y_martix =
+			    SGVector<T>::convert_to_matrix(y, A.num_cols, 1, false);
+
+			auto temp_martix = SGMatrix<T>::matrix_multiply(
+			    x_martix, y_martix, false, true, alpha);
+			add(A, temp_martix, A);
+		}
 
-/**
- * Performs the operation C = alpha * A + beta * B.
- * This version returns the result in a newly created vector or matrix.
- *
- * @param A First vector or matrix
- * @param B Second vector or matrix
- * @param alpha Constant to be multiplied by the first vector or matrix
- * @param beta Constant to be multiplied by the second vector or matrix
- * @return The result vector or matrix
- */
-template <typename T, template<typename> class Container>
-Container<T> add(Container<T>& a, Container<T>& b, T alpha=1, T beta=1)
-{
-	auto result = a.clone();
-	add(a, b, result, alpha, beta);
-	return result;
-}
+		/**
+		 * Compute the cholesky decomposition \f$A = L L^{*}\f$ or \f$A = U^{*}
+		 * U\f$
+		 * of a Hermitian positive definite matrix
+		 *
+		 * @param A The matrix whose cholesky decomposition is to be computed
+		 * @param lower Whether to compute the upper or lower triangular
+		 *  Cholesky factorization (default: lower)
+		 * @return The upper or lower triangular Cholesky factorization
+		 */
+		template <typename T>
+		SGMatrix<T>
+		cholesky_factor(const SGMatrix<T>& A, const bool lower = true)
+		{
+			return infer_backend(A)->cholesky_factor(A, lower);
+		}
 
-/**
- * Compute the cholesky decomposition \f$A = L L^{*}\f$ or \f$A = U^{*} U\f$
- * of a Hermitian positive definite matrix
- *
- * @param A The matrix whose cholesky decomposition is to be computed
- * @param lower Whether to compute the upper or lower triangular
- *  Cholesky factorization (default: lower)
- * @return The upper or lower triangular Cholesky factorization
- */
-template <typename T>
-SGMatrix<T> cholesky_factor(const SGMatrix<T>& A, const bool lower=true)
-{
-	return infer_backend(A)->cholesky_factor(A, lower);
-}
+		/**
+		 * Solve the linear equations \f$Ax=b\f$, given the Cholesky
+		 * factorization of A,
+		 * where \f$A\f$ is a Hermitian positive definite matrix
+		 *
+		 * @param L Triangular matrix, Cholesky factorization of A
+		 * @param b Right-hand side array
+		 * @param lower Whether to use L as the upper or lower triangular
+		 *  Cholesky factorization (default:lower)
+		 * @return \f$\x\f$
+		 */
+		template <typename T>
+		SGVector<T> cholesky_solver(
+		    const SGMatrix<T>& L, const SGVector<T>& b, const bool lower = true)
+		{
+			return infer_backend(L, SGMatrix<T>(b))
+			    ->cholesky_solver(L, b, lower);
+		}
 
-/**
- * Solve the linear equations \f$Ax=b\f$, given the Cholesky factorization of A,
- * where \f$A\f$ is a Hermitian positive definite matrix
- *
- * @param L Triangular matrix, Cholesky factorization of A
- * @param b Right-hand side array
- * @param lower Whether to use L as the upper or lower triangular
- *  Cholesky factorization (default:lower)
- * @return \f$\x\f$
- */
-template <typename T>
-SGVector<T> cholesky_solver(const SGMatrix<T>& L, const SGVector<T>& b,
-	const bool lower=true)
-{
-	return infer_backend(L, SGMatrix<T>(b))->cholesky_solver(L, b, lower);
-}
+		/**
+		 * Vector dot-product that works with generic vectors.
+		 *
+		 * @param a First vector
+		 * @param b Second vector
+		 * @return The dot product of \f$\mathbf{a}\f$ and \f$\mathbf{b}\f$,
+		 * represented
+		 * as \f$\sum_i a_i b_i\f$
+		 */
+		template <typename T>
+		T dot(const SGVector<T>& a, const SGVector<T>& b)
+		{
+			REQUIRE(
+			    a.vlen == b.vlen,
+			    "Length of vector a (%d) doesn't match vector b (%d).\n",
+			    a.vlen, b.vlen);
+			return infer_backend(a, b)->dot(a, b);
+		}
 
-/**
- * Vector dot-product that works with generic vectors.
- *
- * @param a First vector
- * @param b Second vector
- * @return The dot product of \f$\mathbf{a}\f$ and \f$\mathbf{b}\f$, represented
- * as \f$\sum_i a_i b_i\f$
- */
-template <typename T>
-T dot(const SGVector<T>& a, const SGVector<T>& b)
-{
-	REQUIRE(a.vlen == b.vlen,
-		"Length of vector a (%d) doesn't match vector b (%d).\n", a.vlen, b.vlen);
-	return infer_backend(a, b)->dot(a, b);
-}
+		/**
+		 * Compute the eigenvalues and eigenvectors of a matrix.
+		 * Note that the type of the computed values is the same
+		 * as the matrix's type, i.e. for real matrices it returns
+		 * only the real part of the eigenvalues/vectors.
+		 *
+		 * User should pass an appropriately pre-allocated memory vector
+		 * to store the eigenvalues and an appropriately pre-allocated memory
+		 * matrix to store the eigenvectors.
+		 *
+		 * @param A The matrix whose eigenvalues and eigenvectors are to be
+		 * computed
+		 * @param eigenvalues Eigenvalues result vector
+		 * @param eigenvectors Eigenvectors result matrix
+		 */
+		template <typename T>
+		void eigen_solver(
+		    const SGMatrix<T>& A, SGVector<T>& eigenvalues,
+		    SGMatrix<T>& eigenvectors)
+		{
+			REQUIRE(
+			    A.num_rows == A.num_cols, "Matrix A (%d x% d) is not square!\n",
+			    A.num_rows, A.num_cols);
+			REQUIRE(
+			    A.num_rows == eigenvectors.num_rows,
+			    "Number of rows of A (%d) doesn't match eigenvectors' matrix "
+			    "(%d).\n",
+			    A.num_rows, eigenvectors.num_rows);
+			REQUIRE(
+			    A.num_cols == eigenvectors.num_cols,
+			    "Number of columns of A (%d) doesn't match eigenvectors' "
+			    "matrix (%d).\n",
+			    A.num_cols, eigenvectors.num_cols);
+			REQUIRE(
+			    A.num_cols == eigenvalues.vlen,
+			    "Length of eigenvalues' vector doesn't match matrix A");
+
+			infer_backend(A)->eigen_solver(A, eigenvalues, eigenvectors);
+		}
 
-/** Performs the operation C = A .* B where ".*" denotes elementwise multiplication
- * on matrix blocks.
- *
- * This version returns the result in-place.
- * User should pass an appropriately pre-allocated memory matrix.
- *
- * This operation works with CPU backends only.
- *
- * @param a First matrix block
- * @param b Second matrix block
- * @param c Result matrix
- */
-template <typename T>
-void element_prod(Block<SGMatrix<T>>& a, Block<SGMatrix<T>>& b, SGMatrix<T>& result)
-{
-	REQUIRE(a.m_row_size == b.m_row_size && a.m_col_size == b.m_col_size,
-			"Dimension mismatch! A(%d x %d) vs B(%d x %d)\n",
-			a.m_row_size, a.m_col_size, b.m_row_size, b.m_col_size);
-	REQUIRE(a.m_row_size == result.num_rows && a.m_col_size == result.num_cols,
-			"Dimension mismatch! A(%d x %d) vs result(%d x %d)\n",
-			a.m_row_size, a.m_col_size, result.num_rows, result.num_cols);
+		/**
+		 * Compute the top-k eigenvalues and eigenvectors of a symmetric matrix.
+		 *
+		 * User should pass an appropriately pre-allocated memory vector
+		 * to store the eigenvalues and an appropriately pre-allocated memory
+		 * matrix to store the eigenvectors.
+		 *
+		 * @param A The matrix whose eigenvalues and eigenvectors are to be
+		 * computed
+		 * @param eigenvalues Eigenvalues result vector in ascending order
+		 * @param eigenvectors Eigenvectors result matrix
+		 * @param k number of top eigenvalues to be computed
+		 * [default = 0: all eigenvalues]
+		 */
+		template <typename T>
+		void eigen_solver_symmetric(
+		    const SGMatrix<T>& A, SGVector<T>& eigenvalues,
+		    SGMatrix<T>& eigenvectors, index_t k = 0)
+		{
 
-	REQUIRE(!result.on_gpu(), "Cannot operate with matrix result on_gpu (%d) \
-	 		as matrix blocks are on CPU.\n", result.on_gpu());
+			REQUIRE(
+			    A.num_rows == A.num_cols, "Matrix A (%d x% d) is not square!\n",
+			    A.num_rows, A.num_cols);
+
+			if (k == 0)
+				k = A.num_rows;
+			REQUIRE(
+			    k > 0 && k <= A.num_rows,
+			    "Invalid value of k (%d), it must be in the range 1-%d.", k,
+			    A.num_rows)
+
+			REQUIRE(
+			    A.num_rows == eigenvectors.num_rows,
+			    "Number of rows of A (%d) doesn't match eigenvectors' matrix "
+			    "(%d).\n",
+			    A.num_rows, eigenvectors.num_rows);
+			REQUIRE(
+			    k == eigenvectors.num_cols, "Number of requested eigenvectors "
+			                                "(%d) doesn't match the number "
+			                                "of result matrix columns (%d).\n",
+			    k, eigenvectors.num_cols);
+			REQUIRE(
+			    k == eigenvalues.vlen, "Length of result vector doesn't "
+			                           "match the number of requested "
+			                           "eigenvalues");
+
+			infer_backend(A)->eigen_solver_symmetric(
+			    A, eigenvalues, eigenvectors, k);
+		}
 
-	sg_linalg->get_cpu_backend()->element_prod(a, b, result);
-}
+		/** Performs the operation C = A .* B where ".*" denotes elementwise
+		 * multiplication
+		 * on matrix blocks.
+		 *
+		 * This version returns the result in-place.
+		 * User should pass an appropriately pre-allocated memory matrix.
+		 *
+		 * This operation works with CPU backends only.
+		 *
+		 * @param a First matrix block
+		 * @param b Second matrix block
+		 * @param result Result matrix
+		 */
+		template <typename T>
+		void element_prod(
+		    Block<SGMatrix<T>>& a, Block<SGMatrix<T>>& b, SGMatrix<T>& result)
+		{
+			REQUIRE(
+			    a.m_row_size == b.m_row_size && a.m_col_size == b.m_col_size,
+			    "Dimension mismatch! A(%d x %d) vs B(%d x %d)\n", a.m_row_size,
+			    a.m_col_size, b.m_row_size, b.m_col_size);
+			REQUIRE(
+			    a.m_row_size == result.num_rows &&
+			        a.m_col_size == result.num_cols,
+			    "Dimension mismatch! A(%d x %d) vs result(%d x %d)\n",
+			    a.m_row_size, a.m_col_size, result.num_rows, result.num_cols);
+
+			REQUIRE(
+			    !result.on_gpu(),
+			    "Cannot operate with matrix result on_gpu (%d) \
+	 		as matrix blocks are on CPU.\n",
+			    result.on_gpu());
+
+			sg_linalg->get_cpu_backend()->element_prod(a, b, result);
+		}
 
-/** Performs the operation C = A .* B where ".*" denotes elementwise multiplication
- * on matrix blocks.
- *
- * This version returns the result in a newly created matrix.
- *
- * @param A First matrix block
- * @param B Second matrix block
- * @return The result of the operation
- */
-template <typename T>
-SGMatrix<T> element_prod(Block<SGMatrix<T>>& a, Block<SGMatrix<T>>& b)
-{
-	REQUIRE(a.m_row_size == b.m_row_size && a.m_col_size == b.m_col_size,
-			"Dimension mismatch! A(%d x %d) vs B(%d x %d)\n",
-			a.m_row_size, a.m_col_size, b.m_row_size, b.m_col_size);
+		/** Performs the operation C = A .* B where ".*" denotes elementwise
+		 * multiplication
+		 * on matrix blocks.
+		 *
+		 * This version returns the result in a newly created matrix.
+		 *
+		 * @param A First matrix block
+		 * @param B Second matrix block
+		 * @return The result of the operation
+		 */
+		template <typename T>
+		SGMatrix<T> element_prod(Block<SGMatrix<T>>& a, Block<SGMatrix<T>>& b)
+		{
+			REQUIRE(
+			    a.m_row_size == b.m_row_size && a.m_col_size == b.m_col_size,
+			    "Dimension mismatch! A(%d x %d) vs B(%d x %d)\n", a.m_row_size,
+			    a.m_col_size, b.m_row_size, b.m_col_size);
 
-	SGMatrix<T> result(a.m_row_size, a.m_col_size);
-	result.zero();
+			SGMatrix<T> result(a.m_row_size, a.m_col_size);
+			result.zero();
 
-	element_prod(a, b, result);
+			element_prod(a, b, result);
 
-	return result;
-}
+			return result;
+		}
 
-/** Performs the operation C = A .* B where ".*" denotes elementwise multiplication.
- *
- * This version returns the result in-place.
- * User should pass an appropriately pre-allocated memory matrix
- * Or pass one of the operands arguments (A or B) as a result
- *
- * @param a First matrix
- * @param b Second matrix
- * @param result Result matrix
- */
-template <typename T>
-void element_prod(SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result)
-{
-	REQUIRE(a.num_rows == b.num_rows && a.num_cols == b.num_cols,
-			"Dimension mismatch! A(%d x %d) vs B(%d x %d)\n",
-			a.num_rows, a.num_cols, b.num_rows, b.num_cols);
-	REQUIRE(a.num_rows == result.num_rows && a.num_cols == result.num_cols,
-			"Dimension mismatch! A(%d x %d) vs result(%d x %d)\n",
-			a.num_rows, a.num_cols, result.num_rows, result.num_cols);
-
-	REQUIRE(!(result.on_gpu()^a.on_gpu()),
-			"Cannot operate with matrix result on_gpu (%d) and \
-			 matrix A on_gpu (%d).\n", result.on_gpu(), a.on_gpu());
-	REQUIRE(!(result.on_gpu()^b.on_gpu()),
-			"Cannot operate with matrix result on_gpu (%d) and \
-			 matrix B on_gpu (%d).\n", result.on_gpu(), b.on_gpu());
-
-	infer_backend(a, b)->element_prod(a, b, result);
-}
+		/** Performs the operation C = A .* B where ".*" denotes elementwise
+		 * multiplication.
+		 *
+		 * This version returns the result in-place.
+		 * User should pass an appropriately pre-allocated memory matrix
+		 * Or pass one of the operands arguments (A or B) as a result
+		 *
+		 * @param a First matrix
+		 * @param b Second matrix
+		 * @param result Result matrix
+		 */
+		template <typename T>
+		void element_prod(SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result)
+		{
+			REQUIRE(
+			    a.num_rows == b.num_rows && a.num_cols == b.num_cols,
+			    "Dimension mismatch! A(%d x %d) vs B(%d x %d)\n", a.num_rows,
+			    a.num_cols, b.num_rows, b.num_cols);
+			REQUIRE(
+			    a.num_rows == result.num_rows && a.num_cols == result.num_cols,
+			    "Dimension mismatch! A(%d x %d) vs result(%d x %d)\n",
+			    a.num_rows, a.num_cols, result.num_rows, result.num_cols);
+
+			REQUIRE(
+			    !(result.on_gpu() ^ a.on_gpu()),
+			    "Cannot operate with matrix result on_gpu (%d) and \
+			 matrix A on_gpu (%d).\n",
+			    result.on_gpu(), a.on_gpu());
+			REQUIRE(
+			    !(result.on_gpu() ^ b.on_gpu()),
+			    "Cannot operate with matrix result on_gpu (%d) and \
+			 matrix B on_gpu (%d).\n",
+			    result.on_gpu(), b.on_gpu());
+
+			infer_backend(a, b)->element_prod(a, b, result);
+		}
 
-/** Performs the operation C = A .* B where ".*" denotes elementwise multiplication.
- *
- * This version returns the result in a newly created matrix.
- *
- * @param A First matrix
- * @param B Second matrix
- * @return The result of the operation
- */
-template <typename T>
-SGMatrix<T> element_prod(SGMatrix<T>& a, SGMatrix<T>& b)
-{
-	REQUIRE(a.num_rows == b.num_rows && a.num_cols == b.num_cols,
-			"Dimension mismatch! A(%d x %d) vs B(%d x %d)\n",
-			a.num_rows, a.num_cols, b.num_rows, b.num_cols);
+		/** Performs the operation C = A .* B where ".*" denotes elementwise
+		 * multiplication.
+		 *
+		 * This version returns the result in a newly created matrix.
+		 *
+		 * @param a First matrix
+		 * @param b Second matrix
+		 * @return The result of the operation
+		 */
+		template <typename T>
+		SGMatrix<T> element_prod(SGMatrix<T>& a, SGMatrix<T>& b)
+		{
+			REQUIRE(
+			    a.num_rows == b.num_rows && a.num_cols == b.num_cols,
+			    "Dimension mismatch! A(%d x %d) vs B(%d x %d)\n", a.num_rows,
+			    a.num_cols, b.num_rows, b.num_cols);
 
-	SGMatrix<T> result;
-	result = a.clone();
+			SGMatrix<T> result;
+			result = a.clone();
 
-	element_prod(a, b, result);
+			element_prod(a, b, result);
 
-	return result;
-}
+			return result;
+		}
 
-/** Performs the operation of a matrix multiplies a vector \f$x = Ab\f$.
- *
- * This version returns the result in-place.
- * User should pass an appropriately allocated memory matrix.
- *
- * @param A The matrix
- * @param b The vector
- * @param transpose Whether to transpose the matrix. Default false
- * @param result Result vector
- */
-template <typename T>
-void matrix_prod(SGMatrix<T>& A, SGVector<T>& b, SGVector<T>& result, bool transpose=false)
-{
-	if (transpose)
-	{
-		REQUIRE(A.num_rows == b.vlen, "Row number of Matrix A (%d) doesn't match \
-			length of vector b (%d).\n", A.num_rows, b.vlen);
-		REQUIRE(result.vlen == A.num_cols, "Length of vector result (%d) doesn't match \
-			column number of Matrix A (%d).\n", result.vlen, A.num_cols);
-	}
-	else
-	{
-		REQUIRE(A.num_cols == b.vlen, "Column number of Matrix A (%d) doesn't match \
-			length of vector b (%d).\n", A.num_cols, b.vlen);
-		REQUIRE(result.vlen == A.num_rows, "Length of vector result (%d) doesn't match \
-			row number of Matrix A (%d).\n", result.vlen, A.num_rows);
-	}
+		/** Performs the operation B = exp(A)
+		 *
+		 * This version returns the result in a newly created vector or matrix.
+		 *
+		 * @param a Exponent vector or matrix
+		 * @return The result of the operation
+		 */
+		template <typename T, template <typename> class Container>
+		Container<T> exponent(const Container<T>& a)
+		{
+			Container<T> result;
+			result = a.clone();
 
-	REQUIRE(!(result.on_gpu()^A.on_gpu()),
-		"Cannot operate with vector result on_gpu (%d) and vector a on_gpu (%d).\n",
-		result.on_gpu(), A.on_gpu());
-	REQUIRE(!(result.on_gpu()^b.on_gpu()),
-		"Cannot operate with vector result on_gpu (%d) and vector b on_gpu (%d).\n",
-		result.on_gpu(), b.on_gpu());
+			infer_backend(a)->exponent(a, result);
 
-	infer_backend(A, SGMatrix<T>(b))->matrix_prod(A, b, result, transpose, false);
-}
+			return result;
+		}
 
-/** Performs the operation of matrix multiply a vector \f$x = Ab\f$.
- * This version returns the result in a newly created vector.
- *
- * @param A The matrix
- * @param b The vector
- * @param transpose Whether to transpose a matrix. Default:false
- * @return result Result vector
- */
-template <typename T>
-SGVector<T> matrix_prod(SGMatrix<T>& A, SGVector<T>& b, bool transpose=false)
-{
-	SGVector<T> result;
-	if (transpose)
-	{
-		REQUIRE(A.num_rows == b.vlen, "Row number of Matrix A (%d) doesn't match \
-			length of vector b (%d).\n", A.num_rows, b.vlen);
-		result = SGVector<T>(A.num_cols);
-	}
-	else
-	{
-		REQUIRE(A.num_cols == b.vlen, "Column number of Matrix A (%d) doesn't match \
-		length of vector b (%d).\n", A.num_cols, b.vlen);
-		result = SGVector<T>(A.num_rows);
-	}
+		/**
+		 * Method that writes the identity into a square matrix.
+		 *
+		 * @param a The square matrix to be set
+		 */
+		template <typename T>
+		void identity(SGMatrix<T>& identity_matrix)
+		{
+			REQUIRE(identity_matrix.num_rows == identity_matrix.num_cols, "Matrix is not square!\n");
+			infer_backend(identity_matrix)->identity(identity_matrix);
+		}
 
-	if (A.on_gpu())
-		to_gpu(result);
+		/** Performs the operation of a matrix multiplies a vector \f$x = Ab\f$.
+		 *
+		 * This version returns the result in-place.
+		 * User should pass an appropriately allocated memory matrix.
+		 *
+		 * @param A The matrix
+		 * @param b The vector
+		 * @param transpose Whether to transpose the matrix. Default false
+		 * @param result Result vector
+		 */
+		template <typename T>
+		void matrix_prod(
+		    SGMatrix<T>& A, SGVector<T>& b, SGVector<T>& result,
+		    bool transpose = false)
+		{
+			if (transpose)
+			{
+				REQUIRE(
+				    A.num_rows == b.vlen,
+				    "Row number of Matrix A (%d) doesn't match \
+			length of vector b (%d).\n",
+				    A.num_rows, b.vlen);
+				REQUIRE(
+				    result.vlen == A.num_cols,
+				    "Length of vector result (%d) doesn't match \
+			column number of Matrix A (%d).\n",
+				    result.vlen, A.num_cols);
+			}
+			else
+			{
+				REQUIRE(
+				    A.num_cols == b.vlen,
+				    "Column number of Matrix A (%d) doesn't match \
+			length of vector b (%d).\n",
+				    A.num_cols, b.vlen);
+				REQUIRE(
+				    result.vlen == A.num_rows,
+				    "Length of vector result (%d) doesn't match \
+			row number of Matrix A (%d).\n",
+				    result.vlen, A.num_rows);
+			}
+
+			REQUIRE(
+			    !(result.on_gpu() ^ A.on_gpu()), "Cannot operate with vector "
+			                                     "result on_gpu (%d) and "
+			                                     "vector a on_gpu (%d).\n",
+			    result.on_gpu(), A.on_gpu());
+			REQUIRE(
+			    !(result.on_gpu() ^ b.on_gpu()), "Cannot operate with vector "
+			                                     "result on_gpu (%d) and "
+			                                     "vector b on_gpu (%d).\n",
+			    result.on_gpu(), b.on_gpu());
+
+			infer_backend(A, SGMatrix<T>(b))
+			    ->matrix_prod(A, b, result, transpose, false);
+		}
 
-	matrix_prod(A, b, result, transpose);
-	return result;
-}
+		/** Performs the operation of matrix multiply a vector \f$x = Ab\f$.
+		 * This version returns the result in a newly created vector.
+		 *
+		 * @param A The matrix
+		 * @param b The vector
+		 * @param transpose Whether to transpose a matrix. Default:false
+		 * @return result Result vector
+		 */
+		template <typename T>
+		SGVector<T>
+		matrix_prod(SGMatrix<T>& A, SGVector<T>& b, bool transpose = false)
+		{
+			SGVector<T> result;
+			if (transpose)
+			{
+				REQUIRE(
+				    A.num_rows == b.vlen,
+				    "Row number of Matrix A (%d) doesn't match \
+			length of vector b (%d).\n",
+				    A.num_rows, b.vlen);
+				result = SGVector<T>(A.num_cols);
+			}
+			else
+			{
+				REQUIRE(
+				    A.num_cols == b.vlen,
+				    "Column number of Matrix A (%d) doesn't match \
+		length of vector b (%d).\n",
+				    A.num_cols, b.vlen);
+				result = SGVector<T>(A.num_rows);
+			}
+
+			if (A.on_gpu())
+				to_gpu(result);
+
+			matrix_prod(A, b, result, transpose);
+			return result;
+		}
 
-/** Performs the operation C = A * B where "*" denotes matrix multiplication.
- *
- * This version returns the result in-place.
- * User should pass an appropriately allocated memory matrix
- *
- * @param A First matrix
- * @param B Second matrix
- * @param result Result matrix
- * @param transpose_A whether to transpose matrix A
- * @param transpose_B whether to transpose matrix B
- */
-template <typename T>
-void matrix_prod(SGMatrix<T>& A, SGMatrix<T>& B, SGMatrix<T>& result,
-	bool transpose_A=false, bool transpose_B=false)
-{
-	REQUIRE(!(result.on_gpu()^A.on_gpu()),
-			"Cannot operate with matrix result on_gpu (%d) and \
-			 matrix A on_gpu (%d).\n", result.on_gpu(), A.on_gpu());
-	REQUIRE(!(result.on_gpu()^B.on_gpu()),
-			"Cannot operate with matrix result on_gpu (%d) and \
-			 matrix B on_gpu (%d).\n", result.on_gpu(), B.on_gpu());
-
-	if (transpose_A)
-	{
-		REQUIRE(A.num_cols == result.num_rows, "Number of columns for A (%d) and \
-				number of rows for result (%d) should be equal!\n", A.num_cols, result.num_rows);
-		if (transpose_B)
+		/** Performs the operation C = A * B where "*" denotes matrix
+		 * multiplication.
+		 *
+		 * This version returns the result in-place.
+		 * User should pass an appropriately allocated memory matrix
+		 *
+		 * @param A First matrix
+		 * @param B Second matrix
+		 * @param result Result matrix
+		 * @param transpose_A whether to transpose matrix A
+		 * @param transpose_B whether to transpose matrix B
+		 */
+		template <typename T>
+		void matrix_prod(
+		    SGMatrix<T>& A, SGMatrix<T>& B, SGMatrix<T>& result,
+		    bool transpose_A = false, bool transpose_B = false)
 		{
-			REQUIRE(A.num_rows == B.num_cols, "Number of rows for A (%d) and \
-					number of columns for B (%d) should be equal!\n", A.num_rows, B.num_cols);
-			REQUIRE(B.num_rows == result.num_cols, "Number of rows for B (%d) and \
+			REQUIRE(
+			    !(result.on_gpu() ^ A.on_gpu()),
+			    "Cannot operate with matrix result on_gpu (%d) and \
+			 matrix A on_gpu (%d).\n",
+			    result.on_gpu(), A.on_gpu());
+			REQUIRE(
+			    !(result.on_gpu() ^ B.on_gpu()),
+			    "Cannot operate with matrix result on_gpu (%d) and \
+			 matrix B on_gpu (%d).\n",
+			    result.on_gpu(), B.on_gpu());
+
+			if (transpose_A)
+			{
+				REQUIRE(
+				    A.num_cols == result.num_rows,
+				    "Number of columns for A (%d) and \
+				number of rows for result (%d) should be equal!\n",
+				    A.num_cols, result.num_rows);
+				if (transpose_B)
+				{
+					REQUIRE(
+					    A.num_rows == B.num_cols,
+					    "Number of rows for A (%d) and \
+					number of columns for B (%d) should be equal!\n",
+					    A.num_rows, B.num_cols);
+					REQUIRE(
+					    B.num_rows == result.num_cols,
+					    "Number of rows for B (%d) and \
 					number of columns for result (%d) should be equal!\n",
-					B.num_rows, result.num_cols);
+					    B.num_rows, result.num_cols);
+				}
+				else
+				{
+					REQUIRE(
+					    A.num_rows == B.num_rows,
+					    "Number of rows for A (%d) and \
+					number of rows for B (%d) should be equal!\n",
+					    A.num_rows, B.num_rows);
+					REQUIRE(
+					    B.num_cols == result.num_cols,
+					    "Number of columns for B (%d) and \
+					number of columns for result (%d) should be equal!\n",
+					    B.num_cols, result.num_cols);
+				}
+			}
+			else
+			{
+				REQUIRE(
+				    A.num_rows == result.num_rows,
+				    "Number of rows for A (%d) and \
+				number of rows for result (%d) should be equal!\n",
+				    A.num_rows, result.num_rows);
+				if (transpose_B)
+				{
+					REQUIRE(
+					    A.num_cols == B.num_cols,
+					    "Number of columns for A (%d) and \
+					number of columns for B (%d) should be equal!\n",
+					    A.num_cols, B.num_cols);
+					REQUIRE(
+					    B.num_rows == result.num_cols,
+					    "Number of rows for B (%d) and \
+					number of columns for result (%d) should be equal!\n",
+					    B.num_rows, result.num_cols);
+				}
+				else
+				{
+					REQUIRE(
+					    A.num_cols == B.num_rows,
+					    "Number of columns for A (%d) and \
+					number of rows for B (%d) should be equal!\n",
+					    A.num_cols, B.num_rows);
+					REQUIRE(
+					    B.num_cols == result.num_cols,
+					    "Number of columns for B (%d) and \
+					number of columns for result (%d) should be equal!\n",
+					    B.num_cols, result.num_cols);
+				}
+			}
+
+			infer_backend(A, B)->matrix_prod(
+			    A, B, result, transpose_A, transpose_B);
 		}
-		else
+
+		/** Performs the operation C = A * B where "*" denotes matrix
+		 * multiplication.
+		 *
+		 * This version returns the result in a newly created matrix.
+		 *
+		 * @param A First matrix
+		 * @param B Second matrix
+		 * @param transpose_A whether to transpose matrix A
+		 * @param transpose_B whether to transpose matrix B
+		 *
+		 * @return The result of the operation
+		 */
+		template <typename T>
+		SGMatrix<T> matrix_prod(
+		    SGMatrix<T>& A, SGMatrix<T>& B, bool transpose_A = false,
+		    bool transpose_B = false)
 		{
-			REQUIRE(A.num_rows == B.num_rows, "Number of rows for A (%d) and \
-					number of rows for B (%d) should be equal!\n", A.num_rows, B.num_rows);
-			REQUIRE(B.num_cols == result.num_cols, "Number of columns for B (%d) and \
-					number of columns for result (%d) should be equal!\n",
-					B.num_cols, result.num_cols);
+			SGMatrix<T> result;
+
+			if (transpose_A & transpose_B)
+			{
+				REQUIRE(
+				    A.num_rows == B.num_cols, "Number of rows for A (%d) and \
+				number of columns for B (%d) should be equal!\n",
+				    A.num_rows, B.num_cols);
+				result = SGMatrix<T>(A.num_cols, B.num_rows);
+			}
+			else if (transpose_A)
+			{
+				REQUIRE(
+				    A.num_rows == B.num_rows, "Number of rows for A (%d) and \
+				number of rows for B (%d) should be equal!\n",
+				    A.num_rows, B.num_rows);
+				result = SGMatrix<T>(A.num_cols, B.num_cols);
+			}
+			else if (transpose_B)
+			{
+				REQUIRE(
+				    A.num_cols == B.num_cols,
+				    "Number of columns for A (%d) and \
+				number of columns for B (%d) should be equal!\n",
+				    A.num_cols, B.num_cols);
+				result = SGMatrix<T>(A.num_rows, B.num_rows);
+			}
+			else
+			{
+				REQUIRE(
+				    A.num_cols == B.num_rows,
+				    "Number of columns for A (%d) and \
+				number of rows for B (%d) should be equal!\n",
+				    A.num_cols, B.num_rows);
+				result = SGMatrix<T>(A.num_rows, B.num_cols);
+			}
+
+			if (A.on_gpu())
+				to_gpu(result);
+
+			matrix_prod(A, B, result, transpose_A, transpose_B);
+
+			return result;
 		}
-	}
-	else
-	{
-		REQUIRE(A.num_rows == result.num_rows, "Number of rows for A (%d) and \
-				number of rows for result (%d) should be equal!\n", A.num_rows, result.num_rows);
-		if (transpose_B)
+
+		/**
+		 * Performs the operation y = \alpha ax + \beta y
+		 * This function multiplies a * x (after transposing a, if needed)
+		 * and multiplies the resulting matrix by alpha. It then multiplies
+		 * vector y by
+		 * beta. It stores the sum of these two products in vector y
+		 *
+		 * @param alpha scaling factor for vector ax
+		 * @param a matrix
+		 * @param transpose Whether to transpose matrix a
+		 * @param x vector
+		 * @param beta scaling factor for vector y
+		 * @param y vector
+		 */
+		template <typename T>
+		void dgemv(
+		    T alpha, SGMatrix<T> a, bool transpose, SGVector<T> x, T beta,
+		    SGVector<T>& y)
 		{
-			REQUIRE(A.num_cols == B.num_cols, "Number of columns for A (%d) and \
-					number of columns for B (%d) should be equal!\n", A.num_cols, B.num_cols);
-			REQUIRE(B.num_rows == result.num_cols, "Number of rows for B (%d) and \
-					number of columns for result (%d) should be equal!\n",
-					B.num_rows, result.num_cols);
+			auto temp_vector = matrix_prod(a, x, transpose);
+			add(temp_vector, y, y, alpha, beta);
 		}
-		else
+
+		/**
+		 * This function multiplies a * b and multiplies the resulting matrix by
+		 * alpha.
+		 * It then multiplies matrix c by beta. It stores the sum of these two
+		 * products
+		 * in matrix c.
+		 *
+		 * @param alpha scaling factor for matrix a*b
+		 * @param a matrix
+		 * @param b matrix
+		 * @param transpose_a Whether to transpose matrix a
+		 * @param transpose_b Whether to transpose matrix b
+		 * @param beta scaling factor for matrix c
+		 * @param c matrix
+		 */
+		template <typename T>
+		void dgemm(
+		    T alpha, SGMatrix<T> a, SGMatrix<T> b, bool transpose_a,
+		    bool transpose_b, T beta, SGMatrix<T>& c)
 		{
-			REQUIRE(A.num_cols == B.num_rows, "Number of columns for A (%d) and \
-					number of rows for B (%d) should be equal!\n", A.num_cols, B.num_rows);
-			REQUIRE(B.num_cols == result.num_cols, "Number of columns for B (%d) and \
-					number of columns for result (%d) should be equal!\n",
-					B.num_cols, result.num_cols);
+			auto temp_matrix = matrix_prod(a, b, transpose_a, transpose_b);
+			add(temp_matrix, c, c, alpha, beta);
 		}
-	}
 
-	infer_backend(A, B)->matrix_prod(A, B, result, transpose_A, transpose_B);
-}
+		/**
+		 * Returns the largest element in a vector or matrix
+		 *
+		 * @param a Input vector or matrix
+		 * @return The largest value in the vector or matrix
+		 */
+		template <typename T, template <typename> class Container>
+		T max(const Container<T>& a)
+		{
+			return infer_backend(a)->max(a);
+		}
 
-/** Performs the operation C = A * B where "*" denotes matrix multiplication.
- *
- * This version returns the result in a newly created matrix.
- *
- * @param A First matrix
- * @param B Second matrix
- * @param transpose_A whether to transpose matrix A
- * @param transpose_B whether to transpose matrix B
- *
- * @return The result of the operation
- */
-template <typename T>
-SGMatrix<T> matrix_prod(SGMatrix<T>& A, SGMatrix<T>& B,
-	bool transpose_A=false, bool transpose_B=false)
-{
-	SGMatrix<T> result;
+		/**
+		 * Method that computes the mean of vectors or matrices composed of real
+		 * numbers.
+		 *
+		 * @param a SGVector or SGMatrix
+		 * @return The vector mean \f$\bar{a}_i\f$ or matrix mean
+		 * \f$\bar{m}_{i,j}\f$
+		 */
+		template <typename T, template <typename> class Container>
+		typename std::enable_if<!std::is_same<T, complex128_t>::value,
+		                        float64_t>::type
+		mean(const Container<T>& a)
+		{
+			REQUIRE(a.size() > 0, "Vector/Matrix cannot be empty!\n");
+			return infer_backend(a)->mean(a);
+		}
 
-	if (transpose_A & transpose_B)
-	{
-		REQUIRE(A.num_rows == B.num_cols, "Number of rows for A (%d) and \
-				number of columns for B (%d) should be equal!\n", A.num_rows, B.num_cols);
-		result = SGMatrix<T>(A.num_cols, B.num_rows);
-	}
-	else if (transpose_A)
-	{
-		REQUIRE(A.num_rows == B.num_rows, "Number of rows for A (%d) and \
-				number of rows for B (%d) should be equal!\n", A.num_rows, B.num_rows);
-		result = SGMatrix<T>(A.num_cols, B.num_cols);
-	}
-	else if (transpose_B)
-	{
-		REQUIRE(A.num_cols == B.num_cols, "Number of columns for A (%d) and \
-				number of columns for B (%d) should be equal!\n", A.num_cols, B.num_cols);
-		result = SGMatrix<T>(A.num_rows, B.num_rows);
-	}
-	else
-	{
-		REQUIRE(A.num_cols == B.num_rows, "Number of columns for A (%d) and \
-				number of rows for B (%d) should be equal!\n", A.num_cols, B.num_rows);
-		result = SGMatrix<T>(A.num_rows, B.num_cols);
-	}
+		/**
+		 * Method that computes the mean of vectors or matrices composed of
+		 * complex numbers.
+		 *
+		 * @param a SGVector or SGMatrix
+		 * @return The vector mean \f$\bar{a}_i\f$ or matrix mean
+		 * \f$\bar{m}_{i,j}\f$
+		 */
+		template <template <typename> class Container>
+		complex128_t mean(const Container<complex128_t>& a)
+		{
+			REQUIRE(a.size() > 0, "Vector/Matrix cannot be empty!\n");
+			return infer_backend(a)->mean(a);
+		}
 
-	if (A.on_gpu())
-		to_gpu(result);
+		/**
+		 * Method that computes the euclidean norm of a vector.
+		 *
+		 * @param a SGVector
+		 * @return The vector norm
+		 */
+		template <typename T>
+		T norm(const SGVector<T>& a)
+		{
+			REQUIRE(a.size() > 0, "Vector cannot be empty!\n");
+			return CMath::sqrt(dot(a, a));
+		}
 
-	matrix_prod(A, B, result, transpose_A, transpose_B);
+		/**
+		 * Solve the linear equations \f$Ax=b\f$ through the
+		 * QR decomposition of A.
+		 *
+		 * @param A The matrix
+		 * @param b Right-hand side vector or matrix
+		 * @return \f$\x\f$
+		 */
+		template <typename T, template <typename> class Container>
+		Container<T> qr_solver(const SGMatrix<T>& A, const Container<T>& b)
+		{
+			REQUIRE(
+			    A.num_rows == A.num_cols, "Matrix A (%d x% d) is not square!\n",
+			    A.num_rows, A.num_cols);
 
-	return result;
-}
+			return infer_backend(A, SGMatrix<T>(b))->qr_solver(A, b);
+		}
 
-/**
- * Returns the largest element in a vector or matrix
- *
- * @param a Input vector or matrix
- * @return The largest value in the vector or matrix
- */
-template<typename T, template<typename> class Container>
-T max(const Container<T>& a)
-{
-	return infer_backend(a)->max(a);
-}
+		/**
+		 * Range fill a vector or matrix with start...start+len-1
+		 *
+		 * @param a The vector or matrix to be filled
+		 * @param start Value to be assigned to the first element of vector or
+		 * matrix
+		 */
+		template <typename T, template <typename> class Container>
+		void range_fill(Container<T>& a, const T start = 0)
+		{
+			infer_backend(a)->range_fill(a, start);
+		}
 
-/**
- * Method that computes the mean of vectors or matrices composed of real numbers.
- *
- * @param a SGVector or SGMatrix
- * @return The vector mean \f$\bar{a}_i\f$ or matrix mean \f$\bar{m}_{i,j}\f$
- */
-template<typename T, template<typename> class Container>
-typename std::enable_if<!std::is_same<T, complex128_t>::value, float64_t>::type
-mean(const Container<T>& a)
-{
-	REQUIRE(a.size() > 0, "Vector/Matrix cannot be empty!\n");
-	return infer_backend(a)->mean(a);
-}
+		/**
+		 * Performs the operation result = alpha * a on vectors
+		 * This version returns the result in-place.
+		 * User should pass an appropriately pre-allocated memory matrix
+		 * Or pass the operands argument a as a result
+		 *
+		 * @param a First vector
+		 * @param alpha Scale factor
+		 * @param result The vector of alpha * a
+		 */
+		template <typename T>
+		void scale(SGVector<T>& a, SGVector<T>& result, T alpha = 1)
+		{
+			REQUIRE(
+			    result.vlen == a.vlen,
+			    "Length of vector result (%d) doesn't match vector a (%d).\n",
+			    result.vlen, a.vlen);
+			infer_backend(a, result)->scale(a, alpha, result);
+		}
 
-/**
- * Method that computes the mean of vectors or matrices composed of complex numbers.
- *
- * @param a SGVector or SGMatrix
- * @return The vector mean \f$\bar{a}_i\f$ or matrix mean \f$\bar{m}_{i,j}\f$
- */
-template<template<typename> class Container>
-complex128_t mean(const Container<complex128_t>& a)
-{
-	REQUIRE(a.size() > 0, "Vector/Matrix cannot be empty!\n");
-	return infer_backend(a)->mean(a);
-}
+		/**
+		 * Performs the operation result = alpha * A on matrices
+		 * This version returns the result in-place.
+		 * User should pass an appropriately pre-allocated memory matrix
+		 * Or pass the operands argument A as a result
+		 *
+		 * @param A First matrix
+		 * @param alpha Scale factor
+		 * @param result The matrix of alpha * A
+		 */
+		template <typename T>
+		void scale(SGMatrix<T>& A, SGMatrix<T>& result, T alpha = 1)
+		{
+			REQUIRE(
+			    (A.num_rows == result.num_rows), "Number of rows of matrix A "
+			                                     "(%d) must match matrix "
+			                                     "result (%d).\n",
+			    A.num_rows, result.num_rows);
+			REQUIRE(
+			    (A.num_cols == result.num_cols), "Number of columns of matrix "
+			                                     "A (%d) must match matrix "
+			                                     "result (%d).\n",
+			    A.num_cols, result.num_cols);
+			infer_backend(A, result)->scale(A, alpha, result);
+		}
 
-/**
- * Range fill a vector or matrix with start...start+len-1
- *
- * @param a The vector or matrix to be filled
- * @param start Value to be assigned to the first element of vector or matrix
- */
-template <typename T, template<typename> class Container>
-void range_fill(Container<T>& a, const T start=0)
-{
-	infer_backend(a)->range_fill(a, start);
-}
+		/**
+		 * Performs the operation B = alpha * A on vectors or matrices
+		 * This version returns the result in a newly created vector or matrix.
+		 *
+		 * @param a First vector/matrix
+		 * @param alpha Scale factor
+		 * @return Vector or matrix of alpha * A
+		 */
+		template <typename T, template <typename> class Container>
+		Container<T> scale(Container<T>& a, T alpha = 1)
+		{
+			auto result = a.clone();
+			scale(a, result, alpha);
+			return result;
+		}
 
-/**
- * Performs the operation result = alpha * a on vectors
- * This version returns the result in-place.
- * User should pass an appropriately pre-allocated memory matrix
- * Or pass the operands argument a as a result
- *
- * @param a First vector
- * @param alpha Scale factor
- * @param result The vector of alpha * a
- */
-template <typename T>
-void scale(SGVector<T>& a, SGVector<T>& result, T alpha=1)
-{
-	REQUIRE(result.vlen == a.vlen, "Length of vector result (%d) doesn't match vector a (%d).\n", result.vlen, a.vlen);
-	infer_backend(a, result)->scale(a, alpha, result);
-}
+		/**
+		 * Set const value to vectors or matrices
+		 *
+		 * @param a Vector or matrix to be set
+		 * @param value The value to set the vector or matrix
+		 */
+		template <typename T, template <typename> class Container>
+		void set_const(Container<T>& a, T value)
+		{
+			infer_backend(a)->set_const(a, value);
+		}
 
-/**
- * Performs the operation result = alpha * A on matrices
- * This version returns the result in-place.
- * User should pass an appropriately pre-allocated memory matrix
- * Or pass the operands argument A as a result
- *
- * @param A First matrix
- * @param alpha Scale factor
- * @param result The matrix of alpha * A
- */
-template <typename T>
-void scale(SGMatrix<T>& A, SGMatrix<T>& result, T alpha=1)
-{
-	REQUIRE((A.num_rows == result.num_rows), "Number of rows of matrix A (%d) must match matrix result (%d).\n",
-		A.num_rows, result.num_rows);
-	REQUIRE((A.num_cols == result.num_cols), "Number of columns of matrix A (%d) must match matrix result (%d).\n",
-		A.num_cols, result.num_cols);
-	infer_backend(A, result)->scale(A, alpha, result);
-}
+		/**
+		 * Method that computes the sum of vectors or matrices
+		 *
+		 * @param a The vector or matrix whose sum has to be computed
+		 * @param no_diag If true, diagonal entries are excluded from the sum.
+		 * Default: false
+		 * @return The vector sum \f$\sum_i a_i\f$ or matrix sum
+		 * \f$\sum_{i,j}b_{i,j}\f$
+		 */
+		template <typename T, template <typename> class Container>
+		T sum(const Container<T>& a, bool no_diag = false)
+		{
+			return infer_backend(a)->sum(a, no_diag);
+		}
 
-/**
- * Performs the operation B = alpha * A on vectors or matrices
- * This version returns the result in a newly created vector or matrix.
- *
- * @param a First vector/matrix
- * @param alpha Scale factor
- * @return Vector or matrix of alpha * A
- */
-template<typename T, template<typename> class Container>
-Container<T> scale(Container<T>& a, T alpha=1)
-{
-	auto result = a.clone();
-	scale(a, result, alpha);
-	return result;
-}
+		/**
+		 * Method that computes the sum of matrix blocks
+		 * This operation works with CPU backends only.
+		 *
+		 * @param a The matrix-block whose sum of co-efficients has to be
+		 * computed
+		 * @param no_diag If true, diagonal entries are excluded from the sum.
+		 * Default: false
+		 * @return Matrix-block sum \f$\sum_{i,j}b_{i,j}\f$
+		 */
+		template <typename T>
+		T sum(const Block<SGMatrix<T>>& a, bool no_diag = false)
+		{
+			return sg_linalg->get_cpu_backend()->sum(a, no_diag);
+		}
 
-/**
- * Set const value to vectors or matrices
- *
- * @param a Vector or matrix to be set
- * @param value The value to set the vector or matrix
- */
-template <typename T, template<typename> class Container>
-void set_const(Container<T>& a, T value)
-{
-	infer_backend(a)->set_const(a, value);
-}
+		/**
+		 * Method that computes the sum of symmetric matrices
+		 *
+		 * @param a The symmetric matrix whose sum has to be computed
+		 * @param no_diag If true, diagonal entries are excluded from the sum.
+		 * Default: false
+		 * @return The matrix sum \f$\sum_{i,j}b_{i,j}\f$
+		 */
+		template <typename T>
+		T sum_symmetric(const SGMatrix<T>& a, bool no_diag = false)
+		{
+			REQUIRE(a.num_rows == a.num_cols, "Matrix is not square!\n");
+			return infer_backend(a)->sum_symmetric(a, no_diag);
+		}
 
-/**
- * Method that computes the sum of vectors or matrices
- *
- * @param a The vector or matrix whose sum has to be computed
- * @param no_diag If true, diagonal entries are excluded from the sum. Default: false
- * @return The vector sum \f$\sum_i a_i\f$ or matrix sum \f$\sum_{i,j}b_{i,j}\f$
- */
-template <typename T, template <typename> class Container>
-T sum(const Container<T>& a, bool no_diag=false)
-{
-	return infer_backend(a)->sum(a, no_diag);
-}
+		/**
+		 * Method that computes the sum of symmetric matrix blocks
+		 * This operation works with CPU backends only.
+		 *
+		 * @param a The symmetric matrix-block whose sum has to be computed
+		 * @param no_diag If true, diagonal entries are excluded from the sum.
+		 * Default: false
+		 * @return Symmetric matrix-block sum \f$\sum_{i,j}b_{i,j}\f$
+		 */
+		template <typename T>
+		T sum_symmetric(const Block<SGMatrix<T>>& a, bool no_diag = false)
+		{
+			REQUIRE(a.m_row_size == a.m_col_size, "Matrix is not square!\n");
+			return sg_linalg->get_cpu_backend()->sum_symmetric(a, no_diag);
+		}
 
-/**
- * Method that computes the sum of matrix blocks
- * This operation works with CPU backends only.
- *
- * @param a The matrix-block whose sum of co-efficients has to be computed
- * @param no_diag If true, diagonal entries are excluded from the sum. Default: false
- * @return Matrix-block sum \f$\sum_{i,j}b_{i,j}\f$
- */
-template <typename T>
-T sum(const Block<SGMatrix<T>>& a, bool no_diag=false)
-{
-	return sg_linalg->get_cpu_backend()->sum(a, no_diag);
-}
+		/**
+		 * Method that computes colwise sum of co-efficients of a dense matrix
+		 *
+		 * @param Mat a matrix whose colwise sum has to be computed
+		 * @param no_diag If true, diagonal entries are excluded from the sum.
+		 * Default:
+		 * false
+		 * @return The colwise sum of co-efficients computed as
+		 * \f$s_j=\sum_{i}b_{i,j}\f$
+		 */
+		template <typename T>
+		SGVector<T> colwise_sum(const SGMatrix<T>& mat, bool no_diag = false)
+		{
+			return infer_backend(mat)->colwise_sum(mat, no_diag);
+		}
 
-/**
- * Method that computes the sum of symmetric matrices
- *
- * @param a The symmetric matrix whose sum has to be computed
- * @param no_diag If true, diagonal entries are excluded from the sum. Default: false
- * @return The matrix sum \f$\sum_{i,j}b_{i,j}\f$
- */
-template <typename T>
-T sum_symmetric(const SGMatrix<T>& a, bool no_diag=false)
-{
-	REQUIRE(a.num_rows == a.num_cols, "Matrix is not square!\n");
-	return infer_backend(a)->sum_symmetric(a, no_diag);
-}
+		/**
+		 * Method that computes the colwise sum of matrix blocks
+		 * This operation works with CPU backends only.
+		 *
+		 * @param a the matrix-block whose colwise sum of co-efficients has to
+		 * be computed
+		 * @param no_diag If true, diagonal entries are excluded from the sum.
+		 * Default: false
+		 * @return the colwise sum of co-efficients computed as
+		 * \f$s_j=\sum_{i}b_{i,j}\f$
+		 */
+		template <typename T>
+		SGVector<T>
+		colwise_sum(const Block<SGMatrix<T>>& a, bool no_diag = false)
+		{
+			return sg_linalg->get_cpu_backend()->colwise_sum(a, no_diag);
+		}
 
-/**
- * Method that computes the sum of symmetric matrix blocks
- * This operation works with CPU backends only.
- *
- * @param a The symmetric matrix-block whose sum has to be computed
- * @param no_diag If true, diagonal entries are excluded from the sum. Default: false
- * @return Symmetric matrix-block sum \f$\sum_{i,j}b_{i,j}\f$
- */
-template <typename T>
-T sum_symmetric(const Block<SGMatrix<T>>& a, bool no_diag=false)
-{
-	REQUIRE(a.m_row_size == a.m_col_size, "Matrix is not square!\n");
-	return sg_linalg->get_cpu_backend()->sum_symmetric(a, no_diag);
-}
+		/**
+		 * Method that computes rowwise sum of co-efficients of a dense matrix
+		 *
+		 * @param mat a matrix whose rowwise sum has to be computed
+		 * @param no_diag If true, diagonal entries are excluded from the sum.
+		 * Default: false
+		 * @return the rowwise sum of co-efficients computed as
+		 * \f$s_i=\sum_{j}m_{i,j}\f$
+		 */
+		template <typename T>
+		SGVector<T> rowwise_sum(const SGMatrix<T>& mat, bool no_diag = false)
+		{
+			return infer_backend(mat)->rowwise_sum(mat, no_diag);
+		}
 
-/**
- * Method that computes colwise sum of co-efficients of a dense matrix
- *
- * @param Mat a matrix whose colwise sum has to be computed
- * @param no_diag If true, diagonal entries are excluded from the sum. Default: false
- * @return The colwise sum of co-efficients computed as \f$s_j=\sum_{i}b_{i,j}\f$
- */
-template <typename T>
-SGVector<T> colwise_sum(const SGMatrix<T>& mat, bool no_diag=false)
-{
-	return infer_backend(mat)->colwise_sum(mat, no_diag);
-}
+		/**
+		 * Method that computes the rowwise sum of matrix blocks
+		 * This operation works with CPU backends only.
+		 *
+		 * @param a the matrix-block whose rowwise sum of co-efficients has to
+		 * be computed
+		 * @param no_diag If true, diagonal entries are excluded from the sum.
+		 * Default: false
+		 * @return the rowwise sum of co-efficients computed as
+		 * \f$s_i=\sum_{j}m_{i,j}\f$
+		 */
+		template <typename T>
+		SGVector<T>
+		rowwise_sum(const Block<SGMatrix<T>>& a, bool no_diag = false)
+		{
+			return sg_linalg->get_cpu_backend()->rowwise_sum(a, no_diag);
+		}
 
-/**
- * Method that computes the colwise sum of matrix blocks
- * This operation works with CPU backends only.
- *
- * @param a the matrix-block whose colwise sum of co-efficients has to be computed
- * @param no_diag If true, diagonal entries are excluded from the sum. Default: false
- * @return the colwise sum of co-efficients computed as \f$s_j=\sum_{i}b_{i,j}\f$
- */
-template <typename T>
-SGVector<T> colwise_sum(const Block<SGMatrix<T>>& a, bool no_diag=false)
-{
-	return sg_linalg->get_cpu_backend()->colwise_sum(a, no_diag);
-}
+		/**
+		 * Compute the singular value decomposition \f$A = U S V^{*}\f$ of a
+		 * matrix.
+		 * Given the \f$m \times n\f$ matrix A with \f$r = min(m,n)\f$, user
+		 * should pass
+		 * an appropriately pre-allocated vector of length r and a pre-allocated
+		 * matrix of size \f$m \times r\f$ for thin U or \f$m \times m\f$
+		 * otherwise.
+		 *
+		 * @param A The matrix whose svd is to be computed
+		 * @param s The vector that stores the resulting singular values
+		 * @param U The matrix that stores the resulting unitary matrix U
+		 * @param thin_U Whether to compute the full or thin matrix U
+		 * (default:thin)
+		 * @param alg Whether to compute the svd through bidiagonal divide
+		 * and conquer algorithm or Jacobi's algorithm (@see SVDAlgorithm)
+		 * (default: bidiagonal divide and conquer)
+		 */
+		template <typename T>
+		void
+		svd(const SGMatrix<T>& A, SGVector<T>& s, SGMatrix<T>& U,
+		    bool thin_U = true,
+		    SVDAlgorithm alg = SVDAlgorithm::BidiagonalDivideConquer)
+		{
+			auto r = CMath::min(A.num_cols, A.num_rows);
+			REQUIRE(
+			    (A.num_rows == U.num_rows),
+			    "Number of rows of matrix A (%d) must match matrix U (%d).\n",
+			    A.num_rows, U.num_rows);
+			if (thin_U)
+			{
+				REQUIRE(
+				    (U.num_cols == r), "Number of columns of matrix A (%d) "
+				                       "must match A's smaller dimension "
+				                       "(%d).\n",
+				    A.num_cols, r);
+			}
+			else
+			{
+				REQUIRE(
+				    (A.num_rows == U.num_cols), "Number of rows of matrix A "
+				                                "(%d) must match number of "
+				                                "columns of U (%d).\n",
+				    A.num_cols, r);
+			}
+			REQUIRE(
+			    (s.vlen == r), "Length of vector s (%d) doesn't match A's "
+			                   "smaller dimension (%d).\n",
+			    s.vlen, r);
+
+			infer_backend(A)->svd(A, s, U, thin_U, alg);
+		}
 
-/**
- * Method that computes rowwise sum of co-efficients of a dense matrix
- *
- * @param mat a matrix whose rowwise sum has to be computed
- * @param no_diag If true, diagonal entries are excluded from the sum. Default: false
- * @return the rowwise sum of co-efficients computed as \f$s_i=\sum_{j}m_{i,j}\f$
- */
-template <typename T>
-SGVector<T> rowwise_sum(const SGMatrix<T>& mat, bool no_diag=false)
-{
-	return infer_backend(mat)->rowwise_sum(mat, no_diag);
-}
+		/**
+		 * Method that computes the trace of square matrix.
+		 *
+		 * @param A The matrix whose trace has to be computed
+		 * @return The trace of the matrix
+		 */
+		template <typename T>
+		T trace(const SGMatrix<T>& A)
+		{
+			REQUIRE(A.num_rows == A.num_cols, "Matrix is not square!\n");
+			return infer_backend(A)->trace(A);
+		}
 
-/**
- * Method that computes the rowwise sum of matrix blocks
- * This operation works with CPU backends only.
- *
- * @param a the matrix-block whose rowwise sum of co-efficients has to be computed
- * @param no_diag If true, diagonal entries are excluded from the sum. Default: false
- * @return the rowwise sum of co-efficients computed as \f$s_i=\sum_{j}m_{i,j}\f$
- */
-template <typename T>
-SGVector<T> rowwise_sum(const Block<SGMatrix<T>>& a, bool no_diag=false)
-{
-	return sg_linalg->get_cpu_backend()->rowwise_sum(a, no_diag);
-}
+		/**
+		 * Method that computes the transpose of a matrix.
+		 *
+		 * @param A The matrix
+		 * @return The transposed matrix
+		 */
+		template <typename T>
+		SGMatrix<T> transpose_matrix(const SGMatrix<T>& A)
+		{
+			return infer_backend(A)->transpose_matrix(A);
+		}
 
-}
+		/**
+		 * Solve the linear equations \f$Lx=b\f$,
+		 * where \f$L\f$ is a triangular matrix.
+		 *
+		 * @param L Triangular matrix
+		 * @param b Right-hand side array
+		 * @param lower Whether L is upper or lower triangular (default:lower)
+		 * @return \f$\x\f$
+		 */
+		template <typename T, template <typename> class Container>
+		Container<T> triangular_solver(
+		    const SGMatrix<T>& L, const Container<T>& b,
+		    const bool lower = true)
+		{
+			return infer_backend(L, SGMatrix<T>(b))
+			    ->triangular_solver(L, b, lower);
+		}
 
+		/**
+		 * Method that fills with zero a vector or a matrix.
+		 *
+		 * @param a The vector or the matrix to be set
+		 */
+		template <typename T, template <typename> class Container>
+		void zero(Container<T>& a)
+		{
+			infer_backend(a)->zero(a);
+		}
+	}
 }
 
-#endif //LINALG_NAMESPACE_H_
+#endif // LINALG_NAMESPACE_H_
diff --git a/src/shogun/mathematics/linalg/LinalgSpecialPurposes.h b/src/shogun/mathematics/linalg/LinalgSpecialPurposes.h
index f7fe519e166..6da4582405b 100644
--- a/src/shogun/mathematics/linalg/LinalgSpecialPurposes.h
+++ b/src/shogun/mathematics/linalg/LinalgSpecialPurposes.h
@@ -38,30 +38,166 @@
 namespace shogun
 {
 
-namespace linalg
-{
+	namespace linalg
+	{
 
-/** Applies the elementwise logistic function f(x) = 1/(1+exp(-x)) to a matrix
- *  This method returns the result in-place.
- *
- * @param a The input matrix
- * @param result The output matrix
- */
-template <typename T>
-void logistic(SGMatrix<T>& a, SGMatrix<T>& result)
-{
-	REQUIRE((a.num_rows == result.num_rows),
-		"Number of rows of matrix a (%d) must match matrix result (%d).\n",
-		a.num_rows, result.num_rows);
-	REQUIRE((a.num_cols == result.num_cols),
-		"Number of columns of matrix result (%d) must match matrix result (%d).\n",
-		a.num_cols, result.num_cols);
-
-	infer_backend(a, result)->logistic(a, result);
-}
+		/** Applies the elementwise logistic function f(x) = 1/(1+exp(-x)) to a
+		 * matrix
+		 *  This method returns the result in-place.
+		 *
+		 * @param a The input matrix
+		 * @param result The output matrix
+		 */
+		template <typename T>
+		void logistic(SGMatrix<T>& a, SGMatrix<T>& result)
+		{
+			REQUIRE(
+			    (a.num_rows == result.num_rows), "Number of rows of matrix a "
+			                                     "(%d) must match matrix "
+			                                     "result (%d).\n",
+			    a.num_rows, result.num_rows);
+			REQUIRE(
+			    (a.num_cols == result.num_cols), "Number of columns of matrix "
+			                                     "a (%d) must match matrix "
+			                                     "result (%d).\n",
+			    a.num_cols, result.num_cols);
 
-}
+			infer_backend(a, result)->logistic(a, result);
+		}
+
+		/** Performs the operation C(i,j) = C(i,j) * A(i,j) * (1.0-A(i,j)) for
+		 * all i and
+		 * j
+		 *  This method returns the result in-place.
+		 *
+		 * @param a The input matrix
+		 * @param result The output matrix
+		 */
+		template <typename T>
+		void
+		multiply_by_logistic_derivative(SGMatrix<T>& a, SGMatrix<T>& result)
+		{
+			REQUIRE(
+			    (a.num_rows == result.num_rows), "Number of rows of matrix a "
+			                                     "(%d) must match matrix "
+			                                     "result (%d).\n",
+			    a.num_rows, result.num_rows);
+			REQUIRE(
+			    (a.num_cols == result.num_cols), "Number of columns of matrix "
+			                                     "a (%d) must match matrix "
+			                                     "result (%d).\n",
+			    a.num_cols, result.num_cols);
+
+			infer_backend(a, result)->multiply_by_logistic_derivative(
+			    a, result);
+		}
+
+		/** Performs the operation C(i,j) = C(i,j) * (A(i,j)!=0) for all i and j
+		 *  This method returns the result in-place.
+		 *
+		 * @param a The input matrix
+		 * @param result The output matrix
+		 */
+		template <typename T>
+		void multiply_by_rectified_linear_derivative(
+		    SGMatrix<T>& a, SGMatrix<T>& result)
+		{
+			REQUIRE(
+			    (a.num_rows == result.num_rows), "Number of rows of matrix a "
+			                                     "(%d) must match matrix "
+			                                     "result (%d).\n",
+			    a.num_rows, result.num_rows);
+			REQUIRE(
+			    (a.num_cols == result.num_cols), "Number of columns of matrix "
+			                                     "a (%d) must match matrix "
+			                                     "result (%d).\n",
+			    a.num_cols, result.num_cols);
+
+			infer_backend(a, result)->multiply_by_rectified_linear_derivative(
+			    a, result);
+		}
+
+		/** Applies the elementwise rectified linear function f(x) = max(0,x) to
+		 * a
+		 * matrix
+		 *
+		 * @param a The input matrix
+		 * @param result The output matrix
+		 */
+		template <typename T>
+		void rectified_linear(SGMatrix<T>& a, SGMatrix<T>& result)
+		{
+			REQUIRE(
+			    (a.num_rows == result.num_rows), "Number of rows of matrix a "
+			                                     "(%d) must match matrix "
+			                                     "result (%d).\n",
+			    a.num_rows, result.num_rows);
+			REQUIRE(
+			    (a.num_cols == result.num_cols), "Number of columns of matrix "
+			                                     "a (%d) must match matrix "
+			                                     "result (%d).\n",
+			    a.num_cols, result.num_cols);
+
+			infer_backend(a, result)->rectified_linear(a, result);
+		}
+
+		/** Applies the softmax function inplace to a matrix. The softmax
+		 * function is
+		 * defined as \f$ f(A[i,j]) = \frac{exp(A[i,j])}{\sum_i exp(A[i,j])} \f$
+		 *  This method returns the result in-place.
+		 *
+		 * @param a The input matrix
+		 */
+		template <typename T>
+		void softmax(SGMatrix<T>& a)
+		{
+			infer_backend(a)->softmax(a);
+		}
+
+		/** Returns the cross entropy between P and Q. The cross entropy is
+		 * defined as
+		 * \f$ H(P,Q) = - \sum_{ij} P[i,j]log(Q[i,j]) \f$
+		 *
+		 * @param p Input matrix 1
+		 * @param q Input matrix 2
+		 */
+		template <typename T>
+		T cross_entropy(const SGMatrix<T> p, const SGMatrix<T> q)
+		{
+			REQUIRE(
+			    (p.num_rows == q.num_rows),
+			    "Number of rows of matrix p (%d) must match matrix q (%d).\n",
+			    p.num_rows, q.num_rows);
+			REQUIRE(
+			    (p.num_cols == q.num_cols), "Number of columns of matrix p "
+			                                "(%d) must match matrix q (%d).\n",
+			    p.num_cols, q.num_cols);
+
+			return infer_backend(p, q)->cross_entropy(p, q);
+		}
+
+		/** Returns the squared error between P and Q. The squared error is
+		 * defined as
+		 * \f$ E(P,Q) = \frac{1}{2} \sum_{ij} (P[i,j]-Q[i,j])^2 \f$
+		 *
+		 * @param p Input matrix 1
+		 * @param q Input matrix 2
+		 */
+		template <typename T>
+		T squared_error(const SGMatrix<T> p, const SGMatrix<T> q)
+		{
+			REQUIRE(
+			    (p.num_rows == q.num_rows),
+			    "Number of rows of matrix p (%d) must match matrix q (%d).\n",
+			    p.num_rows, q.num_rows);
+			REQUIRE(
+			    (p.num_cols == q.num_cols), "Number of columns of matrix p "
+			                                "(%d) must match matrix q (%d).\n",
+			    p.num_cols, q.num_cols);
 
+			return infer_backend(p, q)->squared_error(p, q);
+		}
+	}
 }
 
-#endif //LINALG_SPECIAL_PURPOSE_H_
+#endif // LINALG_SPECIAL_PURPOSE_H_
diff --git a/src/shogun/mathematics/linalg/SGLinalg.h b/src/shogun/mathematics/linalg/SGLinalg.h
index 27afe015d14..526437d3f21 100644
--- a/src/shogun/mathematics/linalg/SGLinalg.h
+++ b/src/shogun/mathematics/linalg/SGLinalg.h
@@ -35,9 +35,9 @@
 
 #include <shogun/lib/config.h>
 
-#include <shogun/lib/memory.h>
-#include <shogun/lib/common.h>
 #include <shogun/lib/Lock.h>
+#include <shogun/lib/common.h>
+#include <shogun/lib/memory.h>
 
 #include <shogun/mathematics/linalg/LinalgBackendBase.h>
 #include <shogun/mathematics/linalg/LinalgBackendEigen.h>
@@ -47,94 +47,95 @@
 namespace shogun
 {
 
-/** @brief linalg library backend */
-class SGLinalg
-{
-public:
-	/** Mutex of GPU transfer methods */
-	CLock m_gpu_transfer;
-
-	/** Default constructor */
-	SGLinalg()
-	{
-		cpu_backend = std::unique_ptr<LinalgBackendBase>(new LinalgBackendEigen());
-		gpu_backend = nullptr;
-		linalg_warnings = true;
-	}
-
-	/** Default destructor */
-	~SGLinalg()
+	/** @brief linalg library backend */
+	class SGLinalg
 	{
-	}
-
-	/** Set CPU backend
-	 * The default CPU backend is EIGEN3
-	 */
-	void set_cpu_backend(LinalgBackendBase* backend)
-	{
-		cpu_backend = std::unique_ptr<LinalgBackendBase>(backend);
-	}
-
-	/** Set CPU backend
-	 *
-	 * @return Pointer of LinalgBackendBase type
-	 */
-	LinalgBackendBase* const get_cpu_backend() const
-	{
-		return cpu_backend.get();
-	}
-
-	/** Set GPU backend
-	 * The default GPU backend is NULL
-	 */
-	void set_gpu_backend(LinalgBackendBase* backend)
-	{
-		gpu_backend = std::unique_ptr<LinalgBackendBase>(backend);
-	}
-
-	/** Set GPU backend
-	 *
-	 * @return Pointer of LinalgBackendBase type
-	 */
-	LinalgBackendBase* const get_gpu_backend() const
-	{
-		return gpu_backend.get();
-	}
-
-	/** Set linalg library warnings display option
-	 * The warnings are default on.
-	 */
-	void set_linalg_warnings(bool enable_warnings=true)
-	{
-		if (enable_warnings)
+	public:
+		/** Mutex of GPU transfer methods */
+		CLock m_gpu_transfer;
+
+		/** Default constructor */
+		SGLinalg()
+		{
+			cpu_backend =
+			    std::unique_ptr<LinalgBackendBase>(new LinalgBackendEigen());
+			gpu_backend = nullptr;
 			linalg_warnings = true;
-		else
-			linalg_warnings = false;
-	}
-
-	/** Get linalg library warnings display option
-	 *
-	 * @return Whether to display linalg library warnings
-	 */
-	bool const get_linalg_warnings() const
-	{
-		return linalg_warnings;
-	}
-
-private:
-	/** Pointer to CPU backend. CPU backend is always available
-	 * with EIGEN3 or other default/complete implementation.
-	 */
-	std::unique_ptr<LinalgBackendBase> cpu_backend;
-
-	/** Pointer to GPU backend.
-	 * NULL utill assigned.
-	 */
-	std::unique_ptr<LinalgBackendBase> gpu_backend;
-
-	/** Variable saves the options of linalg library warnings display */
-	bool linalg_warnings;
-};
+		}
+
+		/** Default destructor */
+		~SGLinalg()
+		{
+		}
+
+		/** Set CPU backend
+		 * The default CPU backend is EIGEN3
+		 */
+		void set_cpu_backend(LinalgBackendBase* backend)
+		{
+			cpu_backend = std::unique_ptr<LinalgBackendBase>(backend);
+		}
+
+		/** Set CPU backend
+		 *
+		 * @return Pointer of LinalgBackendBase type
+		 */
+		LinalgBackendBase* const get_cpu_backend() const
+		{
+			return cpu_backend.get();
+		}
+
+		/** Set GPU backend
+		 * The default GPU backend is NULL
+		 */
+		void set_gpu_backend(LinalgBackendBase* backend)
+		{
+			gpu_backend = std::unique_ptr<LinalgBackendBase>(backend);
+		}
+
+		/** Set GPU backend
+		 *
+		 * @return Pointer of LinalgBackendBase type
+		 */
+		LinalgBackendBase* const get_gpu_backend() const
+		{
+			return gpu_backend.get();
+		}
+
+		/** Set linalg library warnings display option
+		 * The warnings are default on.
+		 */
+		void set_linalg_warnings(bool enable_warnings = true)
+		{
+			if (enable_warnings)
+				linalg_warnings = true;
+			else
+				linalg_warnings = false;
+		}
+
+		/** Get linalg library warnings display option
+		 *
+		 * @return Whether to display linalg library warnings
+		 */
+		bool const get_linalg_warnings() const
+		{
+			return linalg_warnings;
+		}
+
+	private:
+		/** Pointer to CPU backend. CPU backend is always available
+		 * with EIGEN3 or other default/complete implementation.
+		 */
+		std::unique_ptr<LinalgBackendBase> cpu_backend;
+
+		/** Pointer to GPU backend.
+		 * NULL utill assigned.
+		 */
+		std::unique_ptr<LinalgBackendBase> gpu_backend;
+
+		/** Variable saves the options of linalg library warnings display */
+		bool linalg_warnings;
+	};
 }
 
 namespace shogun
@@ -143,4 +144,4 @@ namespace shogun
 	extern std::unique_ptr<SGLinalg> sg_linalg;
 }
 
-#endif //SG_LINALG_H__
+#endif // SG_LINALG_H__
diff --git a/src/shogun/mathematics/linalg/backend/eigen/BasicOps.cpp b/src/shogun/mathematics/linalg/backend/eigen/BasicOps.cpp
new file mode 100644
index 00000000000..1463258cd5e
--- /dev/null
+++ b/src/shogun/mathematics/linalg/backend/eigen/BasicOps.cpp
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2016, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: 2016 Pan Deng, Soumyajit De, Heiko Strathmann, Viktor Gal
+ */
+
+#include <shogun/mathematics/linalg/LinalgBackendEigen.h>
+#include <shogun/mathematics/linalg/LinalgMacros.h>
+
+using namespace shogun;
+
+#define BACKEND_GENERIC_IN_PLACE_ADD(Type, Container)                          \
+	void LinalgBackendEigen::add(                                              \
+	    Container<Type>& a, Container<Type>& b, Type alpha, Type beta,         \
+	    Container<Type>& result) const                                         \
+	{                                                                          \
+		add_impl(a, b, alpha, beta, result);                                   \
+	}
+DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_IN_PLACE_ADD, SGVector)
+DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_IN_PLACE_ADD, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_ADD
+
+#define BACKEND_GENERIC_ADD_COL_VEC(Type, Container)                           \
+	void LinalgBackendEigen::add_col_vec(                                      \
+	    const SGMatrix<Type>& A, index_t i, const SGVector<Type>& b,           \
+	    Container<Type>& result, Type alpha, Type beta) const                  \
+	{                                                                          \
+		add_col_vec_impl(A, i, b, result, alpha, beta);                        \
+	}
+DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_COL_VEC, SGVector)
+DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_COL_VEC, SGMatrix)
+#undef BACKEND_GENERIC_ADD_COL_VEC
+
+#define BACKEND_GENERIC_ADD_VECTOR(Type, Container)                            \
+	void LinalgBackendEigen::add_vector(                                       \
+	    const SGMatrix<Type>& A, const SGVector<Type>& b,                      \
+	    SGMatrix<Type>& result, Type alpha, Type beta) const                   \
+	{                                                                          \
+		add_vector_impl(A, b, result, alpha, beta);                            \
+	}
+DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_VECTOR, SGMatrix)
+#undef BACKEND_GENERIC_ADD_VECTOR
+
+#define BACKEND_GENERIC_ADD_SCALAR(Type, Container)                            \
+	void LinalgBackendEigen::add_scalar(Container<Type>& a, Type b) const      \
+	{                                                                          \
+		add_scalar_impl(a, b);                                                 \
+	}
+DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_SCALAR, SGVector)
+DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_SCALAR, SGMatrix)
+#undef BACKEND_GENERIC_ADD_SCALAR
+
+#define BACKEND_GENERIC_DOT(Type, Container)                                   \
+	Type LinalgBackendEigen::dot(                                              \
+	    const Container<Type>& a, const Container<Type>& b) const              \
+	{                                                                          \
+		return dot_impl(a, b);                                                 \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_DOT, SGVector)
+#undef BACKEND_GENERIC_DOT
+
+#define BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD(Type, Container)                 \
+	void LinalgBackendEigen::element_prod(                                     \
+	    Container<Type>& a, Container<Type>& b, Container<Type>& result) const \
+	{                                                                          \
+		element_prod_impl(a, b, result);                                       \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_ELEMENT_PROD
+
+#define BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD(Type, Container)           \
+	void LinalgBackendEigen::element_prod(                                     \
+	    linalg::Block<Container<Type>>& a, linalg::Block<Container<Type>>& b,  \
+	    Container<Type>& result) const                                         \
+	{                                                                          \
+		element_prod_impl(a, b, result);                                       \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_BLOCK_ELEMENT_PROD
+
+#define BACKEND_GENERIC_EXPONENT(Type, Container)                              \
+	void LinalgBackendEigen::exponent(                                         \
+	    const Container<Type>& a, Container<Type>& result) const               \
+	{                                                                          \
+		exponent_impl(a, result);                                              \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_EXPONENT, SGVector)
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_EXPONENT, SGMatrix)
+#undef BACKEND_GENERIC_EXPONENT
+
+#define BACKEND_GENERIC_IN_PLACE_MATRIX_PROD(Type, Container)                  \
+	void LinalgBackendEigen::matrix_prod(                                      \
+	    SGMatrix<Type>& a, Container<Type>& b, Container<Type>& result,        \
+	    bool transpose_A, bool transpose_B) const                              \
+	{                                                                          \
+		matrix_prod_impl(a, b, result, transpose_A, transpose_B);              \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_MATRIX_PROD, SGVector)
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_MATRIX_PROD, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_MATRIX_PROD
+
+#define BACKEND_GENERIC_IN_PLACE_SCALE(Type, Container)                        \
+	void LinalgBackendEigen::scale(                                            \
+	    Container<Type>& a, Type alpha, Container<Type>& result) const         \
+	{                                                                          \
+		scale_impl(a, alpha, result);                                          \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_SCALE, SGVector)
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IN_PLACE_SCALE, SGMatrix)
+#undef BACKEND_GENERIC_IN_PLACE_SCALE
+
+#undef DEFINE_FOR_ALL_PTYPE
+#undef DEFINE_FOR_REAL_PTYPE
+#undef DEFINE_FOR_NON_INTEGER_PTYPE
+#undef DEFINE_FOR_NUMERIC_PTYPE
+
+template <typename T>
+void LinalgBackendEigen::add_impl(
+    SGVector<T>& a, SGVector<T>& b, T alpha, T beta, SGVector<T>& result) const
+{
+	typename SGVector<T>::EigenVectorXtMap a_eig = a;
+	typename SGVector<T>::EigenVectorXtMap b_eig = b;
+	typename SGVector<T>::EigenVectorXtMap result_eig = result;
+
+	result_eig = alpha * a_eig + beta * b_eig;
+}
+
+template <typename T>
+void LinalgBackendEigen::add_impl(
+    SGMatrix<T>& a, SGMatrix<T>& b, T alpha, T beta, SGMatrix<T>& result) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
+	typename SGMatrix<T>::EigenMatrixXtMap b_eig = b;
+	typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+
+	result_eig = alpha * a_eig + beta * b_eig;
+}
+
+template <typename T>
+void LinalgBackendEigen::add_col_vec_impl(
+    const SGMatrix<T>& A, index_t i, const SGVector<T>& b, SGMatrix<T>& result,
+    T alpha, T beta) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap A_eig = A;
+	typename SGVector<T>::EigenVectorXtMap b_eig = b;
+	typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+
+	result_eig.col(i) = alpha * A_eig.col(i) + beta * b_eig;
+}
+
+template <typename T>
+void LinalgBackendEigen::add_col_vec_impl(
+    const SGMatrix<T>& A, index_t i, const SGVector<T>& b, SGVector<T>& result,
+    T alpha, T beta) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap A_eig = A;
+	typename SGVector<T>::EigenVectorXtMap b_eig = b;
+	typename SGVector<T>::EigenVectorXtMap result_eig = result;
+
+	result_eig = alpha * A_eig.col(i) + beta * b_eig;
+}
+
+template <typename T>
+void LinalgBackendEigen::add_vector_impl(
+    const SGMatrix<T>& A, const SGVector<T>& b, SGMatrix<T>& result, T alpha,
+    T beta) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap A_eig = A;
+	typename SGVector<T>::EigenVectorXtMap b_eig = b;
+	typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+
+	result_eig = (alpha * A_eig).colwise() + beta * b_eig;
+}
+
+template <typename T>
+void LinalgBackendEigen::add_scalar_impl(SGVector<T>& a, T b) const
+{
+	typename SGVector<T>::EigenVectorXtMap a_eig = a;
+	a_eig = a_eig.array() + b;
+}
+
+template <typename T>
+void LinalgBackendEigen::add_scalar_impl(SGMatrix<T>& a, T b) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
+	a_eig = a_eig.array() + b;
+}
+
+template <typename T>
+T LinalgBackendEigen::dot_impl(const SGVector<T>& a, const SGVector<T>& b) const
+{
+	return (typename SGVector<T>::EigenVectorXtMap(a))
+	    .dot(typename SGVector<T>::EigenVectorXtMap(b));
+}
+
+template <typename T>
+void LinalgBackendEigen::element_prod_impl(
+    SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
+	typename SGMatrix<T>::EigenMatrixXtMap b_eig = b;
+	typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+
+	result_eig = a_eig.array() * b_eig.array();
+}
+
+template <typename T>
+void LinalgBackendEigen::element_prod_impl(
+    linalg::Block<SGMatrix<T>>& a, linalg::Block<SGMatrix<T>>& b,
+    SGMatrix<T>& result) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a.m_matrix;
+	typename SGMatrix<T>::EigenMatrixXtMap b_eig = b.m_matrix;
+	typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+
+	Eigen::Block<typename SGMatrix<T>::EigenMatrixXtMap> a_block =
+	    a_eig.block(a.m_row_begin, a.m_col_begin, a.m_row_size, a.m_col_size);
+	Eigen::Block<typename SGMatrix<T>::EigenMatrixXtMap> b_block =
+	    b_eig.block(b.m_row_begin, b.m_col_begin, b.m_row_size, b.m_col_size);
+
+	result_eig = a_block.array() * b_block.array();
+}
+
+template <typename T>
+void LinalgBackendEigen::exponent_impl(
+    const SGVector<T>& a, SGVector<T>& result) const
+{
+	typename SGVector<T>::EigenVectorXtMap a_eig = a;
+	typename SGVector<T>::EigenVectorXtMap result_eig = result;
+	result_eig = a_eig.array().exp();
+}
+
+template <typename T>
+void LinalgBackendEigen::exponent_impl(
+    const SGMatrix<T>& a, SGMatrix<T>& result) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
+	typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+	result_eig = a_eig.array().exp();
+}
+
+template <typename T>
+void LinalgBackendEigen::matrix_prod_impl(
+    SGMatrix<T>& a, SGVector<T>& b, SGVector<T>& result, bool transpose,
+    bool transpose_B) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
+	typename SGVector<T>::EigenVectorXtMap b_eig = b;
+	typename SGVector<T>::EigenVectorXtMap result_eig = result;
+
+	if (transpose)
+		result_eig = a_eig.transpose() * b_eig;
+	else
+		result_eig = a_eig * b_eig;
+}
+
+template <typename T>
+void LinalgBackendEigen::matrix_prod_impl(
+    SGMatrix<T>& a, SGMatrix<T>& b, SGMatrix<T>& result, bool transpose_A,
+    bool transpose_B) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
+	typename SGMatrix<T>::EigenMatrixXtMap b_eig = b;
+	typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+
+	if (transpose_A && transpose_B)
+		result_eig = a_eig.transpose() * b_eig.transpose();
+
+	else if (transpose_A)
+		result_eig = a_eig.transpose() * b_eig;
+
+	else if (transpose_B)
+		result_eig = a_eig * b_eig.transpose();
+
+	else
+		result_eig = a_eig * b_eig;
+}
+
+template <typename T>
+void LinalgBackendEigen::scale_impl(
+    SGVector<T>& a, T alpha, SGVector<T>& result) const
+{
+	typename SGVector<T>::EigenVectorXtMap a_eig = a;
+	typename SGVector<T>::EigenVectorXtMap result_eig = result;
+
+	result_eig = alpha * a_eig;
+}
+
+template <typename T>
+void LinalgBackendEigen::scale_impl(
+    SGMatrix<T>& a, T alpha, SGMatrix<T>& result) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
+	typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+
+	result_eig = alpha * a_eig;
+}
diff --git a/src/shogun/mathematics/linalg/backend/eigen/Decompositions.cpp b/src/shogun/mathematics/linalg/backend/eigen/Decompositions.cpp
new file mode 100644
index 00000000000..49289788779
--- /dev/null
+++ b/src/shogun/mathematics/linalg/backend/eigen/Decompositions.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2016, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: 2016 Pan Deng, Soumyajit De, Heiko Strathmann, Viktor Gal
+ */
+
+#include <shogun/mathematics/linalg/LinalgBackendEigen.h>
+#include <shogun/mathematics/linalg/LinalgEnums.h>
+#include <shogun/mathematics/linalg/LinalgMacros.h>
+
+using namespace shogun;
+
+#define BACKEND_GENERIC_CHOLESKY_FACTOR(Type, Container)                       \
+	Container<Type> LinalgBackendEigen::cholesky_factor(                       \
+	    const Container<Type>& A, const bool lower) const                      \
+	{                                                                          \
+		return cholesky_factor_impl(A, lower);                                 \
+	}
+DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CHOLESKY_FACTOR, SGMatrix)
+#undef BACKEND_GENERIC_CHOLESKY_FACTOR
+
+#define BACKEND_GENERIC_SVD(Type, Container)                                   \
+	void LinalgBackendEigen::svd(                                              \
+	    const Container<Type>& A, SGVector<Type> s, Container<Type> U,         \
+	    bool thin_U, linalg::SVDAlgorithm alg) const                           \
+	{                                                                          \
+		return svd_impl(A, s, U, thin_U, alg);                                 \
+	}
+DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_SVD, SGMatrix)
+#undef BACKEND_GENERIC_SVD
+
+#undef DEFINE_FOR_ALL_PTYPE
+#undef DEFINE_FOR_REAL_PTYPE
+#undef DEFINE_FOR_NON_INTEGER_PTYPE
+#undef DEFINE_FOR_NUMERIC_PTYPE
+
+template <typename T>
+SGMatrix<T> LinalgBackendEigen::cholesky_factor_impl(
+    const SGMatrix<T>& A, const bool lower) const
+{
+	SGMatrix<T> c(A.num_rows, A.num_cols);
+	set_const(c, 0);
+	typename SGMatrix<T>::EigenMatrixXtMap A_eig = A;
+	typename SGMatrix<T>::EigenMatrixXtMap c_eig = c;
+
+	Eigen::LLT<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> llt(A_eig);
+
+	// compute matrix L or U
+	if (lower == false)
+		c_eig = llt.matrixU();
+	else
+		c_eig = llt.matrixL();
+
+	/*
+	 * checking for success
+	 *
+	 * 0: Eigen::Success. Decomposition was successful
+	 * 1: Eigen::NumericalIssue. The provided data did not satisfy the
+	 * prerequisites.
+	 */
+	REQUIRE(
+	    llt.info() != Eigen::NumericalIssue,
+	    "Matrix is not Hermitian positive definite!\n");
+
+	return c;
+}
+
+template <typename T>
+void LinalgBackendEigen::svd_impl(
+    const SGMatrix<T>& A, SGVector<T>& s, SGMatrix<T>& U, bool thin_U,
+    linalg::SVDAlgorithm alg) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap A_eig = A;
+	typename SGVector<T>::EigenVectorXtMap s_eig = s;
+	typename SGMatrix<T>::EigenMatrixXtMap U_eig = U;
+
+	switch (alg)
+	{
+	case linalg::SVDAlgorithm::BidiagonalDivideConquer:
+	{
+#if EIGEN_VERSION_AT_LEAST(3, 3, 0)
+		auto svd_eig =
+		    A_eig.bdcSvd(thin_U ? Eigen::ComputeThinU : Eigen::ComputeFullU);
+		s_eig = svd_eig.singularValues().template cast<T>();
+		U_eig = svd_eig.matrixU().template cast<T>();
+		break;
+#else
+		SG_SWARNING(
+		    "At least Eigen 3.3 is required for BDC-SVD.\n"
+		    "Falling back on Jacobi-SVD.\n")
+#endif
+	}
+
+	case linalg::SVDAlgorithm::Jacobi:
+	{
+		auto svd_eig =
+		    A_eig.jacobiSvd(thin_U ? Eigen::ComputeThinU : Eigen::ComputeFullU);
+		s_eig = svd_eig.singularValues().template cast<T>();
+		U_eig = svd_eig.matrixU().template cast<T>();
+		break;
+	}
+	}
+}
diff --git a/src/shogun/mathematics/linalg/backend/eigen/EigenSolvers.cpp b/src/shogun/mathematics/linalg/backend/eigen/EigenSolvers.cpp
new file mode 100644
index 00000000000..6a98bf6a491
--- /dev/null
+++ b/src/shogun/mathematics/linalg/backend/eigen/EigenSolvers.cpp
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2016, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: 2016 Pan Deng, Soumyajit De, Heiko Strathmann, Viktor Gal
+ */
+
+#include <shogun/mathematics/lapack.h>
+#include <shogun/mathematics/linalg/LinalgBackendEigen.h>
+#include <shogun/mathematics/linalg/LinalgMacros.h>
+
+using namespace shogun;
+
+#define BACKEND_GENERIC_EIGEN_SOLVER(Type, Container)                          \
+	void LinalgBackendEigen::eigen_solver(                                     \
+	    const Container<Type>& A, SGVector<Type>& eigenvalues,                 \
+	    SGMatrix<Type>& eigenvectors) const                                    \
+	{                                                                          \
+		eigen_solver_impl(A, eigenvalues, eigenvectors);                       \
+	}
+DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_EIGEN_SOLVER, SGMatrix)
+#undef BACKEND_GENERIC_EIGEN_SOLVER
+
+#define BACKEND_GENERIC_EIGEN_SOLVER_SYMMETRIC(Type, Container)                \
+	void LinalgBackendEigen::eigen_solver_symmetric(                           \
+	    const Container<Type>& A, SGVector<Type>& eigenvalues,                 \
+	    SGMatrix<Type>& eigenvectors, index_t k) const                         \
+	{                                                                          \
+		eigen_solver_symmetric_impl(A, eigenvalues, eigenvectors, k);          \
+	}
+DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_EIGEN_SOLVER_SYMMETRIC, SGMatrix)
+#undef BACKEND_GENERIC_EIGEN_SOLVER_SYMMETRIC
+
+#undef DEFINE_FOR_ALL_PTYPE
+#undef DEFINE_FOR_REAL_PTYPE
+#undef DEFINE_FOR_NON_INTEGER_PTYPE
+#undef DEFINE_FOR_NUMERIC_PTYPE
+
+template <typename T>
+void LinalgBackendEigen::eigen_solver_impl(
+    const SGMatrix<T>& A, SGVector<T>& eigenvalues,
+    SGMatrix<T>& eigenvectors) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap A_eig = A;
+	typename SGMatrix<T>::EigenMatrixXtMap eigenvectors_eig = eigenvectors;
+	typename SGVector<T>::EigenVectorXtMap eigenvalues_eig = eigenvalues;
+
+	Eigen::EigenSolver<typename SGMatrix<T>::EigenMatrixXt> solver(A_eig);
+
+	/*
+	 * checking for success
+	 *
+	 * 0: Eigen::Success. Decomposition was successful
+	 * 1: Eigen::NumericalIssue. The input contains INF or NaN values or
+	 * overflow occured
+	 */
+	REQUIRE(
+	    solver.info() != Eigen::NumericalIssue,
+	    "The input contains INF or NaN values or overflow occured.\n");
+
+	eigenvalues_eig = solver.eigenvalues().real();
+	eigenvectors_eig = solver.eigenvectors().real();
+}
+
+void LinalgBackendEigen::eigen_solver_impl(
+    const SGMatrix<complex128_t>& A, SGVector<complex128_t>& eigenvalues,
+    SGMatrix<complex128_t>& eigenvectors) const
+{
+	typename SGMatrix<complex128_t>::EigenMatrixXtMap A_eig = A;
+	typename SGMatrix<complex128_t>::EigenMatrixXtMap eigenvectors_eig =
+	    eigenvectors;
+	typename SGVector<complex128_t>::EigenVectorXtMap eigenvalues_eig =
+	    eigenvalues;
+
+	Eigen::ComplexEigenSolver<typename SGMatrix<complex128_t>::EigenMatrixXt>
+	    solver(A_eig);
+
+	REQUIRE(
+	    solver.info() != Eigen::NumericalIssue,
+	    "The input contains INF or NaN values or overflow occured.\n");
+
+	eigenvalues_eig = solver.eigenvalues();
+	eigenvectors_eig = solver.eigenvectors();
+}
+
+template <typename T>
+void LinalgBackendEigen::eigen_solver_symmetric_impl(
+    const SGMatrix<T>& A, SGVector<T>& eigenvalues, SGMatrix<T>& eigenvectors,
+    index_t k) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap A_eig = A;
+	typename SGMatrix<T>::EigenMatrixXtMap eigenvectors_eig = eigenvectors;
+	typename SGVector<T>::EigenVectorXtMap eigenvalues_eig = eigenvalues;
+
+	Eigen::SelfAdjointEigenSolver<typename SGMatrix<T>::EigenMatrixXt> solver(
+	    A_eig);
+
+	/*
+	 * checking for success
+	 *
+	 * 0: Eigen::Success. Eigenvalues computation was successful
+	 * 2: Eigen::NoConvergence. Iterative procedure did not converge.
+	 */
+	REQUIRE(
+	    solver.info() != Eigen::NoConvergence,
+	    "Iterative procedure did not converge!\n");
+
+	eigenvalues_eig = solver.eigenvalues().tail(k).template cast<T>();
+	eigenvectors_eig = solver.eigenvectors().rightCols(k).template cast<T>();
+}
+
+#ifdef HAVE_LAPACK
+template <>
+void LinalgBackendEigen::eigen_solver_symmetric_impl<float64_t>(
+    const SGMatrix<float64_t>& A, SGVector<float64_t>& eigenvalues,
+    SGMatrix<float64_t>& eigenvectors, index_t k) const
+{
+	int32_t status = 0;
+	int32_t n = A.num_rows;
+
+	// dsyevr requires a vector of length n even if you want just k eigenvalues
+	SGVector<float64_t>::EigenVectorXt ev_eig(n);
+	wrap_dsyevr(
+	    'V', 'U', n, A.matrix, n, n - k + 1, n, ev_eig.data(),
+	    eigenvectors.matrix, &status);
+
+	typename SGVector<float64_t>::EigenVectorXtMap eigenvalues_eig =
+	    eigenvalues;
+	eigenvalues_eig = ev_eig.head(k);
+
+	/*
+	 * checking for success
+	 *
+	 * status == 0: successful exit
+	 * status < 0: the i-th argument had an illegal value
+	 * status > 0: internal error
+	 */
+	REQUIRE(!(status < 0), "The %d-th argument han an illegal value.", -status)
+	REQUIRE(!(status > 0), "Internal error.")
+}
+#endif
diff --git a/src/shogun/mathematics/linalg/backend/eigen/Functions.cpp b/src/shogun/mathematics/linalg/backend/eigen/Functions.cpp
new file mode 100644
index 00000000000..12f3de5d182
--- /dev/null
+++ b/src/shogun/mathematics/linalg/backend/eigen/Functions.cpp
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2016, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: 2016 Pan Deng, Soumyajit De, Heiko Strathmann, Viktor Gal
+ */
+
+#include <shogun/mathematics/linalg/LinalgBackendEigen.h>
+#include <shogun/mathematics/linalg/LinalgMacros.h>
+
+using namespace shogun;
+
+#define BACKEND_GENERIC_MAX(Type, Container)                                   \
+	Type LinalgBackendEigen::max(const Container<Type>& a) const               \
+	{                                                                          \
+		return max_impl(a);                                                    \
+	}
+DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_MAX, SGVector)
+DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_MAX, SGMatrix)
+#undef BACKEND_GENERIC_MAX
+
+#define BACKEND_GENERIC_REAL_MEAN(Type, Container)                             \
+	float64_t LinalgBackendEigen::mean(const Container<Type>& a) const         \
+	{                                                                          \
+		return mean_impl(a);                                                   \
+	}
+DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_REAL_MEAN, SGVector)
+DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_REAL_MEAN, SGMatrix)
+#undef BACKEND_GENERIC_REAL_MEAN
+
+#define BACKEND_GENERIC_COMPLEX_MEAN(Container)                                \
+	complex128_t LinalgBackendEigen::mean(const Container<complex128_t>& a)    \
+	    const                                                                  \
+	{                                                                          \
+		return mean_impl(a);                                                   \
+	}
+BACKEND_GENERIC_COMPLEX_MEAN(SGVector)
+BACKEND_GENERIC_COMPLEX_MEAN(SGMatrix)
+#undef BACKEND_GENERIC_COMPLEX_MEAN
+
+#define BACKEND_GENERIC_SUM(Type, Container)                                   \
+	Type LinalgBackendEigen::sum(const Container<Type>& a, bool no_diag) const \
+	{                                                                          \
+		return sum_impl(a, no_diag);                                           \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SUM, SGVector)
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SUM, SGMatrix)
+#undef BACKEND_GENERIC_SUM
+
+#define BACKEND_GENERIC_BLOCK_SUM(Type, Container)                             \
+	Type LinalgBackendEigen::sum(                                              \
+	    const linalg::Block<Container<Type>>& a, bool no_diag) const           \
+	{                                                                          \
+		return sum_impl(a, no_diag);                                           \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_SUM, SGMatrix)
+#undef BACKEND_GENERIC_BLOCK_SUM
+
+#define BACKEND_GENERIC_SYMMETRIC_SUM(Type, Container)                         \
+	Type LinalgBackendEigen::sum_symmetric(                                    \
+	    const Container<Type>& a, bool no_diag) const                          \
+	{                                                                          \
+		return sum_symmetric_impl(a, no_diag);                                 \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SYMMETRIC_SUM, SGMatrix)
+#undef BACKEND_GENERIC_SYMMETRIC_SUM
+
+#define BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM(Type, Container)                   \
+	Type LinalgBackendEigen::sum_symmetric(                                    \
+	    const linalg::Block<Container<Type>>& a, bool no_diag) const           \
+	{                                                                          \
+		return sum_symmetric_impl(a, no_diag);                                 \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM, SGMatrix)
+#undef BACKEND_GENERIC_SYMMETRIC_BLOCK_SUM
+
+#define BACKEND_GENERIC_COLWISE_SUM(Type, Container)                           \
+	SGVector<Type> LinalgBackendEigen::colwise_sum(                            \
+	    const Container<Type>& a, bool no_diag) const                          \
+	{                                                                          \
+		return colwise_sum_impl(a, no_diag);                                   \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_COLWISE_SUM, SGMatrix)
+#undef BACKEND_GENERIC_COLWISE_SUM
+
+#define BACKEND_GENERIC_BLOCK_COLWISE_SUM(Type, Container)                     \
+	SGVector<Type> LinalgBackendEigen::colwise_sum(                            \
+	    const linalg::Block<Container<Type>>& a, bool no_diag) const           \
+	{                                                                          \
+		return colwise_sum_impl(a, no_diag);                                   \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_COLWISE_SUM, SGMatrix)
+#undef BACKEND_GENERIC_BLOCK_COLWISE_SUM
+
+#define BACKEND_GENERIC_ROWWISE_SUM(Type, Container)                           \
+	SGVector<Type> LinalgBackendEigen::rowwise_sum(                            \
+	    const Container<Type>& a, bool no_diag) const                          \
+	{                                                                          \
+		return rowwise_sum_impl(a, no_diag);                                   \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ROWWISE_SUM, SGMatrix)
+#undef BACKEND_GENERIC_ROWWISE_SUM
+
+#define BACKEND_GENERIC_BLOCK_ROWWISE_SUM(Type, Container)                     \
+	SGVector<Type> LinalgBackendEigen::rowwise_sum(                            \
+	    const linalg::Block<Container<Type>>& a, bool no_diag) const           \
+	{                                                                          \
+		return rowwise_sum_impl(a, no_diag);                                   \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_BLOCK_ROWWISE_SUM, SGMatrix)
+#undef BACKEND_GENERIC_BLOCK_ROWWISE_SUM
+
+#define BACKEND_GENERIC_TRACE(Type, Container)                                 \
+	Type LinalgBackendEigen::trace(const Container<Type>& A) const             \
+	{                                                                          \
+		return trace_impl(A);                                                  \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_TRACE, SGMatrix)
+#undef BACKEND_GENERIC_TRACE
+
+#undef DEFINE_FOR_ALL_PTYPE
+#undef DEFINE_FOR_REAL_PTYPE
+#undef DEFINE_FOR_NON_INTEGER_PTYPE
+#undef DEFINE_FOR_NUMERIC_PTYPE
+
+template <typename T>
+T LinalgBackendEigen::max_impl(const SGVector<T>& vec) const
+{
+	return (typename SGVector<T>::EigenVectorXtMap(vec)).maxCoeff();
+}
+
+template <typename T>
+T LinalgBackendEigen::max_impl(const SGMatrix<T>& mat) const
+{
+	return (typename SGMatrix<T>::EigenMatrixXtMap(mat)).maxCoeff();
+}
+
+template <typename T, template <typename> class Container>
+typename std::enable_if<!std::is_same<T, complex128_t>::value, float64_t>::type
+LinalgBackendEigen::mean_impl(const Container<T>& a) const
+{
+	return sum_impl(a, false) / (float64_t(a.size()));
+}
+
+template <template <typename> class Container>
+complex128_t
+LinalgBackendEigen::mean_impl(const Container<complex128_t>& a) const
+{
+	return sum_impl(a, false) / (complex128_t(a.size()));
+}
+
+template <typename T>
+T LinalgBackendEigen::sum_impl(const SGVector<T>& vec, bool no_diag) const
+{
+	return (typename SGVector<T>::EigenVectorXtMap(vec)).sum();
+}
+
+template <typename T>
+T LinalgBackendEigen::sum_impl(const SGMatrix<T>& mat, bool no_diag) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap m = mat;
+	T result = m.sum();
+	if (no_diag)
+		result -= m.diagonal().sum();
+
+	return result;
+}
+
+template <typename T>
+T LinalgBackendEigen::sum_impl(
+    const linalg::Block<SGMatrix<T>>& mat, bool no_diag) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap m = mat.m_matrix;
+	Eigen::Block<typename SGMatrix<T>::EigenMatrixXtMap> m_block = m.block(
+	    mat.m_row_begin, mat.m_col_begin, mat.m_row_size, mat.m_col_size);
+
+	T result = m_block.sum();
+	if (no_diag)
+		result -= m_block.diagonal().sum();
+
+	return result;
+}
+
+template <typename T>
+T LinalgBackendEigen::sum_symmetric_impl(
+    const SGMatrix<T>& mat, bool no_diag) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap m = mat;
+
+	// since the matrix is symmetric with main diagonal inside, we can save half
+	// the computation with using only the upper triangular part.
+	typename SGMatrix<T>::EigenMatrixXt m_upper =
+	    m.template triangularView<Eigen::StrictlyUpper>();
+	T result = m_upper.sum();
+	result += result;
+
+	if (!no_diag)
+		result += m.diagonal().sum();
+	return result;
+}
+
+template <typename T>
+T LinalgBackendEigen::sum_symmetric_impl(
+    const linalg::Block<SGMatrix<T>>& mat, bool no_diag) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap m = mat.m_matrix;
+	Eigen::Block<typename SGMatrix<T>::EigenMatrixXtMap> m_block = m.block(
+	    mat.m_row_begin, mat.m_col_begin, mat.m_row_size, mat.m_col_size);
+
+	// since the matrix is symmetric with main diagonal inside, we can save half
+	// the computation with using only the upper triangular part.
+	typename SGMatrix<T>::EigenMatrixXt m_upper =
+	    m_block.template triangularView<Eigen::StrictlyUpper>();
+	T result = m_upper.sum();
+	result += result;
+
+	if (!no_diag)
+		result += m_block.diagonal().sum();
+	return result;
+}
+
+template <typename T>
+SGVector<T>
+LinalgBackendEigen::colwise_sum_impl(const SGMatrix<T>& mat, bool no_diag) const
+{
+	SGVector<T> result(mat.num_cols);
+
+	typename SGMatrix<T>::EigenMatrixXtMap mat_eig = mat;
+	typename SGVector<T>::EigenVectorXtMap result_eig = result;
+
+	result_eig = mat_eig.colwise().sum();
+
+	// remove the main diagonal elements if required
+	if (no_diag)
+	{
+		index_t len_major_diag =
+		    mat_eig.rows() < mat_eig.cols() ? mat_eig.rows() : mat_eig.cols();
+		for (index_t i = 0; i < len_major_diag; ++i)
+			result_eig[i] -= mat_eig(i, i);
+	}
+
+	return result;
+}
+
+template <typename T>
+SGVector<T> LinalgBackendEigen::colwise_sum_impl(
+    const linalg::Block<SGMatrix<T>>& mat, bool no_diag) const
+{
+	SGVector<T> result(mat.m_col_size);
+
+	typename SGMatrix<T>::EigenMatrixXtMap m = mat.m_matrix;
+	Eigen::Block<typename SGMatrix<T>::EigenMatrixXtMap> m_block = m.block(
+	    mat.m_row_begin, mat.m_col_begin, mat.m_row_size, mat.m_col_size);
+	typename SGVector<T>::EigenVectorXtMap result_eig = result;
+
+	result_eig = m_block.colwise().sum();
+
+	// remove the main diagonal elements if required
+	if (no_diag)
+	{
+		index_t len_major_diag =
+		    m_block.rows() < m_block.cols() ? m_block.rows() : m_block.cols();
+		for (index_t i = 0; i < len_major_diag; ++i)
+			result_eig[i] -= m_block(i, i);
+	}
+
+	return result;
+}
+
+template <typename T>
+SGVector<T>
+LinalgBackendEigen::rowwise_sum_impl(const SGMatrix<T>& mat, bool no_diag) const
+{
+	SGVector<T> result(mat.num_rows);
+
+	typename SGMatrix<T>::EigenMatrixXtMap mat_eig = mat;
+	typename SGVector<T>::EigenVectorXtMap result_eig = result;
+
+	result_eig = mat_eig.rowwise().sum();
+
+	// remove the main diagonal elements if required
+	if (no_diag)
+	{
+		index_t len_major_diag =
+		    mat_eig.rows() < mat_eig.cols() ? mat_eig.rows() : mat_eig.cols();
+		for (index_t i = 0; i < len_major_diag; ++i)
+			result_eig[i] -= mat_eig(i, i);
+	}
+
+	return result;
+}
+
+template <typename T>
+SGVector<T> LinalgBackendEigen::rowwise_sum_impl(
+    const linalg::Block<SGMatrix<T>>& mat, bool no_diag) const
+{
+	SGVector<T> result(mat.m_row_size);
+
+	typename SGMatrix<T>::EigenMatrixXtMap m = mat.m_matrix;
+	Eigen::Block<typename SGMatrix<T>::EigenMatrixXtMap> m_block = m.block(
+	    mat.m_row_begin, mat.m_col_begin, mat.m_row_size, mat.m_col_size);
+	typename SGVector<T>::EigenVectorXtMap result_eig = result;
+
+	result_eig = m_block.rowwise().sum();
+
+	// remove the main diagonal elements if required
+	if (no_diag)
+	{
+		index_t len_major_diag =
+		    m_block.rows() < m_block.cols() ? m_block.rows() : m_block.cols();
+		for (index_t i = 0; i < len_major_diag; ++i)
+			result_eig[i] -= m_block(i, i);
+	}
+
+	return result;
+}
+
+template <typename T>
+T LinalgBackendEigen::trace_impl(const SGMatrix<T>& A) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap A_eig = A;
+	return A_eig.trace();
+}
diff --git a/src/shogun/mathematics/linalg/backend/eigen/Misc.cpp b/src/shogun/mathematics/linalg/backend/eigen/Misc.cpp
new file mode 100644
index 00000000000..591288383b9
--- /dev/null
+++ b/src/shogun/mathematics/linalg/backend/eigen/Misc.cpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2016, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: 2016 Pan Deng, Soumyajit De, Heiko Strathmann, Viktor Gal
+ */
+
+#include <shogun/mathematics/linalg/LinalgBackendEigen.h>
+#include <shogun/mathematics/linalg/LinalgMacros.h>
+
+using namespace shogun;
+
+#define BACKEND_GENERIC_CENTER_MATRIX(Type, Container)                         \
+	void LinalgBackendEigen::center_matrix(Container<Type>& A) const           \
+	{                                                                          \
+		center_matrix_impl(A);                                                 \
+	}
+DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CENTER_MATRIX, SGMatrix)
+#undef BACKEND_GENERIC_CENTER_MATRIX
+
+#define BACKEND_GENERIC_IDENTITY(Type, Container)                              \
+	void LinalgBackendEigen::identity(Container<Type>& identity_matrix) const  \
+	{                                                                          \
+		identity_impl(identity_matrix);                                        \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_IDENTITY, SGMatrix)
+#undef BACKEND_GENERIC_IDENTITY
+
+#define BACKEND_GENERIC_SET_CONST(Type, Container)                             \
+	void LinalgBackendEigen::set_const(Container<Type>& a, const Type value)   \
+	    const                                                                  \
+	{                                                                          \
+		set_const_impl(a, value);                                              \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SET_CONST, SGVector)
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_SET_CONST, SGMatrix)
+#undef BACKEND_GENERIC_SET_CONST
+
+#define BACKEND_GENERIC_RANGE_FILL(Type, Container)                            \
+	void LinalgBackendEigen::range_fill(Container<Type>& a, const Type start)  \
+	    const                                                                  \
+	{                                                                          \
+		range_fill_impl(a, start);                                             \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_RANGE_FILL, SGVector)
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_RANGE_FILL, SGMatrix)
+#undef BACKEND_GENERIC_RANGE_FILL
+
+#define BACKEND_GENERIC_TRANSPOSE_MATRIX(Type, Container)                      \
+	Container<Type> LinalgBackendEigen::transpose_matrix(                      \
+	    const Container<Type>& A) const                                        \
+	{                                                                          \
+		return transpose_matrix_impl(A);                                       \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_TRANSPOSE_MATRIX, SGMatrix)
+#undef BACKEND_GENERIC_TRANSPOSE_MATRIX
+
+#define BACKEND_GENERIC_ZERO(Type, Container)                                  \
+	void LinalgBackendEigen::zero(Container<Type>& a) const                    \
+	{                                                                          \
+		zero_impl(a);                                                          \
+	}
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ZERO, SGVector)
+DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ZERO, SGMatrix)
+#undef BACKEND_GENERIC_ZERO
+
+#undef DEFINE_FOR_ALL_PTYPE
+#undef DEFINE_FOR_REAL_PTYPE
+#undef DEFINE_FOR_NON_INTEGER_PTYPE
+#undef DEFINE_FOR_NUMERIC_PTYPE
+
+template <typename T>
+void LinalgBackendEigen::center_matrix_impl(SGMatrix<T>& A) const
+{
+	index_t n = A.num_cols;
+	typename SGMatrix<T>::EigenMatrixXtMap A_eig = A;
+
+	typename SGVector<T>::EigenVectorXt rows_sum =
+	    (A_eig.rowwise().sum()).array() / (T)n;
+	typename SGVector<T>::EigenRowVectorXt cols_sum =
+	    (A_eig.colwise().sum()).array() / (T)n;
+	T m = rows_sum.sum() / (T)n;
+
+	A_eig = ((A_eig.array() + m).matrix().rowwise() - cols_sum).colwise() -
+	        rows_sum;
+}
+
+template <typename T>
+void LinalgBackendEigen::identity_impl(SGMatrix<T>& identity_matrix) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap I_eig = identity_matrix;
+	I_eig.setIdentity();
+}
+
+template <typename T, template <typename> class Container>
+void LinalgBackendEigen::range_fill_impl(Container<T>& a, const T start) const
+{
+	for (decltype(a.size()) i = 0; i < a.size(); ++i)
+		a[i] = start + T(i);
+}
+
+template <typename T, template <typename> class Container>
+void LinalgBackendEigen::set_const_impl(Container<T>& a, T value) const
+{
+	for (decltype(a.size()) i = 0; i < a.size(); ++i)
+		a[i] = value;
+}
+
+template <typename T>
+SGMatrix<T>
+LinalgBackendEigen::transpose_matrix_impl(const SGMatrix<T>& A) const
+{
+	SGMatrix<T> tr(A.num_cols, A.num_rows);
+	typename SGMatrix<T>::EigenMatrixXtMap A_eig = A;
+	typename SGMatrix<T>::EigenMatrixXtMap tr_eig = tr;
+
+	tr_eig = A_eig.transpose();
+	return tr;
+}
+
+template <typename T>
+void LinalgBackendEigen::zero_impl(SGVector<T>& a) const
+{
+	typename SGVector<T>::EigenVectorXtMap a_eig = a;
+	a_eig.setZero();
+}
+
+template <typename T>
+void LinalgBackendEigen::zero_impl(SGMatrix<T>& a) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
+	a_eig.setZero();
+}
diff --git a/src/shogun/mathematics/linalg/backend/eigen/Solvers.cpp b/src/shogun/mathematics/linalg/backend/eigen/Solvers.cpp
new file mode 100644
index 00000000000..47d8b0f3de7
--- /dev/null
+++ b/src/shogun/mathematics/linalg/backend/eigen/Solvers.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2016, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: 2016 Pan Deng, Soumyajit De, Heiko Strathmann, Viktor Gal
+ */
+
+#include <shogun/mathematics/linalg/LinalgBackendEigen.h>
+#include <shogun/mathematics/linalg/LinalgMacros.h>
+
+using namespace shogun;
+
+#define BACKEND_GENERIC_CHOLESKY_SOLVER(Type, Container)                       \
+	SGVector<Type> LinalgBackendEigen::cholesky_solver(                        \
+	    const Container<Type>& L, const SGVector<Type>& b, const bool lower)   \
+	    const                                                                  \
+	{                                                                          \
+		return cholesky_solver_impl(L, b, lower);                              \
+	}
+DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_CHOLESKY_SOLVER, SGMatrix)
+#undef BACKEND_GENERIC_CHOLESKY_SOLVER
+
+#define BACKEND_GENERIC_QR_SOLVER(Type, Container)                             \
+	Container<Type> LinalgBackendEigen::qr_solver(                             \
+	    const SGMatrix<Type>& A, const Container<Type>& b) const               \
+	{                                                                          \
+		return qr_solver_impl(A, b);                                           \
+	}
+DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_QR_SOLVER, SGVector)
+DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_QR_SOLVER, SGMatrix)
+#undef BACKEND_GENERIC_QR_SOLVER
+
+#define BACKEND_GENERIC_TRIANGULAR_SOLVER(Type, Container)                     \
+	Container<Type> LinalgBackendEigen::triangular_solver(                     \
+	    const SGMatrix<Type>& L, const Container<Type>& b, const bool lower)   \
+	    const                                                                  \
+	{                                                                          \
+		return triangular_solver_impl(L, b, lower);                            \
+	}
+DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_TRIANGULAR_SOLVER, SGVector)
+DEFINE_FOR_NON_INTEGER_PTYPE(BACKEND_GENERIC_TRIANGULAR_SOLVER, SGMatrix)
+#undef BACKEND_GENERIC_TRIANGULAR_SOLVER
+
+#undef DEFINE_FOR_ALL_PTYPE
+#undef DEFINE_FOR_REAL_PTYPE
+#undef DEFINE_FOR_NON_INTEGER_PTYPE
+#undef DEFINE_FOR_NUMERIC_PTYPE
+
+template <typename T>
+SGVector<T> LinalgBackendEigen::cholesky_solver_impl(
+    const SGMatrix<T>& L, const SGVector<T>& b, const bool lower) const
+{
+	SGVector<T> x(b.size());
+	set_const(x, 0);
+	typename SGMatrix<T>::EigenMatrixXtMap L_eig = L;
+	typename SGVector<T>::EigenVectorXtMap b_eig = b;
+	typename SGVector<T>::EigenVectorXtMap x_eig = x;
+
+	if (lower == false)
+	{
+		Eigen::TriangularView<Eigen::Map<typename SGMatrix<T>::EigenMatrixXt, 0,
+		                                 Eigen::Stride<0, 0>>,
+		                      Eigen::Upper>
+		    tlv(L_eig);
+
+		x_eig = (tlv.transpose()).solve(tlv.solve(b_eig));
+	}
+	else
+	{
+		Eigen::TriangularView<Eigen::Map<typename SGMatrix<T>::EigenMatrixXt, 0,
+		                                 Eigen::Stride<0, 0>>,
+		                      Eigen::Lower>
+		    tlv(L_eig);
+		x_eig = (tlv.transpose()).solve(tlv.solve(b_eig));
+	}
+
+	return x;
+}
+
+template <typename T>
+SGVector<T> LinalgBackendEigen::qr_solver_impl(
+    const SGMatrix<T>& A, const SGVector<T>& b) const
+{
+	SGVector<T> result(b.vlen);
+	typename SGMatrix<T>::EigenMatrixXtMap A_eig = A;
+	typename SGVector<T>::EigenVectorXtMap b_eig = b;
+	typename SGVector<T>::EigenVectorXtMap result_eig = result;
+
+	result_eig = (A_eig.householderQr().solve(b_eig));
+
+	return result;
+}
+
+template <typename T>
+SGMatrix<T> LinalgBackendEigen::qr_solver_impl(
+    const SGMatrix<T>& A, const SGMatrix<T> b) const
+{
+	SGMatrix<T> result(b.num_rows, b.num_cols);
+	typename SGMatrix<T>::EigenMatrixXtMap A_eig = A;
+	typename SGMatrix<T>::EigenMatrixXtMap b_eig = b;
+	typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+
+	result_eig = (A_eig.householderQr().solve(b_eig));
+
+	return result;
+}
+
+template <typename T>
+SGMatrix<T> LinalgBackendEigen::triangular_solver_impl(
+    const SGMatrix<T>& L, const SGMatrix<T>& b, const bool lower) const
+{
+	SGMatrix<T> x(b.num_rows, b.num_cols);
+	typename SGMatrix<T>::EigenMatrixXtMap L_eig = L;
+	typename SGMatrix<T>::EigenMatrixXtMap b_eig = b;
+	typename SGMatrix<T>::EigenMatrixXtMap x_eig = x;
+
+	if (lower == false)
+	{
+		Eigen::TriangularView<Eigen::Map<typename SGMatrix<T>::EigenMatrixXt, 0,
+		                                 Eigen::Stride<0, 0>>,
+		                      Eigen::Upper>
+		    tlv(L_eig);
+		x_eig = tlv.solve(b_eig);
+	}
+	else
+	{
+		Eigen::TriangularView<Eigen::Map<typename SGMatrix<T>::EigenMatrixXt, 0,
+		                                 Eigen::Stride<0, 0>>,
+		                      Eigen::Lower>
+		    tlv(L_eig);
+		x_eig = tlv.solve(b_eig);
+	}
+
+	return x;
+}
+
+template <typename T>
+SGVector<T> LinalgBackendEigen::triangular_solver_impl(
+    const SGMatrix<T>& L, const SGVector<T>& b, const bool lower) const
+{
+	SGVector<T> x(b.size());
+	typename SGMatrix<T>::EigenMatrixXtMap L_eig = L;
+	typename SGVector<T>::EigenVectorXtMap b_eig = b;
+	typename SGVector<T>::EigenVectorXtMap x_eig = x;
+
+	if (lower == false)
+	{
+		Eigen::TriangularView<Eigen::Map<typename SGMatrix<T>::EigenMatrixXt, 0,
+		                                 Eigen::Stride<0, 0>>,
+		                      Eigen::Upper>
+		    tlv(L_eig);
+		x_eig = tlv.solve(b_eig);
+	}
+	else
+	{
+		Eigen::TriangularView<Eigen::Map<typename SGMatrix<T>::EigenMatrixXt, 0,
+		                                 Eigen::Stride<0, 0>>,
+		                      Eigen::Lower>
+		    tlv(L_eig);
+		x_eig = tlv.solve(b_eig);
+	}
+
+	return x;
+}
diff --git a/src/shogun/mathematics/linalg/backend/eigen/SpecialPurposes.cpp b/src/shogun/mathematics/linalg/backend/eigen/SpecialPurposes.cpp
new file mode 100644
index 00000000000..06e25d256b4
--- /dev/null
+++ b/src/shogun/mathematics/linalg/backend/eigen/SpecialPurposes.cpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2016, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: 2016 Pan Deng, Soumyajit De, Heiko Strathmann, Viktor Gal
+ */
+
+#include <shogun/mathematics/linalg/LinalgBackendEigen.h>
+#include <shogun/mathematics/linalg/LinalgMacros.h>
+
+using namespace shogun;
+
+#define BACKEND_GENERIC_CROSS_ENTROPY(Type, Container)                         \
+	Type LinalgBackendEigen::cross_entropy(                                    \
+	    const Container<Type>& P, const Container<Type>& Q) const              \
+	{                                                                          \
+		return cross_entropy_impl(P, Q);                                       \
+	}
+DEFINE_FOR_NON_INTEGER_REAL_PTYPE(BACKEND_GENERIC_CROSS_ENTROPY, SGMatrix)
+#undef BACKEND_GENERIC_CROSS_ENTROPY
+
+#define BACKEND_GENERIC_LOGISTIC(Type, Container)                              \
+	void LinalgBackendEigen::logistic(                                         \
+	    Container<Type>& a, Container<Type>& result) const                     \
+	{                                                                          \
+		logistic_impl(a, result);                                              \
+	}
+DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_LOGISTIC, SGMatrix)
+#undef BACKEND_GENERIC_LOGISTIC
+
+#define BACKEND_GENERIC_MULTIPLY_BY_LOGISTIC_DERIV(Type, Container)            \
+	void LinalgBackendEigen::multiply_by_logistic_derivative(                  \
+	    Container<Type>& a, Container<Type>& result) const                     \
+	{                                                                          \
+		multiply_by_logistic_derivative_impl(a, result);                       \
+	}
+DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_MULTIPLY_BY_LOGISTIC_DERIV, SGMatrix)
+#undef BACKEND_GENERIC_MULTIPLY_BY_LOGISTIC_DERIV
+
+#define BACKEND_GENERIC_MULTIPLY_BY_RECTIFIED_LINEAR_DERIV(Type, Container)    \
+	void LinalgBackendEigen::multiply_by_rectified_linear_derivative(          \
+	    Container<Type>& a, Container<Type>& result) const                     \
+	{                                                                          \
+		multiply_by_rectified_linear_derivative_impl(a, result);               \
+	}
+DEFINE_FOR_NON_INTEGER_REAL_PTYPE(
+    BACKEND_GENERIC_MULTIPLY_BY_RECTIFIED_LINEAR_DERIV, SGMatrix)
+#undef BACKEND_GENERIC_MULTIPLY_BY_RECTIFIED_LINEAR_DERIV
+
+#define BACKEND_GENERIC_RECTIFIED_LINEAR(Type, Container)                      \
+	void LinalgBackendEigen::rectified_linear(                                 \
+	    Container<Type>& a, Container<Type>& result) const                     \
+	{                                                                          \
+		rectified_linear_impl(a, result);                                      \
+	}
+DEFINE_FOR_REAL_PTYPE(BACKEND_GENERIC_RECTIFIED_LINEAR, SGMatrix)
+#undef BACKEND_GENERIC_RECTIFIED_LINEAR
+
+#define BACKEND_GENERIC_SOFTMAX(Type, Container)                               \
+	void LinalgBackendEigen::softmax(Container<Type>& a) const                 \
+	{                                                                          \
+		softmax_impl(a);                                                       \
+	}
+DEFINE_FOR_NON_INTEGER_REAL_PTYPE(BACKEND_GENERIC_SOFTMAX, SGMatrix)
+#undef BACKEND_GENERIC_SOFTMAX
+
+#define BACKEND_GENERIC_SQUARED_ERROR(Type, Container)                         \
+	Type LinalgBackendEigen::squared_error(                                    \
+	    const Container<Type>& P, const Container<Type>& Q) const              \
+	{                                                                          \
+		return squared_error_impl(P, Q);                                       \
+	}
+DEFINE_FOR_NON_INTEGER_REAL_PTYPE(BACKEND_GENERIC_SQUARED_ERROR, SGMatrix)
+#undef BACKEND_GENERIC_SQUARED_ERROR
+
+#undef DEFINE_FOR_ALL_PTYPE
+#undef DEFINE_FOR_REAL_PTYPE
+#undef DEFINE_FOR_NON_INTEGER_PTYPE
+#undef DEFINE_FOR_NUMERIC_PTYPE
+
+template <typename T>
+T LinalgBackendEigen::cross_entropy_impl(
+    const SGMatrix<T>& p, const SGMatrix<T>& q) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap p_eig = p;
+	typename SGMatrix<T>::EigenMatrixXtMap q_eig = q;
+
+	return -1 * (p_eig.array() * (q_eig.array() + 1e-30).log()).sum();
+}
+
+template <typename T>
+void LinalgBackendEigen::logistic_impl(
+    SGMatrix<T>& a, SGMatrix<T>& result) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
+	typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+
+	result_eig = (T)1 / (1 + ((-1 * a_eig).array()).exp());
+}
+
+template <typename T>
+void LinalgBackendEigen::multiply_by_logistic_derivative_impl(
+    SGMatrix<T>& a, SGMatrix<T>& result) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
+	typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+
+	result_eig = result_eig.array() * a_eig.array() * ((T)1 - a_eig.array());
+}
+
+template <typename T>
+void LinalgBackendEigen::multiply_by_rectified_linear_derivative_impl(
+    SGMatrix<T>& a, SGMatrix<T>& result) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
+	typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+
+	for (index_t i = 0; i < a_eig.rows() * a_eig.cols(); ++i)
+		if (a_eig(i) == 0)
+			result_eig(i) = 0;
+}
+
+template <typename T>
+void LinalgBackendEigen::rectified_linear_impl(
+    SGMatrix<T>& a, SGMatrix<T>& result) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
+	typename SGMatrix<T>::EigenMatrixXtMap result_eig = result;
+
+	for (index_t i = 0; i < a_eig.rows() * a_eig.cols(); ++i)
+		result_eig(i) = CMath::max((T)0, a_eig(i));
+}
+
+/** Eigen3 softmax method */
+template <typename T, template <typename> class Container>
+void LinalgBackendEigen::softmax_impl(Container<T>& a) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap a_eig = a;
+
+	auto max = a_eig.maxCoeff();
+	for (index_t j = 0; j < a.num_cols; ++j)
+	{
+		auto sum = (a_eig.col(j).array() - max).exp().sum();
+		T normalizer = (T)CMath::log(sum); // Has to use T instead of float
+		a_eig.col(j) = (a_eig.col(j).array() - normalizer - max).exp();
+	}
+}
+
+template <typename T>
+T LinalgBackendEigen::squared_error_impl(
+    const SGMatrix<T>& p, const SGMatrix<T>& q) const
+{
+	typename SGMatrix<T>::EigenMatrixXtMap p_eig = p;
+	typename SGMatrix<T>::EigenMatrixXtMap q_eig = q;
+
+	return 0.5 * (p_eig - q_eig).array().square().sum();
+}
diff --git a/src/shogun/mathematics/linalg/eigsolver/LanczosEigenSolver.cpp b/src/shogun/mathematics/linalg/eigsolver/LanczosEigenSolver.cpp
index c891144dfb9..0c9a41048e1 100644
--- a/src/shogun/mathematics/linalg/eigsolver/LanczosEigenSolver.cpp
+++ b/src/shogun/mathematics/linalg/eigsolver/LanczosEigenSolver.cpp
@@ -12,8 +12,8 @@
 #ifdef HAVE_LAPACK
 
 #include <shogun/base/Parameter.h>
-#include <shogun/mathematics/lapack.h>
 #include <shogun/mathematics/eigen3.h>
+#include <shogun/mathematics/lapack.h>
 #include <shogun/mathematics/linalg/linop/LinearOperator.h>
 #include <shogun/mathematics/linalg/linsolver/IterativeSolverIterator.h>
 #include <shogun/mathematics/linalg/eigsolver/LanczosEigenSolver.h>
diff --git a/src/shogun/mathematics/linalg/internal/implementation/ElementwiseUnaryOperation.h b/src/shogun/mathematics/linalg/internal/implementation/ElementwiseUnaryOperation.h
deleted file mode 100644
index 9f3b68671f6..00000000000
--- a/src/shogun/mathematics/linalg/internal/implementation/ElementwiseUnaryOperation.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (w) 2015 Soumyajit De
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#ifndef ELEMENTWISE_OPERATION_H_
-#define ELEMENTWISE_OPERATION_H_
-
-#include <shogun/lib/config.h>
-#include <shogun/lib/SGMatrix.h>
-
-#ifdef HAVE_VIENNACL
-#include <shogun/lib/GPUMatrix.h>
-#include <shogun/mathematics/linalg/internal/opencl_config.h>
-#include <shogun/mathematics/linalg/internal/opencl_util.h>
-#include <shogun/mathematics/linalg/internal/implementation/operations/opencl_operation.h>
-#endif // HAVE_VIENNACL
-
-#include <algorithm>
-#include <type_traits>
-
-namespace shogun
-{
-
-namespace linalg
-{
-
-namespace implementation
-{
-
-/**
- * @brief Template struct elementwise_unary_operation. This struct is specialized for
- * computing element-wise operations for both matrices and vectors of CPU
- * (SGMatrix/SGVector) or GPU (CGPUMatrix/CGPUVector).
- */
-template <Backend backend, class Operand, class ReturnType, class UnaryOp>
-struct elementwise_unary_operation
-{
-};
-
-/**
- * @brief Specialization for elementwise_unary_operation with NATIVE backend.
- * The operand types MUST be of CPU types (SGMatrix/SGVector).
- */
-template <class Operand, class ReturnType, class UnaryOp>
-struct elementwise_unary_operation<Backend::NATIVE, Operand, ReturnType, UnaryOp>
-{
-	/** The scalar type of the operand */
-	using T = typename Operand::Scalar;
-
-	/** The scalar type of the result */
-	using ST = typename ReturnType::Scalar;
-
-#ifdef HAVE_VIENNACL
-	/** Ensure that this struct is not being instantiated with any GPU operand types */
-	static_assert(std::is_same<SGMatrix<T>, Operand>::value
-			|| std::is_same<SGVector<T>, Operand>::value,
-			"NATIVE backend not allowed for GPU operands! Use SGMatrix/SGVector "
-			"in order to use NATIVE or use VIENNACL backend instead.\n");
-#endif // HAVE_VIENNACL
-
-	/**
-	 * Method compute that computes element-wise UnaryOp operation for the Operand.
-	 *
-	 * @param operand The operand on which element-wise unary operation has to be performed
-	 * @param result The result of applying the unary operator on each scalar of the operand
-	 * @param unary_op The custom unary operator (a functor, lambda expression or a function
-	 * pointer)
-	 */
-	static void compute(Operand operand, ReturnType result, UnaryOp unary_op)
-	{
-		static_assert(std::is_same<ST,decltype(unary_op(operand.data()[0]))>::value,
-			"The return type of the unary operator and the scalar types of the "
-			"result must be the same!\n");
-
-#pragma omp parallel for
-#ifdef _WIN32
-		for (std::make_signed<decltype(operand.size())>::type i=0; i<operand.size(); ++i)
-#else
-		for (decltype(operand.size()) i=0; i<operand.size(); ++i)
-#endif
-			result.data()[i]=unary_op(operand.data()[i]);
-	}
-};
-
-/**
- * @brief Specialization for elementwise_unary_operation with EIGEN3 backend.
- * The operand types MUST be of CPU types (SGMatrix/SGVector).
- */
-template <class Operand, class ReturnType, class UnaryOp>
-struct elementwise_unary_operation<Backend::EIGEN3, Operand, ReturnType, UnaryOp>
-{
-	/** The scalar type of the operand */
-	using T = typename Operand::Scalar;
-
-	/** The scalar type of the result */
-	using ST = typename UnaryOp::return_type;
-
-#ifdef HAVE_VIENNACL
-	/** Ensure that this struct is not being instantiated with any GPU operand types */
-	static_assert(std::is_same<SGMatrix<T>, Operand>::value
-			|| std::is_same<SGVector<T>, Operand>::value,
-			"NATIVE backend not allowed for GPU operands! Use SGMatrix/SGVector "
-			"in order to use NATIVE or use VIENNACL backend instead.\n");
-#endif // HAVE_VIENNACL
-
-	/**
-	 * Method compute that computes element-wise UnaryOp operation for the Operand
-	 * using EIGEN3 backend.
-	 *
-	 * @param operand The operand on which element-wise unary operation has to be performed
-	 * @param result The result of applying the unary operator on each scalar of the operand
-	 */
-	static void compute(Operand operand, ReturnType result, UnaryOp unary_op)
-	{
-		auto eigen_result=unary_op.compute_using_eigen3(operand);
-		std::copy(eigen_result.data(), eigen_result.data()+eigen_result.size(), result.data());
-	}
-};
-
-#ifdef HAVE_VIENNACL
-/**
- * @brief Specialization for elementwise_unary_operation with VIENNACL backend.
- * The operand types MUST be of GPU types (CGPUMatrix/CGPUVector).
- * The return type and the operand type must be the same for ViennaCL.
- * The unary operator must be of ocl_operation type.
- */
-template <class Operand, class UnaryOp>
-struct elementwise_unary_operation<Backend::VIENNACL, Operand, Operand, UnaryOp>
-{
-	/** The scalar type of the operand */
-	using T = typename Operand::Scalar;
-
-	/** Ensure that the scalar type is not a std::complex<double> type */
-	static_assert(!std::is_same<T,complex128_t>::value,
-			"Complex numbers not supported!\n");
-
-	/** Ensure that this struct is being instantiated with only GPU operand types */
-	static_assert(std::is_same<CGPUMatrix<T>, Operand>::value ||
-			std::is_same<CGPUVector<T>, Operand>::value,
-			"VIENNACL backend not allowed for CPU operands! Use CGPUMatrix/CGPUVector "
-			"in order to use VIENNACL or use NATIVE/EIGEN3 backend instead.\n");
-
-	/**
-	 * Method compute that computes element-wise UnaryOp operation for the Operand
-	 * using VIENNACL backend.
-	 *
-	 * @param operand The operand on which element-wise unary operation has to be performed
-	 * @param result The result of applying the unary operator on each scalar of the operand
-	 */
-	static void compute(Operand operand, Operand result, operations::ocl_operation unary_op)
-	{
-		const std::string operation=unary_op.get_operation();
-		std::hash<std::string> hash_fn;
-		const std::string hash=std::to_string(hash_fn(operation));
-		const std::string kernel_name="kernel_"+hash+"_"+ocl::get_type_string<T>();
-
-		viennacl::ocl::kernel& kernel=
-			ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
-
-		kernel.global_work_size(0, ocl::align_to_multiple_1d(operand.size()));
-
-		viennacl::ocl::enqueue(kernel(operand.data(),
-			cl_int(operand.size()), cl_int(operand.offset),
-			result.data(), cl_int(result.offset)));
-	}
-};
-#endif // HAVE_VIENNACL
-
-}
-
-}
-
-}
-#endif // ELEMENTWISE_OPERATION_H_
diff --git a/src/shogun/mathematics/linalg/internal/implementation/SpecialPurpose.h b/src/shogun/mathematics/linalg/internal/implementation/SpecialPurpose.h
deleted file mode 100644
index 348f94f67a1..00000000000
--- a/src/shogun/mathematics/linalg/internal/implementation/SpecialPurpose.h
+++ /dev/null
@@ -1,647 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (w) 2014 Khaled Nasr
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#ifndef SPECIAL_PURPOSE_IMPL_H_
-#define SPECIAL_PURPOSE_IMPL_H_
-
-#include <shogun/lib/config.h>
-#include <shogun/lib/SGMatrix.h>
-#include <shogun/mathematics/Math.h>
-
-#include <shogun/mathematics/eigen3.h>
-
-#ifdef HAVE_VIENNACL
-#include <shogun/lib/GPUMatrix.h>
-#include <shogun/mathematics/linalg/internal/opencl_util.h>
-#endif // HAVE_VIENNACL
-
-namespace shogun
-{
-
-namespace linalg
-{
-
-namespace implementation
-{
-
-namespace special_purpose
-{
-
-/** Generic class which is specialized for different backends to perform
- * the logistic operation
- */
-template <Backend backend, class Matrix>
-struct logistic
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Applies the elementwise logistic function f(x) = 1/(1+exp(-x)) to a matrix */
-	static void compute(Matrix A, Matrix result);
-};
-
-
-/** Specialization of logistic for the Eigen3 backend */
-template <class Matrix>
-struct logistic<Backend::EIGEN3, Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Applies the elementwise logistic function f(x) = 1/(1+exp(-x)) to a matrix */
-	static void compute(SGMatrix<T> A, SGMatrix<T> result)
-	{
-		int32_t len = A.num_rows*A.num_cols;
-		for (int32_t i=0; i<len; i++)
-			result[i] = 1.0/(1+CMath::exp(-1*A[i]));
-	}
-};
-
-#ifdef HAVE_VIENNACL
-
-/** Specialization of logistic for the ViennaCL backend */
-template <class Matrix>
-struct logistic<Backend::VIENNACL, Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Applies the elementwise logistic function f(x) = 1/(1+exp(-x)) to a matrix */
-	static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
-	{
-		const std::string operation = "return 1.0/(1+exp(-1*element));";
-
-		std::string kernel_name = "logistic_" + ocl::get_type_string<T>();
-		viennacl::ocl::kernel& kernel =
-			ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
-
-		kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
-
-		viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
-			cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
-			result.vcl_matrix(), cl_int(result.offset)));
-	}
-};
-
-#endif // HAVE_VIENNACL
-
-/** Generic class which is specialized for different backends to perform
- * the multiply_by_logistic_derivative operation
- */
-template <Backend backend, class Matrix>
-struct multiply_by_logistic_derivative
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Performs the operation C(i,j) = C(i,j) * A(i,j) * (1.0-A(i,j) for all i and j*/
-	static void compute(Matrix A, Matrix C);
-};
-
-
-/** Specialization of multiply_by_logistic_derivative for the Eigen3 backend */
-template <class Matrix>
-struct multiply_by_logistic_derivative<Backend::EIGEN3, Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Performs the operation C(i,j) = C(i,j) * A(i,j) * (1.0-A(i,j) for all i and j*/
-	static void compute(SGMatrix<T> A, SGMatrix<T> C)
-	{
-		int32_t len = A.num_rows*A.num_cols;
-		for (int32_t i=0; i<len; i++)
-			C[i] *= A[i] * (1.0-A[i]);
-	}
-};
-
-#ifdef HAVE_VIENNACL
-
-/** Specialization of multiply_by_logistic_derivative for the ViennaCL backend */
-template <class Matrix>
-struct multiply_by_logistic_derivative<Backend::VIENNACL, Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Performs the operation C(i,j) = C(i,j) * A(i,j) * (1.0-A(i,j) for all i and j*/
-	static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
-	{
-		const std::string operation = "return element2 * element1*(1.0-element1);";
-
-		std::string kernel_name = "multiply_by_logistic_derivative_" + ocl::get_type_string<T>();
-		viennacl::ocl::kernel& kernel =
-			ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
-
-		kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
-
-		viennacl::ocl::enqueue(kernel(
-			A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
-			C.vcl_matrix(), cl_int(C.offset),
-			C.vcl_matrix(), cl_int(C.offset)));
-	}
-};
-
-#endif // HAVE_VIENNACL
-
-/** Generic class which is specialized for different backends to perform
- * the rectified_linear operation
- */
-template <Backend backend, class Matrix>
-struct rectified_linear
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Applies the elementwise rectified linear function f(x) = max(0,x) to a matrix */
-	static void compute(Matrix A, Matrix result);
-};
-
-
-/** Specialization of rectified_linear for the Eigen3 backend */
-template <class Matrix>
-struct rectified_linear<Backend::EIGEN3, Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Applies the elementwise rectified linear function f(x) = max(0,x) to a matrix */
-	static void compute(SGMatrix<T> A, SGMatrix<T> result)
-	{
-		int32_t len = A.num_rows*A.num_cols;
-		for (int32_t i=0; i<len; i++)
-			result[i] = CMath::max((T)0, A[i]);
-	}
-};
-
-#ifdef HAVE_VIENNACL
-
-/** Specialization of rectified_linear for the ViennaCL backend */
-template <class Matrix>
-struct rectified_linear<Backend::VIENNACL, Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Applies the elementwise rectified linear function f(x) = max(0,x) to a matrix */
-	static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
-	{
-		const std::string operation = "return max((DATATYPE)0,element);";
-
-		std::string kernel_name = "rectified_linear_" + ocl::get_type_string<T>();
-		viennacl::ocl::kernel& kernel =
-			ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
-
-		kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
-
-		viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
-			cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
-			result.vcl_matrix(), cl_int(result.offset)));
-	}
-};
-
-#endif // HAVE_VIENNACL
-
-/** Generic class which is specialized for different backends to perform
- * the multiply_by_rectified_linear_derivative operation
- */
-template <Backend backend, class Matrix>
-struct multiply_by_rectified_linear_derivative
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Performs the operation C(i,j) = C(i,j) * (A(i,j)!=0) for all i and j*/
-	static void compute(Matrix A, Matrix C);
-};
-
-
-/** Specialization of multiply_by_rectified_linear_derivative for the Eigen3 backend */
-template <class Matrix>
-struct multiply_by_rectified_linear_derivative<Backend::EIGEN3, Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Performs the operation C(i,j) = C(i,j) * (A(i,j)!=0) for all i and j*/
-	static void compute(SGMatrix<T> A, SGMatrix<T> C)
-	{
-		int32_t len = A.num_rows*A.num_cols;
-		for (int32_t i=0; i<len; i++)
-			if (A[i]==0)
-				C[i] = 0;
-	}
-};
-
-#ifdef HAVE_VIENNACL
-
-/** Specialization of multiply_by_rectified_linear_derivative for the ViennaCL backend */
-template <class Matrix>
-struct multiply_by_rectified_linear_derivative<Backend::VIENNACL, Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Performs the operation C(i,j) = C(i,j) * (A(i,j)!=0) for all i and j*/
-	static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
-	{
-		const std::string operation = "return element1==0 ? 0 : element2;";
-
-		std::string kernel_name = "multiply_by_rectified_linear_derivative_" + ocl::get_type_string<T>();
-		viennacl::ocl::kernel& kernel =
-			ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
-
-		kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
-
-		viennacl::ocl::enqueue(kernel(
-			A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
-			C.vcl_matrix(), cl_int(C.offset),
-			C.vcl_matrix(), cl_int(C.offset)));
-	}
-};
-
-#endif // HAVE_VIENNACL
-
-/** Applies the softmax function inplace to a matrix. The softmax function is
- * defined as \f$ f(A[i,j]) = \frac{exp(A[i,j])}{\sum_i exp(A[i,j])} \f$
- */
-template <Backend backend, class Matrix>
-struct softmax
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Applies the softmax function inplace to a matrix. The softmax function is
-	 * defined as \f$ f(A[i,j]) = \frac{exp(A[i,j])}{\sum_i exp(A[i,j])} \f$
-	 */
-	static void compute(Matrix A);
-};
-
-
-/** Specialization of softmax for the Eigen3 backend */
-template <class Matrix>
-struct softmax<Backend::EIGEN3, Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Eigen matrix type */
-	typedef Eigen::Matrix<T,Eigen::Dynamic,Eigen::Dynamic> MatrixXt;
-
-	/** Applies the softmax function inplace to a matrix. The softmax function is
-	 * defined as \f$ f(A[i,j]) = \frac{exp(A[i,j])}{\sum_i exp(A[i,j])} \f$
-	 */
-	static void compute(SGMatrix<T> A)
-	{
-		Eigen::Map<MatrixXt> A_eig = A;
-
-		float64_t max = A_eig.maxCoeff();
-
-		for (int32_t j=0; j<A.num_cols; j++)
-		{
-			float64_t sum = 0;
-			for (int32_t i=0; i<A.num_rows; i++)
-				sum += CMath::exp(A(i,j)-max);
-
-			float64_t normalizer = CMath::log(sum);
-			for (int32_t k=0; k<A.num_rows; k++)
-				A(k,j) = CMath::exp(A(k,j)-max-normalizer);
-		}
-	}
-};
-
-#ifdef HAVE_VIENNACL
-
-/** Specialization of softmax for the ViennaCL backend */
-template <class Matrix>
-struct softmax<Backend::VIENNACL, Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Generates the computation kernel */
-	template <class T>
-	static viennacl::ocl::kernel& generate_kernel()
-	{
-		std::string kernel_name = "softmax_" + ocl::get_type_string<T>();
-
-		if (ocl::kernel_exists(kernel_name))
-			return ocl::get_kernel(kernel_name);
-
-		std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
-
-		source.append(
-			R"(
-				__kernel void KERNEL_NAME(
-					__global DATATYPE* A, int nrows, int ncols, int offset)
-				{
-					int j = get_global_id(0);
-
-					if (j>=ncols)
-						return;
-
-					DATATYPE col_max = -INFINITY;
-					for (int i=0; i<nrows; i++)
-						col_max = max(col_max, A[offset + i+j*nrows]);
-
-					DATATYPE col_sum = 0;
-					for (int i=0; i<nrows; i++)
-						col_sum += exp(A[offset + i+j*nrows]-col_max);
-
-					DATATYPE normalizer = log(col_sum);
-					for (int i=0; i<nrows; i++)
-					{
-						int index = offset + i+j*nrows;
-						A[index] = exp(A[index]-col_max-normalizer);
-					}
-				}
-			)"
-		);
-
-		viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
-
-		kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
-
-		return kernel;
-	}
-
-	/** Applies the softmax function inplace to a matrix. The softmax function is
-	 * defined as \f$ f(A[i,j]) = \frac{exp(A[i,j])}{\sum_i exp(A[i,j])} \f$
-	 */
-	static void compute(CGPUMatrix<T> A)
-	{
-		viennacl::ocl::kernel& kernel = generate_kernel<T>();
-		kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_cols));
-
-		viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
-			cl_int(A.num_rows), cl_int(A.num_cols), cl_int(A.offset)));
-	}
-};
-
-#endif // HAVE_VIENNACL
-
-/** Generic class which is specialized for different backends to perform
- * the cross_entropy operation
- */
-template <Backend backend,class Matrix>
-struct cross_entropy
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Returns the cross entropy between P and Q. The cross entropy is defined as
-	 * \f$ H(P,Q) = - \sum_{ij} P[i,j]log(Q[i,j]) \f$
-	 */
-	static T compute(Matrix P, Matrix Q);
-};
-
-/** Specialization of cross_entropy for the Eigen3 backend */
-template <class Matrix>
-struct cross_entropy<Backend::EIGEN3,Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Eigen matrix type */
-	typedef Eigen::Matrix<T,Eigen::Dynamic,Eigen::Dynamic> MatrixXt;
-
-	/** Returns the cross entropy between P and Q. The cross entropy is defined as
-	 * \f$ H(P,Q) = - \sum_{ij} P[i,j]log(Q[i,j]) \f$
-	 */
-	static T compute(SGMatrix<T> P, SGMatrix<T> Q)
-	{
-		Eigen::Map<MatrixXt> P_eig = P;
-		Eigen::Map<MatrixXt> Q_eig = Q;
-
-		return -1*(P_eig.array() * (Q_eig.array()+1e-30).log()).sum();
-	}
-};
-
-#ifdef HAVE_VIENNACL
-/** Specialization of cross_entropy for the ViennaCL backend */
-template <class Matrix>
-struct cross_entropy<Backend::VIENNACL,Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Generates the computation kernel */
-	template <class T>
-	static viennacl::ocl::kernel& generate_kernel()
-	{
-		std::string kernel_name = "cross_entropy_" + ocl::get_type_string<T>();
-
-		if (ocl::kernel_exists(kernel_name))
-			return ocl::get_kernel(kernel_name);
-
-		std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
-
-		source.append(
-			R"(
-				__kernel void KERNEL_NAME(
-					__global DATATYPE* p, int size, int p_offset,
-					__global DATATYPE* q, int q_offset,
-					__global DATATYPE* result)
-				{
-					__local DATATYPE buffer[WORK_GROUP_SIZE_1D];
-
-					int local_id = get_local_id(0);
-
-					DATATYPE thread_sum = 0;
-					for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
-						thread_sum += p[i+p_offset]*log(q[i+q_offset]+1e-30);
-
-					buffer[local_id] = thread_sum;
-
-					for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
-					{
-						barrier(CLK_LOCAL_MEM_FENCE);
-						if (local_id < j)
-							buffer[local_id] += buffer[local_id + j];
-					}
-
-					barrier(CLK_LOCAL_MEM_FENCE);
-
-					if (get_global_id(0)==0)
-						*result = -1*buffer[0];
-				}
-			)"
-		);
-
-		viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
-
-		kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
-		kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
-
-		return kernel;
-	}
-
-	/** Returns the cross entropy between P and Q. The cross entropy is defined as
-	 * \f$ H(P,Q) = - \sum_{ij} P[i,j]log(Q[i,j]) \f$
-	 */
-	static T compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
-	{
-		viennacl::ocl::kernel& kernel = generate_kernel<T>();
-
-		CGPUVector<T> result(1);
-
-		viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
-			cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
-			Q.vcl_matrix(), cl_int(Q.offset),
-			result.vcl_vector()));
-
-		return result[0];
-	}
-};
-#endif // HAVE_VIENNACL
-
-/** Generic class which is specialized for different backends to perform
- * the squared_error operation
- */
-template <Backend backend,class Matrix>
-struct squared_error
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Returns the squared error between P and Q. The squared error is defined as
-	 * \f$ E(P,Q) = \frac{1}{2} \sum_{ij} (P[i,j]-Q[i,j])^2 \f$
-	 */
-	static T compute(Matrix P, Matrix Q);
-};
-
-/** Specialization of squared_error for the Eigen3 backend */
-template <class Matrix>
-struct squared_error<Backend::EIGEN3,Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Eigen matrix type */
-	typedef Eigen::Matrix<T,Eigen::Dynamic,Eigen::Dynamic> MatrixXt;
-
-	/** Returns the squared error between P and Q. The squared error is defined as
-	 * \f$ E(P,Q) = \frac{1}{2} \sum_{ij} (P[i,j]-Q[i,j])^2 \f$
-	 */
-	static T compute(SGMatrix<T> P, SGMatrix<T> Q)
-	{
-		Eigen::Map<MatrixXt> P_eig = P;
-		Eigen::Map<MatrixXt> Q_eig = Q;
-
-		return 0.5 * (P_eig - Q_eig).array().square().sum();
-	}
-};
-
-#ifdef HAVE_VIENNACL
-/** Specialization of squared_error for the ViennaCL backend */
-template <class Matrix>
-struct squared_error<Backend::VIENNACL,Matrix>
-{
-	/** Scalar type */
-	typedef typename Matrix::Scalar T;
-
-	/** Generates the computation kernel */
-	template <class T>
-	static viennacl::ocl::kernel& generate_kernel()
-	{
-		std::string kernel_name = "squared_error_" + ocl::get_type_string<T>();
-
-		if (ocl::kernel_exists(kernel_name))
-			return ocl::get_kernel(kernel_name);
-
-		std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
-
-		source.append(
-			R"(
-				__kernel void KERNEL_NAME(
-					__global DATATYPE* p, int size, int p_offset,
-					__global DATATYPE* q, int q_offset,
-					__global DATATYPE* result)
-				{
-					__local DATATYPE buffer[WORK_GROUP_SIZE_1D];
-
-					int local_id = get_local_id(0);
-
-					DATATYPE thread_sum = 0;
-					for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
-						thread_sum += pown(p[i+p_offset]-q[i+q_offset], 2);
-
-					buffer[local_id] = thread_sum;
-
-					for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
-					{
-						barrier(CLK_LOCAL_MEM_FENCE);
-						if (local_id < j)
-							buffer[local_id] += buffer[local_id + j];
-					}
-
-					barrier(CLK_LOCAL_MEM_FENCE);
-
-					if (get_global_id(0)==0)
-						*result = 0.5*buffer[0];
-				}
-			)"
-		);
-
-		viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source);
-
-		kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
-		kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
-
-		return kernel;
-	}
-
-	/** Returns the squared error between P and Q. The squared error is defined as
-	 * \f$ E(P,Q) = \frac{1}{2} \sum_{ij} (P[i,j]-Q[i,j])^2 \f$
-	 */
-	static T compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
-	{
-		viennacl::ocl::kernel& kernel = generate_kernel<T>();
-
-		CGPUVector<T> result(1);
-
-		viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
-			cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
-			Q.vcl_matrix(), cl_int(Q.offset),
-			result.vcl_vector()));
-
-		return result[0];
-	}
-};
-#endif // HAVE_VIENNACL
-
-}
-
-}
-
-}
-
-}
-#endif // SPECIAL_PURPOSE_IMPL_H_
diff --git a/src/shogun/mathematics/linalg/internal/modules/ElementwiseOperations.h b/src/shogun/mathematics/linalg/internal/modules/ElementwiseOperations.h
deleted file mode 100644
index 3680b5a8826..00000000000
--- a/src/shogun/mathematics/linalg/internal/modules/ElementwiseOperations.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (w) 2015 Soumyajit De
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#ifndef ELEMENTWISE_OPERATIONS_H_
-#define ELEMENTWISE_OPERATIONS_H_
-
-#include <shogun/mathematics/linalg/internal/implementation/operations/Sin.h>
-#include <shogun/mathematics/linalg/internal/implementation/operations/opencl_operation.h>
-#include <shogun/mathematics/linalg/internal/implementation/util/AllocResultUtil.h>
-#include <shogun/mathematics/linalg/internal/implementation/ElementwiseUnaryOperation.h>
-
-namespace shogun
-{
-
-namespace linalg
-{
-
-/**
- * Template method for computing custom unary operations element-wise for matrices
- * and vectors using NATIVE backend. Works for SGMatrix/SGVector.
- *
- * This method returns the result in a newly allocated matrix/vector.
- *
- * @param operand The operand on which the element-wise operation has to be performed
- * @param unary_op The custom unary operator
- * @return The result of the unary operator applied element-wise on the operand
- */
-template <class Operand, class UnaryOp>
-auto elementwise_compute(Operand operand, UnaryOp unary_op)
--> typename Operand::template container_type<decltype(unary_op(operand.data()[0]))>
-{
-	typedef decltype(unary_op(operand.data()[0])) ST;
-	typedef typename Operand::template container_type<ST> ReturnType;
-
-	ReturnType result=util::allocate_result<Operand,ReturnType>::alloc(operand);
-
-	implementation::elementwise_unary_operation<Backend::NATIVE, Operand,
-		ReturnType, UnaryOp>::compute(operand, result, unary_op);
-
-	return result;
-}
-
-/**
- * Template method for computing custom unary operations element-wise for matrices
- * and vectors using NATIVE backend. Works for SGMatrix/SGVector.
- *
- * This method computes the result in-place.
- *
- * @param operand The operand on which the element-wise operation has to be performed
- * @param unary_op The custom unary operator
- */
-template <class Operand, class UnaryOp>
-void elementwise_compute_inplace(Operand operand, UnaryOp unary_op)
-{
-	typedef typename Operand::Scalar T;
-	typedef decltype(unary_op(operand.data()[0])) ST;
-	static_assert(std::is_same<T,ST>::value, "Scalar type mismatch!\n");
-
-	implementation::elementwise_unary_operation<Backend::NATIVE, Operand,
-		Operand, UnaryOp>::compute(operand, operand, unary_op);
-}
-
-#ifdef HAVE_VIENNACL
-/**
- * Template method for computing custom unary operations element-wise for matrices
- * and vectors using VIENNACL/OPENCL backend. Works for CGPUMatrix/CGPUVector.
- *
- * This method returns the result in a newly allocated matrix/vector.
- *
- * @param operand The operand on which the element-wise operation has to be performed
- * @param unary_op The custom unary operator string
- * @return The result of the unary operator applied element-wise on the operand
- */
-template <class Operand>
-Operand elementwise_compute(Operand operand, std::string unary_op)
-{
-	Operand result=util::allocate_result<Operand,Operand>::alloc(operand);
-	operations::ocl_operation operation(unary_op);
-
-	implementation::elementwise_unary_operation<Backend::VIENNACL, Operand,
-		Operand, operations::ocl_operation>::compute(operand, result, operation);
-
-	return result;
-}
-
-/**
- * Template method for computing custom unary operations element-wise for matrices
- * and vectors using VIENNACL/OPENCL backend. Works for CGPUMatrix/CGPUVector.
- *
- * This method computes the result in-place.
- *
- * @param operand The operand on which the element-wise operation has to be performed
- * @param unary_op The custom unary operator string
- */
-template <class Operand>
-void elementwise_compute_inplace(Operand operand, std::string unary_op)
-{
-	operations::ocl_operation operation(unary_op);
-	implementation::elementwise_unary_operation<Backend::VIENNACL, Operand,
-		Operand, operations::ocl_operation>::compute(operand, operand, operation);
-}
-#endif // HAVE_VIENNACL
-
-/**
- * Template method for computing element-wise sin for matrices and vectors.
- *
- * This method returns the result in a newly allocated matrix/vector.
- *
- * @param operand The operand on which the element-wise operation has to be performed
- * @return The result of the unary operator applied element-wise on the operand
- */
-template <Backend backend, class Operand>
-typename Operand::template container_type<typename operations::sin<typename Operand::Scalar>::return_type>
-elementwise_sin(Operand operand)
-{
-	typedef typename Operand::Scalar T;
-	typedef typename operations::sin<T>::return_type ST;
-	typedef typename Operand::template container_type<ST> ReturnType;
-
-	ReturnType result=util::allocate_result<Operand,ReturnType>::alloc(operand);
-
-	operations::sin<T> operation;
-	implementation::elementwise_unary_operation<backend, Operand,
-		ReturnType, operations::sin<T>>::compute(operand, result, operation);
-
-	return result;
-}
-
-/**
- * Template method for computing element-wise sin for matrices and vectors.
- *
- * This method computes the result in-place.
- *
- * @param operand The operand on which the element-wise operation has to be performed
- * @return The result of the unary operator applied element-wise on the operand
- */
-template <Backend backend, class Operand>
-void elementwise_sin_inplace(Operand operand)
-{
-	typedef typename Operand::Scalar T;
-	typedef typename operations::sin<T>::return_type ST;
-	static_assert(std::is_same<T,ST>::value, "Scalar type mismatch!\n");
-
-	operations::sin<T> operation;
-	implementation::elementwise_unary_operation<backend, Operand,
-		Operand, operations::sin<T>>::compute(operand, operand, operation);
-}
-
-}
-
-}
-#endif // ELEMENTWISE_OPERATIONS_H_
diff --git a/src/shogun/mathematics/linalg/internal/modules/SpecialPurpose.h b/src/shogun/mathematics/linalg/internal/modules/SpecialPurpose.h
deleted file mode 100644
index 5c5eae094a9..00000000000
--- a/src/shogun/mathematics/linalg/internal/modules/SpecialPurpose.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (w) 2014 Soumyajit De
- * Written (w) 2014 Khaled Nasr
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#ifndef SPECIAL_PURPOSE_H_
-#define SPECIAL_PURPOSE_H_
-
-#include <shogun/mathematics/linalg/internal/implementation/SpecialPurpose.h>
-
-namespace shogun
-{
-
-namespace linalg
-{
-
-/** Contains special purpose, algorithm specific functions. Uses the same
- * backend as the Core module
- */
-namespace special_purpose
-{
-
-/** Applies the elementwise logistic function f(x) = 1/(1+exp(-x)) to a matrix */
-template <Backend backend=linalg_traits<Linsolver>::backend,class Matrix>
-void logistic(Matrix A, Matrix result)
-{
-	implementation::special_purpose::logistic<backend, Matrix>::compute(A, result);
-}
-
-/** Performs the operation C(i,j) = C(i,j) * A(i,j) * (1.0-A(i,j) for all i and j*/
-template <Backend backend=linalg_traits<Linsolver>::backend,class Matrix>
-void multiply_by_logistic_derivative(Matrix A, Matrix C)
-{
-	implementation::special_purpose::multiply_by_logistic_derivative<backend, Matrix>::compute(A, C);
-}
-
-/** Applies the elementwise rectified linear function f(x) = max(0,x) to a matrix */
-template <Backend backend=linalg_traits<Linsolver>::backend,class Matrix>
-void rectified_linear(Matrix A, Matrix result)
-{
-	implementation::special_purpose::rectified_linear<backend, Matrix>::compute(A, result);
-}
-
-/** Performs the operation C(i,j) = C(i,j) * (A(i,j)!=0) for all i and j*/
-template <Backend backend=linalg_traits<Linsolver>::backend,class Matrix>
-void multiply_by_rectified_linear_derivative(Matrix A, Matrix C)
-{
-	implementation::special_purpose::multiply_by_rectified_linear_derivative<backend, Matrix>::compute(A, C);
-}
-
-/** Applies the softmax function inplace to a matrix. The softmax function is
- * defined as \f$ f(A[i,j]) = \frac{exp(A[i,j])}{\sum_i exp(A[i,j])} \f$
- */
-template <Backend backend=linalg_traits<Linsolver>::backend,class Matrix>
-void softmax(Matrix A)
-{
-	implementation::special_purpose::softmax<backend, Matrix>::compute(A);
-}
-
-/** Returns the cross entropy between P and Q. The cross entropy is defined as
- * \f$ H(P,Q) = - \sum_{ij} P[i,j]log(Q[i,j]) \f$
- */
-template <Backend backend=linalg_traits<Linsolver>::backend,class Matrix>
-typename Matrix::Scalar cross_entropy(Matrix P, Matrix Q)
-{
-	return implementation::special_purpose::cross_entropy<backend, Matrix>::compute(P,Q);
-}
-
-/** Returns the squared error between P and Q. The squared error is defined as
- * \f$ E(P,Q) = \frac{1}{2} \sum_{ij} (P[i,j]-Q[i,j])^2 \f$
- */
-template <Backend backend=linalg_traits<Linsolver>::backend,class Matrix>
-typename Matrix::Scalar squared_error(Matrix P, Matrix Q)
-{
-	return implementation::special_purpose::squared_error<backend, Matrix>::compute(P,Q);
-}
-
-}
-
-}
-
-}
-#endif // SPECIAL_PURPOSE_H_
diff --git a/src/shogun/mathematics/linalg/linalg.h b/src/shogun/mathematics/linalg/linalg.h
deleted file mode 100644
index 674444bdfe6..00000000000
--- a/src/shogun/mathematics/linalg/linalg.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (w) 2014 Soumyajit De
- * Written (w) 2014 Khaled Nasr
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#ifndef LINALG_H_
-#define LINALG_H_
-
-#include <shogun/lib/config.h>
-#ifdef HAVE_VIENNACL
-#include <shogun/lib/GPUMatrix.h>
-#include <shogun/lib/GPUVector.h>
-#include <viennacl/matrix.hpp>
-#include <viennacl/linalg/vector_operations.hpp>
-#endif
-
-namespace shogun
-{
-
-/**
- * This namespace contains all linear algebra specific modules and operations
- * for which we (may) rely on multiple implementation, either native or
- * making use of some supported third party external linear algebra libraries.
- */
-namespace linalg
-{
-
-/**
- * Developer's Note :
- * - Changing the default backend would just require to change it in the
- *   Backend enum
- * - Please refer to the developer's wiki (link) for the design and structure
- *   of internal linalg module
- */
-
-/**
- * @brief
- * All currently supported linear algebra backend libraries, with a default
- * backend, which will be used for all the tasks if any particular backend is
- * not set explicitly via cmake options. A Native backend is also defined to
- * accommodate Shogun's native implementation of some operations.
- *
- * The enum defines these backends in order of priority as default backend, as
- * in, first defined one will be used as default.
- *
- * Note - Currently EIGEN3 is the default (if it is available).
- */
-enum class Backend
-{
-	EIGEN3,
-#ifdef HAVE_VIENNACL
-	VIENNACL,
-#endif
-	NATIVE,
-	DEFAULT = 0
-};
-
-/**
- * @brief
- * General purpose linalg_traits for compile time information about backends
- * set per module (see below). This uses the backend from the modules.
- * To get the backend set globally, use linalg_traits<ModuleName>::backend
- */
-template <class Module>
-struct linalg_traits : Module
-{
-};
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-
-/**
- * Define the modules as type with information about backend
- */
-#ifndef SET_MODULE_BACKEND
-#define SET_MODULE_BACKEND(MODULE, BACKEND) \
-struct MODULE \
-{ \
-	const static Backend backend = Backend::BACKEND; \
-};
-#endif // SET_MODULE_BACKEND
-
-/**
- * Set global backend should define all the module types with same backend.
- * Currently supported modules are
- * Core         - For basic linear algebra operations (e.g. matrix multiplication, addition)
- * Linsolver    - Solvers for linear systems (SVD, Cholesky, QR etc)
- * Eigsolver    - Different eigensolvers
- */
-#ifndef SET_GLOBAL_BACKEND
-#define SET_GLOBAL_BACKEND(BACKEND) \
-	SET_MODULE_BACKEND(Linsolver, BACKEND) \
-	SET_MODULE_BACKEND(Eigsolver, BACKEND)
-#endif // SET_GLOBAL_BACKEND
-
-/** set global backend for all modules if a particular backend is specified */
-#ifdef USE_EIGEN3_GLOBAL
-	SET_GLOBAL_BACKEND(EIGEN3)
-#elif USE_VIENNACL_GLOBAL
-	SET_GLOBAL_BACKEND(VIENNACL)
-#else
-
-/** set module specific backends */
-
-/** Linear solver module */
-#ifdef USE_EIGEN3_LINSLV
-	SET_MODULE_BACKEND(Linsolver, EIGEN3)
-#elif USE_VIENNACL_LINSLV
-	SET_MODULE_BACKEND(Linsolver, VIENNACL)
-#else // the default case
-	SET_MODULE_BACKEND(Linsolver, DEFAULT)
-#endif
-
-/** Eigen solver module */
-#ifdef USE_EIGEN3_EIGSLV
-	SET_MODULE_BACKEND(Eigsolver, EIGEN3)
-#elif USE_VIENNACL_EIGSLV
-	SET_MODULE_BACKEND(Eigsolver, VIENNACL)
-#else // the default case
-	SET_MODULE_BACKEND(Eigsolver, DEFAULT)
-#endif
-
-#endif // end of global settings
-
-#undef SET_GLOBAL_BACKEND
-#undef SET_MODULE_BACKEND
-
-#endif // DOXYGEN_SHOULD_SKIP_THIS
-}
-
-}
-
-/** include all the modules here */
-#include <shogun/mathematics/linalg/internal/modules/SpecialPurpose.h>
-#include <shogun/mathematics/linalg/internal/modules/ElementwiseOperations.h>
-
-#endif // LINALG_H_
diff --git a/src/shogun/mathematics/linalg/ratapprox/opfunc/RationalApproximation.cpp b/src/shogun/mathematics/linalg/ratapprox/opfunc/RationalApproximation.cpp
index 6d2f5515c3d..cc3f23e028c 100644
--- a/src/shogun/mathematics/linalg/ratapprox/opfunc/RationalApproximation.cpp
+++ b/src/shogun/mathematics/linalg/ratapprox/opfunc/RationalApproximation.cpp
@@ -12,7 +12,9 @@
 #include <shogun/lib/SGVector.h>
 #include <shogun/base/Parameter.h>
 #include <shogun/mathematics/Math.h>
+#ifdef USE_GPL_SHOGUN
 #include <shogun/mathematics/JacobiEllipticFunctions.h>
+#endif //USE_GPL_SHOGUN
 #include <shogun/mathematics/linalg/linop/LinearOperator.h>
 #include <shogun/mathematics/linalg/linsolver/LinearSolver.h>
 #include <shogun/mathematics/linalg/eigsolver/EigenSolver.h>
@@ -165,7 +167,11 @@ void CRationalApproximation::compute_shifts_weights_const()
 
 	// compute K and K'
 	float64_t K=0.0, Kp=0.0;
+#ifdef USE_GPL_SHOGUN
 	CJacobiEllipticFunctions::ellipKKp(L, K, Kp);
+#else
+	SG_GPL_ONLY
+#endif //USE_GPL_SHOGUN
 
 	// compute constant multiplier
 	m_constant_multiplier=-8*K*m_mult/(k*PI*m_num_shifts);
@@ -184,7 +190,11 @@ void CRationalApproximation::compute_shifts_weights_const()
 		complex128_t t=complex128_t(0.0, 0.5*Kp)-K+(0.5+i)*2*K/m_num_shifts;
 
 		complex128_t sn, cn, dn;
+#ifdef USE_GPL_SHOGUN
 		CJacobiEllipticFunctions::ellipJC(t, m, sn, cn, dn);
+#else
+		SG_GPL_ONLY
+#endif //USE_GPL_SHOGUN
 
 		complex128_t w=m_mult*(1.0+k*sn)/(1.0-k*sn);
 		complex128_t dzdt=cn*dn/CMath::sq(1.0/k-sn);
diff --git a/src/shogun/mathematics/linalg/ratapprox/tracesampler/ProbingSampler.cpp b/src/shogun/mathematics/linalg/ratapprox/tracesampler/ProbingSampler.cpp
index 944d316eed7..955ca462f4f 100644
--- a/src/shogun/mathematics/linalg/ratapprox/tracesampler/ProbingSampler.cpp
+++ b/src/shogun/mathematics/linalg/ratapprox/tracesampler/ProbingSampler.cpp
@@ -10,6 +10,7 @@
 #include <shogun/lib/common.h>
 
 #ifdef HAVE_COLPACK
+#include <ColPack/ColPackHeaders.h>
 
 #include <vector>
 #include <string>
@@ -21,7 +22,6 @@
 #include <shogun/mathematics/Random.h>
 #include <shogun/mathematics/linalg/linop/SparseMatrixOperator.h>
 #include <shogun/mathematics/linalg/ratapprox/tracesampler/ProbingSampler.h>
-#include <ColPack/ColPackHeaders.h>
 
 using namespace Eigen;
 using namespace ColPack;
diff --git a/src/shogun/metric/LMNN.cpp b/src/shogun/metric/LMNN.cpp
index 7fb538fe798..0fe8c62f75a 100644
--- a/src/shogun/metric/LMNN.cpp
+++ b/src/shogun/metric/LMNN.cpp
@@ -10,9 +10,9 @@
 
 #include <shogun/metric/LMNN.h>
 
-
-#include <shogun/metric/LMNNImpl.h>
+#include <shogun/base/progress.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/metric/LMNNImpl.h>
 
 // useful shorthand to perform operations with Eigen matrices
 // trace of the product of two matrices computed fast using trace(A*B)=sum(A.*B')
@@ -93,11 +93,12 @@ void CLMNN::train(SGMatrix<float64_t> init_transform)
 	// Make space for the training statistics
 	m_statistics->resize(m_maxiter);
 
+	// Progress bar
+	auto pb = progress(range(m_maxiter), *this->io);
+
 	// Main loop
 	while (!stop)
 	{
-		SG_PROGRESS(iter, 0, m_maxiter)
-
 		// Find current set of impostors
 		SG_DEBUG("Finding impostors.\n")
 		cur_impostors = CLMNNImpl::find_impostors(x,y,L,target_nn,iter,m_correction);
@@ -131,7 +132,11 @@ void CLMNN::train(SGMatrix<float64_t> init_transform)
 
 		SG_DEBUG("iteration=%d, objective=%.4f, #impostors=%4d, stepsize=%.4E\n",
 				iter, obj[iter-1], cur_impostors.size(), stepsize)
+
+		// Print progress bar iteration
+		pb.print_progress();
 	}
+	pb.complete();
 
 	// Truncate statistics in case convergence was reached in less than maxiter
 	m_statistics->resize(iter);
diff --git a/src/shogun/metric/LMNNImpl.h b/src/shogun/metric/LMNNImpl.h
index 3f449b70829..de96467ea09 100644
--- a/src/shogun/metric/LMNNImpl.h
+++ b/src/shogun/metric/LMNNImpl.h
@@ -19,7 +19,7 @@
 #include <shogun/features/DenseFeatures.h>
 #include <shogun/labels/MulticlassLabels.h>
 #include <shogun/distance/EuclideanDistance.h>
-#include <Eigen/Dense>
+#include <shogun/mathematics/eigen3.h>
 
 #include <set>
 #include <vector>
diff --git a/src/shogun/modelselection/GradientModelSelection.cpp b/src/shogun/modelselection/GradientModelSelection.cpp
index 850ec4ff3c4..b1cc7aa1174 100644
--- a/src/shogun/modelselection/GradientModelSelection.cpp
+++ b/src/shogun/modelselection/GradientModelSelection.cpp
@@ -350,10 +350,8 @@ CParameterCombination* CGradientModelSelection::select_model(bool print_state)
 		cost_fun->set_variables(model_vars);
 		cost_fun->set_func_data(&params);
 		bool cleanup=false;
-#ifdef USE_REFERENCE_COUNTING
 		if(this->ref_count()>1)
 			cleanup=true;
-#endif
 
 		m_mode_minimizer->set_cost_function(cost_fun);
 		m_mode_minimizer->minimize();
diff --git a/src/shogun/modelselection/GridSearchModelSelection.cpp b/src/shogun/modelselection/GridSearchModelSelection.cpp
index 64eb57dffb7..7bd33a405fe 100644
--- a/src/shogun/modelselection/GridSearchModelSelection.cpp
+++ b/src/shogun/modelselection/GridSearchModelSelection.cpp
@@ -46,12 +46,12 @@ CParameterCombination* CGridSearchModelSelection::select_model(bool print_state)
 	if (m_machine_eval->get_evaluation_direction()==ED_MAXIMIZE)
 	{
 		if (print_state) SG_PRINT("Direction is maximize\n")
-		best_result->mean=CMath::ALMOST_NEG_INFTY;
+		best_result->set_mean(CMath::ALMOST_NEG_INFTY);
 	}
 	else
 	{
 		if (print_state) SG_PRINT("Direction is minimize\n")
-		best_result->mean=CMath::ALMOST_INFTY;
+		best_result->set_mean(CMath::ALMOST_INFTY);
 	}
 
 	/* underlying learning machine */
@@ -86,7 +86,7 @@ CParameterCombination* CGridSearchModelSelection::select_model(bool print_state)
 		/* check if current result is better, delete old combinations */
 		if (m_machine_eval->get_evaluation_direction()==ED_MAXIMIZE)
 		{
-			if (result->mean>best_result->mean)
+			if (result->get_mean() > best_result->get_mean())
 			{
 				if (best_combination)
 					SG_UNREF(best_combination);
@@ -107,7 +107,7 @@ CParameterCombination* CGridSearchModelSelection::select_model(bool print_state)
 		}
 		else
 		{
-			if (result->mean<best_result->mean)
+			if (result->get_mean() < best_result->get_mean())
 			{
 				if (best_combination)
 					SG_UNREF(best_combination);
diff --git a/src/shogun/modelselection/RandomSearchModelSelection.cpp b/src/shogun/modelselection/RandomSearchModelSelection.cpp
index 9aa16b95e9d..d74d1baa783 100644
--- a/src/shogun/modelselection/RandomSearchModelSelection.cpp
+++ b/src/shogun/modelselection/RandomSearchModelSelection.cpp
@@ -57,12 +57,12 @@ CParameterCombination* CRandomSearchModelSelection::select_model(bool print_stat
 	if (m_machine_eval->get_evaluation_direction()==ED_MAXIMIZE)
 	{
 		if (print_state) SG_PRINT("Direction is maximize\n")
-		best_result->mean=CMath::ALMOST_NEG_INFTY;
+		best_result->set_mean(CMath::ALMOST_NEG_INFTY);
 	}
 	else
 	{
 		if (print_state) SG_PRINT("Direction is minimize\n")
-		best_result->mean=CMath::ALMOST_INFTY;
+		best_result->set_mean(CMath::ALMOST_INFTY);
 	}
 
 	/* underlying learning machine */
@@ -97,7 +97,7 @@ CParameterCombination* CRandomSearchModelSelection::select_model(bool print_stat
 		/* check if current result is better, delete old combinations */
 		if (m_machine_eval->get_evaluation_direction()==ED_MAXIMIZE)
 		{
-			if (result->mean>best_result->mean)
+			if (result->get_mean() > best_result->get_mean())
 			{
 				if (best_combination)
 					SG_UNREF(best_combination);
@@ -118,7 +118,7 @@ CParameterCombination* CRandomSearchModelSelection::select_model(bool print_stat
 		}
 		else
 		{
-			if (result->mean<best_result->mean)
+			if (result->get_mean() < best_result->get_mean())
 			{
 				if (best_combination)
 					SG_UNREF(best_combination);
diff --git a/src/shogun/multiclass/BruteKNNSolver.cpp b/src/shogun/multiclass/BruteKNNSolver.cpp
index 8471574fd12..c3538dc6396 100644
--- a/src/shogun/multiclass/BruteKNNSolver.cpp
+++ b/src/shogun/multiclass/BruteKNNSolver.cpp
@@ -22,7 +22,7 @@ CMulticlassLabels* CBruteKNNSolver::classify_objects(CDistance* knn_distance, co
 	SGMatrix<index_t> NN = this->nn;
 
 	//from the indices to the nearest neighbors, compute the class labels
-	for (index_t i=0; i<num_lab && (!CSignal::cancel_computations()); i++)
+	for (index_t i = 0; i < num_lab && (!cancel_computation()); i++)
 	{
 		//write the labels of the k nearest neighbors from theirs indices
 		for (index_t j=0; j<m_k; j++)
@@ -44,7 +44,7 @@ SGVector<int32_t> CBruteKNNSolver::classify_objects_k(CDistance* knn_distance, c
 	//get the k nearest neighbors of each example
 	SGMatrix<index_t> NN = this->nn;
 
-	for (index_t i=0; i<num_lab && (!CSignal::cancel_computations()); i++)
+	for (index_t i = 0; i < num_lab && (!cancel_computation()); i++)
 	{
 		//write the labels of the k nearest neighbors from theirs indices
 		for (index_t j=0; j<m_k; j++)
diff --git a/src/shogun/multiclass/GMNPLib.cpp b/src/shogun/multiclass/GMNPLib.cpp
index fe77e120b13..7e7985e0151 100644
--- a/src/shogun/multiclass/GMNPLib.cpp
+++ b/src/shogun/multiclass/GMNPLib.cpp
@@ -60,8 +60,9 @@ gmnplib.c: Library of solvers for Generalized Minimal Norm Problem (GMNP).
 
 -------------------------------------------------------------------- */
 
-#include <shogun/multiclass/GMNPLib.h>
+#include <shogun/base/progress.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/multiclass/GMNPLib.h>
 
 #include <string.h>
 #include <limits.h>
@@ -348,6 +349,7 @@ int8_t CGMNPLib::gmnp_imdm(float64_t *vector_c,
   /* Main optimization loop                                       */
   /* ------------------------------------------------------------ */
 
+  auto pb = progress(range(10), *this->io);
   col_u = (float64_t*)get_col(u,-1);
   while( exitflag == -1 )
   {
@@ -420,38 +422,44 @@ int8_t CGMNPLib::gmnp_imdm(float64_t *vector_c,
     else if(t >= tmax) exitflag = 0;
 
     /* print info */
-	SG_ABS_PROGRESS(CMath::abs((UB-LB)/UB),
-			-CMath::log10(CMath::abs(UB-LB)),
-			-CMath::log10(1.0),
-			-CMath::log10(tolrel), 6);
-    if(verb && (t % verb) == 0 ) {
-      SG_PRINT("%d: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n",
-        t, UB, LB, UB-LB,(UB-LB)/UB);
-    }
-
-    /* Store selected values */
-    if( t < History_size ) {
-      History[INDEX(0,t,2)] = LB;
-      History[INDEX(1,t,2)] = UB;
-    }
-    else {
-      tmp_ptr = SG_MALLOC(float64_t, (History_size+HISTORY_BUF)*2);
-      if( tmp_ptr == NULL ) SG_ERROR("Not enough memory.")
-      for( i = 0; i < History_size; i++ ) {
-        tmp_ptr[INDEX(0,i,2)] = History[INDEX(0,i,2)];
-        tmp_ptr[INDEX(1,i,2)] = History[INDEX(1,i,2)];
-      }
-      tmp_ptr[INDEX(0,t,2)] = LB;
-      tmp_ptr[INDEX(1,t,2)] = UB;
-
-      History_size += HISTORY_BUF;
-      SG_FREE(History);
-      History = tmp_ptr;
-    }
+	pb.print_absolute(
+		CMath::abs((UB - LB) / UB), -CMath::log10(CMath::abs(UB - LB)),
+		-CMath::log10(1.0), -CMath::log10(tolrel));
+
+	if (verb && (t % verb) == 0)
+	{
+		SG_PRINT(
+			"%d: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n", t, UB, LB,
+			UB - LB, (UB - LB) / UB);
+	}
+
+	/* Store selected values */
+	if (t < History_size)
+	{
+		History[INDEX(0, t, 2)] = LB;
+		History[INDEX(1, t, 2)] = UB;
+	}
+	else
+	{
+		tmp_ptr = SG_MALLOC(float64_t, (History_size + HISTORY_BUF) * 2);
+		if (tmp_ptr == NULL)
+			SG_ERROR("Not enough memory.")
+		for (i = 0; i < History_size; i++)
+		{
+			tmp_ptr[INDEX(0, i, 2)] = History[INDEX(0, i, 2)];
+			tmp_ptr[INDEX(1, i, 2)] = History[INDEX(1, i, 2)];
+		}
+		tmp_ptr[INDEX(0, t, 2)] = LB;
+		tmp_ptr[INDEX(1, t, 2)] = UB;
+
+		History_size += HISTORY_BUF;
+		SG_FREE(History);
+		History = tmp_ptr;
+	}
   }
 
   /* print info about last iteration*/
-  SG_DONE()
+  pb.complete_absolute();
   if(verb && (t % verb) ) {
     SG_PRINT("exit: UB=%f, LB=%f, UB-LB=%f, (UB-LB)/|UB|=%f \n",
       UB, LB, UB-LB,(UB-LB)/UB);
diff --git a/src/shogun/multiclass/GaussianNaiveBayes.cpp b/src/shogun/multiclass/GaussianNaiveBayes.cpp
index a6718a9e0f2..426a937c962 100644
--- a/src/shogun/multiclass/GaussianNaiveBayes.cpp
+++ b/src/shogun/multiclass/GaussianNaiveBayes.cpp
@@ -8,13 +8,16 @@
  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
  */
 
-#include <shogun/multiclass/GaussianNaiveBayes.h>
+#include <shogun/base/progress.h>
 #include <shogun/features/Features.h>
 #include <shogun/labels/Labels.h>
-#include <shogun/labels/RegressionLabels.h>
 #include <shogun/labels/MulticlassLabels.h>
-#include <shogun/mathematics/Math.h>
+#include <shogun/labels/RegressionLabels.h>
 #include <shogun/lib/Signal.h>
+#include <shogun/mathematics/Math.h>
+#include <shogun/multiclass/GaussianNaiveBayes.h>
+
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 using namespace shogun;
 
@@ -22,7 +25,7 @@ CGaussianNaiveBayes::CGaussianNaiveBayes() : CNativeMulticlassMachine(), m_featu
 	m_min_label(0), m_num_classes(0), m_dim(0), m_means(), m_variances(),
 	m_label_prob(), m_rates()
 {
-
+	init();
 };
 
 CGaussianNaiveBayes::CGaussianNaiveBayes(CFeatures* train_examples,
@@ -30,6 +33,7 @@ CGaussianNaiveBayes::CGaussianNaiveBayes(CFeatures* train_examples,
 	m_min_label(0), m_num_classes(0), m_dim(0), m_means(),
 	m_variances(), m_label_prob(), m_rates()
 {
+	init();
 	ASSERT(train_examples->get_num_vectors() == train_labels->get_num_labels())
 	set_labels(train_labels);
 
@@ -76,21 +80,13 @@ bool CGaussianNaiveBayes::train_machine(CFeatures* data)
 	SGVector<int32_t> train_labels = ((CMulticlassLabels*) m_labels)->get_int_labels();
 	ASSERT(m_features->get_num_vectors()==train_labels.vlen)
 
-	// init min_label, max_label and loop variables
-	int32_t min_label = train_labels.vector[0];
-	int32_t max_label = train_labels.vector[0];
-	int i,j;
-
 	// find minimal and maximal label
-	for (i=1; i<train_labels.vlen; i++)
-	{
-		min_label = CMath::min(min_label, train_labels.vector[i]);
-		max_label = CMath::max(max_label, train_labels.vector[i]);
-	}
+	auto min_label = CMath::min(train_labels.vector, train_labels.vlen);
+	auto max_label = CMath::max(train_labels.vector, train_labels.vlen);
+	int i,j;
 
 	// subtract minimal label from all labels
-	for (i=0; i<train_labels.vlen; i++)
-		train_labels.vector[i]-= min_label;
+	linalg::add_scalar(train_labels, -min_label);
 
 	// get number of classes, minimal label and dimensionality
 	m_num_classes = max_label-min_label+1;
@@ -114,9 +110,8 @@ bool CGaussianNaiveBayes::train_machine(CFeatures* data)
 	// number of iterations in all cycles
 	int32_t max_progress = 2 * train_labels.vlen + 2 * m_num_classes;
 
-	// current progress
-	int32_t progress = 0;
-	SG_PROGRESS(progress, 0, max_progress)
+	// Progress bar
+	auto pb = progress(range(max_progress), *this->io);
 
 	// get sum of features among labels
 	for (i=0; i<train_labels.vlen; i++)
@@ -127,8 +122,7 @@ bool CGaussianNaiveBayes::train_machine(CFeatures* data)
 
 		m_label_prob.vector[train_labels.vector[i]]+=1.0;
 
-		progress++;
-		SG_PROGRESS(progress, 0, max_progress)
+		pb.print_progress();
 	}
 
 	// get means of features of labels
@@ -136,9 +130,7 @@ bool CGaussianNaiveBayes::train_machine(CFeatures* data)
 	{
 		for (j=0; j<m_dim; j++)
 			m_means(j, i) /= m_label_prob.vector[i];
-
-		progress++;
-		SG_PROGRESS(progress, 0, max_progress)
+		pb.print_progress();
 	}
 
 	// compute squared residuals with means available
@@ -150,9 +142,7 @@ bool CGaussianNaiveBayes::train_machine(CFeatures* data)
 			m_variances(j, train_labels.vector[i]) +=
 				CMath::sq(fea[j]-m_means(j, train_labels.vector[i]));
 		}
-
-		progress++;
-		SG_PROGRESS(progress, 0, max_progress)
+		pb.print_progress();
 	}
 
 	// get variance of features of labels
@@ -164,10 +154,9 @@ bool CGaussianNaiveBayes::train_machine(CFeatures* data)
 		// get a priori probabilities of labels
 		m_label_prob.vector[i]/= m_num_classes;
 
-		progress++;
-		SG_PROGRESS(progress, 0, max_progress)
+		pb.print_progress();
 	}
-	SG_DONE()
+	pb.complete();
 
 	return true;
 }
@@ -186,13 +175,11 @@ CMulticlassLabels* CGaussianNaiveBayes::apply_multiclass(CFeatures* data)
 	CMulticlassLabels* result = new CMulticlassLabels(num_vectors);
 
 	// classify each example of data
-	SG_PROGRESS(0, 0, num_vectors)
-	for (int i = 0; i < num_vectors; i++)
+	for (auto i : progress(range(num_vectors), *this->io))
 	{
 		result->set_label(i,apply_one(i));
-		SG_PROGRESS(i + 1, 0, num_vectors)
 	}
-	SG_DONE()
+
 	return result;
 };
 
@@ -234,3 +221,19 @@ float64_t CGaussianNaiveBayes::apply_one(int32_t idx)
 
 	return max_label_idx+m_min_label;
 };
+
+void CGaussianNaiveBayes::init()
+{
+	SG_ADD(&m_min_label, "m_min_label", "minimal label", MS_NOT_AVAILABLE);
+	SG_ADD(&m_num_classes, "m_num_classes",
+		"number of different classes (labels)", MS_NOT_AVAILABLE);
+	SG_ADD(&m_dim, "m_dim",
+		"dimensionality of feature space", MS_NOT_AVAILABLE);
+	SG_ADD(&m_means, "m_means",
+		"means for normal distributions of features", MS_NOT_AVAILABLE);
+	SG_ADD(&m_variances, "m_variances",
+		"variances for normal distributions of features", MS_NOT_AVAILABLE);
+	SG_ADD(&m_label_prob, "m_label_prob",
+		"a priori probabilities of labels", MS_NOT_AVAILABLE);
+	SG_ADD(&m_rates, "m_rates", "label rates", MS_NOT_AVAILABLE);
+}
\ No newline at end of file
diff --git a/src/shogun/multiclass/GaussianNaiveBayes.h b/src/shogun/multiclass/GaussianNaiveBayes.h
index 38f420efb4f..3f3db3d4b28 100644
--- a/src/shogun/multiclass/GaussianNaiveBayes.h
+++ b/src/shogun/multiclass/GaussianNaiveBayes.h
@@ -96,6 +96,9 @@ class CGaussianNaiveBayes : public CNativeMulticlassMachine
 	 */
 	virtual bool train_machine(CFeatures* data=NULL);
 
+private:
+	void init();
+
 protected:
 
 	/// features for training or classifying
diff --git a/src/shogun/multiclass/KDTreeKNNsolver.cpp b/src/shogun/multiclass/KDTreeKNNsolver.cpp
index 0c19bf18338..3af00d9045e 100644
--- a/src/shogun/multiclass/KDTreeKNNsolver.cpp
+++ b/src/shogun/multiclass/KDTreeKNNsolver.cpp
@@ -28,7 +28,7 @@ CMulticlassLabels* CKDTREEKNNSolver::classify_objects(CDistance* knn_distance, c
 	CFeatures* query = knn_distance->get_rhs();
 	kd_tree->query_knn(dynamic_cast<CDenseFeatures<float64_t>*>(query), m_k);
 	SGMatrix<index_t> NN = kd_tree->get_knn_indices();
-	for (int32_t i=0; i<num_lab && (!CSignal::cancel_computations()); i++)
+	for (int32_t i = 0; i < num_lab && (!cancel_computation()); i++)
 	{
 		//write the labels of the k nearest neighbors from theirs indices
 		for (int32_t j=0; j<m_k; j++)
@@ -59,7 +59,7 @@ SGVector<int32_t> CKDTREEKNNSolver::classify_objects_k(CDistance* knn_distance,
 	CFeatures* data = knn_distance->get_rhs();
 	kd_tree->query_knn(dynamic_cast<CDenseFeatures<float64_t>*>(data), m_k);
 	SGMatrix<index_t> NN = kd_tree->get_knn_indices();
-	for (index_t i=0; i<num_lab && (!CSignal::cancel_computations()); i++)
+	for (index_t i = 0; i < num_lab && (!cancel_computation()); i++)
 	{
 		//write the labels of the k nearest neighbors from theirs indices
 		for (index_t j=0; j<m_k; j++)
diff --git a/src/shogun/multiclass/KNN.cpp b/src/shogun/multiclass/KNN.cpp
index 8a238576774..839f6114153 100644
--- a/src/shogun/multiclass/KNN.cpp
+++ b/src/shogun/multiclass/KNN.cpp
@@ -11,12 +11,15 @@
  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
  */
 
-#include <shogun/lib/Time.h>
-#include <shogun/lib/Signal.h>
-#include <shogun/multiclass/KNN.h>
+#include <shogun/base/Parameter.h>
+#include <shogun/base/progress.h>
 #include <shogun/labels/Labels.h>
+#include <shogun/lib/Signal.h>
+#include <shogun/lib/Time.h>
 #include <shogun/mathematics/Math.h>
-#include <shogun/base/Parameter.h>
+#include <shogun/multiclass/KNN.h>
+
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 //#define DEBUG_KNN
 
@@ -90,17 +93,11 @@ bool CKNN::train_machine(CFeatures* data)
 	m_train_labels=lab.clone();
 	ASSERT(m_train_labels.vlen>0)
 
-	int32_t max_class=m_train_labels[0];
-	int32_t min_class=m_train_labels[0];
-
-	for (int32_t i=1; i<m_train_labels.vlen; i++)
-	{
-		max_class=CMath::max(max_class, m_train_labels[i]);
-		min_class=CMath::min(min_class, m_train_labels[i]);
-	}
+	// find minimal and maximal class
+	auto min_class = CMath::min(m_train_labels.vector, m_train_labels.vlen);
+	auto max_class = CMath::max(m_train_labels.vector, m_train_labels.vlen);
 
-	for (int32_t i=0; i<m_train_labels.vlen; i++)
-		m_train_labels[i]-=min_class;
+	linalg::add_scalar(m_train_labels, -min_class);
 
 	m_min_label=min_class;
 	m_num_classes=max_class-min_class+1;
@@ -125,10 +122,12 @@ SGMatrix<index_t> CKNN::nearest_neighbors()
 	distance->precompute_lhs();
 	distance->precompute_rhs();
 
+	auto pb = progress(range(n), *this->io);
+
 	//for each test example
-	for (int32_t i=0; i<n && (!CSignal::cancel_computations()); i++)
+	for (int32_t i = 0; i < n && (!cancel_computation()); i++)
 	{
-		SG_PROGRESS(i, 0, n)
+		pb.print_progress();
 
 		//lhs idx 0..num train examples-1 (i.e., all train examples) and rhs idx i
 		distances_lhs(dists,0,m_train_labels.vlen-1,i);
@@ -151,6 +150,7 @@ SGMatrix<index_t> CKNN::nearest_neighbors()
 		for (int32_t j=0; j<m_k; j++)
 			NN(j,i) = train_idxs[j];
 	}
+	pb.complete();
 
 	distance->reset_precompute();
 
@@ -177,7 +177,6 @@ CMulticlassLabels* CKNN::apply_multiclass(CFeatures* data)
 	SGVector<int32_t> train_lab(m_k);
 
 	SG_INFO("%d test examples\n", num_lab)
-	CSignal::clear_cancel();
 
 	//histogram of classes and returned output
 	SGVector<float64_t> classes(m_num_classes);
@@ -203,14 +202,15 @@ CMulticlassLabels* CKNN::classify_NN()
 	SGVector<float64_t> distances(m_train_labels.vlen);
 
 	SG_INFO("%d test examples\n", num_lab)
-	CSignal::clear_cancel();
 
 	distance->precompute_lhs();
 
+	auto pb = progress(range(num_lab), *this->io);
+
 	// for each test example
-	for (int32_t i=0; i<num_lab && (!CSignal::cancel_computations()); i++)
+	for (int32_t i = 0; i < num_lab && (!cancel_computation()); i++)
 	{
-		SG_PROGRESS(i,0,num_lab)
+		pb.print_progress();
 
 		// get distances from i-th test example to 0..num_m_train_labels-1 train examples
 		distances_lhs(distances,0,m_train_labels.vlen-1,i);
@@ -233,6 +233,7 @@ CMulticlassLabels* CKNN::classify_NN()
 		// label i-th test example with label of nearest neighbor with out_idx index
 		output->set_label(i,m_train_labels.vector[out_idx]+m_min_label);
 	}
+	pb.complete();
 
 	distance->reset_precompute();
 
@@ -253,9 +254,8 @@ SGMatrix<int32_t> CKNN::classify_for_multiple_k()
 
 	//histogram of classes and returned output
 	SGVector<int32_t> classes(m_num_classes);
-	
+
 	SG_INFO("%d test examples\n", num_lab)
-	CSignal::clear_cancel();
 
 	init_solver(m_knn_solver);
 
diff --git a/src/shogun/multiclass/LSHKNNSolver.cpp b/src/shogun/multiclass/LSHKNNSolver.cpp
index 800c40a8fc1..dca8093ef44 100644
--- a/src/shogun/multiclass/LSHKNNSolver.cpp
+++ b/src/shogun/multiclass/LSHKNNSolver.cpp
@@ -65,8 +65,8 @@ CMulticlassLabels* CLSHKNNSolver::classify_objects(CDistance* knn_distance, cons
 		sg_memcpy(NN.get_column_vector(i), indices->data(), sizeof(int32_t)*m_k);
 		delete indices;
 	}
-		
-	for (index_t i=0; i<num_lab && (!CSignal::cancel_computations()); i++)
+
+	for (index_t i = 0; i < num_lab && (!cancel_computation()); i++)
 	{
 		//write the labels of the k nearest neighbors from theirs indices
 		for (index_t j=0; j<m_k; j++)
diff --git a/src/shogun/multiclass/LaRank.cpp b/src/shogun/multiclass/LaRank.cpp
index b84efa1709f..e496709fa61 100644
--- a/src/shogun/multiclass/LaRank.cpp
+++ b/src/shogun/multiclass/LaRank.cpp
@@ -46,6 +46,7 @@
  * $Id: kcache.c,v 1.9 2007/01/25 22:42:09 leonb Exp $
  **********************************************************************/
 
+#include <shogun/base/progress.h>
 #include <shogun/lib/config.h>
 
 #include <vector>
@@ -60,6 +61,7 @@
 #include <shogun/lib/Signal.h>
 #include <shogun/lib/Time.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/multiclass/LaRank.h>
 #include <shogun/multiclass/MulticlassOneVsRestStrategy.h>
 #include <shogun/kernel/Kernel.h>
@@ -408,9 +410,8 @@ void LaRankOutput::initialize (CKernel* kfunc, int64_t cache)
 {
 	kernel = larank_kcache_create (kfunc);
 	larank_kcache_set_maximum_size (kernel, cache * 1024 * 1024);
-	beta = SG_MALLOC(float32_t, 1);
+	m_beta = SGVector<float32_t>(1);
 	g = SG_MALLOC(float32_t, 1);
-	*beta=0;
 	*g=0;
 	l = 0;
 }
@@ -420,9 +421,7 @@ void LaRankOutput::destroy ()
 {
 	larank_kcache_destroy (kernel);
 	kernel=NULL;
-	SG_FREE(beta);
 	SG_FREE(g);
-	beta=NULL;
 	g=NULL;
 }
 
@@ -433,8 +432,8 @@ float64_t LaRankOutput::computeScore (int32_t x_id)
 		return 0;
 	else
 	{
-		float32_t *row = larank_kcache_query_row (kernel, x_id, l);
-		return CMath::dot (beta, row, l);
+		SGVector<float32_t> row(larank_kcache_query_row (kernel, x_id, l), l, false);
+		return linalg::dot (m_beta, row);
 	}
 }
 
@@ -459,15 +458,15 @@ void LaRankOutput::update (int32_t x_id, float64_t lambda, float64_t gp)
 	// updates the cache order and the beta coefficient
 	if (xr < l)
 	{
-		beta[xr]+=lambda;
+		m_beta[xr]+=lambda;
 	}
 	else
 	{
 		larank_kcache_swap_ri (kernel, l, x_id);
 		g = SG_REALLOC(float32_t, g, l, l+1);
-		beta = SG_REALLOC(float32_t, beta, l, l+1);
+		m_beta.resize_vector(l+1);
 		g[l]=gp;
-		beta[l]=lambda;
+		m_beta[l]=lambda;
 		l++;
 	}
 
@@ -494,7 +493,7 @@ int32_t LaRankOutput::cleanup ()
 	std::vector < int32_t >idx;
 	for (int32_t x = 0; x < l; x++)
 	{
-		if ((beta[x] < FLT_EPSILON) && (beta[x] > -FLT_EPSILON))
+		if ((m_beta[x] < FLT_EPSILON) && (m_beta[x] > -FLT_EPSILON))
 		{
 			idx.push_back (x);
 			count++;
@@ -507,13 +506,13 @@ int32_t LaRankOutput::cleanup ()
 		for (int32_t r = i; r < (l - 1); r++)
 		{
 			larank_kcache_swap_rr (kernel, r, int64_t(r) + 1);
-			beta[r]=beta[r + 1];
+			m_beta[r]=m_beta[r + 1];
 			g[r]=g[r + 1];
 		}
 	}
-	beta = SG_REALLOC(float32_t, beta, l, new_l+1);
+	m_beta.resize_vector(new_l+1);
 	g = SG_REALLOC(float32_t, g, l, new_l+1);
-	beta[new_l]=0;
+	m_beta[new_l]=0;
 	g[new_l]=0;
 	l = new_l;
 	return count;
@@ -527,8 +526,8 @@ float64_t LaRankOutput::getW2 ()
 	int32_t *r2i = larank_kcache_r2i (kernel, l + 1);
 	for (int32_t r = 0; r < l; r++)
 	{
-		float32_t *row_r = larank_kcache_query_row (kernel, r2i[r], l);
-		sum += beta[r] * CMath::dot (beta, row_r, l);
+		SGVector<float32_t> row_r(larank_kcache_query_row (kernel, r2i[r], l), l, false);
+		sum += m_beta[r] * linalg::dot (m_beta, row_r);
 	}
 	return sum;
 }
@@ -549,7 +548,7 @@ float64_t LaRankOutput::getBeta (int32_t x_id)
 			xr = r;
 			break;
 		}
-	return (xr < 0 ? 0 : beta[xr]);
+	return (xr < 0 ? 0 : m_beta[xr]);
 }
 
 //
@@ -618,8 +617,6 @@ bool CLaRank::train_machine(CFeatures* data)
 	ASSERT(m_labels && m_labels->get_num_labels())
 	ASSERT(m_labels->get_label_type() == LT_MULTICLASS)
 
-	CSignal::clear_cancel();
-
 	if (data)
 	{
 		if (data->get_num_vectors() != m_labels->get_num_labels())
@@ -638,9 +635,10 @@ bool CLaRank::train_machine(CFeatures* data)
 	int32_t n_it = 1;
 	float64_t gap = DBL_MAX;
 
+	auto pb = progress(range(0, 10), *this->io);
 	SG_INFO("Training on %d examples\n", nb_train)
-	while (gap > get_C() && (!CSignal::cancel_computations()) &&
-            n_it < max_iteration)      // stopping criteria
+	while (gap > get_C() && (!cancel_computation()) &&
+	       n_it < max_iteration) // stopping criteria
 	{
 		float64_t tr_err = 0;
 		int32_t ind = step;
@@ -661,19 +659,21 @@ bool CLaRank::train_machine(CFeatures* data)
 		SG_DEBUG("End of iteration %d\n", n_it)
 		SG_DEBUG("Train error (online): %f%%\n", (tr_err / nb_train) * 100)
 		gap = computeGap ();
-		SG_ABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(DBL_MAX), -CMath::log10(get_C()), 6)
+		pb.print_absolute(
+		    gap, -CMath::log10(gap), -CMath::log10(DBL_MAX),
+		    -CMath::log10(get_C()));
 
 		if (!batch_mode)        // skip stopping criteria if online mode
 			gap = 0;
                 n_it++;
 	}
-	SG_DONE()
+	pb.complete_absolute();
 
-        if (n_it >= max_iteration && gap > get_C())
-        {
-            SG_WARNING("LaRank did not converge after %d iterations.\n",
-                       max_iteration)
-        }
+	if (n_it >= max_iteration && gap > get_C())
+	{
+		SG_WARNING(
+		    "LaRank did not converge after %d iterations.\n", max_iteration)
+	}
 
 	int32_t num_classes = outputs.size();
 	create_multiclass_svm(num_classes);
@@ -687,7 +687,7 @@ bool CLaRank::train_machine(CFeatures* data)
 
 		larank_kcache_t* k=o->getKernel();
 		int32_t l=o->get_l();
-		float32_t* beta=o->getBetas();
+		SGVector<float32_t> beta=o->getBetas();
 		int32_t *r2i = larank_kcache_r2i (k, l);
 
 		ASSERT(l>0)
@@ -855,7 +855,7 @@ void CLaRank::set_max_iteration(int32_t max_iter)
     REQUIRE(max_iter > 0,
             "Max iteration (given: %d) must be positive.\n",
             max_iter);
-    max_iteration = max_iter; 
+    max_iteration = max_iter;
 }
 
 // Number of Support Vectors
diff --git a/src/shogun/multiclass/LaRank.h b/src/shogun/multiclass/LaRank.h
index 4036e071413..554ca76c1c4 100644
--- a/src/shogun/multiclass/LaRank.h
+++ b/src/shogun/multiclass/LaRank.h
@@ -95,7 +95,7 @@ namespace shogun
 	class LaRankOutput
 	{
 		public:
-			LaRankOutput () : beta(NULL), g(NULL), kernel(NULL), l(0)
+			LaRankOutput () : g(NULL), kernel(NULL), l(0)
 		{
 		}
 			virtual ~LaRankOutput ()
@@ -149,9 +149,9 @@ namespace shogun
 			float64_t getBeta (int32_t x_id);
 
 			//
-			inline float32_t* getBetas () const
+			inline SGVector<float32_t> getBetas () const
 			{
-				return beta;
+				return m_beta;
 			}
 
 			//
@@ -166,7 +166,7 @@ namespace shogun
 		private:
 			// the solution of LaRank relative to the actual class is stored in
 			// this parameters
-			float32_t* beta;		// Beta coefficiens
+			SGVector<float32_t> m_beta;		// Beta coefficiens
 			float32_t* g;		// Strored gradient derivatives
 			larank_kcache_t *kernel;	// Cache for kernel values
 			int32_t l;			// Number of support vectors
@@ -301,7 +301,7 @@ namespace shogun
 
 
 	/** @brief the LaRank multiclass SVM machine
-	 This implementation uses LaRank algorithm from 
+	 This implementation uses LaRank algorithm from
 	 Bordes, Antoine, et al., 2007.
 	 "Solving multiclass support vector machines with LaRank."
 
diff --git a/src/shogun/multiclass/MCLDA.cpp b/src/shogun/multiclass/MCLDA.cpp
index 7bc2e1240f6..a55b9d27482 100644
--- a/src/shogun/multiclass/MCLDA.cpp
+++ b/src/shogun/multiclass/MCLDA.cpp
@@ -64,6 +64,9 @@ void CMCLDA::init()
 	SG_ADD(&m_xbar, "m_xbar", "total mean", MS_NOT_AVAILABLE);
 	SG_ADD(&m_scalings, "m_scalings", "scalings", MS_NOT_AVAILABLE);
 	SG_ADD(&m_rank, "m_rank", "rank", MS_NOT_AVAILABLE);
+	SG_ADD(&m_dim, "m_dim", "dimension of feature space", MS_NOT_AVAILABLE);
+	SG_ADD(
+	    &m_num_classes, "m_num_classes", "number of classes", MS_NOT_AVAILABLE);
 	SG_ADD(&m_coef, "m_coef", "weight vector", MS_NOT_AVAILABLE);
 	SG_ADD(&m_intercept, "m_intercept", "intercept", MS_NOT_AVAILABLE);
 
diff --git a/src/shogun/multiclass/MulticlassLibLinear.cpp b/src/shogun/multiclass/MulticlassLibLinear.cpp
index 6d56974c461..b25bcc66a77 100644
--- a/src/shogun/multiclass/MulticlassLibLinear.cpp
+++ b/src/shogun/multiclass/MulticlassLibLinear.cpp
@@ -121,8 +121,6 @@ bool CMulticlassLibLinear::train_machine(CFeatures* data)
 	for (int32_t i=0; i<num_vectors; i++)
 		C[i] = m_C;
 
-	CSignal::clear_cancel();
-
 	Solver_MCSVM_CS solver(&mc_problem,num_classes,C,w0.matrix,m_epsilon,
 	                       m_max_iter,m_max_train_time,m_train_state);
 	solver.solve();
diff --git a/src/shogun/multiclass/MulticlassLogisticRegression.cpp b/src/shogun/multiclass/MulticlassLogisticRegression.cpp
deleted file mode 100644
index f68878fbe12..00000000000
--- a/src/shogun/multiclass/MulticlassLogisticRegression.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-
-#include <shogun/multiclass/MulticlassLogisticRegression.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/multiclass/MulticlassOneVsRestStrategy.h>
-#include <shogun/io/SGIO.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/labels/MulticlassLabels.h>
-#include <shogun/lib/slep/slep_mc_plain_lr.h>
-
-using namespace shogun;
-
-CMulticlassLogisticRegression::CMulticlassLogisticRegression() :
-	CLinearMulticlassMachine()
-{
-	init_defaults();
-}
-
-CMulticlassLogisticRegression::CMulticlassLogisticRegression(float64_t z, CDotFeatures* feats, CLabels* labs) :
-	CLinearMulticlassMachine(new CMulticlassOneVsRestStrategy(),feats,NULL,labs)
-{
-	init_defaults();
-	set_z(z);
-}
-
-void CMulticlassLogisticRegression::init_defaults()
-{
-	set_z(0.1);
-	set_epsilon(1e-2);
-	set_max_iter(10000);
-}
-
-void CMulticlassLogisticRegression::register_parameters()
-{
-	SG_ADD(&m_z, "m_z", "regularization constant",MS_AVAILABLE);
-	SG_ADD(&m_epsilon, "m_epsilon", "tolerance epsilon",MS_NOT_AVAILABLE);
-	SG_ADD(&m_max_iter, "m_max_iter", "max number of iterations",MS_NOT_AVAILABLE);
-}
-
-CMulticlassLogisticRegression::~CMulticlassLogisticRegression()
-{
-}
-
-bool CMulticlassLogisticRegression::train_machine(CFeatures* data)
-{
-	if (data)
-		set_features((CDotFeatures*)data);
-
-	REQUIRE(m_features, "%s::train_machine(): No features attached!\n");
-	REQUIRE(m_labels, "%s::train_machine(): No labels attached!\n");
-	REQUIRE(m_labels->get_label_type()==LT_MULTICLASS, "%s::train_machine(): "
-			"Attached labels are no multiclass labels\n");
-	REQUIRE(m_multiclass_strategy, "%s::train_machine(): No multiclass strategy"
-			" attached!\n");
-
-	int32_t n_classes = ((CMulticlassLabels*)m_labels)->get_num_classes();
-	int32_t n_feats = m_features->get_dim_feature_space();
-
-	slep_options options = slep_options::default_options();
-	if (m_machines->get_num_elements()!=0)
-	{
-		SGMatrix<float64_t> all_w_old(n_feats, n_classes);
-		SGVector<float64_t> all_c_old(n_classes);
-		for (int32_t i=0; i<n_classes; i++)
-		{
-			CLinearMachine* machine = (CLinearMachine*)m_machines->get_element(i);
-			SGVector<float64_t> w = machine->get_w();
-			for (int32_t j=0; j<n_feats; j++)
-				all_w_old(j,i) = w[j];
-			all_c_old[i] = machine->get_bias();
-			SG_UNREF(machine);
-		}
-		options.last_result = new slep_result_t(all_w_old,all_c_old);
-		m_machines->reset_array();
-	}
-	options.tolerance = m_epsilon;
-	options.max_iter = m_max_iter;
-	slep_result_t result = slep_mc_plain_lr(m_features,(CMulticlassLabels*)m_labels,m_z,options);
-
-	SGMatrix<float64_t> all_w = result.w;
-	SGVector<float64_t> all_c = result.c;
-	for (int32_t i=0; i<n_classes; i++)
-	{
-		SGVector<float64_t> w(n_feats);
-		for (int32_t j=0; j<n_feats; j++)
-			w[j] = all_w(j,i);
-		float64_t c = all_c[i];
-		CLinearMachine* machine = new CLinearMachine();
-		machine->set_w(w);
-		machine->set_bias(c);
-		m_machines->push_back(machine);
-	}
-	return true;
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/multiclass/MulticlassLogisticRegression.h b/src/shogun/multiclass/MulticlassLogisticRegression.h
deleted file mode 100644
index 758888fd4dc..00000000000
--- a/src/shogun/multiclass/MulticlassLogisticRegression.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-#ifndef MULTICLASSLOGISTICREGRESSION_H_
-#define MULTICLASSLOGISTICREGRESSION_H_
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/common.h>
-#include <shogun/features/DotFeatures.h>
-#include <shogun/machine/LinearMulticlassMachine.h>
-
-namespace shogun
-{
-
-/** @brief multiclass logistic regression
- *
- */
-class CMulticlassLogisticRegression : public CLinearMulticlassMachine
-{
-	public:
-		MACHINE_PROBLEM_TYPE(PT_MULTICLASS)
-
-		/** default constructor  */
-		CMulticlassLogisticRegression();
-
-		/** standard constructor
-		 * @param z z regularization constant value
-		 * @param feats features
-		 * @param labs labels
-		 */
-		CMulticlassLogisticRegression(float64_t z, CDotFeatures* feats, CLabels* labs);
-
-		/** destructor */
-		virtual ~CMulticlassLogisticRegression();
-
-		/** get name */
-		virtual const char* get_name() const
-		{
-			return "MulticlassLogisticRegression";
-		}
-
-		/** set z
-		 * @param z z value
-		 */
-		inline void set_z(float64_t z)
-		{
-			ASSERT(z>0)
-			m_z = z;
-		}
-		/** get C
-		 * @return C value
-		 */
-		inline float64_t get_z() const { return m_z; }
-
-		/** set epsilon
-		 * @param epsilon epsilon value
-		 */
-		inline void set_epsilon(float64_t epsilon)
-		{
-			ASSERT(epsilon>0)
-			m_epsilon = epsilon;
-		}
-		/** get epsilon
-		 * @return epsilon value
-		 */
-		inline float64_t get_epsilon() const { return m_epsilon; }
-
-		/** set max iter
-		 * @param max_iter max iter value
-		 */
-		inline void set_max_iter(int32_t max_iter)
-		{
-			ASSERT(max_iter>0)
-			m_max_iter = max_iter;
-		}
-		/** get max iter
-		 * @return max iter value
-		 */
-		inline int32_t get_max_iter() const { return m_max_iter; }
-
-	protected:
-
-		/** train machine */
-		virtual bool train_machine(CFeatures* data = NULL);
-
-private:
-
-		/** init defaults */
-		void init_defaults();
-
-		/** register parameters */
-		void register_parameters();
-
-protected:
-
-		/** regularization constant for each machine */
-		float64_t m_z;
-
-		/** tolerance */
-		float64_t m_epsilon;
-
-		/** max number of iterations */
-		int32_t m_max_iter;
-
-};
-}
-#endif //USE_GPL_SHOGUN
-#endif
diff --git a/src/shogun/multiclass/MulticlassOCAS.cpp b/src/shogun/multiclass/MulticlassOCAS.cpp
deleted file mode 100644
index 7347ff71fe0..00000000000
--- a/src/shogun/multiclass/MulticlassOCAS.cpp
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2009-2012 Vojtech Franc and Soeren Sonnenburg
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2009-2012 Vojtech Franc and Soeren Sonnenburg
- */
-
-
-#include <shogun/multiclass/MulticlassOCAS.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/multiclass/MulticlassOneVsRestStrategy.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/labels/MulticlassLabels.h>
-
-using namespace shogun;
-
-struct mocas_data
-{
-	CDotFeatures* features;
-	float64_t* W;
-	float64_t* oldW;
-	float64_t* full_A;
-	float64_t* data_y;
-	float64_t* output_values;
-	uint32_t nY;
-	uint32_t nData;
-	uint32_t nDim;
-	float64_t* new_a;
-};
-
-CMulticlassOCAS::CMulticlassOCAS() :
-	CLinearMulticlassMachine()
-{
-	register_parameters();
-	set_C(1.0);
-	set_epsilon(1e-2);
-	set_max_iter(1000000);
-	set_method(1);
-	set_buf_size(5000);
-}
-
-CMulticlassOCAS::CMulticlassOCAS(float64_t C, CDotFeatures* train_features, CLabels* train_labels) :
-	CLinearMulticlassMachine(new CMulticlassOneVsRestStrategy(), train_features, NULL, train_labels), m_C(C)
-{
-	register_parameters();
-	set_epsilon(1e-2);
-	set_max_iter(1000000);
-	set_method(1);
-	set_buf_size(5000);
-}
-
-void CMulticlassOCAS::register_parameters()
-{
-	SG_ADD(&m_C, "m_C", "regularization constant", MS_AVAILABLE);
-	SG_ADD(&m_epsilon, "m_epsilon", "solver relative tolerance", MS_NOT_AVAILABLE);
-	SG_ADD(&m_max_iter, "m_max_iter", "max number of iterations", MS_NOT_AVAILABLE);
-	SG_ADD(&m_method, "m_method", "used solver method", MS_NOT_AVAILABLE);
-	SG_ADD(&m_buf_size, "m_buf_size", "buffer size", MS_NOT_AVAILABLE);
-}
-
-CMulticlassOCAS::~CMulticlassOCAS()
-{
-}
-
-bool CMulticlassOCAS::train_machine(CFeatures* data)
-{
-	if (data)
-		set_features((CDotFeatures*)data);
-
-	ASSERT(m_features)
-	ASSERT(m_labels)
-	ASSERT(m_multiclass_strategy)
-
-	int32_t num_vectors = m_features->get_num_vectors();
-	int32_t num_classes = m_multiclass_strategy->get_num_classes();
-	int32_t num_features = m_features->get_dim_feature_space();
-
-	float64_t C = m_C;
-	SGVector<float64_t> labels = ((CMulticlassLabels*) m_labels)->get_labels();
-	uint32_t nY = num_classes;
-	uint32_t nData = num_vectors;
-	float64_t TolRel = m_epsilon;
-	float64_t TolAbs = 0.0;
-	float64_t QPBound = 0.0;
-	float64_t MaxTime = m_max_train_time;
-	uint32_t BufSize = m_buf_size;
-	uint8_t Method = m_method;
-
-	mocas_data user_data;
-	user_data.features = m_features;
-	user_data.W = SG_CALLOC(float64_t, (int64_t)num_features*num_classes);
-	user_data.oldW = SG_CALLOC(float64_t, (int64_t)num_features*num_classes);
-	user_data.new_a = SG_CALLOC(float64_t, (int64_t)num_features*num_classes);
-	user_data.full_A = SG_CALLOC(float64_t, (int64_t)num_features*num_classes*m_buf_size);
-	user_data.output_values = SG_CALLOC(float64_t, num_vectors);
-	user_data.data_y = labels.vector;
-	user_data.nY = num_classes;
-	user_data.nDim = num_features;
-	user_data.nData = num_vectors;
-
-	ocas_return_value_T value =
-	msvm_ocas_solver(C, labels.vector, nY, nData, TolRel, TolAbs,
-	                 QPBound, MaxTime, BufSize, Method,
-	                 &CMulticlassOCAS::msvm_full_compute_W,
-	                 &CMulticlassOCAS::msvm_update_W,
-	                 &CMulticlassOCAS::msvm_full_add_new_cut,
-	                 &CMulticlassOCAS::msvm_full_compute_output,
-	                 &CMulticlassOCAS::msvm_sort_data,
-	                 &CMulticlassOCAS::msvm_print,
-	                 &user_data);
-
-	SG_DEBUG("Number of iterations [nIter] = %d \n",value.nIter)
-	SG_DEBUG("Number of cutting planes [nCutPlanes] = %d \n",value.nCutPlanes)
-	SG_DEBUG("Number of non-zero alphas [nNZAlpha] = %d \n",value.nNZAlpha)
-	SG_DEBUG("Number of training errors [trn_err] = %d \n",value.trn_err)
-	SG_DEBUG("Primal objective value [Q_P] = %f \n",value.Q_P)
-	SG_DEBUG("Dual objective value [Q_D] = %f \n",value.Q_D)
-	SG_DEBUG("Output time [output_time] = %f \n",value.output_time)
-	SG_DEBUG("Sort time [sort_time] = %f \n",value.sort_time)
-	SG_DEBUG("Add time [add_time] = %f \n",value.add_time)
-	SG_DEBUG("W time [w_time] = %f \n",value.w_time)
-	SG_DEBUG("QP solver time [qp_solver_time] = %f \n",value.qp_solver_time)
-	SG_DEBUG("OCAS time [ocas_time] = %f \n",value.ocas_time)
-	SG_DEBUG("Print time [print_time] = %f \n",value.print_time)
-	SG_DEBUG("QP exit flag [qp_exitflag] = %d \n",value.qp_exitflag)
-	SG_DEBUG("Exit flag [exitflag] = %d \n",value.exitflag)
-
-	m_machines->reset_array();
-	for (int32_t i=0; i<num_classes; i++)
-	{
-		CLinearMachine* machine = new CLinearMachine();
-		machine->set_w(SGVector<float64_t>(&user_data.W[i*num_features],num_features,false).clone());
-
-		m_machines->push_back(machine);
-	}
-
-	SG_FREE(user_data.W);
-	SG_FREE(user_data.oldW);
-	SG_FREE(user_data.new_a);
-	SG_FREE(user_data.full_A);
-	SG_FREE(user_data.output_values);
-
-	return true;
-}
-
-float64_t CMulticlassOCAS::msvm_update_W(float64_t t, void* user_data)
-{
-	float64_t* W = ((mocas_data*)user_data)->W;
-	float64_t* oldW = ((mocas_data*)user_data)->oldW;
-	uint32_t nY = ((mocas_data*)user_data)->nY;
-	uint32_t nDim = ((mocas_data*)user_data)->nDim;
-
-	for(uint32_t j=0; j < nY*nDim; j++)
-		W[j] = oldW[j]*(1-t) + t*W[j];
-
-	float64_t sq_norm_W = CMath::dot(W,W,nDim*nY);
-
-	return sq_norm_W;
-}
-
-void CMulticlassOCAS::msvm_full_compute_W(float64_t *sq_norm_W, float64_t *dp_WoldW,
-                                          float64_t *alpha, uint32_t nSel, void* user_data)
-{
-	float64_t* W = ((mocas_data*)user_data)->W;
-	float64_t* oldW = ((mocas_data*)user_data)->oldW;
-	float64_t* full_A = ((mocas_data*)user_data)->full_A;
-	uint32_t nY = ((mocas_data*)user_data)->nY;
-	uint32_t nDim = ((mocas_data*)user_data)->nDim;
-
-	uint32_t i,j;
-
-	sg_memcpy(oldW, W, sizeof(float64_t)*nDim*nY);
-	memset(W, 0, sizeof(float64_t)*nDim*nY);
-
-	for(i=0; i<nSel; i++)
-	{
-		if(alpha[i] > 0)
-		{
-			for(j=0; j<nDim*nY; j++)
-				W[j] += alpha[i]*full_A[LIBOCAS_INDEX(j,i,nDim*nY)];
-		}
-	}
-
-	*sq_norm_W = CMath::dot(W,W,nDim*nY);
-	*dp_WoldW = CMath::dot(W,oldW,nDim*nY);
-
-	return;
-}
-
-int CMulticlassOCAS::msvm_full_add_new_cut(float64_t *new_col_H, uint32_t *new_cut,
-                                           uint32_t nSel, void* user_data)
-{
-	float64_t* full_A = ((mocas_data*)user_data)->full_A;
-	float64_t* new_a = ((mocas_data*)user_data)->new_a;
-	float64_t* data_y = ((mocas_data*)user_data)->data_y;
-	uint32_t nY = ((mocas_data*)user_data)->nY;
-	uint32_t nDim = ((mocas_data*)user_data)->nDim;
-	uint32_t nData = ((mocas_data*)user_data)->nData;
-	CDotFeatures* features = ((mocas_data*)user_data)->features;
-
-	float64_t sq_norm_a;
-	uint32_t i, j, y, y2;
-
-	memset(new_a, 0, sizeof(float64_t)*nDim*nY);
-
-	for(i=0; i < nData; i++)
-	{
-		y = (uint32_t)(data_y[i]);
-		y2 = (uint32_t)new_cut[i];
-		if(y2 != y)
-		{
-			features->add_to_dense_vec(1.0,i,&new_a[nDim*y],nDim);
-			features->add_to_dense_vec(-1.0,i,&new_a[nDim*y2],nDim);
-		}
-	}
-
-	// compute new_a'*new_a and insert new_a to the last column of full_A
-	sq_norm_a = CMath::dot(new_a,new_a,nDim*nY);
-	for(j=0; j < nDim*nY; j++ )
-		full_A[LIBOCAS_INDEX(j,nSel,nDim*nY)] = new_a[j];
-
-	new_col_H[nSel] = sq_norm_a;
-	for(i=0; i < nSel; i++)
-	{
-		float64_t tmp = 0;
-
-		for(j=0; j < nDim*nY; j++ )
-			tmp += new_a[j]*full_A[LIBOCAS_INDEX(j,i,nDim*nY)];
-
-		new_col_H[i] = tmp;
-	}
-
-	return 0;
-}
-
-int CMulticlassOCAS::msvm_full_compute_output(float64_t *output, void* user_data)
-{
-	float64_t* W = ((mocas_data*)user_data)->W;
-	uint32_t nY = ((mocas_data*)user_data)->nY;
-	uint32_t nDim = ((mocas_data*)user_data)->nDim;
-	uint32_t nData = ((mocas_data*)user_data)->nData;
-	float64_t* output_values = ((mocas_data*)user_data)->output_values;
-	CDotFeatures* features = ((mocas_data*)user_data)->features;
-
-	uint32_t i, y;
-
-	for(y=0; y<nY; y++)
-	{
-		features->dense_dot_range(output_values,0,nData,NULL,&W[nDim*y],nDim,0.0);
-		for (i=0; i<nData; i++)
-			output[LIBOCAS_INDEX(y,i,nY)] = output_values[i];
-	}
-
-	return 0;
-}
-
-int CMulticlassOCAS::msvm_sort_data(float64_t* vals, float64_t* data, uint32_t size)
-{
-	CMath::qsort_index(vals, data, size);
-	return 0;
-}
-
-void CMulticlassOCAS::msvm_print(ocas_return_value_T value)
-{
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/multiclass/MulticlassOCAS.h b/src/shogun/multiclass/MulticlassOCAS.h
deleted file mode 100644
index 0af5b415ba6..00000000000
--- a/src/shogun/multiclass/MulticlassOCAS.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-
-#ifndef _MULTICLASSOCAS_H___
-#define _MULTICLASSOCAS_H___
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/common.h>
-#include <shogun/features/DotFeatures.h>
-#include <shogun/lib/external/libocas.h>
-#include <shogun/machine/LinearMulticlassMachine.h>
-
-namespace shogun
-{
-
-/** @brief multiclass OCAS wrapper */
-class CMulticlassOCAS : public CLinearMulticlassMachine
-{
-	public:
-		MACHINE_PROBLEM_TYPE(PT_MULTICLASS)
-
-		/** default constructor  */
-		CMulticlassOCAS();
-
-		/** standard constructor
-		 * @param C C regularication constant value
-		 * @param features features
-		 * @param labs labels
-		 */
-		CMulticlassOCAS(float64_t C, CDotFeatures* features, CLabels* labs);
-
-		/** destructor */
-		virtual ~CMulticlassOCAS();
-
-		/** get name */
-		virtual const char* get_name() const
-		{
-			return "MulticlassOCAS";
-		}
-
-		/** set C
-		 * @param C C value
-		 */
-		inline void set_C(float64_t C)
-		{
-			ASSERT(C>0)
-			m_C = C;
-		}
-		/** get C
-		 * @return C value
-		 */
-		inline float64_t get_C() const { return m_C; }
-
-		/** set epsilon
-		 * @param epsilon epsilon value
-		 */
-		inline void set_epsilon(float64_t epsilon)
-		{
-			ASSERT(epsilon>0)
-			m_epsilon = epsilon;
-		}
-		/** get epsilon
-		 * @return epsilon value
-		 */
-		inline float64_t get_epsilon() const { return m_epsilon; }
-
-		/** set max iter
-		 * @param max_iter max iter value
-		 */
-		inline void set_max_iter(int32_t max_iter)
-		{
-			ASSERT(max_iter>0)
-			m_max_iter = max_iter;
-		}
-		/** get max iter
-		 * @return max iter value
-		 */
-		inline int32_t get_max_iter() const { return m_max_iter; }
-
-		/** set method
-		 * @param method method value
-		 */
-		inline void set_method(int32_t method)
-		{
-			ASSERT(method==0 || method==1)
-			m_method = method;
-		}
-		/** get method
-		 * @return method value
-		 */
-		inline int32_t get_method() const { return m_method; }
-
-		/** set buf size
-		 * @param buf_size buf size value
-		 */
-		inline void set_buf_size(int32_t buf_size)
-		{
-			ASSERT(buf_size>0)
-			m_buf_size = buf_size;
-		}
-		/** get buf size
-		 * @return buf_size value
-		 */
-		inline int32_t get_buf_size() const { return m_buf_size; }
-
-protected:
-
-		/** train machine */
-		virtual bool train_machine(CFeatures* data = NULL);
-
-		/** update W */
-		static float64_t msvm_update_W(float64_t t, void* user_data);
-
-		/** full compute W */
-		static void msvm_full_compute_W(float64_t *sq_norm_W, float64_t *dp_WoldW,
-		                                float64_t *alpha, uint32_t nSel, void* user_data);
-
-		/** full add new cut */
-		static int msvm_full_add_new_cut(float64_t *new_col_H, uint32_t *new_cut,
-		                                 uint32_t nSel, void* user_data);
-
-		/** full compute output */
-		static int msvm_full_compute_output(float64_t *output, void* user_data);
-
-		/** sort */
-		static int msvm_sort_data(float64_t* vals, float64_t* data, uint32_t size);
-
-		/** print nothing */
-		static void msvm_print(ocas_return_value_T value);
-
-private:
-
-		/** register parameters */
-		void register_parameters();
-
-protected:
-
-		/** regularization constant for each machine */
-		float64_t m_C;
-
-		/** tolerance */
-		float64_t m_epsilon;
-
-		/** max number of iterations */
-		int32_t m_max_iter;
-
-		/** method */
-		int32_t m_method;
-
-		/** buffer size */
-		int32_t m_buf_size;
-};
-}
-#endif
-
-#endif
diff --git a/src/shogun/multiclass/MulticlassTreeGuidedLogisticRegression.cpp b/src/shogun/multiclass/MulticlassTreeGuidedLogisticRegression.cpp
deleted file mode 100644
index c5ae083e0bd..00000000000
--- a/src/shogun/multiclass/MulticlassTreeGuidedLogisticRegression.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-
-#include <shogun/multiclass/MulticlassTreeGuidedLogisticRegression.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/multiclass/MulticlassOneVsRestStrategy.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/labels/MulticlassLabels.h>
-#include <shogun/lib/slep/slep_mc_tree_lr.h>
-
-using namespace shogun;
-
-CMulticlassTreeGuidedLogisticRegression::CMulticlassTreeGuidedLogisticRegression() :
-	CLinearMulticlassMachine()
-{
-	init_defaults();
-}
-
-CMulticlassTreeGuidedLogisticRegression::CMulticlassTreeGuidedLogisticRegression(float64_t z, CDotFeatures* feats, CLabels* labs, CIndexBlockTree* tree) :
-	CLinearMulticlassMachine(new CMulticlassOneVsRestStrategy(),feats,NULL,labs)
-{
-	init_defaults();
-	set_z(z);
-	set_index_tree(tree);
-}
-
-void CMulticlassTreeGuidedLogisticRegression::init_defaults()
-{
-	m_index_tree = NULL;
-	set_z(0.1);
-	set_epsilon(1e-2);
-	set_max_iter(10000);
-}
-
-void CMulticlassTreeGuidedLogisticRegression::register_parameters()
-{
-	SG_ADD(&m_z, "m_z", "regularization constant",MS_AVAILABLE);
-	SG_ADD(&m_epsilon, "m_epsilon", "tolerance epsilon",MS_NOT_AVAILABLE);
-	SG_ADD(&m_max_iter, "m_max_iter", "max number of iterations",MS_NOT_AVAILABLE);
-}
-
-CMulticlassTreeGuidedLogisticRegression::~CMulticlassTreeGuidedLogisticRegression()
-{
-	SG_UNREF(m_index_tree);
-}
-
-bool CMulticlassTreeGuidedLogisticRegression::train_machine(CFeatures* data)
-{
-	if (data)
-		set_features((CDotFeatures*)data);
-
-	ASSERT(m_features)
-	ASSERT(m_labels && m_labels->get_label_type()==LT_MULTICLASS)
-	ASSERT(m_multiclass_strategy)
-	ASSERT(m_index_tree)
-
-	int32_t n_classes = ((CMulticlassLabels*)m_labels)->get_num_classes();
-	int32_t n_feats = m_features->get_dim_feature_space();
-
-	slep_options options = slep_options::default_options();
-	if (m_machines->get_num_elements()!=0)
-	{
-		SGMatrix<float64_t> all_w_old(n_feats, n_classes);
-		SGVector<float64_t> all_c_old(n_classes);
-		for (int32_t i=0; i<n_classes; i++)
-		{
-			CLinearMachine* machine = (CLinearMachine*)m_machines->get_element(i);
-			SGVector<float64_t> w = machine->get_w();
-			for (int32_t j=0; j<n_feats; j++)
-				all_w_old(j,i) = w[j];
-			all_c_old[i] = machine->get_bias();
-			SG_UNREF(machine);
-		}
-		options.last_result = new slep_result_t(all_w_old,all_c_old);
-		m_machines->reset_array();
-	}
-	if (m_index_tree->is_general())
-	{
-		SGVector<float64_t> G = m_index_tree->get_SLEP_G();
-		options.G = G.vector;
-	}
-	SGVector<float64_t> ind_t = m_index_tree->get_SLEP_ind_t();
-	options.ind_t = ind_t.vector;
-	options.n_nodes = ind_t.size()/3;
-	options.tolerance = m_epsilon;
-	options.max_iter = m_max_iter;
-	slep_result_t result = slep_mc_tree_lr(m_features,(CMulticlassLabels*)m_labels,m_z,options);
-
-	SGMatrix<float64_t> all_w = result.w;
-	SGVector<float64_t> all_c = result.c;
-	for (int32_t i=0; i<n_classes; i++)
-	{
-		SGVector<float64_t> w(n_feats);
-		for (int32_t j=0; j<n_feats; j++)
-			w[j] = all_w(j,i);
-		float64_t c = all_c[i];
-		CLinearMachine* machine = new CLinearMachine();
-		machine->set_w(w);
-		machine->set_bias(c);
-		m_machines->push_back(machine);
-	}
-	return true;
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/multiclass/MulticlassTreeGuidedLogisticRegression.h b/src/shogun/multiclass/MulticlassTreeGuidedLogisticRegression.h
deleted file mode 100644
index 7436143f6c5..00000000000
--- a/src/shogun/multiclass/MulticlassTreeGuidedLogisticRegression.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Sergey Lisitsyn
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-#ifndef MULTICLASSTREEGUIDEDLOGISTICREGRESSION_H_
-#define MULTICLASSTREEGUIDEDLOGISTICREGRESSION_H_
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/common.h>
-#include <shogun/features/DotFeatures.h>
-#include <shogun/machine/LinearMulticlassMachine.h>
-#include <shogun/lib/IndexBlockTree.h>
-
-namespace shogun
-{
-
-/** @brief multiclass tree guided logistic regression
- */
-class CMulticlassTreeGuidedLogisticRegression : public CLinearMulticlassMachine
-{
-	public:
-		MACHINE_PROBLEM_TYPE(PT_MULTICLASS)
-
-		/** default constructor  */
-		CMulticlassTreeGuidedLogisticRegression();
-
-		/** standard constructor
-		 * @param z z regularization constant value
-		 * @param feats features
-		 * @param labs labels
-		 * @param tree tree
-		 */
-		CMulticlassTreeGuidedLogisticRegression(float64_t z, CDotFeatures* feats, CLabels* labs, CIndexBlockTree* tree);
-
-		/** destructor */
-		virtual ~CMulticlassTreeGuidedLogisticRegression();
-
-		/** get name */
-		virtual const char* get_name() const
-		{
-			return "MulticlassTreeGuidedLogisticRegression";
-		}
-
-		/** set z
-		 * @param z z value
-		 */
-		inline void set_z(float64_t z)
-		{
-			ASSERT(z>0)
-			m_z = z;
-		}
-		/** get C
-		 * @return C value
-		 */
-		inline float64_t get_z() const { return m_z; }
-
-		/** set epsilon
-		 * @param epsilon epsilon value
-		 */
-		inline void set_epsilon(float64_t epsilon)
-		{
-			ASSERT(epsilon>0)
-			m_epsilon = epsilon;
-		}
-		/** get epsilon
-		 * @return epsilon value
-		 */
-		inline float64_t get_epsilon() const { return m_epsilon; }
-
-		/** set max iter
-		 * @param max_iter max iter value
-		 */
-		inline void set_max_iter(int32_t max_iter)
-		{
-			ASSERT(max_iter>0)
-			m_max_iter = max_iter;
-		}
-		/** get max iter
-		 * @return max iter value
-		 */
-		inline int32_t get_max_iter() const { return m_max_iter; }
-
-		/** set index tree
-		 * @param index_tree index tree
-		 */
-		inline void set_index_tree(CIndexBlockTree* index_tree)
-		{
-			SG_REF(index_tree);
-			SG_UNREF(m_index_tree);
-			m_index_tree = index_tree;
-		}
-		/** get index tree
-		 * @return current index tree
-		 */
-		inline CIndexBlockTree* get_index_tree() const
-		{
-			SG_REF(m_index_tree);
-			return m_index_tree;
-		}
-
-protected:
-
-		/** train machine */
-		virtual bool train_machine(CFeatures* data = NULL);
-
-private:
-
-		/** init defaults */
-		void init_defaults();
-
-		/** register parameters */
-		void register_parameters();
-
-protected:
-
-		/** index tree */
-		CIndexBlockTree* m_index_tree;
-
-		/** regularization constant for each machine */
-		float64_t m_z;
-
-		/** tolerance */
-		float64_t m_epsilon;
-
-		/** max number of iterations */
-		int32_t m_max_iter;
-
-};
-}
-#endif //USE_GPL_SHOGUN
-#endif
diff --git a/src/shogun/multiclass/QDA.cpp b/src/shogun/multiclass/QDA.cpp
index 006b34a22f2..a653c69a605 100644
--- a/src/shogun/multiclass/QDA.cpp
+++ b/src/shogun/multiclass/QDA.cpp
@@ -90,8 +90,10 @@ void CQDA::init()
 	SG_ADD((CSGObject**) &m_features, "m_features", "Feature object.", MS_NOT_AVAILABLE);
 	SG_ADD(&m_means, "m_means", "Mean vectors list", MS_NOT_AVAILABLE);
 	SG_ADD(&m_slog, "m_slog", "Vector used in classification", MS_NOT_AVAILABLE);
-
-	//TODO include SGNDArray objects for serialization
+	SG_ADD(&m_dim, "m_dim", "dimension of feature space", MS_NOT_AVAILABLE);
+	SG_ADD(
+	    &m_num_classes, "m_num_classes", "number of classes", MS_NOT_AVAILABLE);
+	SG_ADD(&m_M, "m_M", "Matrices used in classification", MS_NOT_AVAILABLE);
 
 	m_features  = NULL;
 }
@@ -144,7 +146,7 @@ CMulticlassLabels* CQDA::apply_multiclass(CFeatures* data)
 			rf->free_feature_vector(vec, i);
 		}
 
-		Map< MatrixXd > Em_M(m_M.get_matrix(k), m_dim, m_dim);
+		Map<MatrixXd> Em_M(m_M.submatrix(m_dim * k, m_dim * (k + 1)));
 		A = X*Em_M;
 
 		for (int i = 0; i < num_vecs; i++)
@@ -302,12 +304,8 @@ bool CQDA::train_machine(CFeatures* data)
 	/* Computation of terms required for classification */
 	SGVector< float32_t > sinvsqrt(m_dim);
 
-	// M_dims will be freed in m_M.destroy_ndarray()
-	index_t* M_dims = SG_MALLOC(index_t, 3);
-	M_dims[0] = m_dim;
-	M_dims[1] = m_dim;
-	M_dims[2] = m_num_classes;
-	m_M = SGNDArray< float64_t >(M_dims, 3);
+	// m_num_classes matrices of dimension (m_dim, m_dim) stacked horizontally
+	m_M = SGMatrix<float64_t>(m_dim, m_dim * m_num_classes);
 
 	m_slog = SGVector< float32_t >(m_num_classes);
 	m_slog.zero();
diff --git a/src/shogun/multiclass/QDA.h b/src/shogun/multiclass/QDA.h
index 1b7657b5558..c7134182822 100644
--- a/src/shogun/multiclass/QDA.h
+++ b/src/shogun/multiclass/QDA.h
@@ -207,8 +207,11 @@ class CQDA : public CNativeMulticlassMachine
 		/** feature means for each of the classes in the training data */
 		SGMatrix< float64_t > m_means;
 
-		/** matrices computed in training and used in classification */
-		SGNDArray< float64_t > m_M;
+		/** matrices computed in training and used in classification
+		 * the matrices are stacked horizontally into a matrix of size
+		 * (m_dim, m_dim*m_num_classes).
+		 */
+		SGMatrix<float64_t> m_M;
 
 		/** vector computed in training and used in classification */
 		SGVector< float32_t > m_slog;
diff --git a/src/shogun/multiclass/tree/CARTree.cpp b/src/shogun/multiclass/tree/CARTree.cpp
index af72bcb205f..834503edd19 100644
--- a/src/shogun/multiclass/tree/CARTree.cpp
+++ b/src/shogun/multiclass/tree/CARTree.cpp
@@ -30,7 +30,6 @@
 
 #include <shogun/mathematics/Math.h>
 #include <shogun/multiclass/tree/CARTree.h>
-#include <shogun/mathematics/linalg/linalg.h>
 #include <shogun/mathematics/eigen3.h>
 
 using namespace Eigen;
diff --git a/src/shogun/multiclass/tree/ConditionalProbabilityTree.cpp b/src/shogun/multiclass/tree/ConditionalProbabilityTree.cpp
index 89b34427a09..4aeba9025a9 100644
--- a/src/shogun/multiclass/tree/ConditionalProbabilityTree.cpp
+++ b/src/shogun/multiclass/tree/ConditionalProbabilityTree.cpp
@@ -122,7 +122,7 @@ bool CConditionalProbabilityTree::train_machine(CFeatures* data)
 	{
 		while (m_feats->get_next_example())
 		{
-			train_example(m_feats->get_vector(), static_cast<int32_t>(m_feats->get_label()));
+			train_example(m_feats, static_cast<int32_t>(m_feats->get_label()));
 			m_feats->release_example();
 		}
 
@@ -149,7 +149,7 @@ void CConditionalProbabilityTree::print_tree()
 		printf("Empty Tree\n");
 }
 
-void CConditionalProbabilityTree::train_example(SGVector<float32_t> ex, int32_t label)
+void CConditionalProbabilityTree::train_example(CStreamingDenseFeatures<float32_t>* ex, int32_t label)
 {
 	if (m_root == NULL)
 	{
@@ -170,7 +170,7 @@ void CConditionalProbabilityTree::train_example(SGVector<float32_t> ex, int32_t
 		while (node->left() != NULL)
 		{
 			// not a leaf
-			bool is_left = which_subtree(node, ex);
+			bool is_left = which_subtree(node, ex->get_vector());
 			float64_t node_label;
 			if (is_left)
 				node_label = 0;
@@ -206,7 +206,7 @@ void CConditionalProbabilityTree::train_example(SGVector<float32_t> ex, int32_t
 	}
 }
 
-void CConditionalProbabilityTree::train_path(SGVector<float32_t> ex, bnode_t *node)
+void CConditionalProbabilityTree::train_path(CStreamingDenseFeatures<float32_t>* ex, bnode_t *node)
 {
 	float64_t node_label = 0;
 	train_node(ex, node_label, node);
@@ -225,12 +225,12 @@ void CConditionalProbabilityTree::train_path(SGVector<float32_t> ex, bnode_t *no
 	}
 }
 
-void CConditionalProbabilityTree::train_node(SGVector<float32_t> ex, float64_t label, bnode_t *node)
+void CConditionalProbabilityTree::train_node(CStreamingDenseFeatures<float32_t>* ex, float64_t label, bnode_t *node)
 {
 	REQUIRE(node, "Node must not be NULL\n");
 	COnlineLibLinear *mch = dynamic_cast<COnlineLibLinear *>(m_machines->get_element(node->machine()));
 	REQUIRE(mch, "Instance of %s could not be casted to COnlineLibLinear\n", node->get_name());
-	mch->train_one(ex, label);
+	mch->train_example(ex, label);
 	SG_UNREF(mch);
 }
 
@@ -245,11 +245,11 @@ float64_t CConditionalProbabilityTree::predict_node(SGVector<float32_t> ex, bnod
 	return 1.0/(1+CMath::exp(-pred));
 }
 
-int32_t CConditionalProbabilityTree::create_machine(SGVector<float32_t> ex)
+int32_t CConditionalProbabilityTree::create_machine(CStreamingDenseFeatures<float32_t>* ex)
 {
 	COnlineLibLinear *mch = new COnlineLibLinear();
 	mch->start_train();
-	mch->train_one(ex, 0);
+	mch->train_example(ex, 0);
 	m_machines->push_back(mch);
 	return m_machines->get_num_elements()-1;
 }
diff --git a/src/shogun/multiclass/tree/ConditionalProbabilityTree.h b/src/shogun/multiclass/tree/ConditionalProbabilityTree.h
index 52cae1b25e6..da3c575668e 100644
--- a/src/shogun/multiclass/tree/ConditionalProbabilityTree.h
+++ b/src/shogun/multiclass/tree/ConditionalProbabilityTree.h
@@ -93,20 +93,20 @@ class CConditionalProbabilityTree: public CTreeMachine<ConditionalProbabilityTre
 	 * @param ex the example being trained
 	 * @param label the label of this training example
 	 */
-	void train_example(SGVector<float32_t> ex, int32_t label);
+	void train_example(CStreamingDenseFeatures<float32_t>* ex, int32_t label);
 
 	/** train on a path from a node up to the root
 	 * @param ex the instance of the training example
 	 * @param node the leaf node
 	 */
-	void train_path(SGVector<float32_t> ex, bnode_t *node);
+	void train_path(CStreamingDenseFeatures<float32_t>* ex, bnode_t *node);
 
 	/** train a single node
 	 * @param ex the example being trained
 	 * @param label label
 	 * @param node the node
 	 */
-	void train_node(SGVector<float32_t> ex, float64_t label, bnode_t *node);
+	void train_node(CStreamingDenseFeatures<float32_t>* ex, float64_t label, bnode_t *node);
 
 	/** predict a single node
 	 * @param ex the example being predicted
@@ -117,7 +117,7 @@ class CConditionalProbabilityTree: public CTreeMachine<ConditionalProbabilityTre
 	/** create a new OnlineLinear machine for a node
 	 * @param ex the Example instance for training the new machine
 	 */
-	int32_t create_machine(SGVector<float32_t> ex);
+	int32_t create_machine(CStreamingDenseFeatures<float32_t>* ex);
 
 	/** decide which subtree to go, when training the tree structure.
 	 * @param node the node being decided
diff --git a/src/shogun/optimization/NLOPTMinimizer.cpp b/src/shogun/optimization/NLOPTMinimizer.cpp
deleted file mode 100644
index 746eaaec7c2..00000000000
--- a/src/shogun/optimization/NLOPTMinimizer.cpp
+++ /dev/null
@@ -1,287 +0,0 @@
- /*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (w) 2015 Wu Lin
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- *
- */
-
-#include <shogun/optimization/NLOPTMinimizer.h>
-#include <shogun/optimization/FirstOrderBoundConstraintsCostFunction.h>
-#include <shogun/base/Parameter.h>
-#include <algorithm> 
-
-using namespace shogun;
-#ifdef USE_GPL_SHOGUN
-CNLOPTMinimizer::CNLOPTMinimizer()
-	:FirstOrderMinimizer()
-{
-	init();
-}
-
-CNLOPTMinimizer::~CNLOPTMinimizer()
-{
-}
-
-CNLOPTMinimizer::CNLOPTMinimizer(FirstOrderCostFunction *fun)
-	:FirstOrderMinimizer(fun)
-{
-	init();
-}
-
-void CNLOPTMinimizer::init()
-{
-#ifdef HAVE_NLOPT
-	m_target_variable=SGVector<float64_t>();
-	set_nlopt_parameters();
-	SG_ADD(&m_max_iterations, "CNLOPTMinimizer__m_max_iterations",
-		"max_iterations in CNLOPTMinimizer", MS_NOT_AVAILABLE);
-	SG_ADD(&m_variable_tolerance, "CNLOPTMinimizer__m_variable_tolerance",
-		"variable_tolerance in CNLOPTMinimizer", MS_NOT_AVAILABLE);
-	SG_ADD(&m_function_tolerance, "CNLOPTMinimizer__m_function_tolerance",
-		"function_tolerance in CNLOPTMinimizer", MS_NOT_AVAILABLE);
-	SG_ADD(&m_nlopt_algorithm_id, "CNLOPTMinimizer__m_nlopt_algorithm_id",
-		"nlopt_algorithm_id in CNLOPTMinimizer", MS_NOT_AVAILABLE);
-#endif
-}
-
-float64_t CNLOPTMinimizer::minimize()
-{
-#ifdef HAVE_NLOPT
-	init_minimization();
-
-	nlopt_opt opt=nlopt_create(get_nlopt_algorithm(m_nlopt_algorithm_id),
-		m_target_variable.vlen);
-
-	//add bound constraints
-	FirstOrderBoundConstraintsCostFunction* bound_constraints_fun
-		=dynamic_cast<FirstOrderBoundConstraintsCostFunction *>(m_fun);
-	if(bound_constraints_fun)
-	{
-		SGVector<float64_t> bound=bound_constraints_fun->get_lower_bound();
-		if(bound.vlen==1)
-		{
-			nlopt_set_lower_bounds1(opt, bound[0]);
-		}
-		else if (bound.vlen>1)
-		{
-			REQUIRE(bound.vlen==m_target_variable.vlen,
-				"The length of target variable (%d) and the length of lower bound (%d) do not match\n",
-				m_target_variable.vlen, bound.vlen);
-			nlopt_set_lower_bounds(opt, bound.vector);
-		}
-
-		bound=bound_constraints_fun->get_upper_bound();
-		if(bound.vlen==1)
-		{
-			nlopt_set_upper_bounds1(opt, bound[0]);
-		}
-		else if (bound.vlen>1)
-		{
-			REQUIRE(bound.vlen==m_target_variable.vlen,
-			"The length of target variable (%d) and the length of upper bound (%d) do not match\n",
-				m_target_variable.vlen, bound.vlen);
-			nlopt_set_upper_bounds(opt, bound.vector);
-		}
-	
-	}
-	// set maximum number of evaluations
-	nlopt_set_maxeval(opt, m_max_iterations);
-	// set absolute argument tolearance
-	nlopt_set_xtol_abs1(opt, m_variable_tolerance);
-	nlopt_set_ftol_abs(opt, m_function_tolerance);
-
-	nlopt_set_min_objective(opt, CNLOPTMinimizer::nlopt_function, this);
-
-#endif
-	// the minimum objective value, upon return
-	double cost=0.0;
-
-#ifdef HAVE_NLOPT
-	// optimize our function
-	nlopt_result error_code=nlopt_optimize(opt, m_target_variable.vector, &cost);
-	if(error_code!=1)
-	{
-		SG_SWARNING("Error(s) happened and NLopt failed during minimization (error code:%d)\n",
-			error_code);
-	}
-
-	// clean up
-	nlopt_destroy(opt);
-#endif
-
-	return cost;
-}
-
-#ifdef HAVE_NLOPT
-int16_t CNLOPTMinimizer::get_nlopt_algorithm_id(ENLOPTALGORITHM method)
-{
-	int16_t method_id=-1;
-	switch(method)
-	{
-	case  GN_DIRECT:
-		method_id = (int16_t) NLOPT_GN_DIRECT;
-		break; 
-	case  GN_DIRECT_L:
-		method_id = (int16_t) NLOPT_GN_DIRECT_L;
-		break; 
-	case  GN_DIRECT_L_RAND:
-		method_id = (int16_t) NLOPT_GN_DIRECT_L_RAND;
-		break; 
-	case  GN_DIRECT_NOSCAL:
-		method_id = (int16_t) NLOPT_GN_DIRECT_NOSCAL;
-		break; 
-	case  GN_DIRECT_L_NOSCAL:
-		method_id = (int16_t) NLOPT_GN_DIRECT_L_NOSCAL;
-		break; 
-	case  GN_DIRECT_L_RAND_NOSCAL:
-		method_id = (int16_t) NLOPT_GN_DIRECT_L_RAND_NOSCAL;
-		break; 
-	case  GN_ORIG_DIRECT:
-		method_id = (int16_t) NLOPT_GN_ORIG_DIRECT;
-		break; 
-	case  GN_ORIG_DIRECT_L:
-		method_id = (int16_t) NLOPT_GN_ORIG_DIRECT_L;
-		break; 
-	case  GN_CRS2_LM:
-		method_id = (int16_t) NLOPT_GN_CRS2_LM;
-		break; 
-	case  GN_ISRES:
-		method_id = (int16_t) NLOPT_GN_ISRES;
-		break; 
-	case  LD_MMA:
-		method_id = (int16_t) NLOPT_LD_MMA;
-		break; 
-	case  LD_LBFGS:
-		method_id = (int16_t) NLOPT_LD_LBFGS;
-		break; 
-	case  LD_LBFGS_NOCEDAL:
-		method_id = (int16_t) NLOPT_LD_LBFGS_NOCEDAL;
-		break; 
-	case  LD_VAR1:
-		method_id = (int16_t) NLOPT_LD_VAR1;
-		break; 
-	case  LD_VAR2:
-		method_id = (int16_t) NLOPT_LD_VAR2;
-		break; 
-	case  LD_TNEWTON:
-		method_id = (int16_t) NLOPT_LD_TNEWTON;
-		break; 
-	case  LD_TNEWTON_RESTART:
-		method_id = (int16_t) NLOPT_LD_TNEWTON_RESTART;
-		break; 
-	case  LD_TNEWTON_PRECOND:
-		method_id = (int16_t) NLOPT_LD_TNEWTON_PRECOND;
-		break; 
-	case  LD_TNEWTON_PRECOND_RESTART:
-		method_id = (int16_t) NLOPT_LD_TNEWTON_PRECOND_RESTART;
-		break; 
-	case  LD_SLSQP:
-		method_id = (int16_t) NLOPT_LD_SLSQP;
-		break; 
-	case  LN_PRAXIS:
-		method_id = (int16_t) NLOPT_LN_PRAXIS;
-		break; 
-	case  LN_COBYLA:
-		method_id = (int16_t) NLOPT_LN_COBYLA;
-		break; 
-	case  LN_NEWUOA:
-		method_id = (int16_t) NLOPT_LN_NEWUOA;
-		break; 
-	case  LN_NEWUOA_BOUND:
-		method_id = (int16_t) NLOPT_LN_NEWUOA_BOUND;
-		break; 
-	case  LN_NELDERMEAD:
-		method_id = (int16_t) NLOPT_LN_NELDERMEAD;
-		break; 
-	case  LN_SBPLX:
-		method_id = (int16_t) NLOPT_LN_SBPLX;
-		break; 
-	case  LN_BOBYQA:
-		method_id = (int16_t) NLOPT_LN_BOBYQA;
-		break; 
-	case  AUGLAG:
-		method_id = (int16_t) NLOPT_AUGLAG;
-		break; 
-	case  AUGLAG_EQ:
-		method_id = (int16_t) NLOPT_AUGLAG_EQ;
-		break; 
-	case  G_MLSL:
-		method_id = (int16_t) NLOPT_G_MLSL;
-		break; 
-	case G_MLSL_LDS:
-		method_id = (int16_t) NLOPT_G_MLSL_LDS;
-		break; 
-	};
-	REQUIRE(method_id>=0, "Unsupported algorithm\n");
-	return method_id;
-}
-
-void CNLOPTMinimizer::set_nlopt_parameters(ENLOPTALGORITHM algorithm,
-	float64_t max_iterations,
-	float64_t variable_tolerance,
-	float64_t function_tolerance)
-{
-	m_nlopt_algorithm_id=get_nlopt_algorithm_id(algorithm);
-	m_max_iterations=max_iterations;
-	m_variable_tolerance=variable_tolerance;
-	m_function_tolerance=function_tolerance;
-};
-
-double CNLOPTMinimizer::nlopt_function(unsigned dim, const double* variable, double* gradient,
-	void* func_data)
-{
-	CNLOPTMinimizer* obj_prt=static_cast<CNLOPTMinimizer *>(func_data);
-	REQUIRE(obj_prt, "The instance object passed to NLopt optimizer should not be NULL\n");
-	REQUIRE((index_t)dim==(obj_prt->m_target_variable).vlen, "Length must be matched\n");
-
-	double *var = const_cast<double *>(variable);
-	std::swap_ranges(var, var+dim, (obj_prt->m_target_variable).vector);
-
-	double cost=obj_prt->m_fun->get_cost();
-
-	//get the gradient wrt variable_new
-	SGVector<float64_t> grad=obj_prt->m_fun->get_gradient();
-
-	REQUIRE(grad.vlen==(index_t)dim,
-		"The length of gradient (%d) and the length of variable (%d) do not match\n",
-		grad.vlen,dim);
-
-	std::copy(grad.vector,grad.vector+dim,gradient);
-
-	std::swap_ranges(var, var+dim, (obj_prt->m_target_variable).vector);
-	return cost;
-}
-
-void CNLOPTMinimizer::init_minimization()
-{
-	REQUIRE(m_fun, "Cost function not set!\n");
-	m_target_variable=m_fun->obtain_variable_reference();
-	REQUIRE(m_target_variable.vlen>0,"Target variable from cost function must not empty!\n");
-}
-#endif
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/optimization/NLOPTMinimizer.h b/src/shogun/optimization/NLOPTMinimizer.h
deleted file mode 100644
index 63f7e166255..00000000000
--- a/src/shogun/optimization/NLOPTMinimizer.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (w) 2015 Wu Lin
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- *
- */
-
-
-#ifndef CNLOPTMINIMIZER_H
-#define CNLOPTMINIMIZER_H
-#include <shogun/optimization/FirstOrderMinimizer.h>
-
-#ifdef USE_GPL_SHOGUN
-#ifdef HAVE_NLOPT
-#include <shogun/optimization/nloptcommon.h>
-#endif 
-namespace shogun
-{
-/** @brief The class wraps the external NLOPT library
- *
- * This minimizer supports bound constrainted minimization
- * and unconstrainted minimization using the NLOPT library
- *
- */
-class CNLOPTMinimizer: public FirstOrderMinimizer
-{
-public:
-	/** Default constructor */
-	CNLOPTMinimizer();
-
-	/** Constructor
-	 * @param fun cost function
-	 */
-	CNLOPTMinimizer(FirstOrderCostFunction *fun);
-
-	/** returns the name of the class
-	 *
-	 * @return name CNLOPTMinimizer
-	 */
-	virtual const char* get_name() const { return "NLOPTMinimizer"; }
-
-
-	/** Destructor */
-	virtual ~CNLOPTMinimizer();
-
-	/** Do minimization and get the optimal value 
-	 * 
-	 * @return optimal value
-	 */
-	virtual float64_t minimize();
-
-	/** Does minimizer support batch update?
-	 * 
-	 * @return whether minimizer supports batch update
-	 */
-	virtual bool supports_batch_update() const {return true;}
-
-#ifdef HAVE_NLOPT
-	/* Set parameters used in NLOPT
-	 * For details please see http://ab-initio.mit.edu/wiki/index.php/NLopt_C-plus-plus_Reference
-	 *
-	 * @param algorithm provided by NLOPT for minimization (e.g. LD_LBFGS denotes NLOPT_LD_LBFGS)
-	 * @param max_iterations the number of cost function evaluations 
-	 * @param variable_tolerance absolute tolerance on optimization parameters 
-	 * @param function_tolerance absolute tolerance on function value.
-	 */
-	virtual void set_nlopt_parameters(ENLOPTALGORITHM algorithm=LD_LBFGS,
-		float64_t max_iterations=1000,
-		float64_t variable_tolerance=1e-6,
-		float64_t function_tolerance=1e-6);
-private:
-	/* A helper function will be called by the NLOPT library
-	 * Note that this function should be static and
-	 * private.
-	 * */
-	static double nlopt_function(unsigned dim, const double* variable,
-		double* gradient, void* func_data);
-
-	static int16_t get_nlopt_algorithm_id(ENLOPTALGORITHM method);
-
-	static nlopt_algorithm get_nlopt_algorithm(int16_t method_id)
-	{
-		REQUIRE(method_id>=0 && method_id<(int16_t)NLOPT_NUM_ALGORITHMS,
-			"Unsupported method id (%d)\n", method_id);
-		return (nlopt_algorithm) method_id;
-	}
-
-protected:
-
-	/* Target variable */
-	SGVector<float64_t> m_target_variable;
-
-	/* Init before minimization */
-	virtual void init_minimization();
-
-	/** max number of iterations */
-	float64_t m_max_iterations;
-
-	/** absolute tolerance on optimization parameters */
-	float64_t m_variable_tolerance;
-
-	/** absolute tolerance on function value */
-	float64_t m_function_tolerance;
-
-	/** algorithm provided by NLOPT for minimization  */
-	int16_t m_nlopt_algorithm_id;
-#endif /* HAVE_NLOPT */
-
-private:
-	/* init */
-	void init();
-};
-
-}
-#endif //USE_GPL_SHOGUN
-#endif /* CNLOPTMINIMIZER_H */
-
diff --git a/src/shogun/optimization/lbfgs/lbfgs.cpp b/src/shogun/optimization/lbfgs/lbfgs.cpp
index fb2fdb8cfa6..535e63fb3c5 100644
--- a/src/shogun/optimization/lbfgs/lbfgs.cpp
+++ b/src/shogun/optimization/lbfgs/lbfgs.cpp
@@ -65,12 +65,14 @@ licence.
 #include <cstdio>
 #include <cmath>
 #include <string.h>
+#include <vector>
 
 #include <shogun/optimization/lbfgs/lbfgs.h>
 #include <shogun/lib/SGVector.h>
 #include <shogun/lib/common.h>
 #include <shogun/lib/memory.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 namespace shogun
 {
@@ -90,8 +92,8 @@ typedef struct tag_callback_data callback_data_t;
 
 struct tag_iteration_data {
     float64_t alpha;
-    float64_t *s;     /* [n] */
-    float64_t *y;     /* [n] */
+    SGVector<float64_t> s;     /* [n] */
+    SGVector<float64_t> y;     /* [n] */
     float64_t ys;     /* vecdot(y, s) */
 };
 typedef struct tag_iteration_data iteration_data_t;
@@ -109,12 +111,12 @@ typedef int32_t (*line_search_proc)(
     int32_t n,
     float64_t *x,
     float64_t *f,
-    float64_t *g,
-    float64_t *s,
+    SGVector<float64_t>& g,
+    SGVector<float64_t>& s,
     float64_t *stp,
-    const float64_t* xp,
-    const float64_t* gp,
-    float64_t *wa,
+    const SGVector<float64_t>& xp,
+    const SGVector<float64_t>& gp,
+    SGVector<float64_t>& wa,
     callback_data_t *cd,
     const lbfgs_parameter_t *param
     );
@@ -123,12 +125,12 @@ static int32_t line_search_backtracking(
     int32_t n,
     float64_t *x,
     float64_t *f,
-    float64_t *g,
-    float64_t *s,
+    SGVector<float64_t>& g,
+    SGVector<float64_t>& s,
     float64_t *stp,
-    const float64_t* xp,
-    const float64_t* gp,
-    float64_t *wa,
+    const SGVector<float64_t>& xp,
+    const SGVector<float64_t>& gp,
+    SGVector<float64_t>& wa,
     callback_data_t *cd,
     const lbfgs_parameter_t *param
     );
@@ -137,12 +139,12 @@ static int32_t line_search_backtracking_owlqn(
     int32_t n,
     float64_t *x,
     float64_t *f,
-    float64_t *g,
-    float64_t *s,
+    SGVector<float64_t>& g,
+    SGVector<float64_t>& s,
     float64_t *stp,
-    const float64_t* xp,
-    const float64_t* gp,
-    float64_t *wp,
+    const SGVector<float64_t>& xp,
+    const SGVector<float64_t>& gp,
+    SGVector<float64_t>& wa,
     callback_data_t *cd,
     const lbfgs_parameter_t *param
     );
@@ -151,12 +153,12 @@ static int32_t line_search_morethuente(
     int32_t n,
     float64_t *x,
     float64_t *f,
-    float64_t *g,
-    float64_t *s,
+    SGVector<float64_t>& g,
+    SGVector<float64_t>& s,
     float64_t *stp,
-    const float64_t* xp,
-    const float64_t* gp,
-    float64_t *wa,
+    const SGVector<float64_t>& xp,
+    const SGVector<float64_t>& gp,
+    SGVector<float64_t>& wa,
     callback_data_t *cd,
     const lbfgs_parameter_t *param
     );
@@ -224,10 +226,9 @@ int32_t lbfgs(
     lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam;
     const int32_t m = param.m;
 
-    float64_t *xp = NULL;
-    float64_t *g = NULL, *gp = NULL, *pg = NULL;
-    float64_t *d = NULL, *w = NULL, *pf = NULL;
-    iteration_data_t *lm = NULL, *it = NULL;
+    std::vector<iteration_data_t> lm;
+    std::vector<iteration_data_t>::iterator it;
+    SGVector<float64_t> x_wrap(x, n, false);
     float64_t ys, yy;
     float64_t xnorm, gnorm, beta;
     float64_t fx = 0.;
@@ -316,58 +317,50 @@ int32_t lbfgs(
     }
 
     /* Allocate working space. */
-    xp = SG_CALLOC(float64_t, n);
-    g = SG_CALLOC(float64_t, n);
-    gp = SG_CALLOC(float64_t, n);
-    d = SG_CALLOC(float64_t, n);
-    w = SG_CALLOC(float64_t, n);
-    if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) {
-        ret = LBFGSERR_OUTOFMEMORY;
-        goto lbfgs_exit;
-    }
+    SGVector<float64_t> xp, g, gp, d, w, pg, pf;
+    try
+    {
+        xp = SGVector<float64_t>(n);
+        g = SGVector<float64_t>(n);
+        gp = SGVector<float64_t>(n);
+        d = SGVector<float64_t>(n);
+        w = SGVector<float64_t>(n);
 
-    if (param.orthantwise_c != 0.) {
-        /* Allocate working space for OW-LQN. */
-        pg = SG_CALLOC(float64_t, n);
-        if (pg == NULL) {
-            ret = LBFGSERR_OUTOFMEMORY;
-            goto lbfgs_exit;
+        if (param.orthantwise_c != 0.) {
+            /* Allocate working space for OW-LQN. */
+            pg = SGVector<float64_t>(n);
         }
-    }
 
-    /* Allocate limited memory storage. */
-    lm = SG_CALLOC(iteration_data_t, m);
-    if (lm == NULL) {
-        ret = LBFGSERR_OUTOFMEMORY;
-        goto lbfgs_exit;
-    }
+        /* Allocate limited memory storage. */
+        lm.resize(m);
 
-    /* Initialize the limited memory. */
-    for (i = 0;i < m;++i) {
-        it = &lm[i];
-        it->alpha = 0;
-        it->ys = 0;
-        it->s = SG_CALLOC(float64_t, n);
-        it->y = SG_CALLOC(float64_t, n);
-        if (it->s == NULL || it->y == NULL) {
-            ret = LBFGSERR_OUTOFMEMORY;
-            goto lbfgs_exit;
+        /* Initialize the limited memory. */
+        for (auto& e: lm)
+        {
+            e.alpha = 0;
+            e.ys = 0;
+            e.s = SGVector<float64_t>(n);
+            e.y = SGVector<float64_t>(n);
         }
-    }
 
-    /* Allocate an array for storing previous values of the objective function. */
-    if (0 < param.past) {
-        pf = SG_CALLOC(float64_t, param.past);
+        /* Allocate an array for storing previous values of the objective function. */
+        if (0 < param.past)
+            pf = SGVector<float64_t>(param.past);
+    }
+    catch (const ShogunException& e)
+    {
+        ret = LBFGSERR_OUTOFMEMORY;
+        goto lbfgs_exit;
     }
 
     /* Evaluate the function value and its gradient. */
-    fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0);
+    fx = cd.proc_evaluate(cd.instance, x, g.vector, cd.n, 0);
     if (0. != param.orthantwise_c) {
         /* Compute the L1 norm of the variable and add it to the object value. */
         xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end);
         fx += xnorm * param.orthantwise_c;
         owlqn_pseudo_gradient(
-            pg, x, g, n,
+            pg.vector, x, g.vector, n,
             param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
             );
     }
@@ -382,21 +375,21 @@ int32_t lbfgs(
         we assume the initial hessian matrix H_0 as the identity matrix.
      */
     if (param.orthantwise_c == 0.) {
-		std::copy(g,g+n,d);
-		SGVector<float64_t>::scale_vector(-1, d, n);
+        sg_memcpy(d.vector, g.vector, n*sizeof(float64_t));
+        linalg::scale(d, d, -1.0);
     } else {
-		std::copy(pg,pg+n,d);
-		SGVector<float64_t>::scale_vector(-1, d, n);
+        sg_memcpy(d.vector, pg.vector, n*sizeof(float64_t));
+        linalg::scale(d, d, -1.0);
     }
 
     /*
        Make sure that the initial variables are not a minimizer.
      */
-	xnorm = SGVector<float64_t>::twonorm(x, n);
+    xnorm = CMath::sqrt(linalg::dot(x_wrap, x_wrap));
     if (param.orthantwise_c == 0.) {
-		gnorm = SGVector<float64_t>::twonorm(g, n);
+        gnorm = CMath::sqrt(linalg::dot(g, g));
     } else {
-		gnorm = SGVector<float64_t>::twonorm(pg, n);
+        gnorm = CMath::sqrt(linalg::dot(pg, pg));
     }
     if (xnorm < 1.0) xnorm = 1.0;
     if (gnorm / xnorm <= param.epsilon) {
@@ -407,14 +400,14 @@ int32_t lbfgs(
     /* Compute the initial step:
         step = 1.0 / sqrt(vecdot(d, d, n))
      */
-	step = 1.0 / SGVector<float64_t>::twonorm(d, n);
+    step = 1.0 / CMath::sqrt(linalg::dot(d, d));
 
     k = 1;
     end = 0;
     for (;;) {
         /* Store the current position and gradient vectors. */
-		std::copy(x,x+n,xp);
-		std::copy(g,g+n,gp);
+        sg_memcpy(xp.vector, x, n*sizeof(float64_t));
+        sg_memcpy(gp.vector, g.vector, n*sizeof(float64_t));
 
         /* Search for an optimal step. */
         if (param.orthantwise_c == 0.) {
@@ -422,29 +415,29 @@ int32_t lbfgs(
         } else {
             ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, &param);
             owlqn_pseudo_gradient(
-                pg, x, g, n,
+                pg.vector, x, g.vector, n,
                 param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
                 );
         }
         if (ls < 0) {
             /* Revert to the previous point. */
-            std::copy(xp,xp+n,x);
-            std::copy(gp,gp+n,g);
+            sg_memcpy(x, xp.vector, n*sizeof(float64_t));
+            sg_memcpy(g.vector, gp.vector, n*sizeof(float64_t));
             ret = ls;
 
             /* Roll back */
             if (ls==LBFGSERR_INVALID_VALUE)
-                fx = cd.proc_evaluate(cd.instance, x, g, cd.n, step);
+                fx = cd.proc_evaluate(cd.instance, x, g.vector, cd.n, step);
 
             goto lbfgs_exit;
         }
 
         /* Compute x and g norms. */
-        xnorm = SGVector<float64_t>::twonorm(x, n);
+        xnorm = CMath::sqrt(linalg::dot(x_wrap, x_wrap));
         if (param.orthantwise_c == 0.) {
-            gnorm = SGVector<float64_t>::twonorm(g, n);
+            gnorm = CMath::sqrt(linalg::dot(g, g));
         } else {
-            gnorm = SGVector<float64_t>::twonorm(pg, n);
+            gnorm = CMath::sqrt(linalg::dot(pg, pg));
         }
 
         /* Report the progress. */
@@ -499,9 +492,9 @@ int32_t lbfgs(
                 s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
                 y_{k+1} = g_{k+1} - g_{k}.
          */
-        it = &lm[end];
-		SGVector<float64_t>::add(it->s, 1, x, -1, xp, n);
-		SGVector<float64_t>::add(it->y, 1, g, -1, gp, n);
+        it = std::next(lm.begin(), end);
+        linalg::add(x_wrap, xp, it->s, 1.0, -1.0);
+        linalg::add(g, gp, it->y, 1.0, -1.0);
 
         /*
             Compute scalars ys and yy:
@@ -509,8 +502,8 @@ int32_t lbfgs(
                 yy = y^t \cdot y.
             Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
          */
-		ys = CMath::dot(it->y, it->s, n);
-		yy = CMath::dot(it->y, it->y, n);
+        ys = linalg::dot(it->y, it->s);
+        yy = linalg::dot(it->y, it->y);
         it->ys = ys;
 
         /*
@@ -528,33 +521,30 @@ int32_t lbfgs(
         /* Compute the steepest direction. */
         if (param.orthantwise_c == 0.) {
             /* Compute the negative of gradients. */
-			std::copy(g, g+n, d);
-			SGVector<float64_t>::scale_vector(-1, d, n);
+            sg_memcpy(d.vector, g.vector, n*sizeof(float64_t));
+            linalg::scale(d, d, -1.0);
         } else {
-			std::copy(pg, pg+n, d);
-			SGVector<float64_t>::scale_vector(-1, d, n);
+            sg_memcpy(d.vector, pg.vector, n*sizeof(float64_t));
+            linalg::scale(d, d, -1.0);
         }
 
         j = end;
         for (i = 0;i < bound;++i) {
             j = (j + m - 1) % m;    /* if (--j == -1) j = m-1; */
-            it = &lm[j];
+            it = std::next(lm.begin(), j);
             /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */
-			it->alpha = CMath::dot(it->s, d, n);
-            it->alpha /= it->ys;
+            it->alpha = linalg::dot(it->s, d) / it->ys;
             /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */
-			SGVector<float64_t>::add(d, 1, d, -it->alpha, it->y, n);
+            linalg::add(d, it->y, d, 1.0, -(it->alpha));
         }
 
-		SGVector<float64_t>::scale_vector(ys / yy, d, n);
-
+        linalg::scale(d, d, ys/yy);
         for (i = 0;i < bound;++i) {
-            it = &lm[j];
+            it = std::next(lm.begin(), j);
             /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */
-			beta = CMath::dot(it->y, d, n);
-            beta /= it->ys;
+            beta = linalg::dot(it->y, d) / it->ys;
             /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */
-			SGVector<float64_t>::add(d, 1, d, it->alpha-beta, it->s, n);
+            linalg::add(d, it->s, d, 1.0, it->alpha-beta);
             j = (j + 1) % m;        /* if (++j == m) j = 0; */
         }
 
@@ -581,22 +571,8 @@ int32_t lbfgs(
         *ptr_fx = fx;
     }
 
-    SG_FREE(pf);
-
     /* Free memory blocks used by this function. */
-    if (lm != NULL) {
-        for (i = 0;i < m;++i) {
-            SG_FREE(lm[i].s);
-            SG_FREE(lm[i].y);
-        }
-        SG_FREE(lm);
-    }
-    SG_FREE(pg);
-    SG_FREE(w);
-    SG_FREE(d);
-    SG_FREE(gp);
-    SG_FREE(g);
-    SG_FREE(xp);
+    lm.clear();
 
     return ret;
 }
@@ -607,12 +583,12 @@ static int32_t line_search_backtracking(
     int32_t n,
     float64_t *x,
     float64_t *f,
-    float64_t *g,
-    float64_t *s,
+    SGVector<float64_t>& g,
+    SGVector<float64_t>& s,
     float64_t *stp,
-    const float64_t* xp,
-    const float64_t* gp,
-    float64_t *wp,
+    const SGVector<float64_t>& xp,
+    const SGVector<float64_t>& gp,
+    SGVector<float64_t>& wa,
     callback_data_t *cd,
     const lbfgs_parameter_t *param
     )
@@ -628,7 +604,7 @@ static int32_t line_search_backtracking(
     }
 
     /* Compute the initial gradient in the search direction. */
-    dginit = CMath::dot(g, s, n);
+    dginit = linalg::dot(g, s);
 
     /* Make sure that s points to a descent direction. */
     if (0 < dginit) {
@@ -641,9 +617,9 @@ static int32_t line_search_backtracking(
     const index_t max_iter = 20;
 
     for (;;) {
-        std::copy(xp,xp+n,x);
+        sg_memcpy(x, xp.vector, n*sizeof(float64_t));
         if (cd->proc_adjust_step)
-            *stp=cd->proc_adjust_step(cd->instance, x, s, cd->n, *stp);
+            *stp=cd->proc_adjust_step(cd->instance, x, s.vector, cd->n, *stp);
 
         for(index_t j=0; j<n; j++)
         {
@@ -658,7 +634,7 @@ static int32_t line_search_backtracking(
         while(true)
         {
             /* Evaluate the function and gradient values. */
-            *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
+            *f = cd->proc_evaluate(cd->instance, x, g.vector, cd->n, *stp);
             ++count;
             if (CMath::is_nan(*f) || CMath::is_infinity(*f))
                 *stp*=decay;
@@ -678,25 +654,25 @@ static int32_t line_search_backtracking(
             if (param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) {
                 /* Exit with the Armijo condition. */
                 return count;
-	        }
-
-	        /* Check the Wolfe condition. */
-			dg = CMath::dot(g, s, n);
-	        if (dg < param->wolfe * dginit) {
-		    width = inc;
-	        } else {
-		        if(param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE) {
-		            /* Exit with the regular Wolfe condition. */
-		            return count;
-		        }
-
-		        /* Check the strong Wolfe condition. */
-		        if(dg > -param->wolfe * dginit) {
-		            width = dec;
-		        } else {
-		            /* Exit with the strong Wolfe condition. */
-		            return count;
-		        }
+            }
+
+            /* Check the Wolfe condition. */
+            dg = linalg::dot(g, s);
+            if (dg < param->wolfe * dginit) {
+            width = inc;
+            } else {
+                if(param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE) {
+                    /* Exit with the regular Wolfe condition. */
+                    return count;
+                }
+
+                /* Check the strong Wolfe condition. */
+                if(dg > -param->wolfe * dginit) {
+                    width = dec;
+                } else {
+                    /* Exit with the strong Wolfe condition. */
+                    return count;
+                }
             }
         }
 
@@ -723,12 +699,12 @@ static int32_t line_search_backtracking_owlqn(
     int32_t n,
     float64_t *x,
     float64_t *f,
-    float64_t *g,
-    float64_t *s,
+    SGVector<float64_t>& g,
+    SGVector<float64_t>& s,
     float64_t *stp,
-    const float64_t* xp,
-    const float64_t* gp,
-    float64_t *wp,
+    const SGVector<float64_t>& xp,
+    const SGVector<float64_t>& gp,
+    SGVector<float64_t>& wp,
     callback_data_t *cd,
     const lbfgs_parameter_t *param
     )
@@ -747,18 +723,19 @@ static int32_t line_search_backtracking_owlqn(
         wp[i] = (xp[i] == 0.) ? -gp[i] : xp[i];
     }
 
+    SGVector<float64_t> x_wrap(x, n, false);
     for (;;) {
         /* Update the current point. */
-        std::copy(xp,xp+n,x);
+        sg_memcpy(x, xp.vector, n*sizeof(float64_t));
         if (cd->proc_adjust_step)
-            *stp=cd->proc_adjust_step(cd->instance, x, s, cd->n, *stp);
-        SGVector<float64_t>::add(x, 1, x, *stp, s, n);
+            *stp=cd->proc_adjust_step(cd->instance, x, s.vector, cd->n, *stp);
+        linalg::add(x_wrap, s, x_wrap, 1.0, *stp);
 
         /* The current point is projected onto the orthant. */
-        owlqn_project(x, wp, param->orthantwise_start, param->orthantwise_end);
+        owlqn_project(x, wp.vector, param->orthantwise_start, param->orthantwise_end);
 
         /* Evaluate the function and gradient values. */
-        *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
+        *f = cd->proc_evaluate(cd->instance, x, g.vector, cd->n, *stp);
 
         /* Compute the L1 norm of the variables and add it to the object value. */
         norm = owlqn_x1norm(x, param->orthantwise_start, param->orthantwise_end);
@@ -799,12 +776,12 @@ static int32_t line_search_morethuente(
     int32_t n,
     float64_t *x,
     float64_t *f,
-    float64_t *g,
-    float64_t *s,
+    SGVector<float64_t>& g,
+    SGVector<float64_t>& s,
     float64_t *stp,
-    const float64_t* xp,
-    const float64_t* gp,
-    float64_t *wa,
+    const SGVector<float64_t>& xp,
+    const SGVector<float64_t>& gp,
+    SGVector<float64_t>& wa,
     callback_data_t *cd,
     const lbfgs_parameter_t *param
     )
@@ -825,7 +802,7 @@ static int32_t line_search_morethuente(
     }
 
     /* Compute the initial gradient in the search direction. */
-    dginit = CMath::dot(g, s, n);
+    dginit = linalg::dot(g, s);
 
     /* Make sure that s points to a descent direction. */
     if (0 < dginit) {
@@ -882,16 +859,16 @@ static int32_t line_search_morethuente(
             Compute the current value of x:
                 x <- x + (*stp) * s.
          */
-        std::copy(xp,xp+n,x);
+        sg_memcpy(x, xp.vector, n*sizeof(float64_t));
         if (cd->proc_adjust_step)
-            *stp=cd->proc_adjust_step(cd->instance, x, s, cd->n, *stp);
+            *stp=cd->proc_adjust_step(cd->instance, x, s.vector, cd->n, *stp);
 
         SGVector<float64_t>::add(x, 1, x, *stp, s, n);
 
         /* Evaluate the function and gradient values. */
-        *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
+        *f = cd->proc_evaluate(cd->instance, x, g.vector, cd->n, *stp);
 
-        dg = CMath::dot(g, s, n);
+        dg = linalg::dot(g, s);
 
         ftest1 = finit + *stp * dgtest;
         ++count;
diff --git a/src/shogun/optimization/liblinear/shogun_liblinear.cpp b/src/shogun/optimization/liblinear/shogun_liblinear.cpp
index 1489d84c011..d2016e728e8 100644
--- a/src/shogun/optimization/liblinear/shogun_liblinear.cpp
+++ b/src/shogun/optimization/liblinear/shogun_liblinear.cpp
@@ -39,6 +39,7 @@
 #include <stdarg.h>
 
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/optimization/liblinear/shogun_liblinear.h>
 #include <shogun/optimization/liblinear/tron.h>
 #include <shogun/lib/Time.h>
@@ -81,7 +82,8 @@ double l2r_lr_fun::fun(double *w)
 		else
 			f += C[i]*(-yz+log(1 + exp(yz)));
 	}
-	f += 0.5 *CMath::dot(w,w,n);
+	SGVector<float64_t> w_wrap(w, n, false);
+	f += 0.5 *linalg::dot(w_wrap, w_wrap);
 
 	return(f);
 }
@@ -197,7 +199,8 @@ double l2r_l2_svc_fun::fun(double *w)
 		if (d > 0)
 			f += C[i]*d*d;
 	}
-	f += 0.5*CMath::dot(w, w, w_size);
+	SGVector<float64_t> w_wrap(w, w_size, false);
+	f += 0.5*linalg::dot(w_wrap, w_wrap);
 
 	return(f);
 }
@@ -513,7 +516,9 @@ void Solver_MCSVM_CS::solve()
 		state->inited = true;
 	}
 
-	while(iter < max_iter && !CSignal::cancel_computations())
+	// TODO: replace with the new signal
+	// while(iter < max_iter && !CSignal::cancel_computations())
+	while (iter < max_iter)
 	{
 		double stopping = -CMath::INFTY;
 		for(i=0;i<active_size;i++)
diff --git a/src/shogun/optimization/liblinear/shogun_liblinear.h b/src/shogun/optimization/liblinear/shogun_liblinear.h
index 549cebab1ef..1740eb0355f 100644
--- a/src/shogun/optimization/liblinear/shogun_liblinear.h
+++ b/src/shogun/optimization/liblinear/shogun_liblinear.h
@@ -42,7 +42,7 @@
 
 namespace shogun
 {
-
+#undef I
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/src/shogun/optimization/liblinear/tron.cpp b/src/shogun/optimization/liblinear/tron.cpp
index 2f2912c0bb2..88eab4882ab 100644
--- a/src/shogun/optimization/liblinear/tron.cpp
+++ b/src/shogun/optimization/liblinear/tron.cpp
@@ -7,8 +7,9 @@
 #include <shogun/lib/Signal.h>
 #include <shogun/lib/Time.h>
 
-#include <shogun/mathematics/lapack.h>
+#include <shogun/base/progress.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/lapack.h>
 #include <shogun/optimization/liblinear/tron.h>
 
 using namespace shogun;
@@ -103,10 +104,12 @@ void CTron::tron(float64_t *w, float64_t max_train_time)
 
 	iter = 1;
 
-	CSignal::clear_cancel();
 	CTime start_time;
+	auto pb = progress(range(10));
 
-	while (iter <= max_iter && search && (!CSignal::cancel_computations()))
+	// TODO: replace with new signal
+	// while (iter <= max_iter && search && (!CSignal::cancel_computations()))
+	while (iter <= max_iter && search)
 	{
 		if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
 		  break;
@@ -156,7 +159,9 @@ void CTron::tron(float64_t *w, float64_t max_train_time)
 			gnorm = tron_dnrm2(n, g, inc);
 			if (gnorm < eps*gnorm1)
 				break;
-			SG_SABS_PROGRESS(gnorm, -CMath::log10(gnorm), -CMath::log10(1), -CMath::log10(eps*gnorm1), 6)
+			pb.print_absolute(
+			    gnorm, -CMath::log10(gnorm), -CMath::log10(1),
+			    -CMath::log10(eps * gnorm1));
 		}
 		if (f < -1.0e+32)
 		{
@@ -176,7 +181,7 @@ void CTron::tron(float64_t *w, float64_t max_train_time)
 		}
 	}
 
-	SG_DONE()
+	pb.complete_absolute();
 
 	SG_FREE(g);
 	SG_FREE(r);
diff --git a/src/shogun/optimization/nloptcommon.h b/src/shogun/optimization/nloptcommon.h
deleted file mode 100644
index 22ce7aeb99a..00000000000
--- a/src/shogun/optimization/nloptcommon.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef NLOPTCOMMON_H
-#define NLOPTCOMMON_H
-
-#include <shogun/lib/config.h>
-#include <shogun/lib/common.h>
-#include <shogun/io/SGIO.h>
-
-#ifdef USE_GPL_SHOGUN
-#ifdef HAVE_NLOPT
-#include <nlopt.h>
-#endif 
-
-namespace shogun
-{
-
-#ifdef HAVE_NLOPT
-enum ENLOPTALGORITHM
-{
-	GN_DIRECT=1,
-	GN_DIRECT_L,
-	GN_DIRECT_L_RAND,
-	GN_DIRECT_NOSCAL,
-	GN_DIRECT_L_NOSCAL,
-	GN_DIRECT_L_RAND_NOSCAL,
-	GN_ORIG_DIRECT,
-	GN_ORIG_DIRECT_L,
-	GN_CRS2_LM,
-	GN_ISRES,
-	LD_MMA,
-	LD_LBFGS,
-	LD_LBFGS_NOCEDAL,
-	LD_VAR1,
-	LD_VAR2,
-	LD_TNEWTON,
-	LD_TNEWTON_RESTART,
-	LD_TNEWTON_PRECOND,
-	LD_TNEWTON_PRECOND_RESTART,
-	LD_SLSQP,
-	LN_PRAXIS,
-	LN_COBYLA,
-	LN_NEWUOA,
-	LN_NEWUOA_BOUND,
-	LN_NELDERMEAD, 
-	LN_SBPLX,
-	LN_BOBYQA,
-	AUGLAG,
-	AUGLAG_EQ,
-	G_MLSL,
-	G_MLSL_LDS
-};
-
-#endif //HAVE_NLOPT
-
-} // namespace shogun
-
-#endif //USE_GPL_SHOGUN
-#endif //NLOPTCOMMON_H
diff --git a/src/shogun/preprocessor/FisherLDA.cpp b/src/shogun/preprocessor/FisherLDA.cpp
index 6e0a4cb0120..1b103a1ab7e 100644
--- a/src/shogun/preprocessor/FisherLDA.cpp
+++ b/src/shogun/preprocessor/FisherLDA.cpp
@@ -32,28 +32,32 @@
  */
 #include <shogun/lib/config.h>
 
-#include <shogun/lib/common.h>
-#include <shogun/io/SGIO.h>
-#include <shogun/preprocessor/DensePreprocessor.h>
 #include <shogun/features/DenseFeatures.h>
 #include <shogun/features/Features.h>
+#include <shogun/io/SGIO.h>
 #include <shogun/labels/MulticlassLabels.h>
+#include <shogun/lib/common.h>
 #include <shogun/mathematics/eigen3.h>
-#include <shogun/preprocessor/FisherLDA.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+#include <shogun/preprocessor/DensePreprocessor.h>
 #include <shogun/preprocessor/DimensionReductionPreprocessor.h>
-#include <shogun/mathematics/eigen3.h>
-#include <vector>
+#include <shogun/preprocessor/FisherLDA.h>
+#include <shogun/solver/LDACanVarSolver.h>
+#include <shogun/solver/LDASolver.h>
 
 using namespace std;
 using namespace Eigen;
 using namespace shogun;
 
-CFisherLDA::CFisherLDA (EFLDAMethod method, float64_t thresh):
-	CDimensionReductionPreprocessor()
+CFisherLDA::CFisherLDA(
+    EFLDAMethod method, float64_t thresh, float64_t gamma, bool bdc_svd)
+    : CDimensionReductionPreprocessor()
 {
 	initialize_parameters();
 	m_method=method;
 	m_threshold=thresh;
+	m_gamma = gamma;
+	m_bdc_svd = bdc_svd;
 }
 
 void CFisherLDA::initialize_parameters()
@@ -61,15 +65,25 @@ void CFisherLDA::initialize_parameters()
 	m_method=AUTO_FLDA;
 	m_threshold=0.01;
 	m_num_dim=0;
-	SG_ADD(&m_method, "FLDA_method","method for performing FLDA",
-			MS_NOT_AVAILABLE);
-	SG_ADD(&m_num_dim, "final_dimensions","dimensions to be retained",
-			MS_NOT_AVAILABLE);
-	SG_ADD(&m_transformation_matrix, "transformation_matrix","Transformation"
-			" matrix (Eigenvectors of covariance matrix).", MS_NOT_AVAILABLE);
+	m_gamma = 0;
+	m_bdc_svd = true;
+	SG_ADD(
+	    &m_method, "FLDA_method", "method for performing FLDA",
+	    MS_NOT_AVAILABLE);
+	SG_ADD(
+	    &m_num_dim, "final_dimensions", "dimensions to be retained",
+	    MS_NOT_AVAILABLE);
+	SG_ADD(&m_gamma, "m_gamma", "Regularization parameter", MS_NOT_AVAILABLE);
+	SG_ADD(&m_bdc_svd, "m_bdc_svd", "Use BDC-SVD algorithm", MS_NOT_AVAILABLE);
+	SG_ADD(
+	    &m_transformation_matrix, "transformation_matrix",
+	    "Transformation"
+	    " matrix (Eigenvectors of covariance matrix).",
+	    MS_NOT_AVAILABLE);
 	SG_ADD(&m_mean_vector, "mean_vector", "Mean Vector.", MS_NOT_AVAILABLE);
-	SG_ADD(&m_eigenvalues_vector, "eigenvalues_vector",
-			"Vector with Eigenvalues.", MS_NOT_AVAILABLE);
+	SG_ADD(
+	    &m_eigenvalues_vector, "eigenvalues_vector", "Vector with Eigenvalues.",
+	    MS_NOT_AVAILABLE);
 }
 
 CFisherLDA::~CFisherLDA()
@@ -89,197 +103,103 @@ bool CFisherLDA::fit(CFeatures *features, CLabels *labels, int32_t num_dimension
 
 	REQUIRE(labels, "Labels for the given features are not specified!\n")
 
-	REQUIRE(labels->get_label_type()==LT_MULTICLASS, "The labels should be of "
-			"the type MulticlassLabels! you provided %s\n", labels->get_name());
+	REQUIRE(
+	    labels->get_label_type() == LT_MULTICLASS,
+	    "The labels should be of "
+	    "the type MulticlassLabels! you provided %s\n",
+	    labels->get_name());
 
-	SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)
-										->get_feature_matrix();
+	CDenseFeatures<float64_t>* dense_features =
+	    static_cast<CDenseFeatures<float64_t>*>(features);
+	CMulticlassLabels* multiclass_labels =
+	    static_cast<CMulticlassLabels*>(labels);
 
-	SGVector<float64_t> labels_vector=((CMulticlassLabels*)labels)->get_labels();
+	index_t num_vectors = dense_features->get_num_vectors();
+	index_t num_features = dense_features->get_num_features();
 
-	int32_t num_vectors=feature_matrix.num_cols;
-	int32_t num_features=feature_matrix.num_rows;
+	REQUIRE(
+	    labels->get_num_labels() == num_vectors,
+	    "The number of samples provided (%d)"
+	    " must be equal to the number of labels provided(%d)\n",
+	    num_vectors, labels->get_num_labels());
 
-	REQUIRE(labels_vector.vlen==num_vectors,"The number of samples provided (%d)"
-			" must be equal to the number of labels provided(%d)\n",num_vectors,
-			labels_vector.vlen);
+	int32_t num_class = multiclass_labels->get_num_classes();
 
-	// C holds the number of unique classes.
-	int32_t C=((CMulticlassLabels*)labels)->get_num_classes();
-
-	REQUIRE(C>1, "At least two classes are needed to perform LDA.\n")
-
-	int32_t i=0;
-	int32_t j=0;
+	REQUIRE(num_class > 1, "At least two classes are needed to perform LDA.\n")
 
 	m_num_dim=num_dimensions;
-	// max target dimension allowed.
-	// int32_t max_dim_allowed=C-1;
 
 	// clip number if Dimensions to be a valid number
-	if ((m_num_dim<=0) || (m_num_dim>(C-1)))
-		m_num_dim=(C-1);
-
-	MatrixXd fmatrix=Map<MatrixXd>(feature_matrix.matrix, num_features,
-									num_vectors);
-	Map<VectorXd> lvector(labels_vector.vector, num_vectors);
-
-	// holds the total mean
-	m_mean_vector=SGVector<float64_t>(num_features);
-	Map<VectorXd>mean_total (m_mean_vector.vector, num_features);
-	mean_total=VectorXd::Zero(num_features);
-	// holds the mean for each class
-	vector<VectorXd> mean_class(C);
-
-	// holds the frequency for each class.
-	// i.e the i'th element holds the number
-	// of times class i is observed.
-	VectorXd num_class=VectorXd::Zero(C);
-
-	// calculate the class means and the total means.
-	for (i=0; i<C; i++)
-	{
-		mean_class[i]=VectorXd::Zero(num_features);
-		for (j=0; j<num_vectors; j++)
-		{
-			if (i==lvector[j])
-			{
-				num_class[i]++;
-				mean_class[i]+=fmatrix.col(j);
-			}
-		}
-		mean_class[i]/=(float64_t)num_class[i];
-		mean_total+=mean_class[i];
-	}
-	mean_total/=(float64_t)C;
-
-	// Subtract the class means from the 'respective' data.
-	// e.g all data belonging to class 0 is subtracted by
-	// the mean of class 0 data.
-	for (i=0; i<C; i++)
-		for (j=0; j<num_vectors; j++)
-			if (i==lvector[j])
-				fmatrix.col(j)-=mean_class[i];
-
-	if ((m_method==CANVAR_FLDA) ||
-			(m_method==AUTO_FLDA && num_vectors<num_features))
-	{
-		// holds the  fmatrix for each class
-		vector<MatrixXd> centered_class_i(C);
-		VectorXd temp=num_class;
-		MatrixXd Sw=MatrixXd::Zero(num_features, num_features);
-		for (i=0; i<C; i++)
-		{
-			centered_class_i[i]=MatrixXd::Zero(num_features, num_class[i]);
-			for (j=0; j<num_vectors; j++)
-				if (i==lvector[j])
-					centered_class_i[i].col(num_class[i]-(temp[i]--))
-						=fmatrix.col(j);
-			Sw+=(centered_class_i[i]*centered_class_i[i].transpose())
-				*num_class[i]/(float64_t)(num_class[i]-1);
-		}
-
-		// within class matrix for cannonical variates implementation
-		MatrixXd Sb(num_features, C);
-		for (i=0; i<C; i++)
-		Sb.col(i)=sqrt(num_class[i])*(mean_class[i]-mean_total);
-
-		MatrixXd fmatrix1=Map<MatrixXd>(feature_matrix.matrix, num_features,
-									num_vectors);
-
-		JacobiSVD<MatrixXd> svd(fmatrix1, ComputeThinU | ComputeThinV);
-		// basis to represent the solution
-		MatrixXd Q;
-
-		if(num_features>num_vectors)
-		{
-			j=0;
-			for (i=0;i<num_vectors;i++)
-				if (svd.singularValues()(i)>m_threshold)
-					j++;
-				else
-					break;
-			Q=svd.matrixU().leftCols(j);
-		}
-		else
-			Q=svd.matrixU();
-
-		// Sb is the modified between scatter
-		Sb=(Q.transpose())*Sb*(Sb.transpose())*Q;
-		// Sw is the modified within scatter
-		Sw=Q.transpose()*Sw*Q;
-
-		// to find SVD((inverse(Chol(Sw)))' * Sb * (inverse(Chol(Sw))))
-		//1.get Cw=Chol(Sw)
-		//find the decomposition of Cw'
-		HouseholderQR<MatrixXd> decomposition(Sw.llt().matrixU().transpose());
-		//2.get P=inv(Cw')*Sb
-		//MatrixXd P=decomposition.solve(Sb);
-		//3. final value to be put in SVD will be therefore:
-		// final_ output = (inv(Cw')*(P'))';
-		//MatrixXd X_final_chol=(decomposition.solve(P.transpose())).transpose();
-		JacobiSVD<MatrixXd> svd2(decomposition.solve
-				(decomposition.solve(Sb).transpose()).transpose(),ComputeThinU);
-		m_transformation_matrix=SGMatrix<float64_t> (num_features, m_num_dim);
-		Map<MatrixXd> eigenVectors(m_transformation_matrix.matrix, num_features,
-									m_num_dim);
-
-		eigenVectors=Q*(svd2.matrixU()).leftCols(m_num_dim);
-
-		m_eigenvalues_vector=SGVector<float64_t>(m_num_dim);
-		Map<VectorXd> eigenValues (m_eigenvalues_vector.vector, m_num_dim);
-		eigenValues=svd2.singularValues().topRows(m_num_dim);
-	}
+	if ((m_num_dim <= 0) || (m_num_dim > (num_class - 1)))
+		m_num_dim = (num_class - 1);
+
+	bool lda_more_efficient =
+	    m_method == AUTO_FLDA && num_vectors < num_features;
+
+	if ((m_method == CANVAR_FLDA) || lda_more_efficient)
+		return solver_canvar(dense_features, multiclass_labels);
 	else
+		return solver_classic(dense_features, multiclass_labels);
+}
+
+bool CFisherLDA::solver_canvar(
+    CDenseFeatures<float64_t>* features, CMulticlassLabels* labels)
+{
+	auto solver = std::unique_ptr<LDACanVarSolver<float64_t>>(
+	    new LDACanVarSolver<float64_t>(
+	        features, labels, m_num_dim, m_gamma, m_bdc_svd, m_threshold));
+
+	m_transformation_matrix = solver->get_eigenvectors();
+	m_eigenvalues_vector = solver->get_eigenvalues();
+
+	return true;
+}
+
+bool CFisherLDA::solver_classic(
+    CDenseFeatures<float64_t>* features, CMulticlassLabels* labels)
+{
+	SGMatrix<float64_t> data = features->get_feature_matrix();
+	index_t num_features = data.num_rows;
+	int32_t num_class = labels->get_num_classes();
+
+	auto solver = std::unique_ptr<LDASolver<float64_t>>(
+	    new LDASolver<float64_t>(features, labels, m_gamma));
+
+	m_mean_vector = solver->get_mean();
+	auto class_mean = solver->get_class_mean();
+	auto class_count = solver->get_class_count();
+	SGMatrix<float64_t> Sw = solver->get_within_cov();
+
+	// For holding the between class scatter.
+	SGMatrix<float64_t> Sb(num_features, num_class);
+
+	for (index_t i = 0; i < num_class; i++)
+		Sb.set_column(i, linalg::add(class_mean[i], m_mean_vector, 1.0, -1.0));
+	Sb = linalg::matrix_prod(Sb, Sb, false, true);
+
+	// solve Sw * M = Sb
+	auto aux = linalg::qr_solver(Sw, Sb);
+
+	// calculate the eigenvalues and eigenvectors of M.
+	SGVector<float64_t> eigenvalues(Sb.num_rows);
+	SGMatrix<float64_t> eigenvectors(Sb.num_rows, Sb.num_cols);
+	linalg::eigen_solver(aux, eigenvalues, eigenvectors);
+
+	// keep 'm_num_dim' numbers of top Eigenvalues
+	m_eigenvalues_vector = SGVector<float64_t>(m_num_dim);
+
+	// keep 'm_num_dim' numbers of EigenVectors
+	// corresponding to their respective eigenvalues
+	m_transformation_matrix = SGMatrix<float64_t>(num_features, m_num_dim);
+
+	auto args = CMath::argsort(eigenvalues);
+	for (index_t i = 0; i < m_num_dim; i++)
 	{
-		// For holding the within class scatter.
-		MatrixXd Sw=fmatrix*fmatrix.transpose();
-
-		// For holding the between class scatter.
-		MatrixXd Sb(num_features, C);
-
-		for (i=0; i<C; i++)
-			Sb.col(i)=mean_class[i];
-
-		Sb=Sb-mean_total.rowwise().replicate(C);
-		Sb=Sb*Sb.transpose();
-
-		// calculate the Ax=b problem
-		// where A=Sw
-		// b=Sb
-		// x=M
-		// MatrixXd M=Sw.householderQr().solve(Sb);
-		// calculate the eigenvalues and eigenvectors of M.
-		EigenSolver<MatrixXd> es(Sw.householderQr().solve(Sb));
-
-		MatrixXd all_eigenvectors=es.eigenvectors().real();
-		VectorXd all_eigenvalues=es.eigenvalues().real();
-
-		std::vector<pair<float64_t, int32_t> > data(num_features);
-		for (i=0; i<num_features; i++)
-		{
-			data[i].first=all_eigenvalues[i];
-			data[i].second=i;
-		}
-		// sort the eigenvalues.
-		std::sort (data.begin(), data.end());
-
-		// keep 'm_num_dim' numbers of top Eigenvalues
-		m_eigenvalues_vector=SGVector<float64_t> (m_num_dim);
-		Map<VectorXd> eigenValues(m_eigenvalues_vector.vector, m_num_dim);
-
-		// keep 'm_num_dim' numbers of EigenVectors
-		// corresponding to their respective eigenvalues
-		m_transformation_matrix=SGMatrix<float64_t> (num_features, m_num_dim);
-		Map<MatrixXd> eigenVectors(m_transformation_matrix.matrix, num_features,
-									m_num_dim);
-
-		for (i=0; i<m_num_dim; i++)
-		{
-			eigenValues[i]=data[num_features-i-1].first;
-			eigenVectors.col(i)=all_eigenvectors.col(data[num_features-i-1].second);
-		}
+		index_t k = args[num_features - i - 1];
+		m_eigenvalues_vector[i] = eigenvalues[k];
+		m_transformation_matrix.set_column(k, eigenvectors.get_column(i));
 	}
+
 	return true;
 }
 
@@ -298,22 +218,23 @@ SGMatrix<float64_t> CFisherLDA::apply_to_feature_matrix(CFeatures*features)
 	REQUIRE(features->get_feature_type()==F_DREAL,
 			"LDA only works with real features\n");
 
-	SGMatrix<float64_t> m=((CDenseFeatures<float64_t>*)
-							features)->get_feature_matrix();
+	SGMatrix<float64_t> m =
+	    ((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
 
 	int32_t num_vectors=m.num_cols;
 	int32_t num_features=m.num_rows;
 
 	SG_INFO("Transforming feature matrix\n")
-	Map<MatrixXd> transform_matrix(m_transformation_matrix.matrix,
-			m_transformation_matrix.num_rows, m_transformation_matrix.num_cols);
+	Map<MatrixXd> transform_matrix(
+	    m_transformation_matrix.matrix, m_transformation_matrix.num_rows,
+	    m_transformation_matrix.num_cols);
 
 	SG_INFO("get Feature matrix: %ix%i\n", num_vectors, num_features)
 
 	Map<MatrixXd> feature_matrix (m.matrix, num_features, num_vectors);
 
-	feature_matrix.block (0, 0, m_num_dim, num_vectors)=
-			transform_matrix.transpose()*feature_matrix;
+	feature_matrix.block(0, 0, m_num_dim, num_vectors) =
+	    transform_matrix.transpose() * feature_matrix;
 
 	SG_INFO("Form matrix of target dimension")
 	for (int32_t col=0; col<num_vectors; col++)
@@ -334,8 +255,9 @@ SGVector<float64_t> CFisherLDA::apply_to_feature_vector(SGVector<float64_t> vect
 	Map<VectorXd> inputVec(vector.vector, vector.vlen);
 
 	Map<VectorXd> mean(m_mean_vector.vector, m_mean_vector.vlen);
-	Map<MatrixXd> transformMat(m_transformation_matrix.matrix,
-		m_transformation_matrix.num_rows, m_transformation_matrix.num_cols);
+	Map<MatrixXd> transformMat(
+	    m_transformation_matrix.matrix, m_transformation_matrix.num_rows,
+	    m_transformation_matrix.num_cols);
 
 	resultVec=transformMat.transpose()*inputVec;
 	return result;
diff --git a/src/shogun/preprocessor/FisherLDA.h b/src/shogun/preprocessor/FisherLDA.h
index bba29136e9e..10e1cf392f5 100644
--- a/src/shogun/preprocessor/FisherLDA.h
+++ b/src/shogun/preprocessor/FisherLDA.h
@@ -36,10 +36,11 @@
 
 #include <shogun/lib/config.h>
 
+#include <shogun/features/Features.h>
+#include <shogun/labels/Labels.h>
 #include <shogun/preprocessor/DimensionReductionPreprocessor.h>
 #include <shogun/preprocessor/Preprocessor.h>
-#include <shogun/labels/Labels.h>
-#include <shogun/features/Features.h>
+#include <vector>
 
 namespace shogun
 {
@@ -91,14 +92,23 @@ namespace shogun
 class CFisherLDA: public CDimensionReductionPreprocessor
 {
 	public:
-
 		/** standard constructor
-		 * @param method LDA based on : ::CLASSIC_FLDA/::CANVAR_FLDA/::AUTO_FLDA[default]
-		 * @param thresh threshold value for ::CANVAR_FLDA only. This is used to reject
-		 * those basis whose singular values are less than the provided threshold.
+		 * @param method LDA based on :
+		 * ::CLASSIC_FLDA/::CANVAR_FLDA/::AUTO_FLDA[default]
+		 * @param thresh threshold value for ::CANVAR_FLDA only. This is used to
+		 * reject
+		 * those basis whose singular values are less than the provided
+		 * threshold.
 		 * The default one is 0.01.
+		 * @param gamma regularization parameter
+		 * @param bdc_svd when using SVD solver switch between
+		 * Bidiagonal Divide and Conquer algorithm (BDC) and
+		 * Jacobi's algorithm, for the differences @see linalg::SVDAlgorithm.
+		 * [default = BDC-SVD]
 		 */
-		CFisherLDA(EFLDAMethod method=AUTO_FLDA, float64_t thresh=0.01);
+		CFisherLDA(
+		    EFLDAMethod method = AUTO_FLDA, float64_t thresh = 0.01,
+		    float64_t gamma = 0, bool bdc_svd = true);
 
 		/** destructor */
 		virtual ~CFisherLDA();
@@ -150,16 +160,34 @@ class CFisherLDA: public CDimensionReductionPreprocessor
 		void initialize_parameters();
 
 	protected:
+		/**
+		 * Train the preprocessor with the canonical variates method.
+		 * @param features training data.
+		 * @param labels multiclass labels.
+		 */
+		bool solver_canvar(
+		    CDenseFeatures<float64_t>* features, CMulticlassLabels* labels);
 
+		/**
+		 * Train the preprocessor with the classic method.
+		 * @param features training data.
+		 * @param labels multiclass labels.
+		 */
+		bool solver_classic(
+		    CDenseFeatures<float64_t>* features, CMulticlassLabels* labels);
 
 		/** transformation matrix */
 		SGMatrix<float64_t> m_transformation_matrix;
 		/** num dim */
 		int32_t m_num_dim;
+		/** gamma */
+		float64_t m_gamma;
 		/** m_threshold */
 		float64_t m_threshold;
 		/** m_method */
 		int32_t m_method;
+		/** m_bdc_svd */
+		bool m_bdc_svd;
 		/** mean vector */
 		SGVector<float64_t> m_mean_vector;
 		/** eigenvalues vector */
diff --git a/src/shogun/preprocessor/KernelPCA.cpp b/src/shogun/preprocessor/KernelPCA.cpp
index 3c5935b835d..e14ba797204 100644
--- a/src/shogun/preprocessor/KernelPCA.cpp
+++ b/src/shogun/preprocessor/KernelPCA.cpp
@@ -9,19 +9,18 @@
  */
 
 #include <shogun/preprocessor/KernelPCA.h>
-#ifdef HAVE_LAPACK
 #include <shogun/lib/config.h>
 #include <shogun/mathematics/Math.h>
 
 #include <string.h>
 #include <stdlib.h>
 
-#include <shogun/mathematics/lapack.h>
-#include <shogun/lib/common.h>
-#include <shogun/kernel/Kernel.h>
-#include <shogun/preprocessor/DimensionReductionPreprocessor.h>
 #include <shogun/features/Features.h>
 #include <shogun/io/SGIO.h>
+#include <shogun/kernel/Kernel.h>
+#include <shogun/lib/common.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+#include <shogun/preprocessor/DimensionReductionPreprocessor.h>
 
 using namespace shogun;
 
@@ -79,44 +78,45 @@ bool CKernelPCA::init(CFeatures* features)
 		int32_t n = kernel_matrix.num_cols;
 		int32_t m = kernel_matrix.num_rows;
 		ASSERT(n==m)
-
-		float64_t* bias_tmp = SGMatrix<float64_t>::get_column_sum(kernel_matrix.matrix, n,n);
-		SGVector<float64_t>::scale_vector(-1.0/n, bias_tmp, n);
-		float64_t s = SGVector<float64_t>::sum(bias_tmp, n)/n;
-		SGVector<float64_t>::add_scalar(-s, bias_tmp, n);
-
-		SGMatrix<float64_t>::center_matrix(kernel_matrix.matrix, n, m);
-
-		float64_t* eigenvalues=SGMatrix<float64_t>::compute_eigenvectors(kernel_matrix.matrix, n, n);
-
-		for (int32_t i=0; i<n; i++)
+		if (m_target_dim > n)
 		{
-			//normalize and trap divide by zero and negative eigenvalues
-			for (int32_t j=0; j<n; j++)
-				kernel_matrix.matrix[i*n+j]/=CMath::sqrt(CMath::max(1e-16,eigenvalues[i]));
+			SG_SWARNING(
+			    "Target dimension (%d) is not a valid value, it must be"
+			    "less or equal than the number of vectors."
+			    "Setting it to maximum allowed size (%d).",
+			    m_target_dim, n);
+			m_target_dim = n;
 		}
 
-		SG_FREE(eigenvalues);
-
-		m_transformation_matrix = SGMatrix<float64_t>(kernel_matrix.matrix,n,n);
-		kernel_matrix.matrix = NULL;
-		m_bias_vector = SGVector<float64_t>(n);
-		SGVector<float64_t>::fill_vector(m_bias_vector.vector, m_bias_vector.vlen, 0.0);
+		SGVector<float64_t> bias_tmp = linalg::rowwise_sum(kernel_matrix);
+		linalg::scale(bias_tmp, bias_tmp, -1.0 / n);
+		float64_t s = linalg::sum(bias_tmp) / n;
+		linalg::add_scalar(bias_tmp, -s);
 
-		cblas_dgemv(CblasColMajor, CblasTrans,
-				n, n, 1.0, m_transformation_matrix.matrix, n,
-				bias_tmp, 1, 0.0, m_bias_vector.vector, 1);
+		linalg::center_matrix(kernel_matrix);
 
-		float64_t* rowsum = SGMatrix<float64_t>::get_row_sum(m_transformation_matrix.matrix, n, n);
-		SGVector<float64_t>::scale_vector(1.0/n, rowsum, n);
+		SGVector<float64_t> eigenvalues(m_target_dim);
+		SGMatrix<float64_t> eigenvectors(kernel_matrix.num_rows, m_target_dim);
+		linalg::eigen_solver_symmetric(
+		    kernel_matrix, eigenvalues, eigenvectors, m_target_dim);
 
-		for (int32_t i=0; i<n; i++)
+		m_transformation_matrix =
+		    SGMatrix<float64_t>(kernel_matrix.num_rows, m_target_dim);
+		// eigenvalues are in increasing order
+		for (int32_t i = 0; i < m_target_dim; i++)
 		{
-			for (int32_t j=0; j<n; j++)
-				m_transformation_matrix.matrix[j+n*i] -= rowsum[i];
+			//normalize and trap divide by zero and negative eigenvalues
+			auto idx = m_target_dim - i - 1;
+			auto vec = eigenvectors.get_column(idx);
+			linalg::scale(
+			    vec, vec,
+			    1.0 / CMath::sqrt(CMath::max(1e-16, eigenvalues[idx])));
+			m_transformation_matrix.set_column(i, vec);
 		}
-		SG_FREE(rowsum);
-		SG_FREE(bias_tmp);
+
+		m_bias_vector = SGVector<float64_t>(m_target_dim);
+		linalg::matrix_prod(
+		    m_transformation_matrix, bias_tmp, m_bias_vector, true);
 
 		m_initialized=true;
 		SG_INFO("Done\n")
@@ -125,62 +125,36 @@ bool CKernelPCA::init(CFeatures* features)
 	return false;
 }
 
-
 SGMatrix<float64_t> CKernelPCA::apply_to_feature_matrix(CFeatures* features)
 {
 	ASSERT(m_initialized)
-	CDenseFeatures<float64_t>* simple_features = (CDenseFeatures<float64_t>*)features;
+	int32_t n = m_init_features->get_num_vectors();
 
-	int32_t num_vectors = simple_features->get_num_vectors();
-	int32_t i,j,k;
-	int32_t n = m_transformation_matrix.num_cols;
+	m_kernel->init(features, m_init_features);
+	auto kernel_matrix = m_kernel->get_kernel_matrix();
 
-	m_kernel->init(features,m_init_features);
+	auto rows_sum = linalg::rowwise_sum(kernel_matrix);
+	linalg::add_vector(kernel_matrix, rows_sum, kernel_matrix, 1.0, -1.0 / n);
 
-	float64_t* new_feature_matrix = SG_MALLOC(float64_t, m_target_dim*num_vectors);
+	SGMatrix<float64_t> new_feature_matrix =
+	    linalg::matrix_prod(m_transformation_matrix, kernel_matrix, true, true);
 
-	for (i=0; i<num_vectors; i++)
-	{
-		for (j=0; j<m_target_dim; j++)
-			new_feature_matrix[i*m_target_dim+j] = m_bias_vector.vector[j];
-
-		for (j=0; j<n; j++)
-		{
-			float64_t kij = m_kernel->kernel(i,j);
-
-			for (k=0; k<m_target_dim; k++)
-				new_feature_matrix[k+i*m_target_dim] += kij*m_transformation_matrix.matrix[(n-k-1)*n+j];
-		}
-	}
+	linalg::add_vector(new_feature_matrix, m_bias_vector, new_feature_matrix);
 
 	m_kernel->cleanup();
-	simple_features->set_feature_matrix(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors));
-	return ((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
+	return new_feature_matrix;
 }
 
 SGVector<float64_t> CKernelPCA::apply_to_feature_vector(SGVector<float64_t> vector)
 {
 	ASSERT(m_initialized)
-	SGVector<float64_t> result = SGVector<float64_t>(m_target_dim);
-	m_kernel->init(new CDenseFeatures<float64_t>(SGMatrix<float64_t>(vector.vector,vector.vlen,1)),
-	               m_init_features);
 
-	int32_t j,k;
-	int32_t n = m_transformation_matrix.num_cols;
+	std::unique_ptr<CFeatures> features(
+	    new CDenseFeatures<float64_t>(SGMatrix<float64_t>(vector)));
 
-	for (j=0; j<m_target_dim; j++)
-		result.vector[j] = m_bias_vector.vector[j];
-
-	for (j=0; j<n; j++)
-	{
-		float64_t kj = m_kernel->kernel(0,j);
+	SGMatrix<float64_t> result_matrix = apply_to_feature_matrix(features.get());
 
-		for (k=0; k<m_target_dim; k++)
-			result.vector[k] += kj*m_transformation_matrix.matrix[(n-k-1)*n+j];
-	}
-
-	m_kernel->cleanup();
-	return result;
+	return SGVector<float64_t>(result_matrix);
 }
 
 CDenseFeatures<float64_t>* CKernelPCA::apply_to_string_features(CFeatures* features)
@@ -213,5 +187,3 @@ CDenseFeatures<float64_t>* CKernelPCA::apply_to_string_features(CFeatures* featu
 
 	return new CDenseFeatures<float64_t>(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors));
 }
-
-#endif
diff --git a/src/shogun/preprocessor/KernelPCA.h b/src/shogun/preprocessor/KernelPCA.h
index 526626fd6ac..4be22ed0afa 100644
--- a/src/shogun/preprocessor/KernelPCA.h
+++ b/src/shogun/preprocessor/KernelPCA.h
@@ -11,7 +11,6 @@
 #ifndef KERNELPCA_H__
 #define KERNELPCA_H__
 #include <shogun/lib/config.h>
-#ifdef HAVE_LAPACK
 
 #include <shogun/preprocessor/DimensionReductionPreprocessor.h>
 #include <shogun/features/Features.h>
@@ -109,4 +108,3 @@ class CKernelPCA: public CDimensionReductionPreprocessor
 };
 }
 #endif
-#endif
diff --git a/src/shogun/preprocessor/NormOne.cpp b/src/shogun/preprocessor/NormOne.cpp
index 12dba324fdc..4bc2302243d 100644
--- a/src/shogun/preprocessor/NormOne.cpp
+++ b/src/shogun/preprocessor/NormOne.cpp
@@ -11,6 +11,7 @@
 #include <shogun/preprocessor/NormOne.h>
 #include <shogun/preprocessor/DensePreprocessor.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/features/Features.h>
 
 using namespace shogun;
@@ -63,8 +64,8 @@ SGMatrix<float64_t> CNormOne::apply_to_feature_matrix(CFeatures* features)
 
 	for (int32_t i=0; i<feature_matrix.num_cols; i++)
 	{
-		float64_t* vec= &(feature_matrix.matrix[i*feature_matrix.num_rows]);
-		float64_t norm=CMath::sqrt(CMath::dot(vec, vec, feature_matrix.num_rows));
+		SGVector<float64_t> vec(&(feature_matrix.matrix[i*feature_matrix.num_rows]), feature_matrix.num_rows, false);
+		float64_t norm=CMath::sqrt(linalg::dot(vec, vec));
 		SGVector<float64_t>::scale_vector(1.0/norm, vec, feature_matrix.num_rows);
 	}
 	return feature_matrix;
@@ -75,7 +76,7 @@ SGMatrix<float64_t> CNormOne::apply_to_feature_matrix(CFeatures* features)
 SGVector<float64_t> CNormOne::apply_to_feature_vector(SGVector<float64_t> vector)
 {
 	float64_t* normed_vec = SG_MALLOC(float64_t, vector.vlen);
-	float64_t norm=CMath::sqrt(CMath::dot(vector.vector, vector.vector, vector.vlen));
+	float64_t norm=CMath::sqrt(linalg::dot(vector, vector));
 
 	for (int32_t i=0; i<vector.vlen; i++)
 		normed_vec[i]=vector.vector[i]/norm;
diff --git a/src/shogun/preprocessor/PCA.cpp b/src/shogun/preprocessor/PCA.cpp
index 699777d8b34..0cb5ed2d9b1 100644
--- a/src/shogun/preprocessor/PCA.cpp
+++ b/src/shogun/preprocessor/PCA.cpp
@@ -102,168 +102,183 @@ bool CPCA::init(CFeatures* features)
 
 		// center data
 		Map<MatrixXd> fmatrix(feature_matrix.matrix, num_features, num_vectors);
+
 		m_mean_vector = SGVector<float64_t>(num_features);
 		Map<VectorXd> data_mean(m_mean_vector.vector, num_features);
  		data_mean = fmatrix.rowwise().sum()/(float64_t) num_vectors;
 		fmatrix = fmatrix.colwise()-data_mean;
 
 		m_eigenvalues_vector = SGVector<float64_t>(max_dim_allowed);
-		Map<VectorXd> eigenValues(m_eigenvalues_vector.vector, max_dim_allowed);
 
 		if (m_method == AUTO)
 			m_method = (num_vectors>num_features) ? EVD : SVD;
 
 		if (m_method == EVD)
-		{
-			// covariance matrix
-			MatrixXd cov_mat(num_features, num_features);
-			cov_mat = fmatrix*fmatrix.transpose();
-			cov_mat /= (num_vectors-1);
-
-			SG_INFO("Computing Eigenvalues ... ")
-			// eigen value computed
-			SelfAdjointEigenSolver<MatrixXd> eigenSolve =
-					SelfAdjointEigenSolver<MatrixXd>(cov_mat);
-			eigenValues = eigenSolve.eigenvalues().tail(max_dim_allowed);
-
-			// target dimension
-			switch (m_mode)
-			{
-				case FIXED_NUMBER :
-					num_dim = m_target_dim;
-					break;
+		    init_with_evd(feature_matrix,  max_dim_allowed);
+		else
+		    init_with_svd(feature_matrix, max_dim_allowed);
 
-				case VARIANCE_EXPLAINED :
-					{
-						float64_t eig_sum = eigenValues.sum();
-						float64_t com_sum = 0;
-						for (int32_t i=num_features-1; i<-1; i++)
-						{
-							num_dim++;
-							com_sum += m_eigenvalues_vector.vector[i];
-							if (com_sum/eig_sum>=m_thresh)
-								break;
-						}
-					}
-					break;
+		// restore feature matrix
+		fmatrix = fmatrix.colwise()+data_mean;
+		m_initialized = true;
+		return true;
+	}
 
-				case THRESHOLD :
-					for (int32_t i=num_features-1; i<-1; i++)
-					{
-						if (m_eigenvalues_vector.vector[i]>m_thresh)
-							num_dim++;
-						else
-							break;
-					}
-					break;
-			};
-			SG_INFO("Done\nReducing from %i to %i features..", num_features, num_dim)
-
-			m_transformation_matrix = SGMatrix<float64_t>(num_features,num_dim);
-			Map<MatrixXd> transformMatrix(m_transformation_matrix.matrix,
-								 num_features, num_dim);
-			num_old_dim = num_features;
-
-			// eigenvector matrix
-			transformMatrix = eigenSolve.eigenvectors().block(0,
-						num_features-num_dim, num_features,num_dim);
-			if (m_whitening)
+	return false;
+}
+
+void CPCA::init_with_evd(const SGMatrix<float64_t>& feature_matrix, int32_t max_dim_allowed)
+{
+	int32_t num_vectors = feature_matrix.num_cols;
+	int32_t num_features = feature_matrix.num_rows;
+
+	Map<MatrixXd> fmatrix(feature_matrix.matrix, num_features, num_vectors);
+	Map<VectorXd> eigenValues(m_eigenvalues_vector.vector, max_dim_allowed);
+
+	// covariance matrix
+	MatrixXd cov_mat(num_features, num_features);
+	cov_mat = fmatrix*fmatrix.transpose();
+	cov_mat /= (num_vectors-1);
+
+	SG_INFO("Computing Eigenvalues ... ")
+	// eigen value computed
+	SelfAdjointEigenSolver<MatrixXd> eigenSolve =
+			SelfAdjointEigenSolver<MatrixXd>(cov_mat);
+	eigenValues = eigenSolve.eigenvalues().tail(max_dim_allowed);
+
+	// target dimension
+	switch (m_mode)
+	{
+		case FIXED_NUMBER :
+			num_dim = m_target_dim;
+			break;
+
+		case VARIANCE_EXPLAINED :
 			{
-				for (int32_t i=0; i<num_dim; i++)
+				float64_t eig_sum = eigenValues.sum();
+				float64_t com_sum = 0;
+				for (int32_t i=num_features-1; i<-1; i++)
 				{
-					if (CMath::fequals_abs<float64_t>(0.0, eigenValues[i+max_dim_allowed-num_dim],
-											m_eigenvalue_zero_tolerance))
-					{
-						SG_WARNING("Covariance matrix has almost zero Eigenvalue (ie "
-							"Eigenvalue within a tolerance of %E around 0) at "
-							"dimension %d. Consider reducing its dimension.",
-							m_eigenvalue_zero_tolerance, i+max_dim_allowed-num_dim+1)
-
-						transformMatrix.col(i) = MatrixXd::Zero(num_features,1);
-						continue;
-					}
-
-					transformMatrix.col(i) /=
-					CMath::sqrt(eigenValues[i+max_dim_allowed-num_dim]*(num_vectors-1));
+					num_dim++;
+					com_sum += m_eigenvalues_vector.vector[i];
+					if (com_sum/eig_sum>=m_thresh)
+						break;
 				}
 			}
-		}
+			break;
 
-		else
+		case THRESHOLD :
+			for (int32_t i=num_features-1; i<-1; i++)
+			{
+				if (m_eigenvalues_vector.vector[i]>m_thresh)
+					num_dim++;
+				else
+					break;
+			}
+			break;
+	};
+	SG_INFO("Done\nReducing from %i to %i features..", num_features, num_dim)
+
+	m_transformation_matrix = SGMatrix<float64_t>(num_features,num_dim);
+	Map<MatrixXd> transformMatrix(m_transformation_matrix.matrix,
+						 num_features, num_dim);
+	num_old_dim = num_features;
+
+	// eigenvector matrix
+	transformMatrix = eigenSolve.eigenvectors().block(0,
+				num_features-num_dim, num_features,num_dim);
+	if (m_whitening)
+	{
+		for (int32_t i=0; i<num_dim; i++)
 		{
-			// compute SVD of data matrix
-			JacobiSVD<MatrixXd> svd(fmatrix.transpose(), ComputeThinU | ComputeThinV);
+			if (CMath::fequals_abs<float64_t>(0.0, eigenValues[i+max_dim_allowed-num_dim],
+									m_eigenvalue_zero_tolerance))
+			{
+				SG_WARNING("Covariance matrix has almost zero Eigenvalue (ie "
+					"Eigenvalue within a tolerance of %E around 0) at "
+					"dimension %d. Consider reducing its dimension.",
+					m_eigenvalue_zero_tolerance, i+max_dim_allowed-num_dim+1)
 
-			// compute non-negative eigen values from singular values
-			eigenValues = svd.singularValues();
-			eigenValues = eigenValues.cwiseProduct(eigenValues)/(num_vectors-1);
+				transformMatrix.col(i) = MatrixXd::Zero(num_features,1);
+				continue;
+			}
 
-			// target dimension
-			switch (m_mode)
-			{
-				case FIXED_NUMBER :
-					num_dim = m_target_dim;
-					break;
+			transformMatrix.col(i) /=
+			CMath::sqrt(eigenValues[i+max_dim_allowed-num_dim]*(num_vectors-1));
+		}
+	}
+}
+
+void CPCA::init_with_svd(const SGMatrix<float64_t> &feature_matrix, int32_t max_dim_allowed)
+{
+	int32_t num_vectors = feature_matrix.num_cols;
+	int32_t num_features = feature_matrix.num_rows;
+
+	Map<MatrixXd> fmatrix(feature_matrix.matrix, num_features, num_vectors);
+	Map<VectorXd> eigenValues(m_eigenvalues_vector.vector, max_dim_allowed);
+
+	// compute SVD of data matrix
+	JacobiSVD<MatrixXd> svd(fmatrix.transpose(), ComputeThinU | ComputeThinV);
 
-				case VARIANCE_EXPLAINED :
-					{
-						float64_t eig_sum = eigenValues.sum();
-						float64_t com_sum = 0;
-						for (int32_t i=0; i<num_features; i++)
-						{
-							num_dim++;
-							com_sum += m_eigenvalues_vector.vector[i];
-							if (com_sum/eig_sum>=m_thresh)
-								break;
-						}
-					}
+	// compute non-negative eigen values from singular values
+	eigenValues = svd.singularValues();
+	eigenValues = eigenValues.cwiseProduct(eigenValues) / (num_vectors - 1);
+
+	// target dimension
+	switch (m_mode)
+	{
+		case FIXED_NUMBER:
+			num_dim = m_target_dim;
+        	break;
+
+		case VARIANCE_EXPLAINED:
+		{
+			float64_t eig_sum = eigenValues.sum();
+			float64_t com_sum = 0;
+			for (int32_t i = 0; i < num_features; i++) {
+				num_dim++;
+				com_sum += m_eigenvalues_vector.vector[i];
+				if (com_sum / eig_sum >= m_thresh)
 					break;
+			}
+		} break;
 
-				case THRESHOLD :
-					for (int32_t i=0; i<num_features; i++)
-					{
-						if (m_eigenvalues_vector.vector[i]>m_thresh)
-							num_dim++;
-						else
-							break;
-					}
+		case THRESHOLD:
+			for (int32_t i = 0; i < num_features; i++) {
+				if (m_eigenvalues_vector.vector[i] > m_thresh)
+					num_dim++;
+				else
 					break;
-			};
-			SG_INFO("Done\nReducing from %i to %i features..", num_features, num_dim)
-
-			// right singular vectors form eigenvectors
-			m_transformation_matrix = SGMatrix<float64_t>(num_features,num_dim);
-			Map<MatrixXd> transformMatrix(m_transformation_matrix.matrix, num_features, num_dim);
-			num_old_dim = num_features;
-			transformMatrix = svd.matrixV().block(0, 0, num_features, num_dim);
-			if (m_whitening)
+			}
+			break;
+	};
+	SG_INFO("Done\nReducing from %i to %i features..", num_features, num_dim)
+
+	// right singular vectors form eigenvectors
+	m_transformation_matrix = SGMatrix<float64_t>(num_features, num_dim);
+	Map<MatrixXd> transformMatrix(m_transformation_matrix.matrix, num_features, num_dim);
+	num_old_dim = num_features;
+	transformMatrix = svd.matrixV().block(0, 0, num_features, num_dim);
+
+	if (m_whitening)
+	{
+		for (int32_t i = 0; i < num_dim; i++)
+		{
+			if (CMath::fequals_abs<float64_t>(0.0, eigenValues[i], m_eigenvalue_zero_tolerance))
 			{
-				for (int32_t i=0; i<num_dim; i++)
-				{
-					if (CMath::fequals_abs<float64_t>(0.0, eigenValues[i],
-								m_eigenvalue_zero_tolerance))
-					{
-						SG_WARNING("Covariance matrix has almost zero Eigenvalue (ie "
-							"Eigenvalue within a tolerance of %E around 0) at "
-							"dimension %d. Consider reducing its dimension.",
-							m_eigenvalue_zero_tolerance, i+1)
-
-						transformMatrix.col(i) = MatrixXd::Zero(num_features,1);
-						continue;
-					}
-
-					transformMatrix.col(i) /= CMath::sqrt(eigenValues[i]*(num_vectors-1));
-				}
+
+				SG_WARNING("Covariance matrix has almost zero Eigenvalue (ie "
+					"Eigenvalue within a tolerance of %E around 0) at "
+					"dimension %d. Consider reducing its dimension.",
+					m_eigenvalue_zero_tolerance, i + 1)
+
+				transformMatrix.col(i) = MatrixXd::Zero(num_features, 1);
+				continue;
 			}
-		}
 
-		// restore feature matrix
-		fmatrix = fmatrix.colwise()+data_mean;
-		m_initialized = true;
-		return true;
+			transformMatrix.col(i) /= CMath::sqrt(eigenValues[i] * (num_vectors - 1));
+		}
 	}
-
-	return false;
 }
 
 void CPCA::cleanup()
@@ -380,4 +395,3 @@ float64_t CPCA::get_eigenvalue_zero_tolerance() const
 {
 	return m_eigenvalue_zero_tolerance;
 }
-
diff --git a/src/shogun/preprocessor/PCA.h b/src/shogun/preprocessor/PCA.h
index 1090e1fbf14..f06eab04487 100644
--- a/src/shogun/preprocessor/PCA.h
+++ b/src/shogun/preprocessor/PCA.h
@@ -225,6 +225,12 @@ class CPCA: public CDimensionReductionPreprocessor
 		 * whitening to tackle numerical issues
 		 */
 		float64_t m_eigenvalue_zero_tolerance;
+
+	private:
+		/** Computes the transformation matrix using an eigenvalue decomposition. */
+		void init_with_evd(const SGMatrix<float64_t>& feature_matrix, int32_t max_dim_allowed);
+		/** Computes the transformation matrix using svd */
+		void init_with_svd(const SGMatrix<float64_t>& feature_matrix, int32_t max_dim_allowed);
 };
 }
 #endif // PCA_H_
diff --git a/src/shogun/preprocessor/RandomFourierGaussPreproc.cpp b/src/shogun/preprocessor/RandomFourierGaussPreproc.cpp
index 155b2968862..40c452989de 100644
--- a/src/shogun/preprocessor/RandomFourierGaussPreproc.cpp
+++ b/src/shogun/preprocessor/RandomFourierGaussPreproc.cpp
@@ -11,7 +11,7 @@
 
 #include <shogun/preprocessor/RandomFourierGaussPreproc.h>
 #include <shogun/mathematics/Math.h>
-#include <cmath>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <vector>
 #include <algorithm>
 
@@ -358,14 +358,14 @@ SGVector<float64_t> CRandomFourierGaussPreproc::apply_to_feature_vector(SGVector
 	}
 
 	float64_t val = CMath::sqrt(2.0 / cur_dim_feature_space);
-	float64_t *res = SG_MALLOC(float64_t, cur_dim_feature_space);
+	SGVector<float64_t> res(cur_dim_feature_space);
 
 	for (int32_t od = 0; od < cur_dim_feature_space; ++od) {
-		res[od] = val * cos(randomcoeff_additive[od] + CMath::dot(vector.vector,
-				randomcoeff_multiplicative+od*cur_dim_input_space, cur_dim_input_space));
+		SGVector<float64_t> wrapper(randomcoeff_multiplicative+od*cur_dim_input_space, cur_dim_input_space, false);
+		res[od] = val * cos(randomcoeff_additive[od] + linalg::dot(vector, wrapper));
 	}
 
-	return SGVector<float64_t>(res,cur_dim_feature_space);
+	return res;
 }
 
 SGMatrix<float64_t> CRandomFourierGaussPreproc::apply_to_feature_matrix(CFeatures* features)
@@ -396,11 +396,11 @@ SGMatrix<float64_t> CRandomFourierGaussPreproc::apply_to_feature_matrix(CFeature
 		{
 			for (int32_t od = 0; od < cur_dim_feature_space; ++od)
 			{
+				SGVector<float64_t> a(m+vec * num_features, cur_dim_input_space, false);
+				SGVector<float64_t> b(randomcoeff_multiplicative+od*cur_dim_input_space, cur_dim_input_space, false);
 				res.matrix[od + vec * cur_dim_feature_space] = val * cos(
 						randomcoeff_additive[od]
-								+ CMath::dot(m+vec * num_features,
-										randomcoeff_multiplicative+od*cur_dim_input_space,
-										cur_dim_input_space));
+								+ linalg::dot(a, b));
 			}
 		}
 		((CDenseFeatures<float64_t>*) features)->set_feature_matrix(res);
diff --git a/src/shogun/regression/KRRNystrom.cpp b/src/shogun/regression/KRRNystrom.cpp
index bbcfdbeab64..2cebd70b90c 100644
--- a/src/shogun/regression/KRRNystrom.cpp
+++ b/src/shogun/regression/KRRNystrom.cpp
@@ -58,7 +58,7 @@ less than number of data points (%d)\n", m_num_rkhs_basis, n);
 
 void CKRRNystrom::init()
 {
-	m_num_rkhs_basis=100;
+	m_num_rkhs_basis=0;
 }
 
 SGVector<int32_t> CKRRNystrom::subsample_indices()
@@ -79,6 +79,13 @@ bool CKRRNystrom::solve_krr_system()
 {
 	int32_t n=kernel->get_num_vec_lhs();
 
+	if (m_num_rkhs_basis == 0)
+	{
+		set_num_rkhs_basis((int32_t)CMath::ceil(n/2.0));
+		SG_SWARNING("Number of sampled rows not set, default is half (%d) "
+					"of the number of data points (%d)\n", m_num_rkhs_basis, n);
+	}
+
 	SGVector<float64_t> y=((CRegressionLabels*)m_labels)->get_labels();
 	if (y==NULL)
 		SG_ERROR("Labels not set.\n");
diff --git a/src/shogun/regression/LeastAngleRegression.cpp b/src/shogun/regression/LeastAngleRegression.cpp
index 2ce2e61a271..ee3775d4422 100644
--- a/src/shogun/regression/LeastAngleRegression.cpp
+++ b/src/shogun/regression/LeastAngleRegression.cpp
@@ -14,11 +14,12 @@
 #include <limits>
 #include <algorithm>
 
+#include <shogun/base/progress.h>
 #include <shogun/features/DenseFeatures.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/regression/LeastAngleRegression.h>
 #include <shogun/labels/RegressionLabels.h>
+#include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/eigen3.h>
+#include <shogun/regression/LeastAngleRegression.h>
 
 using namespace Eigen;
 using namespace shogun;
@@ -185,8 +186,12 @@ bool CLeastAngleRegression::train_machine_templated(CDenseFeatures<ST> * data)
 	// main loop
 	//========================================
 	int32_t nloop=0;
+	auto pb =
+	    progress(range(0, max_active_allowed), "LEAST ANGLE REGRESSION: ");
 	while (m_num_active < max_active_allowed && max_corr/n_vec > get_epsilon() && !stop_cond)
 	{
+		COMPUTATION_CONTROLLERS
+
 		// corr = X' * (y-mu) = - X'*mu + Xy
 		typename SGVector<ST>::EigenVectorXtMap map_corr(&corr[0], n_fea);
 		typename SGVector<ST>::EigenVectorXtMap map_mu(&mu[0], n_vec);
@@ -334,7 +339,7 @@ bool CLeastAngleRegression::train_machine_templated(CDenseFeatures<ST> * data)
 
 		nloop++;
 		m_beta_path_t.push_back(beta);
-		if (size_t(m_num_active) >= get_path_size())
+		if (int32_t(m_num_active) >= get_path_size())
 			m_beta_idx.push_back(nloop);
 		else
 			m_beta_idx[m_num_active] = nloop;
@@ -343,7 +348,10 @@ bool CLeastAngleRegression::train_machine_templated(CDenseFeatures<ST> * data)
 		if (get_max_non_zero() > 0 && m_num_active >= get_max_non_zero())
 			stop_cond = true;
 		SG_DEBUG("Added : %d , Dropped %d, Active set size %d max_corr %.17f \n", i_max_corr, i_kick, m_num_active, max_corr);
+
+		pb.print_progress();
 	}
+	pb.complete();
 
 	//copy m_beta_path_t (of type ST) into m_beta_path
 	for(size_t i = 0; i < m_beta_path_t.size(); ++i)
@@ -358,7 +366,15 @@ bool CLeastAngleRegression::train_machine_templated(CDenseFeatures<ST> * data)
 	// assign default estimator
 	set_w(SGVector<float64_t>(n_fea));
 	switch_w(get_path_size()-1);
-	
+
+	if (max_corr / n_vec > get_epsilon())
+	{
+		SG_WARNING(
+		    "Convergence level (%f) not below tolerance (%f) after %d "
+		    "iterations.\n",
+		    max_corr / n_vec, get_epsilon(), nloop);
+	}
+
 	return true;
 }
 
diff --git a/src/shogun/regression/LinearRidgeRegression.cpp b/src/shogun/regression/LinearRidgeRegression.cpp
index 49cd0455806..9b93b851159 100644
--- a/src/shogun/regression/LinearRidgeRegression.cpp
+++ b/src/shogun/regression/LinearRidgeRegression.cpp
@@ -10,10 +10,10 @@
 
 #ifdef HAVE_LAPACK
 #include <shogun/regression/LinearRidgeRegression.h>
+#include <shogun/mathematics/eigen3.h>
 #include <shogun/mathematics/lapack.h>
 #include <shogun/mathematics/Math.h>
 #include <shogun/labels/RegressionLabels.h>
-#include <shogun/mathematics/eigen3.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/src/shogun/regression/svr/LibLinearRegression.cpp b/src/shogun/regression/svr/LibLinearRegression.cpp
index 74344281c40..e13bebe766c 100644
--- a/src/shogun/regression/svr/LibLinearRegression.cpp
+++ b/src/shogun/regression/svr/LibLinearRegression.cpp
@@ -10,11 +10,12 @@
 
 #include <shogun/lib/config.h>
 #ifdef HAVE_LAPACK
-#include <shogun/regression/svr/LibLinearRegression.h>
-#include <shogun/mathematics/Math.h>
+#include <shogun/base/progress.h>
 #include <shogun/labels/RegressionLabels.h>
-#include <shogun/optimization/liblinear/tron.h>
 #include <shogun/lib/Signal.h>
+#include <shogun/mathematics/Math.h>
+#include <shogun/optimization/liblinear/tron.h>
+#include <shogun/regression/svr/LibLinearRegression.h>
 
 using namespace shogun;
 
@@ -57,7 +58,6 @@ CLibLinearRegression::~CLibLinearRegression()
 
 bool CLibLinearRegression::train_machine(CFeatures* data)
 {
-	CSignal::clear_cancel();
 
 	if (data)
 		set_features((CDotFeatures*)data);
@@ -206,7 +206,7 @@ void CLibLinearRegression::solve_l2r_l1l2_svr(SGVector<float64_t>& w, const libl
 		index[i] = i;
 	}
 
-
+	auto pb = progress(range(10));
 	while(iter < max_iter)
 	{
 		Gmax_new = 0;
@@ -305,7 +305,9 @@ void CLibLinearRegression::solve_l2r_l1l2_svr(SGVector<float64_t>& w, const libl
 			Gnorm1_init = Gnorm1_new;
 		iter++;
 
-		SG_SABS_PROGRESS(Gnorm1_new, -CMath::log10(Gnorm1_new), -CMath::log10(eps*Gnorm1_init), -CMath::log10(Gnorm1_init), 6)
+		pb.print_absolute(
+		    Gnorm1_new, -CMath::log10(Gnorm1_new),
+		    -CMath::log10(eps * Gnorm1_init), -CMath::log10(Gnorm1_init));
 
 		if(Gnorm1_new <= eps*Gnorm1_init)
 		{
@@ -322,7 +324,7 @@ void CLibLinearRegression::solve_l2r_l1l2_svr(SGVector<float64_t>& w, const libl
 		Gmax_old = Gmax_new;
 	}
 
-	SG_DONE()
+	pb.complete_absolute();
 	SG_INFO("\noptimization finished, #iter = %d\n", iter)
 	if(iter >= max_iter)
 		SG_INFO("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n")
diff --git a/src/shogun/solver/LDACanVarSolver.h b/src/shogun/solver/LDACanVarSolver.h
new file mode 100644
index 00000000000..c45d6a86f8a
--- /dev/null
+++ b/src/shogun/solver/LDACanVarSolver.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2017, Shogun Toolbox Foundation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LDA_CAN_VAR_SOLVER_H_
+#define LDA_CAN_VAR_SOLVER_H_
+
+#include <shogun/mathematics/linalg/LinalgEnums.h>
+#include <shogun/solver/LDASolver.h>
+
+namespace shogun
+{
+
+	template <typename T>
+	class LDACanVarSolver : public LDASolver<T>
+	{
+	protected:
+		/** Between covariance matrix */
+		SGMatrix<T> m_between_cov;
+		/** Number of dimensions in the projected space */
+		index_t m_num_dim;
+		/** Singular values threshold in svd */
+		float64_t m_threshold;
+		/** eigenvectors matrix */
+		SGMatrix<T> m_eigenvectors;
+		/** eigenvalues vector */
+		SGVector<T> m_eigenvalues;
+		/** use bdc-svd algorithm */
+		bool m_bdc_svd;
+
+		/**
+		 * Compute between class covariance matrix.
+		 */
+		virtual void compute_between_cov();
+
+		/**
+		 * Compute the eigenvectors through the canonical variates algorithm.
+		 */
+		virtual void canvar();
+
+	public:
+		LDACanVarSolver(
+		    CDenseFeatures<T>* features, CMulticlassLabels* labels,
+		    index_t num_dim, float64_t gamma = 0.0, bool bdc_svd = true,
+		    float64_t threshold = 0.01)
+		    : LDASolver<T>(features, labels, gamma)
+		{
+			m_num_dim = num_dim;
+			m_threshold = threshold;
+			m_bdc_svd = bdc_svd;
+
+			compute_between_cov();
+			canvar();
+		}
+
+		/** @returns eigenvectors to project features into the transformed space
+		 */
+		SGMatrix<T> get_eigenvectors();
+
+		/** @returns eigenvalues */
+		SGVector<T> get_eigenvalues();
+	};
+
+	template <typename T>
+	void LDACanVarSolver<T>::compute_between_cov()
+	{
+		index_t num_features = this->m_features->get_num_features();
+		index_t num_class = this->m_labels->get_num_classes();
+
+		m_between_cov = SGMatrix<T>(num_features, num_class);
+		linalg::zero(m_between_cov);
+		for (index_t i = 0; i < num_class; ++i)
+		{
+			auto col = m_between_cov.get_column(i);
+			linalg::add(this->m_class_mean[i], this->m_mean, col, (T)1, (T)-1);
+			linalg::scale(col, col, (T)sqrt(this->m_class_count[i]));
+		}
+	}
+
+	template <typename T>
+	void LDACanVarSolver<T>::canvar()
+	{
+		index_t num_features = this->m_features->get_num_features();
+		index_t num_vectors = this->m_features->get_num_vectors();
+
+		index_t r = CMath::min(num_vectors, num_features);
+		SGMatrix<T> U(num_features, r);
+		SGVector<T> singularValues(r);
+
+		// thin U SVD
+		linalg::SVDAlgorithm svd_alg =
+		    m_bdc_svd ? linalg::SVDAlgorithm::BidiagonalDivideConquer
+		              : linalg::SVDAlgorithm::Jacobi;
+		linalg::svd(
+		    this->m_features->get_feature_matrix(), singularValues, U, true,
+		    svd_alg);
+
+		// basis to represent the solution
+		SGMatrix<T> Q;
+		// keep only the directions s.t. singular value > threshold
+		if (num_features > num_vectors)
+		{
+			index_t j = 0;
+			for (index_t i = 0; i < num_vectors; ++i)
+				if (singularValues[i] > m_threshold)
+					++j;
+				else
+					break;
+			Q = SGMatrix<T>(U.matrix, num_features, j, false);
+		}
+		else
+			Q = U;
+
+		// modified between scatter (Sb)
+		auto aux = linalg::matrix_prod(Q, m_between_cov, true, false);
+		m_between_cov = linalg::matrix_prod(aux, aux, false, true);
+		// modified within scatter (Sw)
+		aux = linalg::matrix_prod(Q, this->m_within_cov, true, false);
+		this->m_within_cov = linalg::matrix_prod(aux, Q);
+
+		// To find svd(inverse(Sw)' * Sb * inverse(Sw))
+		// solve Sb = (Sw' * X) * Sw
+		// 1. get chol(Sw)
+		// 2. solve chol(Sw)' * Y = Sb
+		// 3. solve chol(Sw) * X = Y
+		// 4. compute svd(X)
+		auto chol = linalg::cholesky_factor(this->m_within_cov);
+		SGMatrix<T> W(m_between_cov.num_rows, m_between_cov.num_cols);
+		SGVector<T> eigenvalues(m_between_cov.num_rows);
+		linalg::svd(
+		    linalg::triangular_solver(
+		        chol, linalg::transpose_matrix(
+		                  linalg::triangular_solver(chol, m_between_cov))),
+		    eigenvalues, W, true, svd_alg);
+
+		m_eigenvectors = SGMatrix<T>(num_features, m_num_dim);
+		linalg::zero(m_eigenvectors);
+		m_eigenvalues = SGVector<T>(m_num_dim);
+
+		auto Wt = linalg::matrix_prod(Q, W);
+		for (index_t i = 0; i < m_num_dim; ++i)
+		{
+			m_eigenvectors.set_column(i, Wt.get_column(i));
+			m_eigenvalues[i] = eigenvalues[i];
+		}
+	}
+
+	template <typename T>
+	SGMatrix<T> LDACanVarSolver<T>::get_eigenvectors()
+	{
+		return m_eigenvectors;
+	}
+
+	template <typename T>
+	SGVector<T> LDACanVarSolver<T>::get_eigenvalues()
+	{
+		return m_eigenvalues;
+	}
+}
+
+#endif // LDA_CAN_VAR_SOLVER_H_
diff --git a/src/shogun/solver/LDASolver.h b/src/shogun/solver/LDASolver.h
new file mode 100644
index 00000000000..9300a85c592
--- /dev/null
+++ b/src/shogun/solver/LDASolver.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2017, Shogun Toolbox Foundation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LDA_SOLVER_H_
+#define LDA_SOLVER_H_
+
+#include <shogun/base/SGObject.h>
+#include <shogun/features/DenseFeatures.h>
+#include <shogun/labels/MulticlassLabels.h>
+#include <shogun/lib/config.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+#include <vector>
+
+namespace shogun
+{
+
+	template <typename T>
+	class LDASolver
+	{
+	protected:
+		CDenseFeatures<T>* m_features;
+		CMulticlassLabels* m_labels;
+		// Regularization parameter
+		float64_t m_gamma;
+		// Vector that holds the mean of each class
+		std::vector<SGVector<T>> m_class_mean;
+		// Vector of number of data points of each class
+		std::vector<index_t> m_class_count;
+		// Total mean vector
+		SGVector<T> m_mean;
+		// Within covariance matrix
+		SGMatrix<T> m_within_cov;
+
+		/**
+		 * Compute the total mean and for each class the number of data points
+		 * and its mean.
+		 */
+		virtual void compute_means();
+
+		/**
+		 * Compute within class covariance matrix.
+		 */
+		virtual void compute_within_cov();
+
+	public:
+		LDASolver(
+		    CDenseFeatures<T>* features, CMulticlassLabels* labels,
+		    float64_t gamma = 0.0)
+		{
+			m_features = features;
+			m_labels = labels;
+			m_gamma = gamma;
+
+			SG_REF(m_features)
+			SG_REF(m_labels)
+
+			compute_means();
+			compute_within_cov();
+		}
+
+		~LDASolver()
+		{
+			SG_UNREF(m_features)
+			SG_UNREF(m_labels)
+		}
+
+		/** @return the vector of classes' mean */
+		std::vector<SGVector<T>> get_class_mean();
+
+		/** @return the number of data points of each class */
+		std::vector<index_t> get_class_count();
+
+		/** @return the total mean */
+		SGVector<T> get_mean();
+
+		/** @return the within covariance matrix */
+		SGMatrix<T> get_within_cov();
+	};
+
+	template <typename T>
+	void LDASolver<T>::compute_means()
+	{
+		index_t num_class = m_labels->get_num_classes();
+		auto data = m_features->get_feature_matrix();
+
+		m_class_mean = std::vector<SGVector<T>>(num_class);
+		m_class_count = std::vector<index_t>(num_class);
+		for (index_t i = 0; i < num_class; ++i)
+		{
+			m_class_mean[i] = SGVector<T>(data.num_rows);
+			linalg::zero(m_class_mean[i]);
+		}
+		m_mean = SGVector<T>(data.num_rows);
+		linalg::zero(m_mean);
+
+		// calculate the total mean and the classes' mean.
+		for (index_t i = 0; i < data.num_cols; ++i)
+		{
+			index_t c = (index_t)m_labels->get_label(i);
+			++m_class_count[c];
+			linalg::add_col_vec(data, i, m_class_mean[c], m_class_mean[c]);
+		}
+		for (index_t i = 0; i < num_class; ++i)
+		{
+			linalg::add(m_mean, m_class_mean[i], m_mean);
+			linalg::scale(
+			    m_class_mean[i], m_class_mean[i], 1 / (T)m_class_count[i]);
+		}
+		linalg::scale(m_mean, m_mean, 1 / (T)data.num_cols);
+	}
+
+	template <typename T>
+	void LDASolver<T>::compute_within_cov()
+	{
+		index_t num_features = m_features->get_num_features();
+		index_t num_vectors = m_features->get_num_vectors();
+		index_t num_class = m_labels->get_num_classes();
+
+		auto data = m_features->get_feature_matrix().clone();
+
+		// Center data with respect to each data point's class
+		for (index_t i = 0; i < data.num_cols; ++i)
+			linalg::add_col_vec(
+			    data, i, m_class_mean[m_labels->get_label(i)], data, (T)1.0,
+			    (T)-1.0);
+
+		// holds the feature matrix for each class
+		std::vector<SGMatrix<T>> centered_class(num_class);
+		std::vector<index_t> centered_class_col(num_class);
+
+		m_within_cov = SGMatrix<T>(num_features, num_features);
+		linalg::zero(m_within_cov);
+		for (auto i = 0; i < num_class; ++i)
+		{
+			centered_class[i] = SGMatrix<T>(num_features, m_class_count[i]);
+			linalg::zero(centered_class[i]);
+		}
+		for (index_t i = 0; i < num_vectors; ++i)
+		{
+			index_t c = (index_t)m_labels->get_label(i);
+			centered_class[c].set_column(
+			    centered_class_col[c], data.get_column(i));
+			++centered_class_col[c];
+		}
+		for (index_t i = 0; i < num_class; ++i)
+		{
+			auto tmp = linalg::matrix_prod(
+			    centered_class[i], centered_class[i], false, true);
+			linalg::add(
+			    m_within_cov, tmp, m_within_cov, (T)1.0,
+			    ((T)m_class_count[i] / (m_class_count[i] - 1)));
+		}
+
+		if (m_gamma > 0.0)
+		{
+			T trace = linalg::trace(m_within_cov);
+			SGMatrix<T> id(num_features, num_features);
+			linalg::identity(id);
+			linalg::add(
+			    m_within_cov, id, m_within_cov, (T)(1.0 - m_gamma),
+			    trace * ((T)m_gamma) / num_features);
+		}
+	}
+
+	template <typename T>
+	std::vector<SGVector<T>> LDASolver<T>::get_class_mean()
+	{
+		return m_class_mean;
+	}
+
+	template <typename T>
+	std::vector<index_t> LDASolver<T>::get_class_count()
+	{
+		return m_class_count;
+	}
+
+	template <typename T>
+	SGVector<T> LDASolver<T>::get_mean()
+	{
+		return m_mean;
+	}
+
+	template <typename T>
+	SGMatrix<T> LDASolver<T>::get_within_cov()
+	{
+		return m_within_cov;
+	}
+}
+
+#endif // LDA_SOLVER_H_
diff --git a/src/shogun/statistical_testing/MultiKernelQuadraticTimeMMD.cpp b/src/shogun/statistical_testing/MultiKernelQuadraticTimeMMD.cpp
index 9f42b883cb3..61e9e6c6eaa 100644
--- a/src/shogun/statistical_testing/MultiKernelQuadraticTimeMMD.cpp
+++ b/src/shogun/statistical_testing/MultiKernelQuadraticTimeMMD.cpp
@@ -264,7 +264,7 @@ SGMatrix<float32_t> CMultiKernelQuadraticTimeMMD::sample_null(const KernelManage
 
 	kernel_mgr.unset_precomputed_distance();
 
-	for (size_t i=0; i<result.size(); ++i)
+	for (index_t i=0; i<result.size(); ++i)
 		result.matrix[i]=self->m_owner->normalize_statistic(result.matrix[i]);
 
 	SG_DEBUG("Leaving");
diff --git a/src/shogun/statistical_testing/kernelselection/KernelSelectionStrategy.cpp b/src/shogun/statistical_testing/kernelselection/KernelSelectionStrategy.cpp
index 9eda197d492..cfb2797404d 100644
--- a/src/shogun/statistical_testing/kernelselection/KernelSelectionStrategy.cpp
+++ b/src/shogun/statistical_testing/kernelselection/KernelSelectionStrategy.cpp
@@ -43,8 +43,12 @@
 #include <shogun/statistical_testing/kernelselection/internals/MaxTestPower.h>
 #include <shogun/statistical_testing/kernelselection/internals/MaxCrossValidation.h>
 #include <shogun/statistical_testing/kernelselection/internals/MedianHeuristic.h>
+
+#include <shogun/lib/config.h>
+#ifdef USE_GPL_SHOGUN
 #include <shogun/statistical_testing/kernelselection/internals/WeightedMaxMeasure.h>
 #include <shogun/statistical_testing/kernelselection/internals/WeightedMaxTestPower.h>
+#endif //USE_GPL_SHOGUN
 
 using namespace shogun;
 using namespace internal;
@@ -102,7 +106,11 @@ void CKernelSelectionStrategy::Self::init_policy(CMMD* estimator)
 	case KSM_MAXIMIZE_MMD:
 	{
 		if (weighted)
+			#ifdef USE_GPL_SHOGUN
 			policy=std::unique_ptr<WeightedMaxMeasure>(new WeightedMaxMeasure(kernel_mgr, estimator));
+			#else
+			SG_SGPL_ONLY
+			#endif // USE_GPL_SHOGUN
 		else
 			policy=std::unique_ptr<MaxMeasure>(new MaxMeasure(kernel_mgr, estimator));
 	}
@@ -111,9 +119,13 @@ void CKernelSelectionStrategy::Self::init_policy(CMMD* estimator)
 	{
 		if (weighted)
 		{
+			#ifdef USE_GPL_SHOGUN
 			auto casted_estimator=dynamic_cast<CStreamingMMD*>(estimator);
 			REQUIRE(casted_estimator, "Weighted kernel selection is not possible with MAXIMIZE_POWER!\n");
 			policy=std::unique_ptr<WeightedMaxTestPower>(new WeightedMaxTestPower(kernel_mgr, estimator));
+			#else
+			SG_SGPL_ONLY
+			#endif // USE_GPL_SHOGUN
 		}
 		else
 			policy=std::unique_ptr<MaxTestPower>(new MaxTestPower(kernel_mgr, estimator));
diff --git a/src/shogun/statistical_testing/kernelselection/internals/MaxMeasure.h b/src/shogun/statistical_testing/kernelselection/internals/MaxMeasure.h
index 8df1fe182b5..310b67ef3c2 100644
--- a/src/shogun/statistical_testing/kernelselection/internals/MaxMeasure.h
+++ b/src/shogun/statistical_testing/kernelselection/internals/MaxMeasure.h
@@ -34,14 +34,13 @@
 
 #include <shogun/lib/common.h>
 #include <shogun/statistical_testing/kernelselection/internals/KernelSelection.h>
+#include <shogun/lib/SGVector.h>
 
 namespace shogun
 {
 
 class CKernel;
 class CMMD;
-template <typename T> class SGVector;
-template <typename T> class SGMatrix;
 
 namespace internal
 {
diff --git a/src/shogun/statistical_testing/kernelselection/internals/OptimizationSolver.cpp b/src/shogun/statistical_testing/kernelselection/internals/OptimizationSolver.cpp
deleted file mode 100644
index 3a6a263c534..00000000000
--- a/src/shogun/statistical_testing/kernelselection/internals/OptimizationSolver.cpp
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (w) 2012 - 2013 Heiko Strathmann
- * Written (w) 2014 - 2017 Soumyajit De
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#include <functional>
-#include <algorithm>
-#include <numeric>
-#include <vector>
-#include <shogun/io/SGIO.h>
-#include <shogun/lib/SGVector.h>
-#include <shogun/lib/SGMatrix.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/statistical_testing/kernelselection/internals/OptimizationSolver.h>
-
-//#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/external/libqp.h>
-//#endif // USE_GPL_SHOGUN
-
-using namespace shogun;
-using namespace internal;
-
-struct OptimizationSolver::Self
-{
-	Self(SGVector<float64_t> mmds, SGMatrix<float64_t> Q);
-//#ifdef USE_GPL_SHOGUN
-	SGVector<float64_t> solve() const;
-	void init();
-	static const float64_t* get_Q_col(uint32_t i);
-	static void print_state(libqp_state_T state);
-
-	index_t opt_max_iterations;
-	float64_t opt_epsilon;
-	float64_t opt_low_cut;
-	SGVector<float64_t> m_mmds;
-	static SGMatrix<float64_t> m_Q;
-//#endif // USE_GPL_SHOGUN
-};
-
-//#ifdef USE_GPL_SHOGUN
-SGMatrix<float64_t> OptimizationSolver::Self::m_Q=SGMatrix<float64_t>();
-//#endif // USE_GPL_SHOGUN
-
-OptimizationSolver::Self::Self(SGVector<float64_t> mmds, SGMatrix<float64_t> Q)
-{
-//#ifdef USE_GPL_SHOGUN
-	m_Q=Q;
-	m_mmds=mmds;
-	init();
-//#endif // USE_GPL_SHOGUN
-}
-
-//#ifdef USE_GPL_SHOGUN
-void OptimizationSolver::Self::init()
-{
-	opt_max_iterations=10000;
-	opt_epsilon=1E-14;
-	opt_low_cut=1E-6;
-}
-
-const float64_t* OptimizationSolver::Self::get_Q_col(uint32_t i)
-{
-	return &m_Q[m_Q.num_rows*i];
-}
-
-void OptimizationSolver::Self::print_state(libqp_state_T state)
-{
-	SG_SDEBUG("libqp state: primal=%f\n", state.QP);
-}
-
-SGVector<float64_t> OptimizationSolver::Self::solve() const
-{
-	const index_t num_kernels=m_mmds.size();
-	float64_t sum_m_mmds=std::accumulate(m_mmds.data(), m_mmds.data()+m_mmds.size(), 0);
-	SGVector<float64_t> weights(num_kernels);
-	if (std::any_of(m_mmds.data(), m_mmds.data()+m_mmds.size(), [](float64_t& value) { return value > 0; }))
-	{
-		SG_SDEBUG("At least one MMD entry is positive, performing optimisation\n")
-
-		std::vector<float64_t> Q_diag(num_kernels);
-		std::vector<float64_t> f(num_kernels, 0);
-		std::vector<float64_t> lb(num_kernels, 0);
-		std::vector<float64_t> ub(num_kernels, CMath::INFTY);
-
-		// initial point has to be feasible, i.e. m_mmds'*x = b
-		std::fill(weights.data(), weights.data()+weights.size(), 1.0/sum_m_mmds);
-
-		for (index_t i=0; i<num_kernels; ++i)
-			Q_diag[i]=m_Q(i,i);
-
-		SG_SDEBUG("starting libqp optimization\n");
-		libqp_state_T qp_exitflag=libqp_gsmo_solver(&OptimizationSolver::Self::get_Q_col,
-			Q_diag.data(),
-			f.data(),
-			m_mmds.data(),
-			1,
-			lb.data(),
-			ub.data(),
-			weights.data(),
-			num_kernels,
-			opt_max_iterations,
-			opt_epsilon,
-			&OptimizationSolver::Self::print_state);
-
-		SG_SDEBUG("libqp returns: nIts=%d, exit_flag: %d\n", qp_exitflag.nIter, qp_exitflag.exitflag);
-		m_Q=SGMatrix<float64_t>();
-
-		// set really small entries to zero and sum up for normalization
-		float64_t sum_weights=0;
-		for (index_t i=0; i<weights.vlen; ++i)
-		{
-			if (weights[i]<opt_low_cut)
-			{
-				SG_SDEBUG("lowcut: weight[%i]=%f<%f setting to zero\n", i, weights[i], opt_low_cut);
-				weights[i]=0;
-			}
-			sum_weights+=weights[i];
-		}
-
-		// normalize (allowed since problem is scale invariant)
-		std::for_each(weights.data(), weights.data()+weights.size(), [&sum_weights](float64_t& weight)
-		{
-			weight/=sum_weights;
-		});
-	}
-	else
-	{
-		SG_SWARNING("All mmd estimates are negative. This is techically possible,"
-			"although extremely rare. Consider using different kernels. "
-			"This combination will lead to a bad two-sample test. Since any"
-			"combination is bad, will now just return equally distributed "
-			"kernel weights\n");
-
-		// if no element is positive, we can choose arbritary weights since
-		// the results will be bad anyway
-		std::fill(weights.data(), weights.data()+weights.size(), 1.0/num_kernels);
-	}
-	return weights;
-}
-//#endif // USE_GPL_SHOGUN
-
-OptimizationSolver::OptimizationSolver(const SGVector<float64_t>& mmds, const SGMatrix<float64_t>& Q)
-{
-	self=std::unique_ptr<Self>(new Self(mmds, Q));
-}
-
-OptimizationSolver::~OptimizationSolver()
-{
-}
-
-SGVector<float64_t> OptimizationSolver::solve() const
-{
-//#ifdef USE_GPL_SHOGUN
-	return self->solve();
-//#else // USE_GPL_SHOGUN
-//	SG_SWARNING("Presently this feature is only available with GNU GPLv3 license!");
-//	return SGVector<float64_t>();
-//#endif // USE_GPL_SHOGUN
-}
diff --git a/src/shogun/statistical_testing/kernelselection/internals/OptimizationSolver.h b/src/shogun/statistical_testing/kernelselection/internals/OptimizationSolver.h
deleted file mode 100644
index 992c4857b0c..00000000000
--- a/src/shogun/statistical_testing/kernelselection/internals/OptimizationSolver.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (W) 2012 - 2013 Heiko Strathmann
- * Written (w) 2014 - 2017 Soumyajit De
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#ifndef OPTIMIZATION_SOLVER_H__
-#define OPTIMIZATION_SOLVER_H__
-
-#include <memory>
-#include <shogun/lib/common.h>
-
-namespace shogun
-{
-
-template <typename T> class SGVector;
-template <typename T> class SGMatrix;
-
-namespace internal
-{
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-class OptimizationSolver
-{
-public:
-	OptimizationSolver(const SGVector<float64_t>& mmds, const SGMatrix<float64_t>& Q);
-	OptimizationSolver(const OptimizationSolver& other)=delete;
-	OptimizationSolver& operator=(const OptimizationSolver& other)=delete;
-	~OptimizationSolver();
-	SGVector<float64_t> solve() const;
-private:
-	struct Self;
-	std::unique_ptr<Self> self;
-};
-#endif // DOXYGEN_SHOULD_SKIP_THIS
-}
-
-}
-
-#endif // OPTIMIZATION_SOLVER_H__
diff --git a/src/shogun/statistical_testing/kernelselection/internals/WeightedMaxMeasure.cpp b/src/shogun/statistical_testing/kernelselection/internals/WeightedMaxMeasure.cpp
deleted file mode 100644
index e94febc976e..00000000000
--- a/src/shogun/statistical_testing/kernelselection/internals/WeightedMaxMeasure.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (W) 2012 - 2013 Heiko Strathmann
- * Written (w) 2014 - 2017 Soumyajit De
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#include <shogun/lib/SGVector.h>
-#include <shogun/lib/SGMatrix.h>
-#include <shogun/kernel/Kernel.h>
-#include <shogun/kernel/CombinedKernel.h>
-#include <shogun/statistical_testing/MMD.h>
-#include <shogun/statistical_testing/internals/KernelManager.h>
-#include <shogun/statistical_testing/kernelselection/internals/WeightedMaxMeasure.h>
-#include <shogun/statistical_testing/kernelselection/internals/OptimizationSolver.h>
-
-using namespace shogun;
-using namespace internal;
-
-WeightedMaxMeasure::WeightedMaxMeasure(KernelManager& km, CMMD* est) : MaxMeasure(km, est)
-{
-}
-
-WeightedMaxMeasure::~WeightedMaxMeasure()
-{
-}
-
-void WeightedMaxMeasure::compute_measures()
-{
-	MaxMeasure::compute_measures();
-	const auto num_kernels=kernel_mgr.num_kernels();
-	if (Q.num_rows!=num_kernels || Q.num_cols!=num_kernels)
-		Q=SGMatrix<float64_t>(num_kernels, num_kernels);
-	std::fill(Q.data(), Q.data()+Q.size(), 0);
-	for (auto i=0; i<num_kernels; ++i)
-		Q(i, i)=1;
-}
-
-SGMatrix<float64_t> WeightedMaxMeasure::get_measure_matrix()
-{
-	return Q;
-}
-
-CKernel* WeightedMaxMeasure::select_kernel()
-{
-	init_measures();
-	compute_measures();
-
-	OptimizationSolver solver(measures, Q);
-	SGVector<float64_t> weights=solver.solve();
-
-	CCombinedKernel* kernel=new CCombinedKernel();
-	const size_t num_kernels=kernel_mgr.num_kernels();
-	for (size_t i=0; i<num_kernels; ++i)
-	{
-		if (!kernel->append_kernel(kernel_mgr.kernel_at(i)))
-			SG_SERROR("Error while creating a combined kernel! Please contact Shogun developers!\n");
-	}
-	kernel->set_subkernel_weights(weights);
-	SG_SDEBUG("Created a weighted kernel!\n");
-	return kernel;
-}
diff --git a/src/shogun/statistical_testing/kernelselection/internals/WeightedMaxMeasure.h b/src/shogun/statistical_testing/kernelselection/internals/WeightedMaxMeasure.h
deleted file mode 100644
index 5c0d385c3b9..00000000000
--- a/src/shogun/statistical_testing/kernelselection/internals/WeightedMaxMeasure.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (W) 2012 - 2013 Heiko Strathmann
- * Written (w) 2014 - 2017 Soumyajit De
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#ifndef WEIGHTED_MAX_MEASURE_H__
-#define WEIGHTED_MAX_MEASURE_H__
-
-#include <shogun/lib/common.h>
-#include <shogun/statistical_testing/kernelselection/internals/MaxMeasure.h>
-
-namespace shogun
-{
-
-class CKernel;
-class CMMD;
-
-namespace internal
-{
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-class WeightedMaxMeasure : public MaxMeasure
-{
-public:
-	WeightedMaxMeasure(KernelManager&, CMMD*);
-	WeightedMaxMeasure(const WeightedMaxMeasure& other)=delete;
-	~WeightedMaxMeasure();
-	WeightedMaxMeasure& operator=(const WeightedMaxMeasure& other)=delete;
-	virtual CKernel* select_kernel();
-	virtual SGMatrix<float64_t> get_measure_matrix();
-protected:
-	virtual void compute_measures();
-	SGMatrix<float64_t> Q;
-};
-#endif // DOXYGEN_SHOULD_SKIP_THIS
-}
-
-}
-
-#endif // WEIGHTED_MAX_MEASURE_H__
diff --git a/src/shogun/statistical_testing/kernelselection/internals/WeightedMaxTestPower.cpp b/src/shogun/statistical_testing/kernelselection/internals/WeightedMaxTestPower.cpp
deleted file mode 100644
index d53f8a11aa1..00000000000
--- a/src/shogun/statistical_testing/kernelselection/internals/WeightedMaxTestPower.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (W) 2012 - 2013 Heiko Strathmann
- * Written (w) 2014 - 2017 Soumyajit De
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#include <shogun/lib/SGVector.h>
-#include <shogun/lib/SGMatrix.h>
-#include <shogun/kernel/Kernel.h>
-#include <shogun/kernel/CombinedKernel.h>
-#include <shogun/statistical_testing/StreamingMMD.h>
-#include <shogun/statistical_testing/internals/KernelManager.h>
-#include <shogun/statistical_testing/kernelselection/internals/WeightedMaxTestPower.h>
-#include <shogun/statistical_testing/kernelselection/internals/OptimizationSolver.h>
-
-using namespace shogun;
-using namespace internal;
-
-WeightedMaxTestPower::WeightedMaxTestPower(KernelManager& km, CMMD* est) : WeightedMaxMeasure(km, est), lambda(1E-5)
-{
-}
-
-WeightedMaxTestPower::~WeightedMaxTestPower()
-{
-}
-
-void WeightedMaxTestPower::init_measures()
-{
-}
-
-void WeightedMaxTestPower::compute_measures()
-{
-	auto casted_estimator=dynamic_cast<CStreamingMMD*>(estimator);
-	ASSERT(casted_estimator);
-	const auto& estimates=casted_estimator->compute_statistic_and_Q(kernel_mgr);
-	measures=estimates.first;
-	Q=estimates.second;
-	for (index_t i=0; i<Q.num_rows; ++i)
-		Q(i, i)+=lambda;
-}
diff --git a/src/shogun/statistical_testing/kernelselection/internals/WeightedMaxTestPower.h b/src/shogun/statistical_testing/kernelselection/internals/WeightedMaxTestPower.h
deleted file mode 100644
index d6dd1d6bb5b..00000000000
--- a/src/shogun/statistical_testing/kernelselection/internals/WeightedMaxTestPower.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (W) 2012 - 2013 Heiko Strathmann
- * Written (w) 2014 - 2017 Soumyajit De
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#ifndef WEIGHTED_MAX_TEST_POWER_H__
-#define WEIGHTED_MAX_TEST_POWER_H__
-
-#include <shogun/lib/common.h>
-#include <shogun/statistical_testing/kernelselection/internals/WeightedMaxMeasure.h>
-
-namespace shogun
-{
-
-class CKernel;
-class CMMD;
-template <typename T> class SGVector;
-
-namespace internal
-{
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-class WeightedMaxTestPower : public WeightedMaxMeasure
-{
-public:
-	WeightedMaxTestPower(KernelManager&, CMMD*);
-	WeightedMaxTestPower(const WeightedMaxTestPower& other)=delete;
-	~WeightedMaxTestPower();
-	WeightedMaxTestPower& operator=(const WeightedMaxTestPower& other)=delete;
-protected:
-	virtual void init_measures();
-	virtual void compute_measures();
-	float64_t lambda;
-};
-#endif // DOXYGEN_SHOULD_SKIP_THIS
-}
-
-}
-
-#endif // WEIGHTED_MAX_TEST_POWER_H__
diff --git a/src/shogun/structure/BeliefPropagation.h b/src/shogun/structure/BeliefPropagation.h
index a12c93275d5..157f509b64e 100644
--- a/src/shogun/structure/BeliefPropagation.h
+++ b/src/shogun/structure/BeliefPropagation.h
@@ -20,11 +20,7 @@
 #include <vector>
 #include <set>
 
-#ifdef HAVE_STD_UNORDERED_MAP
-	#include <unordered_map>
-#else
-	#include <tr1/unordered_map>
-#endif
+#include <unordered_map>
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 
@@ -105,15 +101,9 @@ IGNORE_IN_CLASSLIST class CBeliefPropagation : public CMAPInferImpl
  */
 IGNORE_IN_CLASSLIST class CTreeMaxProduct : public CBeliefPropagation
 {
-#ifdef HAVE_STD_UNORDERED_MAP
 	typedef std::unordered_map<uint32_t, uint32_t> msg_map_type;
 	typedef std::unordered_map<uint32_t, std::set<uint32_t> > msgset_map_type;
 	typedef std::unordered_multimap<int32_t, int32_t> var_factor_map_type;
-#else
-	typedef std::tr1::unordered_map<uint32_t, uint32_t> msg_map_type;
-	typedef std::tr1::unordered_map<uint32_t, std::set<uint32_t> > msgset_map_type;
-	typedef std::tr1::unordered_multimap<int32_t, int32_t> var_factor_map_type;
-#endif
 
 public:
 	CTreeMaxProduct();
diff --git a/src/shogun/structure/BmrmStatistics.h b/src/shogun/structure/BmrmStatistics.h
deleted file mode 100644
index 47fb597c3cc..00000000000
--- a/src/shogun/structure/BmrmStatistics.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Michal Uricar, uricamic@cmp.felk.cvut.cz
- *
- */
-
-#ifndef BMRM_RETURN_VALUE_H_
-#define BMRM_RETURN_VALUE_H_
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/common.h>
-#include <shogun/io/SerializableFile.h>
-#include <shogun/lib/SGVector.h>
-
-namespace shogun
-{
-
-/** BMRM statistics like number of iterations etc */
-struct BmrmStatistics
-{
-	/** constructor */
-	BmrmStatistics()
-	{
-		nIter = 0;
-		nCP = 0;
-		nzA = 0;
-		Fp = 0;
-		Fd = 0;
-		qp_exitflag = 0;
-		exitflag = 0;
-	};
-
-	/** destructor */
-	virtual ~BmrmStatistics() { };
-
-	/** dummy load serializable */
-	bool load_serializable(CSerializableFile* file, const char* prefix="") { return false; }
-
-	/** dummy save serializable */
-	bool save_serializable(CSerializableFile* file, const char* prefix="") { return false; }
-
-	/** number of iterations  */
-	uint32_t nIter;
-
-	/** getter for nIter */
-	uint32_t get_n_iters() const { return nIter; }
-
-	/** number of cutting planes */
-	uint32_t nCP;
-
-	/** number of active cutting planes */
-	uint32_t nzA;
-
-	/** primal objective value  */
-	float64_t Fp;
-
-	/** reduced (dual) objective value */
-	float64_t Fd;
-
-	/** exitflag from the last call of the inner QP solver  */
-	int8_t qp_exitflag;
-
-	/** 1 .. bmrm.Q_P - bmrm.Q_D <= TolRel*ABS(bmrm.Q_P)
-	 *  2 .. bmrm.Q_P - bmrm.Q_D <= TolAbs
-	 * -1 .. bmrm.nCutPlanes >= BufSize
-	 * -2 .. not enough memory for the solver
-	 */
-	int8_t exitflag;
-
-	/** Track of Fp values in individual iterations */
-	SGVector< float64_t > hist_Fp;
-
-	/** getter for hist_Fp */
-	SGVector< float64_t > get_hist_Fp_vector() const { return hist_Fp; };
-
-	/** Track of Fd values in individual iterations */
-	SGVector< float64_t > hist_Fd;
-
-	/** getter for hist_Fd */
-	SGVector< float64_t > get_hist_Fd_vector() const { return hist_Fd; };
-
-	/** Track of w_dist values in individual iterations */
-	SGVector< float64_t > hist_wdist;
-
-	/** getter for hist_wdist */
-	SGVector< float64_t > get_hist_wdist_vector() const { return hist_wdist; };
-};
-
-}
-#endif //USE_GPL_SHOGUN
-#endif
diff --git a/src/shogun/structure/CCSOSVM.cpp b/src/shogun/structure/CCSOSVM.cpp
index e2320f64dd0..2a4e3eaef5d 100644
--- a/src/shogun/structure/CCSOSVM.cpp
+++ b/src/shogun/structure/CCSOSVM.cpp
@@ -12,6 +12,7 @@
 #include <shogun/mathematics/Mosek.h>
 #include <shogun/lib/SGSparseVector.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 using namespace shogun;
 
@@ -244,7 +245,7 @@ bool CCCSOSVM::train_machine(CFeatures* data)
 	new_constraint = find_cutting_plane(&margin);
 	value = margin - new_constraint.dense_dot(1.0, m_w.vector, m_w.vlen, 0);
 
-	primal_obj_b = primal_obj = 0.5*CMath::dot(m_w.vector, m_w.vector, m_w.vlen)+m_C*value;
+	primal_obj_b = primal_obj = 0.5*linalg::dot(m_w, m_w)+m_C*value;
 	primal_lower_bound = 0;
 	expected_descent = -primal_obj_b;
 	initial_primal_obj = primal_obj_b;
@@ -281,8 +282,8 @@ bool CCCSOSVM::train_machine(CFeatures* data)
 		cut_error.resize_vector(size_active);
 		// note g_i = - new_constraint
 		cut_error[size_active-1] = m_C*(new_constraint.dense_dot(1.0, w_b.vector, w_b.vlen, 0) - new_constraint.dense_dot(1.0, m_w.vector, m_w.vlen, 0));
-		cut_error[size_active-1] += (primal_obj_b - 0.5*CMath::dot(w_b.vector, w_b.vector, w_b.vlen));
-		cut_error[size_active-1] -= (primal_obj - 0.5*CMath::dot(m_w.vector, m_w.vector, m_w.vlen));
+		cut_error[size_active-1] += (primal_obj_b - 0.5*linalg::dot(w_b, w_b));
+		cut_error[size_active-1] -= (primal_obj - 0.5*linalg::dot(m_w, m_w));
 
 		gammaG0.resize_vector(size_active);
 
@@ -379,12 +380,12 @@ bool CCCSOSVM::train_machine(CFeatures* data)
 		if (m_qp_type == SVMLIGHT)
 		{
 			/* compute dual obj */
-			dual_obj = +0.5*(1+rho)*CMath::dot(m_w.vector, m_w.vector, m_w.vlen);
+			dual_obj = +0.5*(1+rho)*linalg::dot(m_w, m_w);
 			for (int32_t j=0;j<size_active;j++)
 				dual_obj -= proximal_rhs[j]/(1+rho)*alpha[j];
 		}
 
-		z_k_norm = CMath::sqrt(CMath::dot(m_w.vector, m_w.vector, m_w.vlen));
+		z_k_norm = CMath::sqrt(linalg::dot(m_w, m_w));
 		m_w.vec1_plus_scalar_times_vec2(m_w.vector, rho/(1+rho), w_b.vector, w_b.vlen);
 
 		/* detect if step size too small */
@@ -418,11 +419,11 @@ bool CCCSOSVM::train_machine(CFeatures* data)
 		value = margin - new_constraint.dense_dot(1.0, m_w.vector, m_w.vlen, 0);
 
 		/* print primal objective */
-		primal_obj = 0.5*CMath::dot(m_w.vector, m_w.vector, m_w.vlen)+m_C*value;
+		primal_obj = 0.5*linalg::dot(m_w, m_w)+m_C*value;
 
 		SG_DEBUG("ITER PRIMAL_OBJ %.4f\n", primal_obj)
 
-		temp_var = CMath::dot(w_b.vector, w_b.vector, w_b.vlen);
+		temp_var = linalg::dot(w_b, w_b);
 		proximal_term = 0.0;
 		for (index_t i=0; i < m_model->get_dim(); i++)
 			proximal_term += (m_w[i]-w_b[i])*(m_w[i]-w_b[i]);
@@ -456,9 +457,9 @@ bool CCCSOSVM::train_machine(CFeatures* data)
 				/* update cut_error */
 				for (index_t i = 0; i < size_active; i++)
 				{
-					cut_error[i] -= (primal_obj_b - 0.5*CMath::dot(w_b.vector, w_b.vector, w_b.vlen));
+					cut_error[i] -= (primal_obj_b - 0.5*linalg::dot(w_b, w_b));
 					cut_error[i] -= m_C*dXc[i].dense_dot(1.0, w_b.vector, w_b.vlen, 0);
-					cut_error[i] += (primal_obj - 0.5*CMath::dot(m_w, m_w, m_w.vlen));
+					cut_error[i] += (primal_obj - 0.5*linalg::dot(m_w, m_w));
 					cut_error[i] += m_C*dXc[i].dense_dot(1.0, m_w.vector, m_w.vlen, 0);
 				}
 				primal_obj_b = primal_obj;
diff --git a/src/shogun/structure/DualLibQPBMSOSVM.cpp b/src/shogun/structure/DualLibQPBMSOSVM.cpp
deleted file mode 100644
index 72fdb9c241f..00000000000
--- a/src/shogun/structure/DualLibQPBMSOSVM.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Michal Uricar
- * Copyright (C) 2012 Michal Uricar
- */
-
-#include <shogun/structure/DualLibQPBMSOSVM.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/structure/libbmrm.h>
-#include <shogun/structure/libppbm.h>
-#include <shogun/structure/libp3bm.h>
-#include <shogun/structure/libncbm.h>
-
-using namespace shogun;
-
-CDualLibQPBMSOSVM::CDualLibQPBMSOSVM()
-:CLinearStructuredOutputMachine()
-{
-	init();
-}
-
-CDualLibQPBMSOSVM::CDualLibQPBMSOSVM(
-		CStructuredModel*	model,
-		CStructuredLabels*	labs,
-		float64_t	_lambda,
-		SGVector< float64_t >	W)
- : CLinearStructuredOutputMachine(model, labs)
-{
-	init();
-	set_lambda(_lambda);
-
-	// get dimension of w
-	int32_t nDim=this->m_model->get_dim();
-
-	// Check for initial solution
-	if (W.vlen==0)
-	{
-		set_w(SGVector< float64_t >(nDim));
-		get_w().zero();
-	}
-	else
-	{
-		ASSERT(W.size() == nDim);
-		set_w(W);
-	}
-}
-
-CDualLibQPBMSOSVM::~CDualLibQPBMSOSVM()
-{
-}
-
-void CDualLibQPBMSOSVM::init()
-{
-	SG_ADD(&m_TolRel, "m_TolRel", "Relative tolerance", MS_AVAILABLE);
-	SG_ADD(&m_TolAbs, "m_TolAbs", "Absolute tolerance", MS_AVAILABLE);
-	SG_ADD(&m_BufSize, "m_BuffSize", "Size of CP Buffer", MS_AVAILABLE);
-	SG_ADD(&m_lambda, "m_lambda", "Regularization constant lambda",
-			MS_AVAILABLE);
-	SG_ADD(&m_cleanICP, "m_cleanICP", "Inactive cutting plane removal flag",
-			MS_AVAILABLE);
-	SG_ADD(&m_cleanAfter,
-			"m_cleanAfter",
-			"Number of inactive iterations after which ICP will be removed",
-			MS_AVAILABLE);
-	SG_ADD(&m_K, "m_K", "Parameter K", MS_NOT_AVAILABLE);
-	SG_ADD(&m_Tmax, "m_Tmax", "Parameter Tmax", MS_AVAILABLE);
-	SG_ADD(&m_cp_models, "m_cp_models", "Number of cutting plane models",
-			MS_AVAILABLE);
-
-	set_TolRel(0.001);
-	set_TolAbs(0.0);
-	set_BufSize(1000);
-	set_lambda(0.0);
-	set_cleanICP(true);
-	set_cleanAfter(10);
-	set_K(0.4);
-	set_Tmax(100);
-	set_cp_models(1);
-	set_store_train_info(false);
-	set_solver(BMRM);
-}
-
-bool CDualLibQPBMSOSVM::train_machine(CFeatures* data)
-{
-	if (data)
-		set_features(data);
-
-	if (m_verbose||m_store_train_info)
-	{
-		if (m_helper != NULL)
-			SG_UNREF(m_helper);
-
-		m_helper = new CSOSVMHelper();
-		SG_REF(m_helper);
-	}
-
-	// Initialize the model for training
-	m_model->init_training();
-	// call the solver
-	switch(m_solver)
-	{
-		case BMRM:
-			m_result=svm_bmrm_solver(this, m_w.vector, m_TolRel, m_TolAbs,
-					m_lambda, m_BufSize, m_cleanICP, m_cleanAfter, m_K, m_Tmax,
-					m_store_train_info);
-			break;
-		case PPBMRM:
-			m_result=svm_ppbm_solver(this, m_w.vector, m_TolRel, m_TolAbs,
-					m_lambda, m_BufSize, m_cleanICP, m_cleanAfter, m_K, m_Tmax,
-					m_verbose);
-			break;
-		case P3BMRM:
-			m_result=svm_p3bm_solver(this, m_w.vector, m_TolRel, m_TolAbs,
-					m_lambda, m_BufSize, m_cleanICP, m_cleanAfter, m_K, m_Tmax,
-					m_cp_models, m_verbose);
-			break;
-		case NCBM:
-			m_result=svm_ncbm_solver(this, m_w.vector, m_TolRel, m_TolAbs,
-					m_lambda, m_BufSize, m_cleanICP, m_cleanAfter, true /* convex */,
-					true /* use line search*/, m_verbose);
-			break;
-		default:
-			SG_ERROR("CDualLibQPBMSOSVM: m_solver=%d is not supported", m_solver);
-	}
-
-	if (m_result.exitflag>0)
-		return true;
-	else
-		return false;
-}
-
-EMachineType CDualLibQPBMSOSVM::get_classifier_type()
-{
-	return CT_LIBQPSOSVM;
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/structure/DualLibQPBMSOSVM.h b/src/shogun/structure/DualLibQPBMSOSVM.h
deleted file mode 100644
index 5a9f2e900ff..00000000000
--- a/src/shogun/structure/DualLibQPBMSOSVM.h
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Written (W) 2012 Michal Uricar
- * Copyright (C) 2012 Michal Uricar
- */
-
-#ifndef _DUALLIBQPBMSOSVM__H__
-#define _DUALLIBQPBMSOSVM__H__
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/machine/LinearStructuredOutputMachine.h>
-#include <shogun/features/DotFeatures.h>
-#include <shogun/structure/BmrmStatistics.h>
-
-namespace shogun
-{
-
-/**
- * Enum
- * Training method selection
- */
-enum ESolver
-{
-	BMRM=1,		/**< Standard BMRM algorithm. */
-	PPBMRM=2,	/**< Proximal Point BMRM (BMRM with prox-term) */
-	P3BMRM=3,	/**< Proximal Point P-BMRM (multiple cutting plane models) */
-	NCBM=4
-};
-
-/**
- * @brief Class DualLibQPBMSOSVM that uses Bundle Methods for Regularized Risk
- * Minimization algorithms for structured output (SO) problems [1] presented
- * in [2].
- *
- * [1] Tsochantaridis, I., Hofmann, T., Joachims, T., Altun, Y.
- *	   Support Vector Machine Learning for Interdependent and Structured Ouput
- *	   Spaces.
- *	   http://www.cs.cornell.edu/People/tj/publications/tsochantaridis_etal_04a.pdf
- *
- * [2] Teo, C.H., Vishwanathan, S.V.N, Smola, A. and Quoc, V.Le.
- *     Bundle Methods for Regularized Risk Minimization
- *     http://users.cecs.anu.edu.au/~chteo/pub/TeoVisSmoLe10.pdf
- */
-class CDualLibQPBMSOSVM : public CLinearStructuredOutputMachine
-{
-	public:
-		/** default constructor */
-		CDualLibQPBMSOSVM();
-
-		/** constructor
-		 *
-		 * @param model		Structured Model
-		 * @param labs			Structured labels
-		 * @param _lambda		Regularization constant
-		 * @param W				initial solution of weight vector
-		 */
-		CDualLibQPBMSOSVM(
-				CStructuredModel*		model,
-				CStructuredLabels*		labs,
-				float64_t				_lambda,
-				SGVector< float64_t >	W=0);
-
-		/** destructor */
-		virtual ~CDualLibQPBMSOSVM();
-
-		/** @return name of SGSerializable */
-		virtual const char* get_name() const { return "DualLibQPBMSOSVM"; }
-
-		/** set lambda
-		 *
-		 * @param _lambda	Regularization constant
-		 */
-		inline void set_lambda(float64_t _lambda) { m_lambda=_lambda; }
-
-		/** get lambda
-		 *
-		 * @return Regularization constant
-		 */
-		inline float64_t get_lambda() { return m_lambda; }
-
-		/** set relative tolerance
-		 *
-		 * @param TolRel	Relative tolerance
-		 */
-		inline void set_TolRel(float64_t TolRel) { m_TolRel=TolRel; }
-
-		/** get relative tolerance
-		 *
-		 * @return Relative tolerance
-		 */
-		inline float64_t get_TolRel() { return m_TolRel; }
-
-		/** set absolute tolerance
-		 *
-		 * @param TolAbs	Absolute tolerance
-		 */
-		inline void set_TolAbs(float64_t TolAbs) { m_TolAbs=TolAbs; }
-
-		/** get absolute tolerance
-		 *
-		 * @return Absolute tolerance
-		 */
-		inline float64_t get_TolAbs() { return m_TolAbs; }
-
-		/** set size of cutting plane buffer
-		 *
-		 * @param BufSize	Size of the cutting plane buffer (i.e. maximal number of
-		 *					iterations)
-		 */
-		inline void set_BufSize(uint32_t BufSize) { m_BufSize=BufSize; }
-
-		/** get size of cutting plane buffer
-		 *
-		 * @return Size of the cutting plane buffer
-		 */
-		inline uint32_t get_BufSize() { return m_BufSize; }
-
-		/** set ICP removal flag
-		 *
-		 * @param cleanICP	Flag that enables/disables inactive cutting plane removal
-		 *					feature
-		 */
-		inline void set_cleanICP(bool cleanICP) { m_cleanICP=cleanICP; }
-
-		/** get ICP removal flag
-		 *
-		 * @return Status of inactive cutting plane removal feature (enabled/disabled)
-		 */
-		inline bool get_cleanICP() { return m_cleanICP; }
-
-		/** set number of iterations for cleaning ICP
-		 *
-		 * @param cleanAfter	Specifies number of iterations that inactive cutting
-		 *						planes has to be inactive for to be removed
-		 */
-		inline void set_cleanAfter(uint32_t cleanAfter) { m_cleanAfter=cleanAfter; }
-
-		/** get number of iterations for cleaning ICP
-		 *
-		 * @return Number of iterations that inactive cutting planes has to be
-		 *			inactive for to be removed
-		 */
-		inline uint32_t get_cleanAfter() { return m_cleanAfter; }
-
-		/** set K
-		 *
-		 * @param K	Parameter K
-		 */
-		inline void set_K(float64_t K) { m_K=K; }
-
-		/** get K
-		 *
-		 * @return K
-		 */
-		inline float64_t get_K() { return m_K; }
-
-		/** set Tmax
-		 *
-		 * @param Tmax Parameter Tmax
-		 */
-		inline void set_Tmax(uint32_t Tmax) { m_Tmax=Tmax; }
-
-		/** get Tmax
-		 *
-		 * @return Tmax
-		 */
-		inline uint32_t get_Tmax() { return m_Tmax; }
-
-		/** set number of cutting plane models
-		 *
-		 * @param cp_models	Number of cutting plane models
-		 */
-		inline void set_cp_models(uint32_t cp_models) { m_cp_models=cp_models; }
-
-		/** get number of cutting plane models
-		 *
-		 * @return Number of cutting plane models
-		 */
-		inline uint32_t get_cp_models() { return m_cp_models; }
-
-		/** get bmrm result
-		 *
-		 * @return Result returned from Bundle Method algorithm
-		 */
-		inline BmrmStatistics get_result() { return m_result; }
-
-		/** get training algorithm
-		 *
-		 * @return Type of Bundle Method solver used for training
-		 */
-		inline ESolver get_solver() { return m_solver; }
-
-		/** set training algorithm
-		 *
-		 * @param solver	Type of Bundle Method solver used for training
-		 */
-		inline void set_solver(ESolver solver) { m_solver=solver; }
-
-		/** set initial value of weight vector w
-		 *
-		 * @param W     initial weight vector
-		 */
-		inline void set_w(SGVector< float64_t > W)
-		{
-			REQUIRE(W.vlen == m_model->get_dim(), "Dimension of the initial "
-					"solution must match the model's dimension!\n");
-			m_w=W;
-		}
-
-		/** set enableing/disabling storing training information
-		 *
-		 * @param store_train_info		Flag enabling/disabling storing training information,
-		 * 								Storing training information requires extra computational costs.
-		 */
-		inline void set_store_train_info(bool store_train_info)
-		{
-			m_store_train_info=store_train_info;
-		}
-
-		/** get classifier type
-		 *
-		 * @return classifier type CT_LIBQPSOSVM
-		 */
-		virtual EMachineType get_classifier_type();
-
-	protected:
-		/** train dual SO-SVM
-		 *
-		 */
-		bool train_machine(CFeatures* data=NULL);
-
-	private:
-		/** init parameters
-		 *
-		 */
-		void init();
-
-	private:
-
-		/** lambda */
-		float64_t m_lambda;
-
-		/** TolRel */
-		float64_t m_TolRel;
-
-		/** TolAbs */
-		float64_t m_TolAbs;
-
-		/** BufSize */
-		uint32_t m_BufSize;
-
-		/** Clean ICP */
-		bool m_cleanICP;
-
-		/** Clean ICP after n-th iteration */
-		uint32_t m_cleanAfter;
-
-		/** K */
-		float64_t m_K;
-
-		/** Tmax */
-		uint32_t m_Tmax;
-
-		/** number of cutting plane models */
-		uint32_t m_cp_models;
-
-		/** BMRM result */
-		BmrmStatistics m_result;
-
-		/** training algorithm */
-		ESolver m_solver;
-
-		/** store training information*/
-		bool m_store_train_info;
-
-}; /* class CDualLibQPBMSOSVM */
-
-} /* namespace shogun */
-
-#endif //USE_GPL_SHOGUN
-#endif /* _DUALLIBQPBMSOSVM__H__ */
diff --git a/src/shogun/structure/DynProg.cpp b/src/shogun/structure/DynProg.cpp
index 7a33c4d5dd3..2422fa3377a 100644
--- a/src/shogun/structure/DynProg.cpp
+++ b/src/shogun/structure/DynProg.cpp
@@ -1400,10 +1400,6 @@ void CDynProg::compute_nbest_paths(int32_t max_num_signals, bool use_orf,
 		// recursion
 		for (int32_t t=1; t<m_seq_len; t++)
 		{
-			//if (is_big && t%(1+(m_seq_len/1000))==1)
-			//	SG_PROGRESS(t, 0, m_seq_len)
-			//SG_PRINT("%i\n", t)
-
 			for (T_STATES j=0; j<m_N; j++)
 			{
 				if (seq.element(j,t)<=-1e20)
diff --git a/src/shogun/structure/FWSOSVM.cpp b/src/shogun/structure/FWSOSVM.cpp
index feca2714084..86100bab5ac 100644
--- a/src/shogun/structure/FWSOSVM.cpp
+++ b/src/shogun/structure/FWSOSVM.cpp
@@ -9,6 +9,7 @@
  */
 
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/structure/FWSOSVM.h>
 #include <shogun/labels/LabelsFactory.h>
 #include <shogun/lib/SGVector.h>
@@ -139,7 +140,7 @@ bool CFWSOSVM::train_machine(CFeatures* data)
 
 			// 3) loss_i = L(y_i, y_pred)
 			float64_t loss_i = result->delta;
-			ASSERT(loss_i - CMath::dot(m_w.vector, psi_i.vector, m_w.vlen) >= -1e-12);
+			ASSERT(loss_i - linalg::dot(m_w, psi_i) >= -1e-12);
 
 			// 4) update w_s and ell_s
 			w_s.add(psi_i);
@@ -155,7 +156,7 @@ bool CFWSOSVM::train_machine(CFeatures* data)
 		// 5) duality gap
 		SGVector<float64_t> w_diff = m_w.clone();
 		SGVector<float64_t>::add(w_diff.vector, 1.0, m_w.vector, -1.0, w_s.vector, w_s.vlen);
-		float64_t dual_gap = m_lambda * CMath::dot(m_w.vector, w_diff.vector, m_w.vlen) - m_ell + ell_s;
+		float64_t dual_gap = m_lambda * linalg::dot(m_w, w_diff) - m_ell + ell_s;
 
 		// Debug: compute primal and dual objectives and training error
 		if (m_verbose)
@@ -190,7 +191,7 @@ bool CFWSOSVM::train_machine(CFeatures* data)
 		if (m_do_line_search)
 		{
 			gamma = dual_gap / (m_lambda \
-					* (CMath::dot(w_diff.vector, w_diff.vector, w_diff.vlen) + 1e-12));
+					* (linalg::dot(w_diff, w_diff) + 1e-12));
 			gamma = ((gamma > 1 ? 1 : gamma) < 0) ? 0 : gamma; // clip to [0,1], or max(0,min(1,gamma))
 		}
 
diff --git a/src/shogun/structure/FactorGraph.cpp b/src/shogun/structure/FactorGraph.cpp
index 8fc78e80ca0..29408469745 100644
--- a/src/shogun/structure/FactorGraph.cpp
+++ b/src/shogun/structure/FactorGraph.cpp
@@ -49,7 +49,6 @@ CFactorGraph::~CFactorGraph()
 	SG_UNREF(m_datasources);
 	SG_UNREF(m_dset);
 
-#ifdef USE_REFERENCE_COUNTING
 	if (m_factors != NULL)
 		SG_DEBUG("CFactorGraph::~CFactorGraph(): m_factors->ref_count() = %d.\n", m_factors->ref_count());
 
@@ -57,7 +56,6 @@ CFactorGraph::~CFactorGraph()
 		SG_DEBUG("CFactorGraph::~CFactorGraph(): m_datasources->ref_count() = %d.\n", m_datasources->ref_count());
 
 	SG_DEBUG("CFactorGraph::~CFactorGraph(): this->ref_count() = %d.\n", this->ref_count());
-#endif
 }
 
 void CFactorGraph::register_parameters()
@@ -79,10 +77,8 @@ void CFactorGraph::init()
 	m_factors = new CDynamicObjectArray();
 	m_datasources = new CDynamicObjectArray();
 
-#ifdef USE_REFERENCE_COUNTING
 	if (m_factors != NULL)
 		SG_DEBUG("CFactorGraph::init(): m_factors->ref_count() = %d.\n", m_factors->ref_count());
-#endif
 
 	// NOTE m_cards cannot be empty
 	m_dset = new CDisjointSet(m_cardinalities.size());
diff --git a/src/shogun/structure/FactorGraphModel.cpp b/src/shogun/structure/FactorGraphModel.cpp
index dbb1591c5a3..69d28efe7af 100644
--- a/src/shogun/structure/FactorGraphModel.cpp
+++ b/src/shogun/structure/FactorGraphModel.cpp
@@ -11,15 +11,11 @@
 #include <shogun/structure/FactorGraphModel.h>
 #include <shogun/structure/Factor.h>
 #include <shogun/features/FactorGraphFeatures.h>
- #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
-#ifdef HAVE_STD_UNORDERED_MAP
-	#include <unordered_map>
-	typedef std::unordered_map<int32_t, int32_t> factor_counts_type;
-#else
-	#include <tr1/unordered_map>
-	typedef std::tr1::unordered_map<int32_t, int32_t> factor_counts_type;
-#endif
+#include <unordered_map>
+typedef std::unordered_map<int32_t, int32_t> factor_counts_type;
 
 using namespace shogun;
 
@@ -354,8 +350,8 @@ CResultSet* CFactorGraphModel::argmax(SGVector<float64_t> w, int32_t feat_idx, b
 
 	if (m_verbose)
 	{
-		float64_t dot_pred = CMath::dot(w.vector, ret->psi_pred.vector, w.vlen);
-		float64_t dot_truth = CMath::dot(w.vector, ret->psi_truth.vector, w.vlen);
+		float64_t dot_pred = linalg::dot(w, ret->psi_pred);
+		float64_t dot_truth = linalg::dot(w, ret->psi_truth);
 		float64_t slack =  dot_pred + ret->delta - dot_truth;
 
 		SG_SPRINT("\n");
diff --git a/src/shogun/structure/HMSVMModel.cpp b/src/shogun/structure/HMSVMModel.cpp
index 77dba8bbde0..74152d7a906 100644
--- a/src/shogun/structure/HMSVMModel.cpp
+++ b/src/shogun/structure/HMSVMModel.cpp
@@ -14,6 +14,7 @@
 #include <shogun/mathematics/Math.h>
 #include <shogun/structure/Plif.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 using namespace shogun;
 
@@ -339,7 +340,7 @@ CResultSet* CHMSVMModel::argmax(
 	{
 		ret->delta     = CStructuredModel::delta_loss(feat_idx, ypred);
 		ret->psi_truth = CStructuredModel::get_joint_feature_vector(feat_idx, feat_idx);
-		ret->score    -= CMath::dot(w.vector, ret->psi_truth.vector, dim);
+		ret->score    -= linalg::dot(w, ret->psi_truth);
 	}
 
 	return ret;
diff --git a/src/shogun/structure/HierarchicalMultilabelModel.cpp b/src/shogun/structure/HierarchicalMultilabelModel.cpp
index 058a25847f0..d436c4d848c 100644
--- a/src/shogun/structure/HierarchicalMultilabelModel.cpp
+++ b/src/shogun/structure/HierarchicalMultilabelModel.cpp
@@ -7,7 +7,7 @@
  */
 
 #include <shogun/features/DotFeatures.h>
-#include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/structure/HierarchicalMultilabelModel.h>
 #include <shogun/structure/MultilabelSOLabels.h>
 #include <shogun/lib/DynamicArray.h>
@@ -286,7 +286,7 @@ CResultSet * CHierarchicalMultilabelModel::argmax(SGVector<float64_t> w,
 	SG_REF(y_pred);
 
 	ret->psi_pred = get_joint_feature_vector(feat_idx, y_pred);
-	ret->score = CMath::dot(w.vector, ret->psi_pred.vector, dim);
+	ret->score = linalg::dot(w, ret->psi_pred);
 	ret->argmax = y_pred;
 
 	if (training)
@@ -294,8 +294,7 @@ CResultSet * CHierarchicalMultilabelModel::argmax(SGVector<float64_t> w,
 		ret->delta = CStructuredModel::delta_loss(feat_idx, y_pred);
 		ret->psi_truth = CStructuredModel::get_joint_feature_vector(feat_idx,
 		                 feat_idx);
-		ret->score += (ret->delta - CMath::dot(w.vector,
-		                ret->psi_truth.vector, dim));
+		ret->score += (ret->delta - linalg::dot(w, ret->psi_truth));
 	}
 
 	SG_UNREF(nodes_to_traverse);
diff --git a/src/shogun/structure/MulticlassModel.cpp b/src/shogun/structure/MulticlassModel.cpp
index 526c18d9c5b..c796d3acb74 100644
--- a/src/shogun/structure/MulticlassModel.cpp
+++ b/src/shogun/structure/MulticlassModel.cpp
@@ -11,6 +11,7 @@
 
 #include <shogun/features/DotFeatures.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/structure/MulticlassModel.h>
 #include <shogun/structure/MulticlassSOLabels.h>
 
@@ -118,8 +119,7 @@ CResultSet* CMulticlassModel::argmax(
 		ret->delta     = CStructuredModel::delta_loss(feat_idx, y);
 		ret->psi_truth = CStructuredModel::get_joint_feature_vector(
 					feat_idx, feat_idx);
-		ret->score    -= CMath::dot(w.vector,
-					ret->psi_truth.vector, dim);
+		ret->score    -= linalg::dot(w, ret->psi_truth);
 	}
 
 	return ret;
diff --git a/src/shogun/structure/MultilabelCLRModel.cpp b/src/shogun/structure/MultilabelCLRModel.cpp
index 858a60994ca..3f754d4c81c 100644
--- a/src/shogun/structure/MultilabelCLRModel.cpp
+++ b/src/shogun/structure/MultilabelCLRModel.cpp
@@ -8,6 +8,7 @@
 
 #include <shogun/features/DotFeatures.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/structure/MultilabelCLRModel.h>
 #include <shogun/structure/MultilabelSOLabels.h>
 
@@ -241,7 +242,7 @@ CResultSet * CMultilabelCLRModel::argmax(SGVector<float64_t> w, int32_t feat_idx
 	SG_REF(y_pred);
 
 	ret->psi_pred = get_joint_feature_vector(feat_idx, y_pred);
-	ret->score = CMath::dot(w.vector, ret->psi_pred.vector, dim);
+	ret->score = linalg::dot(w, ret->psi_pred);
 	ret->argmax = y_pred;
 
 	if (training)
@@ -249,8 +250,7 @@ CResultSet * CMultilabelCLRModel::argmax(SGVector<float64_t> w, int32_t feat_idx
 		ret->delta = CStructuredModel::delta_loss(feat_idx, y_pred);
 		ret->psi_truth = CStructuredModel::get_joint_feature_vector(
 		                         feat_idx, feat_idx);
-		ret->score += (ret->delta - CMath::dot(w.vector,
-		                ret->psi_truth.vector, dim));
+		ret->score += (ret->delta - linalg::dot(w, ret->psi_truth));
 	}
 
 	return ret;
diff --git a/src/shogun/structure/MultilabelModel.cpp b/src/shogun/structure/MultilabelModel.cpp
index ec329f16a85..bb6b6c8eec3 100644
--- a/src/shogun/structure/MultilabelModel.cpp
+++ b/src/shogun/structure/MultilabelModel.cpp
@@ -7,6 +7,7 @@
 
 #include <shogun/features/DotFeatures.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/structure/MultilabelModel.h>
 #include <shogun/structure/MultilabelSOLabels.h>
 
@@ -205,8 +206,7 @@ CResultSet * CMultilabelModel::argmax(SGVector<float64_t> w, int32_t feat_idx,
 		ret->delta = CStructuredModel::delta_loss(feat_idx, y_pred);
 		ret->psi_truth = CStructuredModel::get_joint_feature_vector(
 		                         feat_idx, feat_idx);
-		ret->score += (ret->delta - CMath::dot(w.vector,
-		                ret->psi_truth.vector, dim));
+		ret->score += (ret->delta - linalg::dot(w, ret->psi_truth));
 	}
 
 	return ret;
diff --git a/src/shogun/structure/PrimalMosekSOSVM.cpp b/src/shogun/structure/PrimalMosekSOSVM.cpp
index dd9f88d1f78..a5f30a2df96 100644
--- a/src/shogun/structure/PrimalMosekSOSVM.cpp
+++ b/src/shogun/structure/PrimalMosekSOSVM.cpp
@@ -14,6 +14,7 @@
 #include <shogun/lib/DynamicObjectArray.h>
 #include <shogun/lib/List.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/structure/PrimalMosekSOSVM.h>
 #include <shogun/loss/HingeLoss.h>
 
@@ -232,9 +233,9 @@ float64_t CPrimalMosekSOSVM::compute_loss_arg(CResultSet* result) const
 
 	if(result->psi_computed)
 	{
-		return	CMath::dot(m_w.vector, result->psi_pred.vector, M) +
+		return linalg::dot(m_w, result->psi_pred) +
 			result->delta -
-			CMath::dot(m_w.vector, result->psi_truth.vector, M);
+			linalg::dot(m_w, result->psi_truth);
 	}
 	else if(result->psi_computed_sparse)
 	{
diff --git a/src/shogun/structure/SOSVMHelper.cpp b/src/shogun/structure/SOSVMHelper.cpp
index fa95705e25f..833811e60b5 100644
--- a/src/shogun/structure/SOSVMHelper.cpp
+++ b/src/shogun/structure/SOSVMHelper.cpp
@@ -11,7 +11,7 @@
 #include <shogun/structure/SOSVMHelper.h>
 #include <shogun/base/Parameter.h>
 #include <shogun/labels/StructuredLabels.h>
- #include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 using namespace shogun;
 
@@ -77,12 +77,12 @@ float64_t CSOSVMHelper::primal_objective(SGVector<float64_t> w, CStructuredModel
 		SG_UNREF(result);
 	}
 
-	return (lbda/2 * CMath::dot(w.vector, w.vector, w.vlen) + hinge_losses/N);
+	return (lbda/2 * linalg::dot(w, w) + hinge_losses/N);
 }
 
 float64_t CSOSVMHelper::dual_objective(SGVector<float64_t> w, float64_t aloss, float64_t lbda)
 {
-	return (-lbda/2 * CMath::dot(w.vector, w.vector, w.vlen) + aloss);
+	return (-lbda/2 * linalg::dot(w, w) + aloss);
 }
 
 float64_t CSOSVMHelper::average_loss(SGVector<float64_t> w, CStructuredModel* model, bool is_ub)
diff --git a/src/shogun/structure/libbmrm.cpp b/src/shogun/structure/libbmrm.cpp
deleted file mode 100644
index f14d1e6448a..00000000000
--- a/src/shogun/structure/libbmrm.cpp
+++ /dev/null
@@ -1,612 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * libbmrm.h: Implementation of the BMRM solver for SO training
- *
- * Copyright (C) 2012 Michal Uricar, uricamic@cmp.felk.cvut.cz
- *
- * Implementation of the BMRM solver
- *--------------------------------------------------------------------- */
-
-#include <shogun/structure/libbmrm.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/external/libqp.h>
-#include <shogun/lib/Time.h>
-#include <shogun/mathematics/Math.h>
-
-#include <climits>
-#include <limits>
-
-namespace shogun
-{
-static const uint32_t QPSolverMaxIter=0xFFFFFFFF;
-static const float64_t epsilon=0.0;
-
-static float64_t *H;
-uint32_t BufSize;
-
-void add_cutting_plane(
-		bmrm_ll**	tail,
-		bool*		map,
-		float64_t*	A,
-		uint32_t	free_idx,
-		float64_t*	cp_data,
-		uint32_t	dim)
-{
-	REQUIRE(map[free_idx],
-		"add_cutting_plane: CP index %u is not free\n", free_idx)
-
-	LIBBMRM_MEMCPY(A+free_idx*dim, cp_data, dim*sizeof(float64_t));
-	map[free_idx]=false;
-
-	bmrm_ll *cp=(bmrm_ll*)LIBBMRM_CALLOC(1, bmrm_ll);
-
-	if (cp==NULL)
-	{
-		SG_SERROR("Out of memory.\n")
-		return;
-	}
-
-	cp->address=A+(free_idx*dim);
-	cp->prev=*tail;
-	cp->next=NULL;
-	cp->idx=free_idx;
-	(*tail)->next=cp;
-	*tail=cp;
-}
-
-void remove_cutting_plane(
-		bmrm_ll**	head,
-		bmrm_ll**	tail,
-		bool*		map,
-		float64_t*	icp)
-{
-	bmrm_ll *cp_list_ptr=*head;
-
-	while(cp_list_ptr->address != icp)
-	{
-		cp_list_ptr=cp_list_ptr->next;
-	}
-
-	if (cp_list_ptr==*head)
-	{
-		*head=(*head)->next;
-		cp_list_ptr->next->prev=NULL;
-	}
-	else if (cp_list_ptr==*tail)
-	{
-		*tail=(*tail)->prev;
-		cp_list_ptr->prev->next=NULL;
-	}
-	else
-	{
-		cp_list_ptr->prev->next=cp_list_ptr->next;
-		cp_list_ptr->next->prev=cp_list_ptr->prev;
-	}
-
-	map[cp_list_ptr->idx]=true;
-	LIBBMRM_FREE(cp_list_ptr);
-}
-
-void clean_icp(ICP_stats* icp_stats,
-		BmrmStatistics& bmrm,
-		bmrm_ll** head,
-		bmrm_ll** tail,
-		float64_t*& Hmat,
-		float64_t*& diag_H,
-		float64_t*& beta,
-		bool*& map,
-		uint32_t cleanAfter,
-		float64_t*& b,
-		uint32_t*& I,
-		uint32_t cp_models
-		)
-{
-	/* find ICP */
-	uint32_t cntICP=0;
-	uint32_t cntACP=0;
-	bmrm_ll* cp_ptr=*head;
-	uint32_t tmp_idx=0;
-
-	while (cp_ptr != *tail)
-	{
-		if (icp_stats->ICPcounter[tmp_idx++]>=cleanAfter)
-		{
-			icp_stats->ICPs[cntICP++]=cp_ptr->address;
-		}
-		else
-		{
-			icp_stats->ACPs[cntACP++]=tmp_idx-1;
-		}
-
-		cp_ptr=cp_ptr->next;
-	}
-
-	/* do ICP removal */
-	if (cntICP > 0)
-	{
-		uint32_t nCP_new=bmrm.nCP-cntICP;
-
-		for (uint32_t i=0; i<cntICP; ++i)
-		{
-			tmp_idx=0;
-			cp_ptr=*head;
-
-			while(cp_ptr->address != icp_stats->ICPs[i])
-			{
-				cp_ptr=cp_ptr->next;
-				tmp_idx++;
-			}
-
-			remove_cutting_plane(head, tail, map, icp_stats->ICPs[i]);
-
-			LIBBMRM_MEMMOVE(b+tmp_idx, b+tmp_idx+1,
-					(bmrm.nCP+cp_models-tmp_idx)*sizeof(float64_t));
-			LIBBMRM_MEMMOVE(beta+tmp_idx, beta+tmp_idx+1,
-					(bmrm.nCP-tmp_idx)*sizeof(float64_t));
-			LIBBMRM_MEMMOVE(diag_H+tmp_idx, diag_H+tmp_idx+1,
-					(bmrm.nCP-tmp_idx)*sizeof(float64_t));
-			LIBBMRM_MEMMOVE(I+tmp_idx, I+tmp_idx+1,
-					(bmrm.nCP-tmp_idx)*sizeof(uint32_t));
-			LIBBMRM_MEMMOVE(icp_stats->ICPcounter+tmp_idx, icp_stats->ICPcounter+tmp_idx+1,
-					(bmrm.nCP-tmp_idx)*sizeof(uint32_t));
-		}
-
-		/* H */
-		for (uint32_t i=0; i < nCP_new; ++i)
-		{
-			for (uint32_t j=0; j < nCP_new; ++j)
-			{
-				icp_stats->H_buff[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)]=
-					Hmat[LIBBMRM_INDEX(icp_stats->ACPs[i], icp_stats->ACPs[j], icp_stats->maxCPs)];
-			}
-		}
-
-		for (uint32_t i=0; i<nCP_new; ++i)
-			for (uint32_t j=0; j<nCP_new; ++j)
-				Hmat[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)]=
-					icp_stats->H_buff[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)];
-
-		bmrm.nCP=nCP_new;
-		ASSERT(bmrm.nCP<BufSize);
-	}
-}
-
-/*----------------------------------------------------------------------
-  Returns pointer at i-th column of Hessian matrix.
-  ----------------------------------------------------------------------*/
-static const float64_t *get_col( uint32_t i)
-{
-	return( &H[ BufSize*i ] );
-}
-
-BmrmStatistics svm_bmrm_solver(
-		CDualLibQPBMSOSVM  *machine,
-		float64_t*       W,
-		float64_t        TolRel,
-		float64_t        TolAbs,
-		float64_t        _lambda,
-		uint32_t         _BufSize,
-		bool             cleanICP,
-		uint32_t         cleanAfter,
-		float64_t        K,
-		uint32_t         Tmax,
-		bool             store_train_info)
-{
-	BmrmStatistics bmrm;
-	libqp_state_T qp_exitflag={0, 0, 0, 0};
-	float64_t *b, *beta, *diag_H, *prevW;
-	float64_t R, *subgrad, *A, QPSolverTolRel, C=1.0, wdist=0.0;
-	floatmax_t rsum, sq_norm_W, sq_norm_Wdiff=0.0;
-	uint32_t *I;
-	uint8_t S=1;
-	CStructuredModel* model=machine->get_model();
-	uint32_t nDim=model->get_dim();
-	CSOSVMHelper* helper = NULL;
-
-	CTime ttime;
-	float64_t tstart, tstop;
-
-	bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_ptr2, *cp_list=NULL;
-	float64_t *A_1=NULL, *A_2=NULL;
-	bool *map=NULL;
-
-
-	tstart=ttime.cur_time_diff(false);
-
-	BufSize=_BufSize;
-	QPSolverTolRel=1e-9;
-
-	uint32_t histSize = BufSize;
-	H=NULL;
-	b=NULL;
-	beta=NULL;
-	A=NULL;
-	subgrad=NULL;
-	diag_H=NULL;
-	I=NULL;
-	prevW=NULL;
-
-
-	H= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, float64_t);
-
-	if (H==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	ASSERT(nDim > 0);
-	ASSERT(BufSize > 0);
-	REQUIRE(BufSize < (std::numeric_limits<size_t>::max() / nDim),
-		"overflow: %u * %u > %u -- biggest possible BufSize=%u or nDim=%u\n",
-		BufSize, nDim, std::numeric_limits<size_t>::max(),
-		(std::numeric_limits<size_t>::max() / nDim),
-		(std::numeric_limits<size_t>::max() / BufSize));
-
-	A= (float64_t*) LIBBMRM_CALLOC(size_t(nDim)*size_t(BufSize), float64_t);
-
-	if (A==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	b= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	if (b==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	beta= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	if (beta==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	subgrad= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
-
-	if (subgrad==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	diag_H= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	if (diag_H==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	I= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-
-	if (I==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	ICP_stats icp_stats;
-	icp_stats.maxCPs = BufSize;
-
-	icp_stats.ICPcounter= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-	if (icp_stats.ICPcounter==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	icp_stats.ICPs= (float64_t**) LIBBMRM_CALLOC(BufSize, float64_t*);
-	if (icp_stats.ICPs==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	icp_stats.ACPs= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-	if (icp_stats.ACPs==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	/* Temporary buffers for ICP removal */
-	icp_stats.H_buff= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, float64_t);
-	if (icp_stats.H_buff==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	map= (bool*) LIBBMRM_CALLOC(BufSize, bool);
-
-	if (map==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	memset( (bool*) map, true, BufSize);
-
-	cp_list= (bmrm_ll*) LIBBMRM_CALLOC(1, bmrm_ll);
-
-	if (cp_list==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	prevW= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
-
-	if (prevW==NULL)
-	{
-		bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	bmrm.hist_Fp = SGVector< float64_t >(histSize);
-	bmrm.hist_Fd = SGVector< float64_t >(histSize);
-	bmrm.hist_wdist = SGVector< float64_t >(histSize);
-
-	/* Iinitial solution */
-	R=machine->risk(subgrad, W);
-
-	bmrm.nCP=0;
-	bmrm.nIter=0;
-	bmrm.exitflag=0;
-
-	b[0]=-R;
-
-	/* Cutting plane auxiliary double linked list */
-
-	LIBBMRM_MEMCPY(A, subgrad, nDim*sizeof(float64_t));
-	map[0]=false;
-	cp_list->address=&A[0];
-	cp_list->idx=0;
-	cp_list->prev=NULL;
-	cp_list->next=NULL;
-	CPList_head=cp_list;
-	CPList_tail=cp_list;
-
-	/* Compute initial value of Fp, Fd, assuming that W is zero vector */
-
-	sq_norm_W=0;
-	bmrm.Fp=R+0.5*_lambda*sq_norm_W;
-	bmrm.Fd=-LIBBMRM_PLUS_INF;
-
-	tstop=ttime.cur_time_diff(false);
-
-	/* Verbose output */
-	SG_SINFO("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
-				bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, R);
-
-	/* store Fp, Fd and wdist history */
-	bmrm.hist_Fp[0]=bmrm.Fp;
-	bmrm.hist_Fd[0]=bmrm.Fd;
-	bmrm.hist_wdist[0]=0.0;
-
-	if (store_train_info)
-		helper = machine->get_helper();
-
-	/* main loop */
-	ASSERT(bmrm.nCP<BufSize);
-	while (bmrm.exitflag==0)
-	{
-		tstart=ttime.cur_time_diff(false);
-		bmrm.nIter++;
-
-		/* Update H */
-
-		if (bmrm.nCP>0)
-		{
-			A_2=get_cutting_plane(CPList_tail);
-			cp_ptr=CPList_head;
-
-			for (uint32_t i=0; i<bmrm.nCP; ++i)
-			{
-				A_1=get_cutting_plane(cp_ptr);
-				cp_ptr=cp_ptr->next;
-				rsum= CMath::dot(A_1, A_2, nDim);
-
-				H[LIBBMRM_INDEX(bmrm.nCP, i, BufSize)]
-					= H[LIBBMRM_INDEX(i, bmrm.nCP, BufSize)]
-					= rsum/_lambda;
-			}
-		}
-
-		A_2=get_cutting_plane(CPList_tail);
-		rsum = CMath::dot(A_2, A_2, nDim);
-
-		H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)]=rsum/_lambda;
-
-		diag_H[bmrm.nCP]=H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)];
-		I[bmrm.nCP]=1;
-
-		beta[bmrm.nCP]=0.0; // [beta; 0]
-		bmrm.nCP++;
-		ASSERT(bmrm.nCP<BufSize);
-
-#if 0
-		/* TODO: scaling...*/
-		float64_t scale = CMath::max(diag_H, BufSize)/(1000.0*_lambda);
-		SGVector<float64_t> sb(bmrm.nCP);
-		sb.zero();
-		sb.vec1_plus_scalar_times_vec2(sb.vector, 1/scale, b, bmrm.nCP);
-
-		SGVector<float64_t> sh(bmrm.nCP);
-		sh.zero();
-		sb.vec1_plus_scalar_times_vec2(sh.vector, 1/scale, diag_H, bmrm.nCP);
-
-		qp_exitflag =
-			libqp_splx_solver(&get_col, sh.vector, sb.vector, &C, I, &S, beta,
-				bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
-#else
-		/* call QP solver */
-		qp_exitflag=libqp_splx_solver(&get_col, diag_H, b, &C, I, &S, beta,
-				bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
-#endif
-
-		bmrm.qp_exitflag=qp_exitflag.exitflag;
-
-		/* Update ICPcounter (add one to unused and reset used)
-		 * + compute number of active CPs */
-		bmrm.nzA=0;
-
-		for (uint32_t aaa=0; aaa<bmrm.nCP; ++aaa)
-		{
-			if (beta[aaa]>epsilon)
-			{
-				++bmrm.nzA;
-				icp_stats.ICPcounter[aaa]=0;
-			}
-			else
-			{
-				icp_stats.ICPcounter[aaa]+=1;
-			}
-		}
-
-		/* W update */
-		memset(W, 0, sizeof(float64_t)*nDim);
-		cp_ptr=CPList_head;
-		for (uint32_t j=0; j<bmrm.nCP; ++j)
-		{
-			A_1=get_cutting_plane(cp_ptr);
-			cp_ptr=cp_ptr->next;
-			SGVector<float64_t>::vec1_plus_scalar_times_vec2(W, -beta[j]/_lambda, A_1, nDim);
-		}
-
-		/* risk and subgradient computation */
-		R = machine->risk(subgrad, W);
-		add_cutting_plane(&CPList_tail, map, A,
-				find_free_idx(map, BufSize), subgrad, nDim);
-
-		sq_norm_W=CMath::dot(W, W, nDim);
-		b[bmrm.nCP]=CMath::dot(subgrad, W, nDim) - R;
-
-		sq_norm_Wdiff=0.0;
-		for (uint32_t j=0; j<nDim; ++j)
-		{
-			sq_norm_Wdiff+=(W[j]-prevW[j])*(W[j]-prevW[j]);
-		}
-
-		bmrm.Fp=R+0.5*_lambda*sq_norm_W;
-		bmrm.Fd=-qp_exitflag.QP;
-		wdist=CMath::sqrt(sq_norm_Wdiff);
-
-		/* Stopping conditions */
-		if (bmrm.Fp - bmrm.Fd <= TolRel*LIBBMRM_ABS(bmrm.Fp))
-			bmrm.exitflag=1;
-
-		if (bmrm.Fp - bmrm.Fd <= TolAbs)
-			bmrm.exitflag=2;
-
-		tstop=ttime.cur_time_diff(false);
-
-		/* Verbose output */
-		SG_SINFO("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d\n",
-					bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, bmrm.Fp-bmrm.Fd,
-					(bmrm.Fp-bmrm.Fd)/bmrm.Fp, R, bmrm.nCP, bmrm.nzA, qp_exitflag.exitflag);
-
-		// iteration exceeds histSize
-		if (bmrm.nIter >= histSize)
-		{
-			histSize += BufSize;
-			bmrm.hist_Fp.resize_vector(histSize);
-			bmrm.hist_Fd.resize_vector(histSize);
-			bmrm.hist_wdist.resize_vector(histSize);
-		}
-
-		/* Keep Fp, Fd and w_dist history */
-		ASSERT(bmrm.nIter < histSize);
-		bmrm.hist_Fp[bmrm.nIter]=bmrm.Fp;
-		bmrm.hist_Fd[bmrm.nIter]=bmrm.Fd;
-		bmrm.hist_wdist[bmrm.nIter]=wdist;
-
-		/* keep W (for wdist history track) */
-		LIBBMRM_MEMCPY(prevW, W, nDim*sizeof(float64_t));
-
-		/* Inactive Cutting Planes (ICP) removal */
-		if (cleanICP)
-		{
-			clean_icp(&icp_stats, bmrm, &CPList_head, &CPList_tail, H, diag_H, beta, map, cleanAfter, b, I);
-			ASSERT(bmrm.nCP<BufSize);
-		}
-
-		// next CP would exceed BufSize
-		if (bmrm.nCP+1 >= BufSize)
-			bmrm.exitflag=-1;
-
-		/* Debug: compute objective and training error */
-		if (store_train_info)
-		{
-			float64_t info_tstart=ttime.cur_time_diff(false);
-
-			SGVector<float64_t> w_info(W, nDim, false);
-			float64_t primal = CSOSVMHelper::primal_objective(w_info, model, _lambda);
-			float64_t train_error = CSOSVMHelper::average_loss(w_info, model);
-			helper->add_debug_info(primal, bmrm.nIter, train_error);
-
-			float64_t info_tstop=ttime.cur_time_diff(false);
-
-			SG_SINFO("On iteration %4d, tim=%.3lf, primal=%.3lf, train_error=%lf\n", bmrm.nIter, info_tstop-info_tstart, primal, train_error);
-		}
-
-	} /* end of main loop */
-
-	if (store_train_info)
-	{
-		helper->terminate();
-		SG_UNREF(helper);
-	}
-
-	ASSERT(bmrm.nIter+1 <= histSize);
-	bmrm.hist_Fp.resize_vector(bmrm.nIter+1);
-	bmrm.hist_Fd.resize_vector(bmrm.nIter+1);
-	bmrm.hist_wdist.resize_vector(bmrm.nIter+1);
-
-	cp_ptr=CPList_head;
-
-	while(cp_ptr!=NULL)
-	{
-		cp_ptr2=cp_ptr;
-		cp_ptr=cp_ptr->next;
-		LIBBMRM_FREE(cp_ptr2);
-		cp_ptr2=NULL;
-	}
-
-	cp_list=NULL;
-
-cleanup:
-
-	LIBBMRM_FREE(H);
-	LIBBMRM_FREE(b);
-	LIBBMRM_FREE(beta);
-	LIBBMRM_FREE(A);
-	LIBBMRM_FREE(subgrad);
-	LIBBMRM_FREE(diag_H);
-	LIBBMRM_FREE(I);
-	LIBBMRM_FREE(icp_stats.ICPcounter);
-	LIBBMRM_FREE(icp_stats.ICPs);
-	LIBBMRM_FREE(icp_stats.ACPs);
-	LIBBMRM_FREE(icp_stats.H_buff);
-	LIBBMRM_FREE(map);
-	LIBBMRM_FREE(prevW);
-
-	if (cp_list)
-		LIBBMRM_FREE(cp_list);
-
-	SG_UNREF(model);
-
-	return(bmrm);
-}
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/structure/libbmrm.h b/src/shogun/structure/libbmrm.h
deleted file mode 100644
index 0022ea7cb52..00000000000
--- a/src/shogun/structure/libbmrm.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * libbmrm.h: Implementation of the BMRM solver for SO training
- *
- * Copyright (C) 2012 Michal Uricar, uricamic@cmp.felk.cvut.cz
- *
- * Implementation of the BMRM solver
- *--------------------------------------------------------------------- */
-
-#ifndef libbmrm_h
-#define libbmrm_h
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/common.h>
-#include <shogun/structure/BmrmStatistics.h>
-#include <shogun/structure/DualLibQPBMSOSVM.h>
-
-#define LIBBMRM_PLUS_INF (-log(0.0))
-#define LIBBMRM_CALLOC(x, y) SG_CALLOC(y, x)
-#define LIBBMRM_REALLOC(x, y) SG_REALLOC(x, y)
-#define LIBBMRM_FREE(x) SG_FREE(x)
-#define LIBBMRM_MEMCPY(x, y, z) sg_memcpy(x, y, z)
-#define LIBBMRM_MEMMOVE(x, y, z) memmove(x, y, z)
-#define LIBBMRM_INDEX(ROW, COL, NUM_ROWS) ((COL)*(NUM_ROWS)+(ROW))
-#define LIBBMRM_ABS(A) ((A) < 0 ? -(A) : (A))
-
-namespace shogun
-{
-/** Buffer size */
-extern uint32_t BufSize;
-
-/** Linked list for cutting planes buffer management */
-IGNORE_IN_CLASSLIST struct bmrm_ll {
-	/** Pointer to previous CP entry */
-	bmrm_ll   *prev;
-	/** Pointer to next CP entry */
-	bmrm_ll   *next;
-	/** Pointer to the real CP data */
-	float64_t   *address;
-	/** Index of CP */
-	uint32_t    idx;
-};
-
-/** inactive cutting plane statistics */
-IGNORE_IN_CLASSLIST struct ICP_stats
-{
-	/** maximum number of CP stats we can hold */
-	uint32_t maxCPs;
-
-	/** vector of the number of iterations the CPs were inactive */
-	uint32_t* ICPcounter;
-
-	/** vector of addresses of the inactive CPs that needs to be pruned */
-	float64_t** ICPs;
-
-	/** vector of the active CPs */
-	uint32_t* ACPs;
-
-	/** Temporary buffer for storing H */
-	float64_t* H_buff;
-};
-
-/** Add cutting plane
- *
- * @param tail Pointer to the last CP entry
- * @param map Pointer to map storing info about CP physical memory
- * @param A CP physical memory
- * @param free_idx Index to physical memory where the CP data will be stored
- * @param cp_data CP data
- * @param dim Dimension of CP data
- */
-void add_cutting_plane(
-		bmrm_ll**	tail,
-		bool*		map,
-		float64_t*	A,
-		uint32_t	free_idx,
-		float64_t*	cp_data,
-		uint32_t	dim);
-
-/** Remove cutting plane at given index
- *
- * @param head Pointer to the first CP entry
- * @param tail Pointer to the last CP entry
- * @param map Pointer to map storing info about CP physical memory
- * @param icp Pointer to inactive CP that should be removed
- */
-void remove_cutting_plane(
-		bmrm_ll**	head,
-		bmrm_ll**	tail,
-		bool*		map,
-		float64_t*	icp);
-
-/**
- * Clean-up in-active cutting planes
- */
-void clean_icp(ICP_stats* icp_stats,
-		BmrmStatistics& bmrm,
-		bmrm_ll** head,
-		bmrm_ll** tail,
-		float64_t*& H,
-		float64_t*& diag_H,
-		float64_t*& beta,
-		bool*& map,
-		uint32_t cleanAfter,
-		float64_t*& b,
-		uint32_t*& I,
-		uint32_t cp_models = 0
-		);
-
-/** Get cutting plane
- *
- * @param ptr Pointer to some CP entry
- * @return Pointer to cutting plane at given entry
- */
-inline float64_t * get_cutting_plane(bmrm_ll *ptr) { return ptr->address; }
-
-/** Get index of free slot for new cutting plane
- *
- * @param map Pointer to map storing info about CP physical memory
- * @param size Size of the CP buffer
- * @return Index of unoccupied memory field in CP physical memory
- */
-inline uint32_t find_free_idx(bool *map, uint32_t size)
-{
-    for (uint32_t i=0; i<size; ++i) if (map[i]) return i;
-    SG_SERROR("No free index available in CP buffer of size %d.\n", size);
-    return size-1;
-}
-
-/** Standard BMRM Solver for Structured Output Learning
- *
- * @param machine Pointer to the BMRM machine
- * @param W Weight vector
- * @param TolRel Relative tolerance
- * @param TolAbs Absolute tolerance
- * @param _lambda Regularization constant
- * @param _BufSize Size of the CP buffer (i.e. maximal number of iterations)
- * @param cleanICP Flag that enables/disables inactive cutting plane removal feature
- * @param cleanAfter Number of iterations that should be cutting plane inactive for to be removed
- * @param K Parameter K
- * @param Tmax Parameter Tmax
- * @param store_train_info Flag that enable/disable store training infomation, e.g., primal, dual, training error
- * @return Structure with BMRM algorithm result
- */
-BmrmStatistics svm_bmrm_solver(
-		CDualLibQPBMSOSVM  *machine,
-		float64_t          *W,
-		float64_t          TolRel,
-		float64_t          TolAbs,
-		float64_t          _lambda,
-		uint32_t           _BufSize,
-		bool               cleanICP,
-		uint32_t           cleanAfter,
-		float64_t          K,
-		uint32_t           Tmax,
-		bool               store_train_info
-		);
-
-}
-#endif //USE_GPL_SHOGUN
-#endif /* libbmrm_h */
diff --git a/src/shogun/structure/libncbm.cpp b/src/shogun/structure/libncbm.cpp
deleted file mode 100644
index 9ce47b5776c..00000000000
--- a/src/shogun/structure/libncbm.cpp
+++ /dev/null
@@ -1,778 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Viktor Gal
- *
- */
-
-#include <shogun/structure/libncbm.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/Time.h>
-#include <shogun/mathematics/Math.h>
-
-#include <shogun/lib/external/libqp.h>
-#include <shogun/multiclass/GMNPLib.h>
-
-#include <vector>
-
-namespace shogun
-{
-
-static float64_t* HMatrix;
-static uint32_t maxCPs;
-static const float64_t epsilon=0.0;
-
-static const float64_t *get_col(uint32_t i)
-{
-	return (&HMatrix[maxCPs*i]);
-}
-
-IGNORE_IN_CLASSLIST struct line_search_res
-{
-	/* */
-	float64_t a;
-	float64_t fval;
-	float64_t reg;
-	SGVector<float64_t> solution;
-	SGVector<float64_t> gradient;
-};
-
-inline static line_search_res zoom
-(
- CDualLibQPBMSOSVM *machine,
- float64_t lambda,
- float64_t a_lo,
- float64_t a_hi,
- float64_t initial_fval,
- SGVector<float64_t>& initial_solution,
- SGVector<float64_t>& search_dir,
- float64_t wolfe_c1,
- float64_t wolfe_c2,
- float64_t init_lgrad,
- float64_t f_lo,
- float64_t g_lo,
- float64_t f_hi,
- float64_t g_hi
-)
-{
-	line_search_res ls_res;
-	ls_res.solution.resize_vector(initial_solution.vlen);
-	ls_res.gradient.resize_vector(initial_solution.vlen);
-
-	SGVector<float64_t> cur_solution(initial_solution.vlen);
-	cur_solution.zero();
-	SGVector<float64_t> cur_grad(initial_solution.vlen);
-
-	uint32_t iter = 0;
-	while (1)
-	{
-		float64_t d1 = g_lo+g_hi - 3*(f_lo-f_hi)/(a_lo-a_hi);
-		float64_t d2 = CMath::sqrt(d1*d1 - g_lo*g_hi);
-		float64_t a_j = a_hi -(a_hi-a_lo)*(g_hi+d2-d1)/(g_hi-g_lo+2*d2);
-
-		if (a_lo < a_hi)
-		{
-			if ((a_j < a_lo) || (a_j > a_hi))
-			{
-				a_j = 0.5*(a_lo+a_hi);
-			}
-		}
-		else
-		{
-			if ((a_j > a_lo) || (a_j < a_hi))
-			{
-				a_j = 0.5*(a_lo+a_hi);
-			}
-		}
-
-		cur_solution.add(cur_solution.vector, 1.0,
-				initial_solution.vector, a_j,
-				search_dir.vector, cur_solution.vlen);
-
-		float64_t cur_fval = machine->risk(cur_grad.vector, cur_solution.vector);
-		float64_t cur_reg
-			= 0.5*lambda*CMath::dot(cur_solution.vector,
-					cur_solution.vector, cur_solution.vlen);
-		cur_fval += cur_reg;
-
-		cur_grad.vec1_plus_scalar_times_vec2(cur_grad.vector, lambda, cur_solution.vector, cur_grad.vlen);
-
-		if
-			(
-			 (cur_fval > (initial_fval+wolfe_c1*a_j*init_lgrad))
-			 ||
-			 (cur_fval > f_lo)
-			)
-		{
-			a_hi = a_j;
-			f_hi = cur_fval;
-			g_hi = CMath::dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
-		}
-		else
-		{
-			float64_t cur_lgrad
-				= CMath::dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
-
-			if (CMath::abs(cur_lgrad) < -wolfe_c2*init_lgrad)
-			{
-				ls_res.a = a_j;
-				ls_res.fval = cur_fval;
-				ls_res.reg = cur_reg;
-				ls_res.gradient = cur_grad;
-				ls_res.solution = cur_solution;
-//				SG_SPRINT("in zoom (wolfe2): %f\n", cur_fval)
-				return ls_res;
-			}
-
-			if (cur_lgrad*(a_hi - a_lo) >= 0)
-			{
-				a_hi = a_lo;
-				f_hi = f_lo;
-				g_hi = g_lo;
-			}
-			a_lo = a_j;
-			f_lo = cur_fval;
-			g_lo = cur_lgrad;
-		}
-
-		if
-			(
-			 (CMath::abs(a_lo - a_hi) <= 0.01*a_lo)
-			 ||
-			 (iter >= 5)
-			)
-		{
-			ls_res.a = a_j;
-			ls_res.fval = cur_fval;
-			ls_res.reg = cur_reg;
-			ls_res.gradient = cur_grad;
-			ls_res.solution = cur_solution;
-//			SG_SPRINT("in zoom iter: %d %f\n", iter, cur_fval)
-			return ls_res;
-		}
-		iter++;
-	}
-}
-
-inline std::vector<line_search_res> line_search_with_strong_wolfe
-(
-		CDualLibQPBMSOSVM *machine,
-		float64_t lambda,
-		float64_t initial_val,
-		SGVector<float64_t>& initial_solution,
-		SGVector<float64_t>& initial_grad,
-		SGVector<float64_t>& search_dir,
-		float64_t astart,
-		float64_t amax = 1.1,
-		float64_t wolfe_c1 = 1E-4,
-		float64_t wolfe_c2 = 0.9,
-		float64_t max_iter = 5
-)
-{
-	/* NOTE:
-	 * model->risk returns only the risk as it's name says
-	 * have to cur_fval = model.risk + (lambda*0.5*w*w')
-	 *
-	 * subgrad should be: subgrad + (lambda*w)
-	 *
-	 */
-
-	uint32_t iter = 0;
-
-	initial_grad.vec1_plus_scalar_times_vec2(initial_grad.vector, lambda, initial_solution.vector, initial_grad.vlen);
-
-	float64_t initial_lgrad = CMath::dot(initial_grad.vector, search_dir.vector, initial_grad.vlen);
-	float64_t prev_lgrad = initial_lgrad;
-	float64_t prev_fval = initial_val;
-
-	float64_t prev_a = 0;
-	float64_t cur_a = astart;
-
-	std::vector<line_search_res> ret;
-	while (1)
-	{
-		SGVector<float64_t> x(initial_solution.vlen);
-		SGVector<float64_t> cur_subgrad(initial_solution.vlen);
-
-		x.add(x.vector, 1.0, initial_solution.vector, cur_a, search_dir.vector, x.vlen);
-		float64_t cur_fval = machine->risk(cur_subgrad.vector, x.vector);
-		float64_t cur_reg = 0.5*lambda*CMath::dot(x.vector, x.vector, x.vlen);
-		cur_fval += cur_reg;
-
-		cur_subgrad.vec1_plus_scalar_times_vec2(cur_subgrad.vector, lambda, x.vector, x.vlen);
-		if (iter == 0)
-		{
-			line_search_res initial_step;
-			initial_step.fval = cur_fval;
-			initial_step.reg = cur_reg;
-			initial_step.gradient = cur_subgrad;
-			initial_step.solution = x;
-			ret.push_back(initial_step);
-		}
-
-		float64_t cur_lgrad
-			= CMath::dot(cur_subgrad.vector, search_dir.vector,
-					cur_subgrad.vlen);
-		if
-			(
-			 (cur_fval > initial_val+wolfe_c1*cur_a*initial_lgrad)
-			 ||
-			 (cur_fval >= prev_fval && iter > 0)
-			)
-		{
-			ret.push_back(
-					zoom(machine, lambda, prev_a, cur_a, initial_val,
-						initial_solution, search_dir, wolfe_c1, wolfe_c2,
-						initial_lgrad, prev_fval, prev_lgrad, cur_fval, cur_lgrad)
-					);
-			return ret;
-		}
-
-		if (CMath::abs(cur_lgrad) <= -wolfe_c2*initial_lgrad)
-		{
-			line_search_res ls_res;
-			ls_res.a = cur_a;
-			ls_res.fval = cur_fval;
-			ls_res.reg = cur_reg;
-			ls_res.solution = x;
-			ls_res.gradient = cur_subgrad;
-			ret.push_back(ls_res);
-			return ret;
-		}
-
-		if (cur_lgrad >= 0)
-		{
-			ret.push_back(
-					zoom(machine, lambda, cur_a, prev_a, initial_val,
-						initial_solution, search_dir, wolfe_c1, wolfe_c2,
-						initial_lgrad, cur_fval, cur_lgrad, prev_fval, prev_lgrad)
-					);
-			return ret;
-		}
-		iter++;
-		if ((CMath::abs(cur_a - amax) <= 0.01*amax) || (iter >= max_iter))
-		{
-			line_search_res ls_res;
-			ls_res.a = cur_a;
-			ls_res.fval = cur_fval;
-			ls_res.reg = cur_reg;
-			ls_res.solution = x;
-			ls_res.gradient = cur_subgrad;
-			ret.push_back(ls_res);
-			return ret;
-		}
-
-		prev_a = cur_a;
-		prev_fval = cur_fval;
-		prev_lgrad = cur_lgrad;
-
-		cur_a = (cur_a + amax)*0.5;
-	}
-}
-
-inline void update_H(BmrmStatistics& ncbm,
-		bmrm_ll* head,
-		bmrm_ll* tail,
-		SGMatrix<float64_t>& H,
-		SGVector<float64_t>& diag_H,
-		float64_t lambda,
-		uint32_t maxCP,
-		int32_t w_dim)
-{
-	float64_t* a_2 = get_cutting_plane(tail);
-	bmrm_ll* cp_ptr=head;
-
-	for (uint32_t i=0; i < ncbm.nCP; ++i)
-	{
-		float64_t* a_1 = get_cutting_plane(cp_ptr);
-		cp_ptr=cp_ptr->next;
-
-		float64_t dot_val = CMath::dot(a_2, a_1, w_dim);
-
-		H.matrix[LIBBMRM_INDEX(ncbm.nCP, i, maxCP)]
-			= H.matrix[LIBBMRM_INDEX(i, ncbm.nCP, maxCP)]
-			= dot_val/lambda;
-	}
-
-	/* set the diagonal element, i.e. subgrad_i*subgrad_i' */
-	float64_t dot_val = CMath::dot(a_2, a_2, w_dim);
-	H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)]=dot_val/lambda;
-
-	diag_H[ncbm.nCP]=H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)];
-
-	ncbm.nCP++;
-}
-
-
-BmrmStatistics svm_ncbm_solver(
-		CDualLibQPBMSOSVM *machine,
-		float64_t         *w,
-		float64_t         TolRel,
-		float64_t         TolAbs,
-		float64_t         _lambda,
-		uint32_t          _BufSize,
-		bool              cleanICP,
-		uint32_t          cleanAfter,
-		bool              is_convex,
-		bool              line_search,
-		bool              verbose
-		)
-{
-	BmrmStatistics ncbm;
-	libqp_state_T qp_exitflag={0, 0, 0, 0};
-
-	CStructuredModel* model = machine->get_model();
-	int32_t w_dim = model->get_dim();
-
-	maxCPs = _BufSize;
-	BufSize = _BufSize;
-
-	ncbm.nCP=0;
-	ncbm.nIter=0;
-	ncbm.exitflag=0;
-
-	/* variables for timing the algorithm*/
-	CTime ttime;
-	float64_t tstart, tstop;
-	tstart=ttime.cur_time_diff(false);
-
-	/* matrix of subgradiends */
-	SGMatrix<float64_t> A(w_dim, maxCPs);
-
-	/* bias vector */
-	SGVector<float64_t> bias(maxCPs);
-	bias.zero();
-
-	/* Matrix for storing H = A*A' */
-	SGMatrix<float64_t> H(maxCPs,maxCPs);
-	HMatrix = H.matrix;
-
-	/* diag_H */
-	SGVector<float64_t> diag_H(maxCPs);
-	diag_H.zero();
-
-	/* x the solution vector of the dual problem:
-	 * 1/lambda x*H*x' - x*bias
-	 */
-	SGVector<float64_t> x(maxCPs);
-	x.zero();
-
-	/* for sum_{i in I_k} x[i] <= b[k] for all k such that S[k] == 1 */
-	float64_t b = 1.0;
-	uint8_t S = 1;
-	SGVector<uint32_t> I(maxCPs);
-	I.set_const(1);
-
-	/* libqp paramteres */
-	uint32_t QPSolverMaxIter = 0xFFFFFFFF;
-	float64_t QPSolverTolRel = 1E-9;
-
-	/* variables for maintaining inactive planes */
-	SGVector<bool> map(maxCPs);
-	map.set_const(true);
-	ICP_stats icp_stats;
-	icp_stats.maxCPs = maxCPs;
-	icp_stats.ICPcounter = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
-	icp_stats.ICPs = (float64_t**) LIBBMRM_CALLOC(maxCPs, float64_t*);
-	icp_stats.ACPs = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
-	icp_stats.H_buff = (float64_t*) LIBBMRM_CALLOC(maxCPs*maxCPs,float64_t);
-	if
-	(
-		icp_stats.ICPcounter == NULL || icp_stats.ICPs == NULL
-		|| icp_stats.ACPs == NULL || icp_stats.H_buff == NULL
-	)
-	{
-		ncbm.exitflag=-2;
-		LIBBMRM_FREE(icp_stats.ICPcounter);
-		LIBBMRM_FREE(icp_stats.ICPs);
-		LIBBMRM_FREE(icp_stats.ACPs);
-		LIBBMRM_FREE(icp_stats.H_buff);
-		return ncbm;
-	}
-
-	/* best */
-	float64_t best_Fp = CMath::INFTY;
-	float64_t best_risk = CMath::INFTY;
-	SGVector<float64_t> best_w(w_dim);
-	best_w.zero();
-	SGVector<float64_t> best_subgrad(w_dim);
-	best_subgrad.zero();
-
-	/* initial solution */
-	SGVector<float64_t> cur_subgrad(w_dim);
-	SGVector<float64_t> cur_w(w_dim);
-	sg_memcpy(cur_w.vector, w, sizeof(float64_t)*w_dim);
-
-	float64_t cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
-	bias[0] = -cur_risk;
-	best_Fp = 0.5*_lambda*CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen) + cur_risk;
-	best_risk = cur_risk;
-	sg_memcpy(best_w.vector, cur_w.vector, sizeof(float64_t)*w_dim);
-	sg_memcpy(best_subgrad.vector, cur_subgrad.vector, sizeof(float64_t)*w_dim);
-
-	/* create a double-linked list over the A the subgrad matrix */
-	bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_list=NULL;
-	cp_list = (bmrm_ll*) SG_CALLOC(bmrm_ll, 1);
-	if (cp_list==NULL)
-	{
-		ncbm.exitflag=-2;
-		return ncbm;
-	}
-	/* save the subgradient */
-	sg_memcpy(A.matrix, cur_subgrad.vector, sizeof(float64_t)*w_dim);
-	map[0] = false;
-	cp_list->address=&A[0];
-	cp_list->idx=0;
-	cp_list->prev=NULL;
-	cp_list->next=NULL;
-	CPList_head=cp_list;
-	CPList_tail=cp_list;
-
-	update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
-	tstop=ttime.cur_time_diff(false);
-	if (verbose)
-		SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
-				ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, cur_risk);
-
-	float64_t astar = 0.01;
-
-	SG_SPRINT("clean icps: %d\n", cleanICP)
-	while (ncbm.exitflag==0)
-	{
-		tstart=ttime.cur_time_diff(false);
-		ncbm.nIter++;
-
-		//diag_H.display_vector();
-		//bias.display_vector();
-
-		/* solve the dual of the problem, namely:
-		 *
-		 */
-		qp_exitflag =
-			libqp_splx_solver(&get_col, diag_H.vector, bias.vector, &b, I.vector, &S, x.vector,
-					ncbm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
-
-		ncbm.Fd = -qp_exitflag.QP;
-
-		ncbm.qp_exitflag=qp_exitflag.exitflag;
-
-		/* Update ICPcounter (add one to unused and reset used)
-		 * + compute number of active CPs */
-		ncbm.nzA=0;
-
-		for (uint32_t i=0; i < ncbm.nCP; ++i)
-		{
-			if (x[i] > epsilon)
-			{
-				/* cp was active => reset counter */
-				++ncbm.nzA;
-				icp_stats.ICPcounter[i]=0;
-			}
-			else
-			{
-				icp_stats.ICPcounter[i]++;
-			}
-		}
-
-		/* Inactive Cutting Planes (ICP) removal */
-		if (cleanICP)
-		{
-			clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
-					H.matrix, diag_H.vector, x.vector,
-					map.vector, cleanAfter, bias.vector, I.vector);
-		}
-
-		/* calculate the new w
-		 * w[i] = -1/lambda*A[i]*x[i]
-		 */
-		cur_w.zero();
-		cp_ptr=CPList_head;
-		for (uint32_t i=0; i < ncbm.nCP; ++i)
-		{
-			float64_t* A_1 = get_cutting_plane(cp_ptr);
-			cp_ptr=cp_ptr->next;
-			SGVector<float64_t>::vec1_plus_scalar_times_vec2(cur_w.vector, -x[i]/_lambda, A_1, w_dim);
-		}
-
-		bool calc_gap = false;
-		if (calc_gap)
-		{
-			SGVector<float64_t> scores(ncbm.nCP);
-			cp_ptr=CPList_head;
-
-			for (uint32_t i=0; i < ncbm.nCP; ++i)
-			{
-				float64_t* a_1 = get_cutting_plane(cp_ptr);
-				cp_ptr = cp_ptr->next;
-				scores[i] = CMath::dot(cur_w.vector, a_1, w_dim);
-			}
-			scores.vec1_plus_scalar_times_vec2(scores.vector, -1.0, bias.vector, scores.vlen);
-
-			float64_t w_norm = CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen);
-			float64_t PO = 0.5*_lambda*w_norm + CMath::max(scores.vector, scores.vlen);
-			float64_t QP_gap = PO - ncbm.Fd;
-
-			SG_SPRINT("%4d: primal:%f dual:%f QP_gap:%f\n", ncbm.nIter, PO, ncbm.Fd, QP_gap)
-		}
-
-		/* Stopping conditions */
-		if ((best_Fp - ncbm.Fd) <= TolRel*LIBBMRM_ABS(best_Fp))
-			ncbm.exitflag = 1;
-
-		if ((best_Fp - ncbm.Fd) <= TolAbs)
-			ncbm.exitflag = 2;
-
-		if (ncbm.nCP >= maxCPs)
-			ncbm.exitflag = -1;
-
-		// next CP would exceed BufSize/maxCPs
-		if (ncbm.nCP+3 >= maxCPs)
-			ncbm.exitflag=-1;
-
-		tstop=ttime.cur_time_diff(false);
-
-		/* Verbose output */
-		if (verbose)
-			SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d, best_fp=%f, gap=%f\n",
-					ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, ncbm.Fp-ncbm.Fd,
-					(ncbm.Fp-ncbm.Fd)/ncbm.Fp, cur_risk, ncbm.nCP, ncbm.nzA, qp_exitflag.exitflag, best_Fp, (best_Fp-ncbm.Fd)/best_Fp);
-
-		if (ncbm.exitflag!=0)
-			break;
-
-		std::vector<line_search_res> wbest_candidates;
-		if (!line_search)
-		{
-			cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
-
-			add_cutting_plane(&CPList_tail, map, A.matrix,
-					find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
-
-			bias[ncbm.nCP] = CMath::dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
-
-			update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
-
-			// add as a new wbest candidate
-			line_search_res ls;
-			ls.fval = cur_risk+0.5*_lambda*CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen);
-			ls.solution = cur_w;
-			ls.gradient = cur_subgrad;
-
-			wbest_candidates.push_back(ls);
-		}
-		else
-		{
-			tstart=ttime.cur_time_diff(false);
-			/* do line searching */
-			SGVector<float64_t> search_dir(w_dim);
-			search_dir.add(search_dir.vector, 1.0, cur_w.vector, -1.0, best_w.vector, w_dim);
-
-			float64_t norm_dir = search_dir.twonorm(search_dir.vector, search_dir.vlen);
-			float64_t astart;
-			uint32_t cp_min_approx = 0;
-			if (cp_min_approx || (ncbm.nIter == 1))
-			{
-				astart = 1.0;
-			}
-			else
-			{
-				astart = CMath::min(astar/norm_dir,1.0);
-				if (astart == 0)
-					astart = 1.0;
-			}
-
-			/* line search */
-			std::vector<line_search_res> ls_res
-				= line_search_with_strong_wolfe(machine, _lambda, best_Fp, best_w, best_subgrad, search_dir, astart);
-
-			if (ls_res[0].fval != ls_res[1].fval)
-			{
-				ls_res[0].gradient.vec1_plus_scalar_times_vec2(ls_res[0].gradient.vector, -_lambda, ls_res[0].solution.vector, w_dim);
-
-				add_cutting_plane(&CPList_tail, map, A.matrix,
-						find_free_idx(map, maxCPs), ls_res[0].gradient, w_dim);
-
-				bias[ncbm.nCP]
-					= CMath::dot(ls_res[0].solution.vector, ls_res[0].gradient, w_dim)
-					- (ls_res[0].fval - ls_res[0].reg);
-
-				update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
-
-				wbest_candidates.push_back(ls_res[0]);
-			}
-
-			ls_res[1].gradient.vec1_plus_scalar_times_vec2(ls_res[1].gradient.vector, -_lambda, ls_res[1].solution.vector, w_dim);
-
-			add_cutting_plane(&CPList_tail, map, A.matrix,
-					find_free_idx(map, maxCPs), ls_res[1].gradient.vector, w_dim);
-
-			bias[ncbm.nCP]
-				= CMath::dot(ls_res[1].solution.vector, ls_res[1].gradient.vector, w_dim)
-				- (ls_res[1].fval - ls_res[1].reg);
-
-			update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
-
-			wbest_candidates.push_back(ls_res[1]);
-
-			if ((best_Fp <= ls_res[1].fval) && (astart != 1))
-			{
-				cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
-
-				add_cutting_plane(&CPList_tail, map, A.matrix,
-							find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
-
-				bias[ncbm.nCP]
-					=  CMath::dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
-
-				update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
-
-				/* add as a new wbest candidate */
-				line_search_res ls;
-				ls.fval = cur_risk+0.5*_lambda*CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen);
-				ls.solution = cur_w;
-				ls.gradient = cur_subgrad;
-				SG_SPRINT("%lf\n", ls.fval)
-
-				wbest_candidates.push_back(ls);
-			}
-
-			astar = ls_res[1].a * norm_dir;
-
-			tstop=ttime.cur_time_diff(false);
-			SG_SPRINT("\t\tline search time: %.5lf\n", tstop-tstart)
-		}
-
-		/* search for the best w among the new candidates */
-		if (verbose)
-			SG_SPRINT("\t searching for the best Fp:\n")
-		for (size_t i = 0; i < wbest_candidates.size(); i++)
-		{
-			if (verbose)
-				SG_SPRINT("\t\t %d fcurrent: %.16lf\n", i, wbest_candidates[i].fval)
-
-			if (wbest_candidates[i].fval < best_Fp)
-			{
-				best_Fp = wbest_candidates[i].fval;
-				best_risk = wbest_candidates[i].fval - wbest_candidates[i].reg;
-				sg_memcpy(best_w, wbest_candidates[i].solution.vector, sizeof(float64_t)*w_dim);
-				sg_memcpy(best_subgrad.vector, wbest_candidates[i].gradient.vector, sizeof(float64_t)*w_dim);
-
-				ncbm.Fp = best_Fp;
-
-				if (verbose)
-					SG_SPRINT("\t\t new best norm: %f\n",
-							best_w.twonorm(best_w.vector, w_dim));
-			}
-
-			if (!is_convex)
-			{
-				index_t cp_idx = ncbm.nCP-(wbest_candidates.size()-i);
-
-				/* conflict */
-				float64_t score
-					= CMath::dot(best_w.vector,
-							wbest_candidates[i].gradient.vector, w_dim)
-					+ (-1.0*bias[cp_idx]);
-				if (score > best_risk)
-				{
-					float64_t U
-						= best_risk
-						- CMath::dot(best_w.vector,
-								wbest_candidates[i].gradient.vector, w_dim);
-
-					float64_t L
-						= best_Fp - wbest_candidates[i].reg
-						- CMath::dot(wbest_candidates[i].solution.vector,
-								wbest_candidates[i].gradient.vector, w_dim);
-
-					if (verbose)
-						SG_SPRINT("CONFLICT Rbest=%.6lg score=%g L=%.6lg U=%.6lg\n", best_risk, score, L, U)
-					if (L <= U)
-					{
-						if (verbose)
-							SG_SPRINT("%.6lf < %.6lf => changing bias[%d]=%g\n", L, U, cp_idx, L)
-						bias[cp_idx]= -L;
-					}
-					else
-					{
-						wbest_candidates[i].gradient.zero();
-						SGVector<float64_t>::vec1_plus_scalar_times_vec2(wbest_candidates[i].gradient.vector, -_lambda, best_w.vector, w_dim);
-
-						cp_ptr = CPList_tail;
-						for (size_t j = wbest_candidates.size()-1; i < j; --j)
-						{
-							cp_ptr = cp_ptr->prev;
-							SG_SPRINT("tail - %d\n (%d)", j, i)
-						}
-
-						float64_t* cp = get_cutting_plane(cp_ptr);
-						LIBBMRM_MEMCPY(cp, wbest_candidates[i].gradient.vector, w_dim*sizeof(float64_t));
-
-						/* update the corresponding column and row in H */
-						cp_ptr = CPList_head;
-						for (uint32_t j = 0; j < ncbm.nCP-1; ++j)
-						{
-							float64_t* a = get_cutting_plane(cp_ptr);
-							cp_ptr = cp_ptr->next;
-							float64_t dot_val
-								= CMath::dot(a, wbest_candidates[i].gradient.vector, w_dim);
-
-							H.matrix[LIBBMRM_INDEX(cp_idx, j, maxCPs)]
-								= H.matrix[LIBBMRM_INDEX(j, cp_idx, maxCPs)]
-								= dot_val/_lambda;
-						}
-
-						diag_H[LIBBMRM_INDEX(cp_idx, cp_idx, maxCPs)]
-							= CMath::dot(wbest_candidates[i].gradient.vector,
-									wbest_candidates[i].gradient.vector, w_dim);
-
-
-						bias[cp_idx]
-							= best_Fp - wbest_candidates[i].reg
-							- CMath::dot(wbest_candidates[i].solution.vector,
-									wbest_candidates[i].gradient.vector, w_dim);
-
-						if (verbose)
-							SG_SPRINT("solved by changing nCP=%d bias:%g (%g)\n", cp_idx, bias[cp_idx], L)
-					}
-				}
-			}
-		}
-
-		/* Inactive Cutting Planes (ICP) removal
-		if (cleanICP)
-		{
-			clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
-					H.matrix, diag_H.vector, x.vector,
-					map.vector, cleanAfter, bias.vector, I.vector);
-		}
-		*/
-	}
-
-	sg_memcpy(w, best_w.vector, sizeof(float64_t)*w_dim);
-
-	/* free ICP_stats variables */
-	LIBBMRM_FREE(icp_stats.ICPcounter);
-	LIBBMRM_FREE(icp_stats.ICPs);
-	LIBBMRM_FREE(icp_stats.ACPs);
-	LIBBMRM_FREE(icp_stats.H_buff);
-
-        cp_ptr=CPList_head;
-        while(cp_ptr!=NULL)
-        {
-                bmrm_ll * cp_ptr_this=cp_ptr;
-                cp_ptr=cp_ptr->next;
-                LIBBMRM_FREE(cp_ptr_this);
-                cp_ptr_this=NULL;
-        }
-
-	SG_UNREF(model);
-
-	return ncbm;
-}
-
-} /* namespace shogun */
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/structure/libncbm.h b/src/shogun/structure/libncbm.h
deleted file mode 100644
index 75ff7e43830..00000000000
--- a/src/shogun/structure/libncbm.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Viktor Gal
- *
- */
-
-#ifndef libncbm_h
-#define libncbm_h
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/common.h>
-#include <shogun/structure/libbmrm.h>
-
-namespace shogun
-{
-
-	/**
-	 * NCBM (non-convex bundle method) solver
-	 * Solves any unconstrainedminimization problem in the form of:
-	 * min lambda/2 ||w||^2 + R(w)
-	 * where R(w) is a risk funciton of any kind.
-	 */
-	BmrmStatistics svm_ncbm_solver(
-			CDualLibQPBMSOSVM  *machine,
-			float64_t        *w,
-			float64_t        TolRel,
-			float64_t        TolAbs,
-			float64_t        _lambda,
-			uint32_t         _BufSize,
-			bool             cleanICP,
-			uint32_t         cleanAfter,
-			bool             is_convex = false,
-			bool             line_search = true,
-			bool             verbose = false
-			);
-}
-#endif //USE_GPL_SHOGUN
-#endif /* libncbm_h */
diff --git a/src/shogun/structure/libp3bm.cpp b/src/shogun/structure/libp3bm.cpp
deleted file mode 100644
index 00074a67cc0..00000000000
--- a/src/shogun/structure/libp3bm.cpp
+++ /dev/null
@@ -1,736 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * libppbm.h: Implementation of the Proximal Point BM solver for SO training
- *
- * Copyright (C) 2012 Michal Uricar, uricamic@cmp.felk.cvut.cz
- *
- * Implementation of the Proximal Point P-BMRM (p3bm)
- *--------------------------------------------------------------------- */
-
-#include <shogun/structure/libp3bm.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/external/libqp.h>
-#include <shogun/lib/Time.h>
- #include <shogun/mathematics/Math.h>
-
-namespace shogun
-{
-static const uint32_t QPSolverMaxIter=0xFFFFFFFF;
-static const float64_t epsilon=0.0;
-
-static float64_t *H, *H2;
-
-/*----------------------------------------------------------------------
-  Returns pointer at i-th column of Hessian matrix.
-  ----------------------------------------------------------------------*/
-static const float64_t *get_col( uint32_t i)
-{
-	return( &H2[ BufSize*i ] );
-}
-
-BmrmStatistics svm_p3bm_solver(
-		CDualLibQPBMSOSVM *machine,
-		float64_t*      W,
-		float64_t       TolRel,
-		float64_t       TolAbs,
-		float64_t       _lambda,
-		uint32_t        _BufSize,
-		bool            cleanICP,
-		uint32_t        cleanAfter,
-		float64_t       K,
-		uint32_t        Tmax,
-		uint32_t        cp_models,
-		bool            verbose)
-{
-	BmrmStatistics p3bmrm;
-	libqp_state_T qp_exitflag={0, 0, 0, 0}, qp_exitflag_good={0, 0, 0, 0};
-	float64_t *b, *b2, *beta, *beta_good, *beta_start, *diag_H, *diag_H2;
-	float64_t R, *Rt, **subgrad_t, *A, QPSolverTolRel, *C=NULL;
-	float64_t *prevW, *wt, alpha, alpha_start, alpha_good=0.0, Fd_alpha0=0.0;
-	float64_t lastFp, wdist, gamma=0.0;
-	floatmax_t rsum, sq_norm_W, sq_norm_Wdiff, sq_norm_prevW, eps;
-	uint32_t *I, *I2, *I_start, *I_good;
-	uint8_t *S=NULL;
-	uint32_t qp_cnt=0;
-	bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_ptr2, *cp_list=NULL;
-	float64_t *A_1=NULL;
-	bool *map=NULL, tuneAlpha=true, flag=true;
-	bool alphaChanged=false, isThereGoodSolution=false;
-	TMultipleCPinfo **info=NULL;
-	CStructuredModel* model=machine->get_model();
-	CSOSVMHelper* helper = NULL;
-	uint32_t nDim=model->get_dim();
-	uint32_t to=0, N=0, cp_i=0;
-
-	CTime ttime;
-	float64_t tstart, tstop;
-
-
-	tstart=ttime.cur_time_diff(false);
-
-	BufSize=_BufSize*cp_models;
-	QPSolverTolRel=1e-9;
-
-	H=NULL;
-	b=NULL;
-	beta=NULL;
-	A=NULL;
-	subgrad_t=NULL;
-	diag_H=NULL;
-	I=NULL;
-	prevW=NULL;
-	wt=NULL;
-	diag_H2=NULL;
-	b2=NULL;
-	I2=NULL;
-	H2=NULL;
-	I_good=NULL;
-	I_start=NULL;
-	beta_start=NULL;
-	beta_good=NULL;
-
-	alpha=0.0;
-
-	H= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, float64_t);
-
-	A= (float64_t*) LIBBMRM_CALLOC(nDim*BufSize, float64_t);
-
-	b= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	beta= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	subgrad_t= (float64_t**) LIBBMRM_CALLOC(cp_models, float64_t*);
-
-	Rt= (float64_t*) LIBBMRM_CALLOC(cp_models, float64_t);
-
-	diag_H= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	I= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-
-	cp_list= (bmrm_ll*) LIBBMRM_CALLOC(1, bmrm_ll);
-
-	prevW= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
-
-	wt= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
-
-	C= (float64_t*) LIBBMRM_CALLOC(cp_models, float64_t);
-
-	S= (uint8_t*) LIBBMRM_CALLOC(cp_models, uint8_t);
-
-	info= (TMultipleCPinfo**) LIBBMRM_CALLOC(cp_models, TMultipleCPinfo*);
-
-	CFeatures* features = model->get_features();
-	int32_t num_feats = features->get_num_vectors();
-	SG_UNREF(features);
-
-	/* CP cleanup variables */
-	ICP_stats icp_stats;
-	icp_stats.maxCPs = BufSize;
-	icp_stats.ICPcounter= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-	icp_stats.ICPs= (float64_t**) LIBBMRM_CALLOC(BufSize, float64_t*);
-	icp_stats.ACPs= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-	icp_stats.H_buff= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, float64_t);
-
-	if (H==NULL || A==NULL || b==NULL || beta==NULL || subgrad_t==NULL ||
-			diag_H==NULL || I==NULL || icp_stats.ICPcounter==NULL ||
-			icp_stats.ICPs==NULL || icp_stats.ACPs==NULL ||
-			cp_list==NULL || prevW==NULL || wt==NULL || Rt==NULL || C==NULL ||
-			S==NULL || info==NULL || icp_stats.H_buff==NULL)
-	{
-		p3bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	/* multiple cutting plane model init */
-
-	to=0;
-	N= (uint32_t) round( (float64_t) ((float64_t)num_feats / (float64_t) cp_models));
-
-	for (uint32_t p=0; p<cp_models; ++p)
-	{
-		S[p]=1;
-		C[p]=1.0;
-		info[p]=(TMultipleCPinfo*)LIBBMRM_CALLOC(1, TMultipleCPinfo);
-		subgrad_t[p]=(float64_t*)LIBBMRM_CALLOC(nDim, float64_t);
-
-		if (subgrad_t[p]==NULL || info[p]==NULL)
-		{
-			p3bmrm.exitflag=-2;
-			goto cleanup;
-		}
-
-		info[p]->m_from=to;
-		to=((p+1)*N > (uint32_t)num_feats) ? (uint32_t)num_feats : (p+1)*N;
-		info[p]->m_N=to-info[p]->m_from;
-	}
-
-	map= (bool*) LIBBMRM_CALLOC(BufSize, bool);
-
-	if (map==NULL)
-	{
-		p3bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	memset( (bool*) map, true, BufSize);
-
-	/* Temporary buffers */
-	beta_start= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	beta_good= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	b2= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	diag_H2= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	H2= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, float64_t);
-
-	I_start= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-
-	I_good= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-
-	I2= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-
-	if (beta_start==NULL || beta_good==NULL || b2==NULL || diag_H2==NULL ||
-			I_start==NULL || I_good==NULL || I2==NULL || H2==NULL)
-	{
-		p3bmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	p3bmrm.hist_Fp.resize_vector(BufSize);
-	p3bmrm.hist_Fd.resize_vector(BufSize);
-	p3bmrm.hist_wdist.resize_vector(BufSize);
-
-	/* Iinitial solution */
-	Rt[0] = machine->risk(subgrad_t[0], W, info[0]);
-
-	p3bmrm.nCP=0;
-	p3bmrm.nIter=0;
-	p3bmrm.exitflag=0;
-
-	b[0]=-Rt[0];
-
-	/* Cutting plane auxiliary double linked list */
-	LIBBMRM_MEMCPY(A, subgrad_t[0], nDim*sizeof(float64_t));
-	map[0]=false;
-	cp_list->address=&A[0];
-	cp_list->idx=0;
-	cp_list->prev=NULL;
-	cp_list->next=NULL;
-	CPList_head=cp_list;
-	CPList_tail=cp_list;
-
-	for (uint32_t p=1; p<cp_models; ++p)
-	{
-		Rt[p] = machine->risk(subgrad_t[p], W, info[p]);
-		b[p]=CMath::dot(subgrad_t[p], W, nDim) - Rt[p];
-		add_cutting_plane(&CPList_tail, map, A, find_free_idx(map, BufSize), subgrad_t[p], nDim);
-	}
-
-	/* Compute initial value of Fp, Fd, assuming that W is zero vector */
-	R=0.0;
-
-	for (uint32_t p=0; p<cp_models; ++p)
-		R+=Rt[p];
-
-	sq_norm_W=CMath::dot(W, W, nDim);
-	sq_norm_Wdiff=0.0;
-
-	for (uint32_t j=0; j<nDim; ++j)
-	{
-		sq_norm_Wdiff+=(W[j]-prevW[j])*(W[j]-prevW[j]);
-	}
-
-	wdist=CMath::sqrt(sq_norm_Wdiff);
-
-	p3bmrm.Fp=R+0.5*_lambda*sq_norm_W + alpha*sq_norm_Wdiff;
-	p3bmrm.Fd=-LIBBMRM_PLUS_INF;
-	lastFp=p3bmrm.Fp;
-
-	/* if there is initial W, then set K to be 0.01 times its norm */
-	K = (sq_norm_W == 0.0) ? 0.4 : 0.01*CMath::sqrt(sq_norm_W);
-
-	LIBBMRM_MEMCPY(prevW, W, nDim*sizeof(float64_t));
-
-	tstop=ttime.cur_time_diff(false);
-
-	/* Keep history of Fp, Fd, and wdist */
-	p3bmrm.hist_Fp[0]=p3bmrm.Fp;
-	p3bmrm.hist_Fd[0]=p3bmrm.Fd;
-	p3bmrm.hist_wdist[0]=wdist;
-
-	/* Verbose output */
-	if (verbose)
-		SG_SDEBUG("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf, K=%lf, CPmodels=%d\n",
-				p3bmrm.nIter, tstop-tstart, p3bmrm.Fp, p3bmrm.Fd, R, K, cp_models);
-
-	if (verbose)
-		helper = machine->get_helper();
-
-	/* main loop */
-	while (p3bmrm.exitflag==0)
-	{
-		tstart=ttime.cur_time_diff(false);
-		p3bmrm.nIter++;
-
-		/* Update H */
-		if (p3bmrm.nIter==1)
-		{
-			cp_ptr=CPList_head;
-
-			for (cp_i=0; cp_i<cp_models; ++cp_i)  /* for all cutting planes */
-			{
-				A_1=get_cutting_plane(cp_ptr);
-
-				for (uint32_t p=0; p<cp_models; ++p)
-				{
-					rsum=CMath::dot(A_1, subgrad_t[p], nDim);
-
-					H[LIBBMRM_INDEX(p, cp_i, BufSize)]=rsum;
-				}
-
-				cp_ptr=cp_ptr->next;
-			}
-		}
-		else
-		{
-			cp_ptr=CPList_head;
-
-			for (cp_i=0; cp_i<p3bmrm.nCP+cp_models; ++cp_i)  /* for all cutting planes */
-			{
-				A_1=get_cutting_plane(cp_ptr);
-
-				for (uint32_t p=0; p<cp_models; ++p)
-				{
-					rsum=CMath::dot(A_1, subgrad_t[p], nDim);
-
-					H[LIBBMRM_INDEX(p3bmrm.nCP+p, cp_i, BufSize)]=rsum;
-				}
-
-				cp_ptr=cp_ptr->next;
-			}
-
-			for (uint32_t i=0; i<p3bmrm.nCP; ++i)
-				for (uint32_t j=0; j<cp_models; ++j)
-					H[LIBBMRM_INDEX(i, p3bmrm.nCP+j, BufSize)]=
-						H[LIBBMRM_INDEX(p3bmrm.nCP+j, i, BufSize)];
-		}
-
-		for (uint32_t p=0; p<cp_models; ++p)
-			diag_H[p3bmrm.nCP+p]=H[LIBBMRM_INDEX(p3bmrm.nCP+p, p3bmrm.nCP+p, BufSize)];
-
-		p3bmrm.nCP+=cp_models;
-
-		/* tune alpha cycle */
-		/* ------------------------------------------------------------------------ */
-		flag=true;
-		isThereGoodSolution=false;
-
-		for (uint32_t p=0; p<cp_models; ++p)
-		{
-			I[p3bmrm.nCP-cp_models+p]=p+1;
-			beta[p3bmrm.nCP-cp_models+p]=0.0;
-		}
-
-		LIBBMRM_MEMCPY(beta_start, beta, p3bmrm.nCP*sizeof(float64_t));
-		LIBBMRM_MEMCPY(I_start, I, p3bmrm.nCP*sizeof(uint32_t));
-		qp_cnt=0;
-
-		if (tuneAlpha)
-		{
-			alpha_start=alpha; alpha=0.0;
-			LIBBMRM_MEMCPY(I2, I_start, p3bmrm.nCP*sizeof(uint32_t));
-
-			/* add alpha-dependent terms to H, diag_h and b */
-			cp_ptr=CPList_head;
-
-			for (uint32_t i=0; i<p3bmrm.nCP; ++i)
-			{
-				A_1=get_cutting_plane(cp_ptr);
-				cp_ptr=cp_ptr->next;
-
-				rsum = CMath::dot(A_1, prevW, nDim);
-
-				b2[i]=b[i]-((2*alpha)/(_lambda+2*alpha))*rsum;
-				diag_H2[i]=diag_H[i]/(_lambda+2*alpha);
-
-				for (uint32_t j=0; j<p3bmrm.nCP; ++j)
-					H2[LIBBMRM_INDEX(i, j, BufSize)]=
-						H[LIBBMRM_INDEX(i, j, BufSize)]/(_lambda+2*alpha);
-
-			}
-
-			/* solve QP with current alpha */
-			qp_exitflag=libqp_splx_solver(&get_col, diag_H2, b2, C, I2, S, beta,
-					p3bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
-			p3bmrm.qp_exitflag=qp_exitflag.exitflag;
-			qp_cnt++;
-			Fd_alpha0=-qp_exitflag.QP;
-
-			/* obtain w_t and check if norm(w_{t+1} -w_t) <= K */
-			memset(wt, 0, sizeof(float64_t)*nDim);
-			SGVector<float64_t>::vec1_plus_scalar_times_vec2(wt, 2*alpha/(_lambda+2*alpha), prevW, nDim);
-			cp_ptr=CPList_head;
-			for (uint32_t j=0; j<p3bmrm.nCP; ++j)
-			{
-				A_1=get_cutting_plane(cp_ptr);
-				cp_ptr=cp_ptr->next;
-				SGVector<float64_t>::vec1_plus_scalar_times_vec2(wt, -beta[j]/(_lambda+2*alpha), A_1, nDim);
-			}
-
-			sq_norm_Wdiff=0.0;
-
-			for (uint32_t i=0; i<nDim; ++i)
-				sq_norm_Wdiff+=(wt[i]-prevW[i])*(wt[i]-prevW[i]);
-
-			if (CMath::sqrt(sq_norm_Wdiff) <= K)
-			{
-				flag=false;
-
-				if (alpha!=alpha_start)
-					alphaChanged=true;
-			}
-			else
-			{
-				alpha=alpha_start;
-			}
-
-			while(flag)
-			{
-				LIBBMRM_MEMCPY(I2, I_start, p3bmrm.nCP*sizeof(uint32_t));
-				LIBBMRM_MEMCPY(beta, beta_start, p3bmrm.nCP*sizeof(float64_t));
-
-				/* add alpha-dependent terms to H, diag_h and b */
-				cp_ptr=CPList_head;
-
-				for (uint32_t i=0; i<p3bmrm.nCP; ++i)
-				{
-					A_1=get_cutting_plane(cp_ptr);
-					cp_ptr=cp_ptr->next;
-
-					rsum = CMath::dot(A_1, prevW, nDim);
-
-					b2[i]=b[i]-((2*alpha)/(_lambda+2*alpha))*rsum;
-					diag_H2[i]=diag_H[i]/(_lambda+2*alpha);
-
-					for (uint32_t j=0; j<p3bmrm.nCP; ++j)
-						H2[LIBBMRM_INDEX(i, j, BufSize)]=H[LIBBMRM_INDEX(i, j, BufSize)]/(_lambda+2*alpha);
-				}
-
-				/* solve QP with current alpha */
-				qp_exitflag=libqp_splx_solver(&get_col, diag_H2, b2, C, I2, S, beta,
-						p3bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
-				p3bmrm.qp_exitflag=qp_exitflag.exitflag;
-				qp_cnt++;
-
-				/* obtain w_t and check if norm(w_{t+1}-w_t) <= K */
-				memset(wt, 0, sizeof(float64_t)*nDim);
-				SGVector<float64_t>::vec1_plus_scalar_times_vec2(wt, 2*alpha/(_lambda+2*alpha), prevW, nDim);
-				cp_ptr=CPList_head;
-				for (uint32_t j=0; j<p3bmrm.nCP; ++j)
-				{
-					A_1=get_cutting_plane(cp_ptr);
-					cp_ptr=cp_ptr->next;
-					SGVector<float64_t>::vec1_plus_scalar_times_vec2(wt, -beta[j]/(_lambda+2*alpha), A_1, nDim);
-				}
-
-				sq_norm_Wdiff=0.0;
-
-				for (uint32_t i=0; i<nDim; ++i)
-					sq_norm_Wdiff+=(wt[i]-prevW[i])*(wt[i]-prevW[i]);
-
-				if (CMath::sqrt(sq_norm_Wdiff) > K)
-				{
-					/* if there is a record of some good solution (i.e. adjust alpha by division by 2) */
-
-					if (isThereGoodSolution)
-					{
-						LIBBMRM_MEMCPY(beta, beta_good, p3bmrm.nCP*sizeof(float64_t));
-						LIBBMRM_MEMCPY(I2, I_good, p3bmrm.nCP*sizeof(uint32_t));
-						alpha=alpha_good;
-						qp_exitflag=qp_exitflag_good;
-						flag=false;
-					}
-					else
-					{
-						if (alpha == 0)
-						{
-							alpha=1.0;
-							alphaChanged=true;
-						}
-						else
-						{
-							alpha*=2;
-							alphaChanged=true;
-						}
-					}
-				}
-				else
-				{
-					if (alpha > 0)
-					{
-						/* keep good solution and try for alpha /= 2 if previous alpha was 1 */
-						LIBBMRM_MEMCPY(beta_good, beta, p3bmrm.nCP*sizeof(float64_t));
-						LIBBMRM_MEMCPY(I_good, I2, p3bmrm.nCP*sizeof(uint32_t));
-						alpha_good=alpha;
-						qp_exitflag_good=qp_exitflag;
-						isThereGoodSolution=true;
-
-						if (alpha!=1.0)
-						{
-							alpha/=2.0;
-							alphaChanged=true;
-						}
-						else
-						{
-							alpha=0.0;
-							alphaChanged=true;
-						}
-					}
-					else
-					{
-						flag=false;
-					}
-				}
-			}
-		}
-		else
-		{
-			alphaChanged=false;
-			LIBBMRM_MEMCPY(I2, I_start, p3bmrm.nCP*sizeof(uint32_t));
-			LIBBMRM_MEMCPY(beta, beta_start, p3bmrm.nCP*sizeof(float64_t));
-
-			/* add alpha-dependent terms to H, diag_h and b */
-			cp_ptr=CPList_head;
-
-			for (uint32_t i=0; i<p3bmrm.nCP; ++i)
-			{
-				A_1=get_cutting_plane(cp_ptr);
-				cp_ptr=cp_ptr->next;
-
-				rsum = CMath::dot(A_1, prevW, nDim);
-
-				b2[i]=b[i]-((2*alpha)/(_lambda+2*alpha))*rsum;
-				diag_H2[i]=diag_H[i]/(_lambda+2*alpha);
-
-				for (uint32_t j=0; j<p3bmrm.nCP; ++j)
-					H2[LIBBMRM_INDEX(i, j, BufSize)]=H[LIBBMRM_INDEX(i, j, BufSize)]/(_lambda+2*alpha);
-			}
-
-			/* solve QP with current alpha */
-			qp_exitflag=libqp_splx_solver(&get_col, diag_H2, b2, C, I2, S, beta,
-					p3bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
-			p3bmrm.qp_exitflag=qp_exitflag.exitflag;
-			qp_cnt++;
-		}
-		/* ----------------------------------------------------------------------------------------------- */
-
-		/* Update ICPcounter (add one to unused and reset used) + compute number of active CPs */
-		p3bmrm.nzA=0;
-
-		for (uint32_t aaa=0; aaa<p3bmrm.nCP; ++aaa)
-		{
-			if (beta[aaa]>epsilon)
-			{
-				++p3bmrm.nzA;
-				icp_stats.ICPcounter[aaa]=0;
-			}
-			else
-			{
-				icp_stats.ICPcounter[aaa]+=1;
-			}
-		}
-
-		/* W update */
-		memset(W, 0, sizeof(float64_t)*nDim);
-		SGVector<float64_t>::vec1_plus_scalar_times_vec2(W, 2*alpha/(_lambda+2*alpha), prevW, nDim);
-		cp_ptr=CPList_head;
-		for (uint32_t j=0; j<p3bmrm.nCP; ++j)
-		{
-			A_1=get_cutting_plane(cp_ptr);
-			cp_ptr=cp_ptr->next;
-			SGVector<float64_t>::vec1_plus_scalar_times_vec2(W, -beta[j]/(_lambda+2*alpha), A_1, nDim);
-		}
-
-		/* risk and subgradient computation */
-		R=0.0;
-
-		for (uint32_t p=0; p<cp_models; ++p)
-		{
-			Rt[p] = machine->risk(subgrad_t[p], W, info[p]);
-			b[p3bmrm.nCP+p] = CMath::dot(subgrad_t[p], W, nDim) - Rt[p];
-			add_cutting_plane(&CPList_tail, map, A, find_free_idx(map, BufSize), subgrad_t[p], nDim);
-			R+=Rt[p];
-		}
-
-		sq_norm_W=CMath::dot(W, W, nDim);
-		sq_norm_prevW=CMath::dot(prevW, prevW, nDim);
-		sq_norm_Wdiff=0.0;
-
-		for (uint32_t j=0; j<nDim; ++j)
-		{
-			sq_norm_Wdiff+=(W[j]-prevW[j])*(W[j]-prevW[j]);
-		}
-
-		/* compute Fp and Fd */
-		p3bmrm.Fp=R+0.5*_lambda*sq_norm_W + alpha*sq_norm_Wdiff;
-		p3bmrm.Fd=-qp_exitflag.QP + ((alpha*_lambda)/(_lambda + 2*alpha))*sq_norm_prevW;
-
-		/* gamma + tuneAlpha flag */
-		if (alphaChanged)
-		{
-			eps=1.0-(p3bmrm.Fd/p3bmrm.Fp);
-			gamma=(lastFp*(1-eps)-Fd_alpha0)/(Tmax*(1-eps));
-		}
-
-		if ((lastFp-p3bmrm.Fp) <= gamma)
-		{
-			tuneAlpha=true;
-		}
-		else
-		{
-			tuneAlpha=false;
-		}
-
-		/* Stopping conditions - set only with nonzero alpha */
-		if (alpha==0.0)
-		{
-			if (p3bmrm.Fp-p3bmrm.Fd<=TolRel*LIBBMRM_ABS(p3bmrm.Fp))
-				p3bmrm.exitflag=1;
-
-			if (p3bmrm.Fp-p3bmrm.Fd<=TolAbs)
-				p3bmrm.exitflag=2;
-		}
-
-		if (p3bmrm.nCP>=BufSize)
-			p3bmrm.exitflag=-1;
-
-		tstop=ttime.cur_time_diff(false);
-
-		/* compute wdist (= || W_{t+1} - W_{t} || ) */
-		sq_norm_Wdiff=0.0;
-
-		for (uint32_t i=0; i<nDim; ++i)
-		{
-			sq_norm_Wdiff+=(W[i]-prevW[i])*(W[i]-prevW[i]);
-		}
-
-		wdist=CMath::sqrt(sq_norm_Wdiff);
-
-		/* Keep history of Fp, Fd and wdist */
-		p3bmrm.hist_Fp[p3bmrm.nIter]=p3bmrm.Fp;
-		p3bmrm.hist_Fd[p3bmrm.nIter]=p3bmrm.Fd;
-		p3bmrm.hist_wdist[p3bmrm.nIter]=wdist;
-
-		/* Verbose output */
-		if (verbose)
-			SG_SDEBUG("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, wdist=%lf, alpha=%lf, qp_cnt=%d, gamma=%lf, tuneAlpha=%d\n",
-					p3bmrm.nIter, tstop-tstart, p3bmrm.Fp, p3bmrm.Fd, p3bmrm.Fp-p3bmrm.Fd,
-					(p3bmrm.Fp-p3bmrm.Fd)/p3bmrm.Fp, R, p3bmrm.nCP, p3bmrm.nzA, wdist, alpha,
-					qp_cnt, gamma, tuneAlpha);
-
-		/* Check size of Buffer */
-		if (p3bmrm.nCP>=BufSize)
-		{
-			p3bmrm.exitflag=-2;
-			SG_SERROR("Buffer exceeded.\n")
-		}
-
-		/* keep w_t + Fp */
-		LIBBMRM_MEMCPY(prevW, W, nDim*sizeof(float64_t));
-		lastFp=p3bmrm.Fp;
-
-		/* Inactive Cutting Planes (ICP) removal */
-		if (cleanICP)
-		{
-			clean_icp(&icp_stats, p3bmrm, &CPList_head,
-					&CPList_tail, H, diag_H, beta, map,
-					cleanAfter, b, I, cp_models);
-		}
-
-		// next CP would exceed BufSize
-		if (p3bmrm.nCP+1 >= BufSize)
-			p3bmrm.exitflag=-1;
-
-		/* Debug: compute objective and training error */
-		if (verbose)
-		{
-			SGVector<float64_t> w_debug(W, nDim, false);
-			float64_t primal = CSOSVMHelper::primal_objective(w_debug, model, _lambda);
-			float64_t train_error = CSOSVMHelper::average_loss(w_debug, model);
-			helper->add_debug_info(primal, p3bmrm.nIter, train_error);
-		}
-	} /* end of main loop */
-
-	if (verbose)
-	{
-		helper->terminate();
-		SG_UNREF(helper);
-	}
-
-	p3bmrm.hist_Fp.resize_vector(p3bmrm.nIter);
-	p3bmrm.hist_Fd.resize_vector(p3bmrm.nIter);
-	p3bmrm.hist_wdist.resize_vector(p3bmrm.nIter);
-
-	cp_ptr=CPList_head;
-
-	while(cp_ptr!=NULL)
-	{
-		cp_ptr2=cp_ptr;
-		cp_ptr=cp_ptr->next;
-		LIBBMRM_FREE(cp_ptr2);
-		cp_ptr2=NULL;
-	}
-
-	cp_list=NULL;
-
-cleanup:
-
-	LIBBMRM_FREE(H);
-	LIBBMRM_FREE(b);
-	LIBBMRM_FREE(beta);
-	LIBBMRM_FREE(A);
-	LIBBMRM_FREE(diag_H);
-	LIBBMRM_FREE(I);
-	LIBBMRM_FREE(icp_stats.ICPcounter);
-	LIBBMRM_FREE(icp_stats.ICPs);
-	LIBBMRM_FREE(icp_stats.ACPs);
-	LIBBMRM_FREE(icp_stats.H_buff);
-	LIBBMRM_FREE(map);
-	LIBBMRM_FREE(prevW);
-	LIBBMRM_FREE(wt);
-	LIBBMRM_FREE(beta_start);
-	LIBBMRM_FREE(beta_good);
-	LIBBMRM_FREE(I_start);
-	LIBBMRM_FREE(I_good);
-	LIBBMRM_FREE(I2);
-	LIBBMRM_FREE(b2);
-	LIBBMRM_FREE(diag_H2);
-	LIBBMRM_FREE(H2);
-	LIBBMRM_FREE(C);
-	LIBBMRM_FREE(S);
-	LIBBMRM_FREE(Rt);
-
-	if (cp_list)
-		LIBBMRM_FREE(cp_list);
-
-	for (uint32_t p=0; p<cp_models; ++p)
-	{
-		LIBBMRM_FREE(subgrad_t[p]);
-		LIBBMRM_FREE(info[p]);
-	}
-
-	LIBBMRM_FREE(subgrad_t);
-	LIBBMRM_FREE(info);
-
-	SG_UNREF(model);
-
-	return(p3bmrm);
-}
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/structure/libp3bm.h b/src/shogun/structure/libp3bm.h
deleted file mode 100644
index 5f8d1340f36..00000000000
--- a/src/shogun/structure/libp3bm.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * libp3bm.h: Implementation of the Proximal Point P-BMRM solver for SO training
- *
- * Copyright (C) 2012 Michal Uricar, uricamic@cmp.felk.cvut.cz
- *
- * Implementation of the Proximal Point P-BMRM (3pbm)
- *--------------------------------------------------------------------- */
-
-#ifndef libp3bm_h
-#define libp3bm_h
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/common.h>
-#include <shogun/structure/libbmrm.h>
-
-namespace shogun
-{
-	/** Proximal Point P-BMRM (multiple cutting plane models) Solver for
-	 *	Structured Output Learning
-	 *
-	 * @param machine		Pointer to the BMRM machine
-	 * @param W				Weight vector
-	 * @param TolRel		Relative tolerance
-	 * @param TolAbs		Absolute tolerance
-	 * @param _lambda		Regularization constant
-	 * @param _BufSize		Size of the CP buffer (i.e. maximal number of iterations)
-	 * @param cleanICP		Flag that enables/disables inactive cutting plane removal
-	 *						feature
-	 * @param cleanAfter	Number of iterations that should be cutting plane
-	 *						inactive for to be removed
-	 * @param K				Parameter K
-	 * @param Tmax			Parameter Tmax
-	 * @param cp_models		Count of cutting plane models to be used
-	 * @param verbose		Flag that enables/disables screen output
-	 * @return Structure with BMRM algorithm result
-	 */
-	BmrmStatistics svm_p3bm_solver(
-			CDualLibQPBMSOSVM  *machine,
-			float64_t	*W,
-			float64_t	TolRel,
-			float64_t	TolAbs,
-			float64_t	_lambda,
-			uint32_t	_BufSize,
-			bool	cleanICP,
-			uint32_t	cleanAfter,
-			float64_t	K,
-			uint32_t	Tmax,
-			uint32_t        cp_models,
-			bool	verbose
-			);
-
-}
-#endif //USE_GPL_SHOGUN
-#endif /* libp3bm_h */
diff --git a/src/shogun/structure/libppbm.cpp b/src/shogun/structure/libppbm.cpp
deleted file mode 100644
index 8a222388bbd..00000000000
--- a/src/shogun/structure/libppbm.cpp
+++ /dev/null
@@ -1,679 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * libp3bm.h: Implementation of the Proximal Point P-BMRM solver for SO training
- *
- * Copyright (C) 2012 Michal Uricar, uricamic@cmp.felk.cvut.cz
- *
- * Implementation of the Proximal Point P-BMRM
- *--------------------------------------------------------------------- */
-
-#include <shogun/structure/libppbm.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/external/libqp.h>
- #include <shogun/mathematics/Math.h>
-#include <shogun/lib/Time.h>
-
-namespace shogun
-{
-static const uint32_t QPSolverMaxIter=0xFFFFFFFF;
-static const float64_t epsilon=0.0;
-
-static float64_t *H, *H2;
-
-/*----------------------------------------------------------------------
-  Returns pointer at i-th column of Hessian matrix.
-  ----------------------------------------------------------------------*/
-static const float64_t *get_col( uint32_t i)
-{
-	return( &H2[ BufSize*i ] );
-}
-
-BmrmStatistics svm_ppbm_solver(
-		CDualLibQPBMSOSVM *machine,
-		float64_t*      W,
-		float64_t       TolRel,
-		float64_t       TolAbs,
-		float64_t       _lambda,
-		uint32_t        _BufSize,
-		bool            cleanICP,
-		uint32_t        cleanAfter,
-		float64_t       K,
-		uint32_t        Tmax,
-		bool            verbose)
-{
-	BmrmStatistics ppbmrm;
-	libqp_state_T qp_exitflag={0, 0, 0, 0}, qp_exitflag_good={0, 0, 0, 0};
-	float64_t *b, *b2, *beta, *beta_good, *beta_start, *diag_H, *diag_H2;
-	float64_t R, *subgrad, *A, QPSolverTolRel, C=1.0;
-	float64_t *prevW, *wt, alpha, alpha_start, alpha_good=0.0, Fd_alpha0=0.0;
-	float64_t lastFp, wdist, gamma=0.0;
-	floatmax_t rsum, sq_norm_W, sq_norm_Wdiff, sq_norm_prevW, eps;
-	uint32_t *I, *I2, *I_start, *I_good;
-	uint8_t S=1;
-	CStructuredModel* model=machine->get_model();
-	uint32_t nDim=model->get_dim();
-	CSOSVMHelper* helper = NULL;
-	uint32_t qp_cnt=0;
-	bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_ptr2, *cp_list=NULL;
-	float64_t *A_1=NULL, *A_2=NULL;
-	bool *map=NULL, tuneAlpha=true, flag=true, alphaChanged=false, isThereGoodSolution=false;
-
-	CTime ttime;
-	float64_t tstart, tstop;
-
-
-	tstart=ttime.cur_time_diff(false);
-
-	BufSize=_BufSize;
-	QPSolverTolRel=1e-9;
-
-	H=NULL;
-	b=NULL;
-	beta=NULL;
-	A=NULL;
-	subgrad=NULL;
-	diag_H=NULL;
-	I=NULL;
-	prevW=NULL;
-	wt=NULL;
-	diag_H2=NULL;
-	b2=NULL;
-	I2=NULL;
-	H2=NULL;
-	I_good=NULL;
-	I_start=NULL;
-	beta_start=NULL;
-	beta_good=NULL;
-
-	alpha=0.0;
-
-	H= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, float64_t);
-
-	ASSERT(nDim > 0);
-	ASSERT(BufSize > 0);
-	REQUIRE(BufSize < (std::numeric_limits<size_t>::max() / nDim),
-		"overflow: %u * %u > %u -- biggest possible BufSize=%u or nDim=%u\n",
-		BufSize, nDim, std::numeric_limits<size_t>::max(),
-		(std::numeric_limits<size_t>::max() / nDim),
-		(std::numeric_limits<size_t>::max() / BufSize));
-
-	A= (float64_t*) LIBBMRM_CALLOC(nDim*BufSize, float64_t);
-
-	b= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	beta= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	subgrad= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
-
-	diag_H= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	I= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-
-	/* structure for maintaining inactive CPs info */
-	ICP_stats icp_stats;
-	icp_stats.maxCPs = BufSize;
-	icp_stats.ICPcounter= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-	icp_stats.ICPs= (float64_t**) LIBBMRM_CALLOC(BufSize, float64_t*);
-	icp_stats.ACPs= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-
-	cp_list= (bmrm_ll*) LIBBMRM_CALLOC(1, bmrm_ll);
-
-	prevW= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
-
-	wt= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
-
-	if (H==NULL || A==NULL || b==NULL || beta==NULL || subgrad==NULL ||
-			diag_H==NULL || I==NULL || icp_stats.ICPcounter==NULL ||
-			icp_stats.ICPs==NULL || icp_stats.ACPs==NULL ||
-			cp_list==NULL || prevW==NULL || wt==NULL)
-	{
-		ppbmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	map= (bool*) LIBBMRM_CALLOC(BufSize, bool);
-
-	if (map==NULL)
-	{
-		ppbmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	memset( (bool*) map, true, BufSize);
-
-	/* Temporary buffers for ICP removal */
-	icp_stats.H_buff= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, float64_t);
-
-	if (icp_stats.H_buff==NULL)
-	{
-		ppbmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	/* Temporary buffers */
-	beta_start= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	beta_good= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	b2= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	diag_H2= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
-
-	H2= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, float64_t);
-
-	I_start= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-
-	I_good= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-
-	I2= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
-
-	if (beta_start==NULL || beta_good==NULL || b2==NULL || diag_H2==NULL ||
-			I_start==NULL || I_good==NULL || I2==NULL || H2==NULL)
-	{
-		ppbmrm.exitflag=-2;
-		goto cleanup;
-	}
-
-	ppbmrm.hist_Fp.resize_vector(BufSize);
-	ppbmrm.hist_Fd.resize_vector(BufSize);
-	ppbmrm.hist_wdist.resize_vector(BufSize);
-
-	/* Iinitial solution */
-	R = machine->risk(subgrad, W);
-
-	ppbmrm.nCP=0;
-	ppbmrm.nIter=0;
-	ppbmrm.exitflag=0;
-
-	b[0]=-R;
-
-	/* Cutting plane auxiliary double linked list */
-	LIBBMRM_MEMCPY(A, subgrad, nDim*sizeof(float64_t));
-	map[0]=false;
-	cp_list->address=&A[0];
-	cp_list->idx=0;
-	cp_list->prev=NULL;
-	cp_list->next=NULL;
-	CPList_head=cp_list;
-	CPList_tail=cp_list;
-
-	/* Compute initial value of Fp, Fd, assuming that W is zero vector */
-	sq_norm_Wdiff=0.0;
-
-	b[0] = CMath::dot(subgrad, W, nDim);
-	sq_norm_W = CMath::dot(W, W, nDim);
-	for (uint32_t j=0; j<nDim; ++j)
-	{
-		sq_norm_Wdiff+=(W[j]-prevW[j])*(W[j]-prevW[j]);
-	}
-
-	ppbmrm.Fp=R+0.5*_lambda*sq_norm_W + alpha*sq_norm_Wdiff;
-	ppbmrm.Fd=-LIBBMRM_PLUS_INF;
-	lastFp=ppbmrm.Fp;
-	wdist=CMath::sqrt(sq_norm_Wdiff);
-
-	K = (sq_norm_W == 0.0) ? 0.4 : 0.01*CMath::sqrt(sq_norm_W);
-
-	LIBBMRM_MEMCPY(prevW, W, nDim*sizeof(float64_t));
-
-	tstop=ttime.cur_time_diff(false);
-
-	/* Keep history of Fp, Fd, wdist */
-	ppbmrm.hist_Fp[0]=ppbmrm.Fp;
-	ppbmrm.hist_Fd[0]=ppbmrm.Fd;
-	ppbmrm.hist_wdist[0]=wdist;
-
-	/* Verbose output */
-
-	if (verbose)
-		SG_SDEBUG("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf, K=%lf\n",
-				ppbmrm.nIter, tstop-tstart, ppbmrm.Fp, ppbmrm.Fd, R, K);
-
-	if (verbose)
-		helper = machine->get_helper();
-
-	/* main loop */
-
-	while (ppbmrm.exitflag==0)
-	{
-		tstart=ttime.cur_time_diff(false);
-		ppbmrm.nIter++;
-
-		/* Update H */
-
-		if (ppbmrm.nCP>0)
-		{
-			A_2=get_cutting_plane(CPList_tail);
-			cp_ptr=CPList_head;
-
-			for (uint32_t i=0; i<ppbmrm.nCP; ++i)
-			{
-				A_1=get_cutting_plane(cp_ptr);
-				cp_ptr=cp_ptr->next;
-				rsum=CMath::dot(A_1, A_2, nDim);
-
-				H[LIBBMRM_INDEX(ppbmrm.nCP, i, BufSize)]
-					= H[LIBBMRM_INDEX(i, ppbmrm.nCP, BufSize)]
-					= rsum;
-			}
-		}
-
-		A_2=get_cutting_plane(CPList_tail);
-		rsum = CMath::dot(A_2, A_2, nDim);
-
-		H[LIBBMRM_INDEX(ppbmrm.nCP, ppbmrm.nCP, BufSize)]=rsum;
-
-		diag_H[ppbmrm.nCP]=H[LIBBMRM_INDEX(ppbmrm.nCP, ppbmrm.nCP, BufSize)];
-		I[ppbmrm.nCP]=1;
-
-		beta[ppbmrm.nCP]=0.0; // [beta; 0]
-		ppbmrm.nCP++;
-
-		/* tune alpha cycle */
-		/* ---------------------------------------------------------------------- */
-
-		flag=true;
-		isThereGoodSolution=false;
-		LIBBMRM_MEMCPY(beta_start, beta, ppbmrm.nCP*sizeof(float64_t));
-		LIBBMRM_MEMCPY(I_start, I, ppbmrm.nCP*sizeof(uint32_t));
-		qp_cnt=0;
-		alpha_good=alpha;
-
-		if (tuneAlpha)
-		{
-			alpha_start=alpha; alpha=0.0;
-			beta[ppbmrm.nCP]=0.0;
-			LIBBMRM_MEMCPY(I2, I_start, ppbmrm.nCP*sizeof(uint32_t));
-			I2[ppbmrm.nCP]=1;
-
-			/* add alpha-dependent terms to H, diag_h and b */
-			cp_ptr=CPList_head;
-
-			for (uint32_t i=0; i<ppbmrm.nCP; ++i)
-			{
-				A_1=get_cutting_plane(cp_ptr);
-				cp_ptr=cp_ptr->next;
-
-				rsum = CMath::dot(A_1, prevW, nDim);
-
-				b2[i]=b[i]-((2*alpha)/(_lambda+2*alpha))*rsum;
-				diag_H2[i]=diag_H[i]/(_lambda+2*alpha);
-
-				for (uint32_t j=0; j<ppbmrm.nCP; ++j)
-					H2[LIBBMRM_INDEX(i, j, BufSize)]=
-						H[LIBBMRM_INDEX(i, j, BufSize)]/(_lambda+2*alpha);
-			}
-
-			/* solve QP with current alpha */
-			qp_exitflag=libqp_splx_solver(&get_col, diag_H2, b2, &C, I2, &S, beta,
-					ppbmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
-			ppbmrm.qp_exitflag=qp_exitflag.exitflag;
-			qp_cnt++;
-			Fd_alpha0=-qp_exitflag.QP;
-
-			/* obtain w_t and check if norm(w_{t+1} -w_t) <= K */
-			for (uint32_t i=0; i<nDim; ++i)
-			{
-				rsum=0.0;
-				cp_ptr=CPList_head;
-
-				for (uint32_t j=0; j<ppbmrm.nCP; ++j)
-				{
-					A_1=get_cutting_plane(cp_ptr);
-					cp_ptr=cp_ptr->next;
-					rsum+=A_1[i]*beta[j];
-				}
-
-				wt[i]=(2*alpha*prevW[i] - rsum)/(_lambda+2*alpha);
-			}
-
-			sq_norm_Wdiff=0.0;
-
-			for (uint32_t i=0; i<nDim; ++i)
-				sq_norm_Wdiff+=(wt[i]-prevW[i])*(wt[i]-prevW[i]);
-
-			if (CMath::sqrt(sq_norm_Wdiff) <= K)
-			{
-				flag=false;
-
-				if (alpha!=alpha_start)
-					alphaChanged=true;
-			}
-			else
-			{
-				alpha=alpha_start;
-			}
-
-			while(flag)
-			{
-				LIBBMRM_MEMCPY(I2, I_start, ppbmrm.nCP*sizeof(uint32_t));
-				LIBBMRM_MEMCPY(beta, beta_start, ppbmrm.nCP*sizeof(float64_t));
-				I2[ppbmrm.nCP]=1;
-				beta[ppbmrm.nCP]=0.0;
-
-				/* add alpha-dependent terms to H, diag_h and b */
-				cp_ptr=CPList_head;
-
-				for (uint32_t i=0; i<ppbmrm.nCP; ++i)
-				{
-					A_1=get_cutting_plane(cp_ptr);
-					cp_ptr=cp_ptr->next;
-
-					rsum = CMath::dot(A_1, prevW, nDim);
-
-					b2[i]=b[i]-((2*alpha)/(_lambda+2*alpha))*rsum;
-					diag_H2[i]=diag_H[i]/(_lambda+2*alpha);
-
-					for (uint32_t j=0; j<ppbmrm.nCP; ++j)
-						H2[LIBBMRM_INDEX(i, j, BufSize)]=H[LIBBMRM_INDEX(i, j, BufSize)]/(_lambda+2*alpha);
-				}
-
-				/* solve QP with current alpha */
-				qp_exitflag=libqp_splx_solver(&get_col, diag_H2, b2, &C, I2, &S, beta,
-						ppbmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
-				ppbmrm.qp_exitflag=qp_exitflag.exitflag;
-				qp_cnt++;
-
-				/* obtain w_t and check if norm(w_{t+1}-w_t) <= K */
-				for (uint32_t i=0; i<nDim; ++i)
-				{
-					rsum=0.0;
-					cp_ptr=CPList_head;
-
-					for (uint32_t j=0; j<ppbmrm.nCP; ++j)
-					{
-						A_1=get_cutting_plane(cp_ptr);
-						cp_ptr=cp_ptr->next;
-						rsum+=A_1[i]*beta[j];
-					}
-
-					wt[i]=(2*alpha*prevW[i] - rsum)/(_lambda+2*alpha);
-				}
-
-				sq_norm_Wdiff=0.0;
-				for (uint32_t i=0; i<nDim; ++i)
-					sq_norm_Wdiff+=(wt[i]-prevW[i])*(wt[i]-prevW[i]);
-
-				if (CMath::sqrt(sq_norm_Wdiff) > K)
-				{
-					/* if there is a record of some good solution
-					 * (i.e. adjust alpha by division by 2) */
-
-					if (isThereGoodSolution)
-					{
-						LIBBMRM_MEMCPY(beta, beta_good, ppbmrm.nCP*sizeof(float64_t));
-						LIBBMRM_MEMCPY(I2, I_good, ppbmrm.nCP*sizeof(uint32_t));
-						alpha=alpha_good;
-						qp_exitflag=qp_exitflag_good;
-						flag=false;
-					}
-					else
-					{
-						if (alpha == 0)
-						{
-							alpha=1.0;
-							alphaChanged=true;
-						}
-						else
-						{
-							alpha*=2;
-							alphaChanged=true;
-						}
-					}
-				}
-				else
-				{
-					if (alpha > 0)
-					{
-						/* keep good solution and try for alpha /= 2 if previous alpha was 1 */
-						LIBBMRM_MEMCPY(beta_good, beta, ppbmrm.nCP*sizeof(float64_t));
-						LIBBMRM_MEMCPY(I_good, I2, ppbmrm.nCP*sizeof(uint32_t));
-						alpha_good=alpha;
-						qp_exitflag_good=qp_exitflag;
-						isThereGoodSolution=true;
-
-						if (alpha!=1.0)
-						{
-							alpha/=2.0;
-							alphaChanged=true;
-						}
-						else
-						{
-							alpha=0.0;
-							alphaChanged=true;
-						}
-					}
-					else
-					{
-						flag=false;
-					}
-				}
-			}
-		}
-		else
-		{
-			alphaChanged=false;
-			LIBBMRM_MEMCPY(I2, I_start, ppbmrm.nCP*sizeof(uint32_t));
-			LIBBMRM_MEMCPY(beta, beta_start, ppbmrm.nCP*sizeof(float64_t));
-
-			/* add alpha-dependent terms to H, diag_h and b */
-			cp_ptr=CPList_head;
-
-			for (uint32_t i=0; i<ppbmrm.nCP; ++i)
-			{
-				A_1=get_cutting_plane(cp_ptr);
-				cp_ptr=cp_ptr->next;
-
-				rsum = CMath::dot(A_1, prevW, nDim);
-
-				b2[i]=b[i]-((2*alpha)/(_lambda+2*alpha))*rsum;
-				diag_H2[i]=diag_H[i]/(_lambda+2*alpha);
-
-				for (uint32_t j=0; j<ppbmrm.nCP; ++j)
-					H2[LIBBMRM_INDEX(i, j, BufSize)]=
-						H[LIBBMRM_INDEX(i, j, BufSize)]/(_lambda+2*alpha);
-			}
-			/* solve QP with current alpha */
-			qp_exitflag=libqp_splx_solver(&get_col, diag_H2, b2, &C, I2, &S, beta,
-					ppbmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
-			ppbmrm.qp_exitflag=qp_exitflag.exitflag;
-			qp_cnt++;
-		}
-
-		/* ----------------------------------------------------------------------------------------------- */
-
-		/* Update ICPcounter (add one to unused and reset used) + compute number of active CPs */
-		ppbmrm.nzA=0;
-
-		for (uint32_t aaa=0; aaa<ppbmrm.nCP; ++aaa)
-		{
-			if (beta[aaa]>epsilon)
-			{
-				++ppbmrm.nzA;
-				icp_stats.ICPcounter[aaa]=0;
-			}
-			else
-			{
-				icp_stats.ICPcounter[aaa]+=1;
-			}
-		}
-
-		/* W update */
-		for (uint32_t i=0; i<nDim; ++i)
-		{
-			rsum=0.0;
-			cp_ptr=CPList_head;
-
-			for (uint32_t j=0; j<ppbmrm.nCP; ++j)
-			{
-				A_1=get_cutting_plane(cp_ptr);
-				cp_ptr=cp_ptr->next;
-				rsum+=A_1[i]*beta[j];
-			}
-
-			W[i]=(2*alpha*prevW[i]-rsum)/(_lambda+2*alpha);
-		}
-
-		/* risk and subgradient computation */
-		R = machine->risk(subgrad, W);
-		add_cutting_plane(&CPList_tail, map, A,
-				find_free_idx(map, BufSize), subgrad, nDim);
-
-		sq_norm_W=CMath::dot(W, W, nDim);
-		sq_norm_prevW=CMath::dot(prevW, prevW, nDim);
-		b[ppbmrm.nCP]=CMath::dot(subgrad, W, nDim) - R;
-
-		sq_norm_Wdiff=0.0;
-		for (uint32_t j=0; j<nDim; ++j)
-		{
-			sq_norm_Wdiff+=(W[j]-prevW[j])*(W[j]-prevW[j]);
-		}
-
-		/* compute Fp and Fd */
-		ppbmrm.Fp=R+0.5*_lambda*sq_norm_W + alpha*sq_norm_Wdiff;
-		ppbmrm.Fd=-qp_exitflag.QP+((alpha*_lambda)/(_lambda + 2*alpha))*sq_norm_prevW;
-
-		/* gamma + tuneAlpha flag */
-		if (alphaChanged)
-		{
-			eps=1.0-(ppbmrm.Fd/ppbmrm.Fp);
-			gamma=(lastFp*(1-eps)-Fd_alpha0)/(Tmax*(1-eps));
-		}
-
-		if ((lastFp-ppbmrm.Fp) <= gamma)
-		{
-			tuneAlpha=true;
-		}
-		else
-		{
-			tuneAlpha=false;
-		}
-
-		/* Stopping conditions - set only with nonzero alpha */
-		if (alpha==0.0)
-		{
-			if (ppbmrm.Fp-ppbmrm.Fd<=TolRel*LIBBMRM_ABS(ppbmrm.Fp))
-				ppbmrm.exitflag=1;
-
-			if (ppbmrm.Fp-ppbmrm.Fd<=TolAbs)
-				ppbmrm.exitflag=2;
-		}
-
-		if (ppbmrm.nCP>=BufSize)
-			ppbmrm.exitflag=-1;
-
-		tstop=ttime.cur_time_diff(false);
-
-		/* compute wdist (= || W_{t+1} - W_{t} || ) */
-		sq_norm_Wdiff=0.0;
-
-		for (uint32_t i=0; i<nDim; ++i)
-		{
-			sq_norm_Wdiff+=(W[i]-prevW[i])*(W[i]-prevW[i]);
-		}
-
-		wdist=CMath::sqrt(sq_norm_Wdiff);
-
-		/* Keep history of Fp, Fd, wdist */
-		ppbmrm.hist_Fp[ppbmrm.nIter]=ppbmrm.Fp;
-		ppbmrm.hist_Fd[ppbmrm.nIter]=ppbmrm.Fd;
-		ppbmrm.hist_wdist[ppbmrm.nIter]=wdist;
-
-		/* Verbose output */
-		if (verbose)
-			SG_SDEBUG("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, wdist=%lf, alpha=%lf, qp_cnt=%d, gamma=%lf, tuneAlpha=%d\n",
-					ppbmrm.nIter, tstop-tstart, ppbmrm.Fp, ppbmrm.Fd, ppbmrm.Fp-ppbmrm.Fd,
-					(ppbmrm.Fp-ppbmrm.Fd)/ppbmrm.Fp, R, ppbmrm.nCP, ppbmrm.nzA, wdist, alpha,
-					qp_cnt, gamma, tuneAlpha);
-
-		/* Check size of Buffer */
-		if (ppbmrm.nCP>=BufSize)
-		{
-			ppbmrm.exitflag=-2;
-			SG_SERROR("Buffer exceeded.\n")
-		}
-
-		/* keep w_t + Fp */
-		LIBBMRM_MEMCPY(prevW, W, nDim*sizeof(float64_t));
-		lastFp=ppbmrm.Fp;
-
-		/* Inactive Cutting Planes (ICP) removal */
-		if (cleanICP)
-		{
-			clean_icp(&icp_stats, ppbmrm, &CPList_head, &CPList_tail, H, diag_H, beta, map, cleanAfter, b, I);
-		}
-
-		// next CP would exceed BufSize
-		if (ppbmrm.nCP+1 >= BufSize)
-			ppbmrm.exitflag=-1;
-
-		/* Debug: compute objective and training error */
-		if (verbose)
-		{
-			SGVector<float64_t> w_debug(W, nDim, false);
-			float64_t primal = CSOSVMHelper::primal_objective(w_debug, model, _lambda);
-			float64_t train_error = CSOSVMHelper::average_loss(w_debug, model);
-			helper->add_debug_info(primal, ppbmrm.nIter, train_error);
-		}
-	} /* end of main loop */
-
-	if (verbose)
-	{
-		helper->terminate();
-		SG_UNREF(helper);
-	}
-
-	ppbmrm.hist_Fp.resize_vector(ppbmrm.nIter);
-	ppbmrm.hist_Fd.resize_vector(ppbmrm.nIter);
-	ppbmrm.hist_wdist.resize_vector(ppbmrm.nIter);
-
-	cp_ptr=CPList_head;
-
-	while(cp_ptr!=NULL)
-	{
-		cp_ptr2=cp_ptr;
-		cp_ptr=cp_ptr->next;
-		LIBBMRM_FREE(cp_ptr2);
-		cp_ptr2=NULL;
-	}
-
-	cp_list=NULL;
-
-cleanup:
-
-	LIBBMRM_FREE(H);
-	LIBBMRM_FREE(b);
-	LIBBMRM_FREE(beta);
-	LIBBMRM_FREE(A);
-	LIBBMRM_FREE(subgrad);
-	LIBBMRM_FREE(diag_H);
-	LIBBMRM_FREE(I);
-	LIBBMRM_FREE(icp_stats.ICPcounter);
-	LIBBMRM_FREE(icp_stats.ICPs);
-	LIBBMRM_FREE(icp_stats.ACPs);
-	LIBBMRM_FREE(icp_stats.H_buff);
-	LIBBMRM_FREE(map);
-	LIBBMRM_FREE(prevW);
-	LIBBMRM_FREE(wt);
-	LIBBMRM_FREE(beta_start);
-	LIBBMRM_FREE(beta_good);
-	LIBBMRM_FREE(I_start);
-	LIBBMRM_FREE(I_good);
-	LIBBMRM_FREE(I2);
-	LIBBMRM_FREE(b2);
-	LIBBMRM_FREE(diag_H2);
-	LIBBMRM_FREE(H2);
-
-	if (cp_list)
-		LIBBMRM_FREE(cp_list);
-
-	SG_UNREF(model);
-
-	return(ppbmrm);
-}
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/structure/libppbm.h b/src/shogun/structure/libppbm.h
deleted file mode 100644
index 13cc0e8c989..00000000000
--- a/src/shogun/structure/libppbm.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * libppbm.h: Implementation of the Proximal Point BM solver for SO training
- *
- * Copyright (C) 2012 Michal Uricar, uricamic@cmp.felk.cvut.cz
- *
- * Implementation of the Proximal Point Bundle Method solver
- *--------------------------------------------------------------------- */
-
-#ifndef libppbm_h
-#define libppbm_h
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/common.h>
-#include <shogun/structure/libbmrm.h>
-
-namespace shogun
-{
-	/** Proximal Point BMRM Solver for Structured Output Learning
-	 *
-	 * @param machine		Pointer to the BMRM machine
-	 * @param W				Weight vector
-	 * @param TolRel		Relative tolerance
-	 * @param TolAbs		Absolute tolerance
-	 * @param _lambda		Regularization constant
-	 * @param _BufSize		Size of the CP buffer (i.e. maximal number of iterations)
-	 * @param cleanICP		Flag that enables/disables inactive cutting plane removal
-	 *						feature
-	 * @param cleanAfter	Number of iterations that should be cutting plane
-	 *						inactive for to be removed
-	 * @param K				Parameter K
-	 * @param Tmax			Parameter Tmax
-	 * @param verbose		Flag that enables/disables screen output
-	 * @return Structure with BMRM algorithm result
-	 */
-	BmrmStatistics svm_ppbm_solver(
-			CDualLibQPBMSOSVM  *machine,
-			float64_t	*W,
-			float64_t	TolRel,
-			float64_t	TolAbs,
-			float64_t	_lambda,
-			uint32_t	_BufSize,
-			bool	cleanICP,
-			uint32_t	cleanAfter,
-			float64_t	K,
-			uint32_t	Tmax,
-			bool	verbose
-			);
-
-}
-#endif //USE_GPL_SHOGUN
-#endif /* libppbm_h */
diff --git a/src/shogun/transfer/multitask/LibLinearMTL.cpp b/src/shogun/transfer/multitask/LibLinearMTL.cpp
index 4b79636e172..6b33a17430d 100644
--- a/src/shogun/transfer/multitask/LibLinearMTL.cpp
+++ b/src/shogun/transfer/multitask/LibLinearMTL.cpp
@@ -15,13 +15,14 @@
 #include <shogun/lib/config.h>
 
 #ifdef HAVE_LAPACK
+#include <shogun/base/Parameter.h>
+#include <shogun/base/progress.h>
+#include <shogun/features/DotFeatures.h>
 #include <shogun/io/SGIO.h>
 #include <shogun/lib/Signal.h>
 #include <shogun/lib/Time.h>
-#include <shogun/base/Parameter.h>
-#include <shogun/transfer/multitask/LibLinearMTL.h>
 #include <shogun/optimization/liblinear/tron.h>
-#include <shogun/features/DotFeatures.h>
+#include <shogun/transfer/multitask/LibLinearMTL.h>
 
 using namespace shogun;
 
@@ -71,7 +72,7 @@ CLibLinearMTL::~CLibLinearMTL()
 
 bool CLibLinearMTL::train_machine(CFeatures* data)
 {
-	CSignal::clear_cancel();
+
 	ASSERT(m_labels)
 
 	if (data)
@@ -253,8 +254,9 @@ void CLibLinearMTL::solve_l2r_l1l2_svc(const liblinear_problem *prob, double eps
 		index[i] = i;
 	}
 
+	auto pb = progress(range(10));
 	CTime start_time;
-	while (iter < max_iterations && !CSignal::cancel_computations())
+	while (iter < max_iterations && !cancel_computation())
 	{
 		if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
 			break;
@@ -352,7 +354,8 @@ void CLibLinearMTL::solve_l2r_l1l2_svc(const liblinear_problem *prob, double eps
 
 		iter++;
 		float64_t gap=PGmax_new - PGmin_new;
-		SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6)
+		pb.print_absolute(
+		    gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps));
 
 		if(gap <= eps)
 		{
@@ -374,7 +377,7 @@ void CLibLinearMTL::solve_l2r_l1l2_svc(const liblinear_problem *prob, double eps
 			PGmin_old = -CMath::INFTY;
 	}
 
-	SG_DONE()
+	pb.complete_absolute();
 	SG_INFO("optimization finished, #iter = %d\n",iter)
 	if (iter >= max_iterations)
 	{
diff --git a/src/shogun/transfer/multitask/MultitaskClusteredLogisticRegression.cpp b/src/shogun/transfer/multitask/MultitaskClusteredLogisticRegression.cpp
deleted file mode 100644
index 87c70a97d10..00000000000
--- a/src/shogun/transfer/multitask/MultitaskClusteredLogisticRegression.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-#include <shogun/transfer/multitask/MultitaskClusteredLogisticRegression.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/lib/malsar/malsar_clustered.h>
-#include <shogun/lib/malsar/malsar_options.h>
-#include <shogun/lib/SGVector.h>
-#include <shogun/features/DotFeatures.h>
-#include <shogun/lib/SGMatrix.h>
-
-namespace shogun
-{
-
-CMultitaskClusteredLogisticRegression::CMultitaskClusteredLogisticRegression() :
-	CMultitaskLogisticRegression(), m_rho1(0.0), m_rho2(0.0)
-{
-}
-
-CMultitaskClusteredLogisticRegression::CMultitaskClusteredLogisticRegression(
-     float64_t rho1, float64_t rho2, CDotFeatures* train_features,
-     CBinaryLabels* train_labels, CTaskGroup* task_group, int32_t n_clusters) :
-	CMultitaskLogisticRegression(0.0,train_features,train_labels,(CTaskRelation*)task_group)
-{
-	set_rho1(rho1);
-	set_rho2(rho2);
-	set_num_clusters(n_clusters);
-}
-
-int32_t CMultitaskClusteredLogisticRegression::get_rho1() const
-{
-	return m_rho1;
-}
-
-int32_t CMultitaskClusteredLogisticRegression::get_rho2() const
-{
-	return m_rho2;
-}
-
-void CMultitaskClusteredLogisticRegression::set_rho1(float64_t rho1)
-{
-	m_rho1 = rho1;
-}
-
-void CMultitaskClusteredLogisticRegression::set_rho2(float64_t rho2)
-{
-	m_rho2 = rho2;
-}
-
-int32_t CMultitaskClusteredLogisticRegression::get_num_clusters() const
-{
-	return m_num_clusters;
-}
-
-void CMultitaskClusteredLogisticRegression::set_num_clusters(int32_t num_clusters)
-{
-	m_num_clusters = num_clusters;
-}
-
-CMultitaskClusteredLogisticRegression::~CMultitaskClusteredLogisticRegression()
-{
-}
-
-bool CMultitaskClusteredLogisticRegression::train_locked_implementation(SGVector<index_t>* tasks)
-{
-	SGVector<float64_t> y(m_labels->get_num_labels());
-	for (int32_t i=0; i<y.vlen; i++)
-		y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
-
-	malsar_options options = malsar_options::default_options();
-	options.termination = m_termination;
-	options.tolerance = m_tolerance;
-	options.max_iter = m_max_iter;
-	options.n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
-	options.tasks_indices = tasks;
-	options.n_clusters = m_num_clusters;
-
-#ifndef HAVE_CXX11
-	malsar_result_t model = malsar_clustered(
-		features, y.vector, m_rho1, m_rho2, options);
-
-	m_tasks_w = model.w;
-	m_tasks_c = model.c;
-#else
-	SG_WARNING("Clustered LR is unstable with C++11\n")
-	m_tasks_w = SGMatrix<float64_t>(((CDotFeatures*)features)->get_dim_feature_space(), options.n_tasks);
-	m_tasks_w.set_const(0);
-	m_tasks_c = SGVector<float64_t>(options.n_tasks);
-	m_tasks_c.set_const(0);
-#endif
-	return true;
-}
-
-bool CMultitaskClusteredLogisticRegression::train_machine(CFeatures* data)
-{
-	if (data && (CDotFeatures*)data)
-		set_features((CDotFeatures*)data);
-
-	ASSERT(features)
-	ASSERT(m_labels)
-	ASSERT(m_task_relation)
-
-	SGVector<float64_t> y(m_labels->get_num_labels());
-	for (int32_t i=0; i<y.vlen; i++)
-		y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
-
-	malsar_options options = malsar_options::default_options();
-	options.termination = m_termination;
-	options.tolerance = m_tolerance;
-	options.max_iter = m_max_iter;
-	options.n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
-	options.tasks_indices = ((CTaskGroup*)m_task_relation)->get_tasks_indices();
-	options.n_clusters = m_num_clusters;
-
-#ifndef HAVE_CXX11
-	malsar_result_t model = malsar_clustered(
-		features, y.vector, m_rho1, m_rho2, options);
-
-	m_tasks_w = model.w;
-	m_tasks_c = model.c;
-#else
-	SG_WARNING("Clustered LR is unstable with C++11\n")
-	m_tasks_w = SGMatrix<float64_t>(((CDotFeatures*)features)->get_dim_feature_space(), options.n_tasks);
-	m_tasks_w.set_const(0);
-	m_tasks_c = SGVector<float64_t>(options.n_tasks);
-	m_tasks_c.set_const(0);
-#endif
-
-	SG_FREE(options.tasks_indices);
-
-	return true;
-}
-
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/transfer/multitask/MultitaskClusteredLogisticRegression.h b/src/shogun/transfer/multitask/MultitaskClusteredLogisticRegression.h
deleted file mode 100644
index c81d2e0a75d..00000000000
--- a/src/shogun/transfer/multitask/MultitaskClusteredLogisticRegression.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-
-#ifndef  MULTITASKCLUSTEREDLOGISTICREGRESSION_H_
-#define  MULTITASKCLUSTEREDLOGISTICREGRESSION_H_
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/transfer/multitask/MultitaskLogisticRegression.h>
-
-namespace shogun
-{
-/** @brief class MultitaskClusteredLogisticRegression, a classifier for multitask problems.
- * Supports only task group relations. Based on solver ported from the MALSAR library.
- * Assumes task in group are related with a clustered structure.
- *
- * @see CTaskGroup
- */
-class CMultitaskClusteredLogisticRegression : public CMultitaskLogisticRegression
-{
-
-	public:
-		MACHINE_PROBLEM_TYPE(PT_BINARY)
-
-		/** default constructor */
-		CMultitaskClusteredLogisticRegression();
-
-		/** constructor
-		 *
-		 * @param rho1 rho1 regularization coefficient
-		 * @param rho2 rho2 regularization coefficient
-		 * @param training_data training features
-		 * @param training_labels training labels
-		 * @param task_group task group
-		 * @param num_clusters number of task clusters
-		 */
-		CMultitaskClusteredLogisticRegression(
-		     float64_t rho1, float64_t rho2, CDotFeatures* training_data,
-		     CBinaryLabels* training_labels, CTaskGroup* task_group,
-		     int32_t num_clusters);
-
-		/** destructor */
-		virtual ~CMultitaskClusteredLogisticRegression();
-
-		/** get rho1 regularization coefficient
-		 *
-		 * @return rho1 value
-		 */
-		int32_t get_rho1() const;
-
-		/** set rho1
-		 * @param rho1 value
-		 */
-		void set_rho1(float64_t rho1);
-
-		/** get rho1
-		 */
-		int32_t get_rho2() const;
-
-		/** set rho1
-		 * @param rho2 value
-		 */
-		void set_rho2(float64_t rho2);
-
-		/** get number of clusters
-		 *
-		 * @return number of clusters
-		 */
-		int32_t get_num_clusters() const;
-
-		/** set number of clusters
-		 * @param num_clusters number of clusters
-		 */
-		void set_num_clusters(int32_t num_clusters);
-
-		/** get name
-		 *
-		 * @return name of the object
-		 */
-		virtual const char* get_name() const
-		{
-			return "MultitaskClusteredLogisticRegression";
-		}
-
-	protected:
-
-		/** train machine
-		 *
-		 * @param data features to use for training
-		 */
-		virtual bool train_machine(CFeatures* data=NULL);
-
-		/** train locked implementation
-		 *
-		 * @param tasks array of tasks indices
-		 */
-		virtual bool train_locked_implementation(SGVector<index_t>* tasks);
-
-	protected:
-
-		/** rho1 */
-		float64_t m_rho1;
-
-		/** rho2 */
-		float64_t m_rho2;
-
-		/** number of clusters */
-		int32_t m_num_clusters;
-};
-}
-#endif //USE_GPL_SHOGUN
-#endif
-
diff --git a/src/shogun/transfer/multitask/MultitaskL12LogisticRegression.cpp b/src/shogun/transfer/multitask/MultitaskL12LogisticRegression.cpp
deleted file mode 100644
index 4d9c652099f..00000000000
--- a/src/shogun/transfer/multitask/MultitaskL12LogisticRegression.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-
-#include <shogun/transfer/multitask/MultitaskL12LogisticRegression.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/malsar/malsar_joint_feature_learning.h>
-#include <shogun/lib/malsar/malsar_options.h>
-#include <shogun/lib/SGVector.h>
-#include <shogun/features/DotFeatures.h>
-
-namespace shogun
-{
-
-class CMultitaskL12LogisticRegression::Self
-{
-public:
-
-	/** rho1, regularization coefficient of L1/L2 term */
-	float64_t m_rho1;
-
-	/** rho2, regularization coefficient of L2 term */
-	float64_t m_rho2;
-};
-
-CMultitaskL12LogisticRegression::CMultitaskL12LogisticRegression() :
-	CMultitaskLogisticRegression(), self()
-{
-	set_rho1(0.0);
-	set_rho2(0.0);
-	init();
-}
-
-CMultitaskL12LogisticRegression::CMultitaskL12LogisticRegression(
-     float64_t rho1, float64_t rho2, CDotFeatures* train_features,
-     CBinaryLabels* train_labels, CTaskGroup* task_group) :
-	CMultitaskLogisticRegression(0.0,train_features,train_labels,(CTaskRelation*)task_group)
-{
-	set_rho1(rho1);
-	set_rho2(rho2);
-	init();
-}
-
-void CMultitaskL12LogisticRegression::init()
-{
-	SG_ADD(&self->m_rho1,"rho1","rho L1/L2 regularization parameter",MS_AVAILABLE);
-	SG_ADD(&self->m_rho2,"rho2","rho L2 regularization parameter",MS_AVAILABLE);
-}
-
-void CMultitaskL12LogisticRegression::set_rho1(float64_t rho1)
-{
-	self->m_rho1 = rho1;
-}
-
-void CMultitaskL12LogisticRegression::set_rho2(float64_t rho2)
-{
-	self->m_rho2 = rho2;
-}
-
-float64_t CMultitaskL12LogisticRegression::get_rho1() const
-{
-	return self->m_rho1;
-}
-
-float64_t CMultitaskL12LogisticRegression::get_rho2() const
-{
-	return self->m_rho2;
-}
-
-CMultitaskL12LogisticRegression::~CMultitaskL12LogisticRegression()
-{
-}
-
-bool CMultitaskL12LogisticRegression::train_locked_implementation(SGVector<index_t>* tasks)
-{
-	SGVector<float64_t> y(m_labels->get_num_labels());
-	for (int32_t i=0; i<y.vlen; i++)
-		y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
-
-	malsar_options options = malsar_options::default_options();
-	options.termination = m_termination;
-	options.tolerance = m_tolerance;
-	options.max_iter = m_max_iter;
-	options.n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
-	options.tasks_indices = tasks;
-	malsar_result_t model = malsar_joint_feature_learning(
-		features, y.vector, self->m_rho1, self->m_rho2, options);
-
-	m_tasks_w = model.w;
-	m_tasks_c = model.c;
-
-	return true;
-}
-
-bool CMultitaskL12LogisticRegression::train_machine(CFeatures* data)
-{
-	if (data && (CDotFeatures*)data)
-		set_features((CDotFeatures*)data);
-
-	ASSERT(features)
-	ASSERT(m_labels)
-	ASSERT(m_task_relation)
-
-	SGVector<float64_t> y(m_labels->get_num_labels());
-	for (int32_t i=0; i<y.vlen; i++)
-		y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
-
-	malsar_options options = malsar_options::default_options();
-	options.termination = m_termination;
-	options.tolerance = m_tolerance;
-	options.max_iter = m_max_iter;
-	options.n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
-	options.tasks_indices = ((CTaskGroup*)m_task_relation)->get_tasks_indices();
-
-	malsar_result_t model = malsar_joint_feature_learning(
-		features, y.vector, self->m_rho1, self->m_rho2, options);
-
-	m_tasks_w = model.w;
-	m_tasks_c = model.c;
-
-	SG_FREE(options.tasks_indices);
-
-	return true;
-}
-
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/transfer/multitask/MultitaskL12LogisticRegression.h b/src/shogun/transfer/multitask/MultitaskL12LogisticRegression.h
deleted file mode 100644
index be76cf52310..00000000000
--- a/src/shogun/transfer/multitask/MultitaskL12LogisticRegression.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2014 Sergey Lisitsyn
- */
-
-
-#ifndef  MULTITASKL12LOGISTICREGRESSION_H_
-#define  MULTITASKL12LOGISTICREGRESSION_H_
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/base/unique.h>
-
-#include <shogun/transfer/multitask/MultitaskLogisticRegression.h>
-
-namespace shogun
-{
-/** @brief class MultitaskL12LogisticRegression, a classifier for multitask problems.
- * Supports only task group relations. Based on solver ported from the MALSAR library.
- *
- * @see CTaskGroup
- * */
-class CMultitaskL12LogisticRegression : public CMultitaskLogisticRegression
-{
-
-	public:
-		MACHINE_PROBLEM_TYPE(PT_BINARY)
-
-		/** default constructor */
-		CMultitaskL12LogisticRegression();
-
-		/** constructor
-		 *
-		 * @param rho1 rho1 regularization coefficient of L1/L2 term
-		 * @param rho2 rho2 regularization coefficient of L2 term
-		 * @param training_data training features
-		 * @param training_labels training labels
-		 * @param task_group task group
-		 */
-		CMultitaskL12LogisticRegression(
-		     float64_t rho1, float64_t rho2, CDotFeatures* training_data,
-		     CBinaryLabels* training_labels, CTaskGroup* task_group);
-
-		/** destructor */
-		virtual ~CMultitaskL12LogisticRegression();
-
-		/** set rho1 regularization coefficient
-		 * @param rho1 value
-		 */
-		void set_rho1(float64_t rho1);
-
-		/** get rho1 regularization coefficient
-		 * @return rho1 value
-		 */
-		float64_t get_rho1() const;
-
-		/** set rho2 regularization coefficient
-		 * @param rho2 value
-		 */
-		void set_rho2(float64_t rho2);
-
-		/** get rho2 regularization coefficient
-		 * @return rho2 value
-		 */
-		float64_t get_rho2() const;
-
-		/** get name
-		 *
-		 * @return name of the object
-		 */
-		virtual const char* get_name() const
-		{
-			return "MultitaskL12LogisticRegression";
-		}
-
-	private:
-
-		/** init */
-		void init();
-
-	protected:
-
-		/** train machine
-		 *
-		 * @param data features to use for training
-		 */
-		virtual bool train_machine(CFeatures* data=NULL);
-
-		/** train locked implementation
-		 *
-		 * @param tasks array of tasks indices
-		 */
-		virtual bool train_locked_implementation(SGVector<index_t>* tasks);
-
-	protected:
-
-		class Self;
-		/** Pointer to implementation */
-		Unique<Self> self;
-
-};
-}
-#endif //USE_GPL_SHOGUN
-#endif
-
diff --git a/src/shogun/transfer/multitask/MultitaskLeastSquaresRegression.cpp b/src/shogun/transfer/multitask/MultitaskLeastSquaresRegression.cpp
deleted file mode 100644
index 9b89c7d2a88..00000000000
--- a/src/shogun/transfer/multitask/MultitaskLeastSquaresRegression.cpp
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-
-#include <shogun/transfer/multitask/MultitaskLeastSquaresRegression.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/transfer/multitask/TaskGroup.h>
-#include <shogun/transfer/multitask/TaskTree.h>
-#include <shogun/lib/slep/slep_solver.h>
-#include <shogun/lib/slep/slep_options.h>
-
-namespace shogun
-{
-
-CMultitaskLeastSquaresRegression::CMultitaskLeastSquaresRegression() :
-	CMultitaskLinearMachine()
-{
-	initialize_parameters();
-	register_parameters();
-}
-
-CMultitaskLeastSquaresRegression::CMultitaskLeastSquaresRegression(
-     float64_t z, CDotFeatures* train_features,
-     CRegressionLabels* train_labels, CTaskRelation* task_relation) :
-	CMultitaskLinearMachine(train_features,(CLabels*)train_labels,task_relation)
-{
-	set_z(z);
-	initialize_parameters();
-	register_parameters();
-}
-
-CMultitaskLeastSquaresRegression::~CMultitaskLeastSquaresRegression()
-{
-}
-
-void CMultitaskLeastSquaresRegression::register_parameters()
-{
-	SG_ADD(&m_z, "z", "regularization coefficient", MS_AVAILABLE);
-	SG_ADD(&m_q, "q", "q of L1/Lq", MS_AVAILABLE);
-	SG_ADD(&m_termination, "termination", "termination", MS_NOT_AVAILABLE);
-	SG_ADD(&m_regularization, "regularization", "regularization", MS_NOT_AVAILABLE);
-	SG_ADD(&m_tolerance, "tolerance", "tolerance", MS_NOT_AVAILABLE);
-	SG_ADD(&m_max_iter, "max_iter", "maximum number of iterations", MS_NOT_AVAILABLE);
-}
-
-void CMultitaskLeastSquaresRegression::initialize_parameters()
-{
-	set_z(0.0);
-	set_q(2.0);
-	set_termination(0);
-	set_regularization(0);
-	set_tolerance(1e-3);
-	set_max_iter(1000);
-}
-
-bool CMultitaskLeastSquaresRegression::train_locked_implementation(SGVector<index_t>* tasks)
-{
-	SG_NOTIMPLEMENTED
-	return false;
-}
-
-float64_t CMultitaskLeastSquaresRegression::apply_one(int32_t i)
-{
-	float64_t dot = features->dense_dot(i,m_tasks_w.get_column_vector(m_current_task),m_tasks_w.num_rows);
-	return dot + m_tasks_c[m_current_task];
-}
-
-int32_t CMultitaskLeastSquaresRegression::get_max_iter() const
-{
-	return m_max_iter;
-}
-int32_t CMultitaskLeastSquaresRegression::get_regularization() const
-{
-	return m_regularization;
-}
-int32_t CMultitaskLeastSquaresRegression::get_termination() const
-{
-	return m_termination;
-}
-float64_t CMultitaskLeastSquaresRegression::get_tolerance() const
-{
-	return m_tolerance;
-}
-float64_t CMultitaskLeastSquaresRegression::get_z() const
-{
-	return m_z;
-}
-float64_t CMultitaskLeastSquaresRegression::get_q() const
-{
-	return m_q;
-}
-
-void CMultitaskLeastSquaresRegression::set_max_iter(int32_t max_iter)
-{
-	ASSERT(max_iter>=0)
-	m_max_iter = max_iter;
-}
-void CMultitaskLeastSquaresRegression::set_regularization(int32_t regularization)
-{
-	ASSERT(regularization==0 || regularization==1)
-	m_regularization = regularization;
-}
-void CMultitaskLeastSquaresRegression::set_termination(int32_t termination)
-{
-	ASSERT(termination>=0 && termination<=4)
-	m_termination = termination;
-}
-void CMultitaskLeastSquaresRegression::set_tolerance(float64_t tolerance)
-{
-	ASSERT(tolerance>0.0)
-	m_tolerance = tolerance;
-}
-void CMultitaskLeastSquaresRegression::set_z(float64_t z)
-{
-	m_z = z;
-}
-void CMultitaskLeastSquaresRegression::set_q(float64_t q)
-{
-	m_q = q;
-}
-
-bool CMultitaskLeastSquaresRegression::train_machine(CFeatures* data)
-{
-	if (data && (CDotFeatures*)data)
-		set_features((CDotFeatures*)data);
-
-	ASSERT(features)
-	ASSERT(m_labels)
-
-	SGVector<float64_t> y = ((CRegressionLabels*)m_labels)->get_labels();
-
-	slep_options options = slep_options::default_options();
-	options.n_tasks = m_task_relation->get_num_tasks();
-	options.tasks_indices = m_task_relation->get_tasks_indices();
-	options.q = m_q;
-	options.regularization = m_regularization;
-	options.termination = m_termination;
-	options.tolerance = m_tolerance;
-	options.max_iter = m_max_iter;
-
-	ETaskRelationType relation_type = m_task_relation->get_relation_type();
-	switch (relation_type)
-	{
-		case TASK_GROUP:
-		{
-			//CTaskGroup* task_group = (CTaskGroup*)m_task_relation;
-			options.mode = MULTITASK_GROUP;
-			options.loss = LEAST_SQUARES;
-			m_tasks_w = slep_solver(features, y.vector, m_z, options).w;
-			m_tasks_c = SGVector<float64_t>(options.n_tasks);
-			m_tasks_c.zero();
-		}
-		break;
-		case TASK_TREE:
-		{
-			CTaskTree* task_tree = (CTaskTree*)m_task_relation;
-			SGVector<float64_t> ind_t = task_tree->get_SLEP_ind_t();
-			options.ind_t = ind_t.vector;
-			options.n_nodes = ind_t.vlen/3;
-			options.mode = MULTITASK_TREE;
-			options.loss = LEAST_SQUARES;
-			m_tasks_w = slep_solver(features, y.vector, m_z, options).w;
-			m_tasks_c = SGVector<float64_t>(options.n_tasks);
-			m_tasks_c.zero();
-		}
-		break;
-		default:
-			SG_ERROR("Not supported task relation type\n")
-	}
-
-	SG_FREE(options.tasks_indices);
-
-	return true;
-}
-
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/transfer/multitask/MultitaskLeastSquaresRegression.h b/src/shogun/transfer/multitask/MultitaskLeastSquaresRegression.h
deleted file mode 100644
index ba1a66d8a6a..00000000000
--- a/src/shogun/transfer/multitask/MultitaskLeastSquaresRegression.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-#ifndef  MULTITASKLSREGRESSION_H_
-#define  MULTITASKLSREGRESSION_H_
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/transfer/multitask/TaskRelation.h>
-#include <shogun/transfer/multitask/MultitaskLinearMachine.h>
-
-namespace shogun
-{
-/** @brief class Multitask Least Squares Regression, a
- * machine to solve regression problems with a few tasks
- * related via group or tree. Based on L1/Lq regression
- * for groups and L1/L2 for trees.
- *
- * The underlying solver is based on the SLEP library.
- *
- */
-class CMultitaskLeastSquaresRegression : public CMultitaskLinearMachine
-{
-
-	public:
-		/** problem type */
-		MACHINE_PROBLEM_TYPE(PT_REGRESSION)
-
-		/** default constructor */
-		CMultitaskLeastSquaresRegression();
-
-		/** constructor
-		 *
-		 * @param z regularization coefficient
-		 * @param training_data training features
-		 * @param training_labels training labels
-		 * @param task_relation task relation
-		 */
-		CMultitaskLeastSquaresRegression(
-		     float64_t z, CDotFeatures* training_data,
-		     CRegressionLabels* training_labels, CTaskRelation* task_relation);
-
-		/** destructor */
-		virtual ~CMultitaskLeastSquaresRegression();
-
-		/** get name */
-		virtual const char* get_name() const
-		{
-			return "MultitaskLeastSquaresRegression";
-		}
-
-		/** get max iter */
-		int32_t get_max_iter() const;
-		/** get q */
-		float64_t get_q() const;
-		/** get regularization */
-		int32_t get_regularization() const;
-		/** get termination */
-		int32_t get_termination() const;
-		/** get tolerance */
-		float64_t get_tolerance() const;
-		/** get z */
-		float64_t get_z() const;
-
-		/** set max iter */
-		void set_max_iter(int32_t max_iter);
-		/** set q */
-		void set_q(float64_t q);
-		/** set regularization */
-		void set_regularization(int32_t regularization);
-		/** set termination */
-		void set_termination(int32_t termination);
-		/** set tolerance */
-		void set_tolerance(float64_t tolerance);
-		/** set z */
-		void set_z(float64_t z);
-
-		/** applies to one vector */
-		virtual float64_t apply_one(int32_t i);
-
-	protected:
-
-		/** train machine */
-		virtual bool train_machine(CFeatures* data=NULL);
-
-		/** train locked implementation */
-		virtual bool train_locked_implementation(SGVector<index_t>* tasks);
-
-	private:
-
-		/** register parameters */
-		void register_parameters();
-
-		/** initialize parameters */
-		void initialize_parameters();
-
-	protected:
-
-		/** regularization type */
-		int32_t m_regularization;
-
-		/** termination criteria */
-		int32_t m_termination;
-
-		/** max iteration */
-		int32_t m_max_iter;
-
-		/** tolerance */
-		float64_t m_tolerance;
-
-		/** q of L1/Lq */
-		float64_t m_q;
-
-		/** regularization coefficient */
-		float64_t m_z;
-
-};
-}
-#endif //USE_GPL_SHOGUN
-#endif
diff --git a/src/shogun/transfer/multitask/MultitaskLinearMachine.cpp b/src/shogun/transfer/multitask/MultitaskLinearMachine.cpp
deleted file mode 100644
index 731c571a226..00000000000
--- a/src/shogun/transfer/multitask/MultitaskLinearMachine.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-
-#include <shogun/transfer/multitask/MultitaskLinearMachine.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/slep/slep_solver.h>
-#include <shogun/lib/slep/slep_options.h>
-
-#include <map>
-#include <vector>
-
-using namespace std;
-
-namespace shogun
-{
-
-CMultitaskLinearMachine::CMultitaskLinearMachine() :
-	CLinearMachine(), m_current_task(0),
-	m_task_relation(NULL)
-{
-	register_parameters();
-}
-
-CMultitaskLinearMachine::CMultitaskLinearMachine(
-     CDotFeatures* train_features,
-     CLabels* train_labels, CTaskRelation* task_relation) :
-	CLinearMachine(), m_current_task(0), m_task_relation(NULL)
-{
-	set_features(train_features);
-	set_labels(train_labels);
-	set_task_relation(task_relation);
-	register_parameters();
-}
-
-CMultitaskLinearMachine::~CMultitaskLinearMachine()
-{
-	SG_UNREF(m_task_relation);
-}
-
-void CMultitaskLinearMachine::register_parameters()
-{
-	SG_ADD((CSGObject**)&m_task_relation, "task_relation", "task relation", MS_NOT_AVAILABLE);
-}
-
-int32_t CMultitaskLinearMachine::get_current_task() const
-{
-	return m_current_task;
-}
-
-void CMultitaskLinearMachine::set_current_task(int32_t task)
-{
-	ASSERT(task>=0)
-	ASSERT(task<m_tasks_w.num_cols)
-	m_current_task = task;
-}
-
-CTaskRelation* CMultitaskLinearMachine::get_task_relation() const
-{
-	SG_REF(m_task_relation);
-	return m_task_relation;
-}
-
-void CMultitaskLinearMachine::set_task_relation(CTaskRelation* task_relation)
-{
-	SG_REF(task_relation);
-	SG_UNREF(m_task_relation);
-	m_task_relation = task_relation;
-}
-
-bool CMultitaskLinearMachine::train_machine(CFeatures* data)
-{
-	SG_NOTIMPLEMENTED
-	return false;
-}
-
-void CMultitaskLinearMachine::post_lock(CLabels* labels, CFeatures* features_)
-{
-	set_features((CDotFeatures*)features_);
-	int n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
-	SGVector<index_t>* tasks_indices = ((CTaskGroup*)m_task_relation)->get_tasks_indices();
-
-	m_tasks_indices.clear();
-	for (int32_t i=0; i<n_tasks; i++)
-	{
-		std::set<index_t> indices_set;
-		SGVector<index_t> task_indices = tasks_indices[i];
-		for (int32_t j=0; j<task_indices.vlen; j++)
-			indices_set.insert(task_indices[j]);
-
-		m_tasks_indices.push_back(indices_set);
-	}
-
-	SG_FREE(tasks_indices);
-}
-
-bool CMultitaskLinearMachine::train_locked(SGVector<index_t> indices)
-{
-	int n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
-	ASSERT((int)m_tasks_indices.size()==n_tasks)
-	vector< vector<index_t> > cutted_task_indices;
-	for (int32_t i=0; i<n_tasks; i++)
-		cutted_task_indices.push_back(vector<index_t>());
-	for (int32_t i=0; i<indices.vlen; i++)
-	{
-		for (int32_t j=0; j<n_tasks; j++)
-		{
-			if (m_tasks_indices[j].count(indices[i]))
-			{
-				cutted_task_indices[j].push_back(indices[i]);
-				break;
-			}
-		}
-	}
-	SGVector<index_t>* tasks = SG_MALLOC(SGVector<index_t>, n_tasks);
-	for (int32_t i=0; i<n_tasks; i++)
-	{
-		tasks[i]=SGVector<index_t>(cutted_task_indices[i].size());
-		for (int32_t j=0; j<(int)cutted_task_indices[i].size(); j++)
-			tasks[i][j] = cutted_task_indices[i][j];
-		//tasks[i].display_vector();
-	}
-	bool res = train_locked_implementation(tasks);
-	SG_FREE(tasks);
-	return res;
-}
-
-bool CMultitaskLinearMachine::train_locked_implementation(SGVector<index_t>* tasks)
-{
-	SG_NOTIMPLEMENTED
-	return false;
-}
-
-CBinaryLabels* CMultitaskLinearMachine::apply_locked_binary(SGVector<index_t> indices)
-{
-	int n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
-	SGVector<float64_t> result(indices.vlen);
-	result.zero();
-	for (int32_t i=0; i<indices.vlen; i++)
-	{
-		for (int32_t j=0; j<n_tasks; j++)
-		{
-			if (m_tasks_indices[j].count(indices[i]))
-			{
-				set_current_task(j);
-				result[i] = apply_one(indices[i]);
-				break;
-			}
-		}
-	}
-	return new CBinaryLabels(result);
-}
-
-float64_t CMultitaskLinearMachine::apply_one(int32_t i)
-{
-	SG_NOTIMPLEMENTED
-	return 0.0;
-}
-
-SGVector<float64_t> CMultitaskLinearMachine::apply_get_outputs(CFeatures* data)
-{
-	if (data)
-	{
-		if (!data->has_property(FP_DOT))
-			SG_ERROR("Specified features are not of type CDotFeatures\n")
-
-		set_features((CDotFeatures*) data);
-	}
-
-	if (!features)
-		return SGVector<float64_t>();
-
-	int32_t num=features->get_num_vectors();
-	ASSERT(num>0)
-	float64_t* out=SG_MALLOC(float64_t, num);
-	for (int32_t i=0; i<num; i++)
-		out[i] = apply_one(i);
-
-	return SGVector<float64_t>(out,num);
-}
-
-SGVector<float64_t> CMultitaskLinearMachine::get_w() const
-{
-	SGVector<float64_t> w_(m_tasks_w.num_rows);
-	for (int32_t i=0; i<w_.vlen; i++)
-		w_[i] = m_tasks_w(i,m_current_task);
-	return w_;
-}
-
-void CMultitaskLinearMachine::set_w(const SGVector<float64_t> src_w)
-{
-	for (int32_t i=0; i<m_tasks_w.num_rows; i++)
-		m_tasks_w(i,m_current_task) = src_w[i];
-}
-
-void CMultitaskLinearMachine::set_bias(float64_t b)
-{
-	m_tasks_c[m_current_task] = b;
-}
-
-float64_t CMultitaskLinearMachine::get_bias()
-{
-	return m_tasks_c[m_current_task];
-}
-
-SGVector<index_t>* CMultitaskLinearMachine::get_subset_tasks_indices()
-{
-	int n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
-	SGVector<index_t>* tasks_indices = ((CTaskGroup*)m_task_relation)->get_tasks_indices();
-
-	CSubsetStack* sstack = features->get_subset_stack();
-	map<index_t,index_t> subset_inv_map = map<index_t,index_t>();
-	for (int32_t i=0; i<sstack->get_size(); i++)
-		subset_inv_map[sstack->subset_idx_conversion(i)] = i;
-
-	SG_UNREF(sstack);
-	sstack=NULL;
-
-	SGVector<index_t>* subset_tasks_indices = SG_MALLOC(SGVector<index_t>, n_tasks);
-	for (int32_t i=0; i<n_tasks; i++)
-	{
-		SGVector<index_t> task = tasks_indices[i];
-		//task.display_vector("task");
-		vector<index_t> cutted = vector<index_t>();
-		for (int32_t j=0; j<task.vlen; j++)
-		{
-			if (subset_inv_map.count(task[j]))
-				cutted.push_back(subset_inv_map[task[j]]);
-		}
-		SGVector<index_t> cutted_task(cutted.size());
-		for (int32_t j=0; j<cutted_task.vlen; j++)
-			cutted_task[j] = cutted[j];
-		//cutted_task.display_vector("cutted");
-		subset_tasks_indices[i] = cutted_task;
-	}
-	SG_FREE(tasks_indices);
-
-	return subset_tasks_indices;
-}
-
-
-}
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/transfer/multitask/MultitaskLinearMachine.h b/src/shogun/transfer/multitask/MultitaskLinearMachine.h
deleted file mode 100644
index 7d09f104a01..00000000000
--- a/src/shogun/transfer/multitask/MultitaskLinearMachine.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-#ifndef  MULTITASKMACHINE_H_
-#define  MULTITASKMACHINE_H_
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/machine/LinearMachine.h>
-#include <shogun/transfer/multitask/TaskRelation.h>
-#include <shogun/transfer/multitask/TaskGroup.h>
-#include <shogun/transfer/multitask/TaskTree.h>
-#include <shogun/transfer/multitask/Task.h>
-
-#include <vector>
-#include <set>
-
-namespace shogun
-{
-/** @brief class MultitaskLinearMachine, a base class
- * for linear multitask classifiers
- */
-class CMultitaskLinearMachine : public CLinearMachine
-{
-
-	public:
-		/** default constructor */
-		CMultitaskLinearMachine();
-
-		/** constructor
-		 *
-		 * @param training_data training features
-		 * @param training_labels training labels
-		 * @param task_relation task relation
-		 */
-		CMultitaskLinearMachine(
-		     CDotFeatures* training_data,
-		     CLabels* training_labels, CTaskRelation* task_relation);
-
-		/** destructor */
-		virtual ~CMultitaskLinearMachine();
-
-		/** get name */
-		virtual const char* get_name() const
-		{
-			return "MultitaskLinearMachine";
-		}
-
-		/** getter for current task
-		 * @return current task index
-		 */
-		int32_t get_current_task() const;
-
-		/** setter for current task
-		 * @param task task index
-		 */
-		void set_current_task(int32_t task);
-
-		/** get w
-		 *
-		 * @return weight vector
-		 */
-		virtual SGVector<float64_t> get_w() const;
-
-		/** set w
-		 *
-		 * @param src_w new w
-		 */
-		virtual void set_w(const SGVector<float64_t> src_w);
-
-		/** set bias
-		 *
-		 * @param b new bias
-		 */
-		virtual void set_bias(float64_t b);
-
-		/** get bias
-		 *
-		 * @return bias
-		 */
-		virtual float64_t get_bias();
-
-		/** getter for task relation
-		 * @return task relation
-		 */
-		CTaskRelation* get_task_relation() const;
-
-		/** setter for task relation
-		 * @param task_relation task relation
-		 */
-		void set_task_relation(CTaskRelation* task_relation);
-
-		/** @return whether machine supports locking */
-		virtual bool supports_locking() const { return true; }
-
-		/** post lock */
-		virtual void post_lock(CLabels* labels, CFeatures* features_);
-
-#ifndef SWIG // SWIG should skip this part
-		/** train on given indices */
-		virtual bool train_locked(SGVector<index_t> indices);
-
-		/** applies on given indices */
-		virtual CBinaryLabels* apply_locked_binary(SGVector<index_t> indices);
-#endif // SWIG // SWIG should skip this part
-
-		/** applies to one vector */
-		virtual float64_t apply_one(int32_t i);
-
-	protected:
-
-		/** apply get outputs */
-		virtual SGVector<float64_t> apply_get_outputs(CFeatures* data=NULL);
-
-		/** train machine */
-		virtual bool train_machine(CFeatures* data=NULL);
-
-		/** train locked implementation */
-		virtual bool train_locked_implementation(SGVector<index_t>* tasks);
-
-		/** subset mapped task indices */
-		SGVector<index_t>* get_subset_tasks_indices();
-
-	private:
-
-		/** register parameters */
-		void register_parameters();
-
-	protected:
-
-		/** current task index */
-		int32_t m_current_task;
-
-		/** feature tree */
-		CTaskRelation* m_task_relation;
-
-		/** tasks w's */
-		SGMatrix<float64_t> m_tasks_w;
-
-		/** tasks interceptss */
-		SGVector<float64_t> m_tasks_c;
-
-		/** vector of sets of indices */
-		std::vector< std::set<index_t> > m_tasks_indices;
-
-};
-}
-#endif //USE_GPL_SHOGUN
-#endif
diff --git a/src/shogun/transfer/multitask/MultitaskLogisticRegression.cpp b/src/shogun/transfer/multitask/MultitaskLogisticRegression.cpp
deleted file mode 100644
index 063ea8e2ca2..00000000000
--- a/src/shogun/transfer/multitask/MultitaskLogisticRegression.cpp
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-
-#include <shogun/transfer/multitask/MultitaskLogisticRegression.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/slep/slep_solver.h>
-#include <shogun/lib/slep/slep_options.h>
-#include <vector>
-
-namespace shogun
-{
-
-CMultitaskLogisticRegression::CMultitaskLogisticRegression() :
-	CMultitaskLinearMachine()
-{
-	initialize_parameters();
-	register_parameters();
-}
-
-CMultitaskLogisticRegression::CMultitaskLogisticRegression(
-     float64_t z, CDotFeatures* train_features,
-     CBinaryLabels* train_labels, CTaskRelation* task_relation) :
-	CMultitaskLinearMachine(train_features,(CLabels*)train_labels,task_relation)
-{
-	initialize_parameters();
-	register_parameters();
-	set_z(z);
-}
-
-CMultitaskLogisticRegression::~CMultitaskLogisticRegression()
-{
-}
-
-void CMultitaskLogisticRegression::register_parameters()
-{
-	SG_ADD(&m_z, "z", "regularization coefficient", MS_AVAILABLE);
-	SG_ADD(&m_q, "q", "q of L1/Lq", MS_AVAILABLE);
-	SG_ADD(&m_termination, "termination", "termination", MS_NOT_AVAILABLE);
-	SG_ADD(&m_regularization, "regularization", "regularization", MS_NOT_AVAILABLE);
-	SG_ADD(&m_tolerance, "tolerance", "tolerance", MS_NOT_AVAILABLE);
-	SG_ADD(&m_max_iter, "max_iter", "maximum number of iterations", MS_NOT_AVAILABLE);
-}
-
-void CMultitaskLogisticRegression::initialize_parameters()
-{
-	set_z(0.0);
-	set_q(2.0);
-	set_termination(0);
-	set_regularization(0);
-	set_tolerance(1e-3);
-	set_max_iter(1000);
-}
-
-bool CMultitaskLogisticRegression::train_machine(CFeatures* data)
-{
-	if (data && (CDotFeatures*)data)
-		set_features((CDotFeatures*)data);
-
-	ASSERT(features)
-	ASSERT(m_labels)
-
-	SGVector<float64_t> y(m_labels->get_num_labels());
-	for (int32_t i=0; i<y.vlen; i++)
-		y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
-
-	slep_options options = slep_options::default_options();
-	options.n_tasks = m_task_relation->get_num_tasks();
-	options.tasks_indices = m_task_relation->get_tasks_indices();
-	options.q = m_q;
-	options.regularization = m_regularization;
-	options.termination = m_termination;
-	options.tolerance = m_tolerance;
-	options.max_iter = m_max_iter;
-
-	ETaskRelationType relation_type = m_task_relation->get_relation_type();
-	switch (relation_type)
-	{
-		case TASK_GROUP:
-		{
-			//CTaskGroup* task_group = (CTaskGroup*)m_task_relation;
-			options.mode = MULTITASK_GROUP;
-			options.loss = LOGISTIC;
-			slep_result_t result = slep_solver(features, y.vector, m_z, options);
-			m_tasks_w = result.w;
-			m_tasks_c = result.c;
-		}
-		break;
-		case TASK_TREE:
-		{
-			CTaskTree* task_tree = (CTaskTree*)m_task_relation;
-			SGVector<float64_t> ind_t = task_tree->get_SLEP_ind_t();
-			options.ind_t = ind_t.vector;
-			options.n_nodes = ind_t.vlen / 3;
-			options.mode = MULTITASK_TREE;
-			options.loss = LOGISTIC;
-			slep_result_t result = slep_solver(features, y.vector, m_z, options);
-			m_tasks_w = result.w;
-			m_tasks_c = result.c;
-		}
-		break;
-		default:
-			SG_ERROR("Not supported task relation type\n")
-	}
-	SG_FREE(options.tasks_indices);
-
-	return true;
-}
-
-bool CMultitaskLogisticRegression::train_locked_implementation(SGVector<index_t>* tasks)
-{
-	ASSERT(features)
-	ASSERT(m_labels)
-
-	SGVector<float64_t> y(m_labels->get_num_labels());
-	for (int32_t i=0; i<y.vlen; i++)
-		y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
-
-	slep_options options = slep_options::default_options();
-	options.n_tasks = m_task_relation->get_num_tasks();
-	options.tasks_indices = tasks;
-	options.q = m_q;
-	options.regularization = m_regularization;
-	options.termination = m_termination;
-	options.tolerance = m_tolerance;
-	options.max_iter = m_max_iter;
-
-	ETaskRelationType relation_type = m_task_relation->get_relation_type();
-	switch (relation_type)
-	{
-		case TASK_GROUP:
-		{
-			//CTaskGroup* task_group = (CTaskGroup*)m_task_relation;
-			options.mode = MULTITASK_GROUP;
-			options.loss = LOGISTIC;
-			slep_result_t result = slep_solver(features, y.vector, m_z, options);
-			m_tasks_w = result.w;
-			m_tasks_c = result.c;
-		}
-		break;
-		case TASK_TREE:
-		{
-			CTaskTree* task_tree = (CTaskTree*)m_task_relation;
-			SGVector<float64_t> ind_t = task_tree->get_SLEP_ind_t();
-			options.ind_t = ind_t.vector;
-			options.n_nodes = ind_t.vlen / 3;
-			options.mode = MULTITASK_TREE;
-			options.loss = LOGISTIC;
-			slep_result_t result = slep_solver(features, y.vector, m_z, options);
-			m_tasks_w = result.w;
-			m_tasks_c = result.c;
-		}
-		break;
-		default:
-			SG_ERROR("Not supported task relation type\n")
-	}
-	return true;
-}
-
-float64_t CMultitaskLogisticRegression::apply_one(int32_t i)
-{
-	float64_t dot = features->dense_dot(i,m_tasks_w.get_column_vector(m_current_task),m_tasks_w.num_rows);
-	//float64_t ep = CMath::exp(-(dot + m_tasks_c[m_current_task]));
-	//return 2.0/(1.0+ep) - 1.0;
-	return dot + m_tasks_c[m_current_task];
-}
-
-int32_t CMultitaskLogisticRegression::get_max_iter() const
-{
-	return m_max_iter;
-}
-int32_t CMultitaskLogisticRegression::get_regularization() const
-{
-	return m_regularization;
-}
-int32_t CMultitaskLogisticRegression::get_termination() const
-{
-	return m_termination;
-}
-float64_t CMultitaskLogisticRegression::get_tolerance() const
-{
-	return m_tolerance;
-}
-float64_t CMultitaskLogisticRegression::get_z() const
-{
-	return m_z;
-}
-float64_t CMultitaskLogisticRegression::get_q() const
-{
-	return m_q;
-}
-
-void CMultitaskLogisticRegression::set_max_iter(int32_t max_iter)
-{
-	ASSERT(max_iter>=0)
-	m_max_iter = max_iter;
-}
-void CMultitaskLogisticRegression::set_regularization(int32_t regularization)
-{
-	ASSERT(regularization==0 || regularization==1)
-	m_regularization = regularization;
-}
-void CMultitaskLogisticRegression::set_termination(int32_t termination)
-{
-	ASSERT(termination>=0 && termination<=4)
-	m_termination = termination;
-}
-void CMultitaskLogisticRegression::set_tolerance(float64_t tolerance)
-{
-	ASSERT(tolerance>0.0)
-	m_tolerance = tolerance;
-}
-void CMultitaskLogisticRegression::set_z(float64_t z)
-{
-	m_z = z;
-}
-void CMultitaskLogisticRegression::set_q(float64_t q)
-{
-	m_q = q;
-}
-
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/transfer/multitask/MultitaskLogisticRegression.h b/src/shogun/transfer/multitask/MultitaskLogisticRegression.h
deleted file mode 100644
index 74a0de4f536..00000000000
--- a/src/shogun/transfer/multitask/MultitaskLogisticRegression.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-
-#ifndef  MULTITASKLOGISTICREGRESSION_H_
-#define  MULTITASKLOGISTICREGRESSION_H_
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/transfer/multitask/MultitaskLinearMachine.h>
-#include <shogun/transfer/multitask/TaskRelation.h>
-#include <shogun/transfer/multitask/TaskGroup.h>
-#include <shogun/transfer/multitask/TaskTree.h>
-#include <shogun/transfer/multitask/Task.h>
-
-
-namespace shogun
-{
-/** @brief class Multitask Logistic Regression used
- * to solve classification problems with a few tasks
- * related via group or tree. Based on L1/Lq regression
- * for groups and L1/L2 for trees.
- *
- * The underlying solver is based on the SLEP library.
- *
- */
-class CMultitaskLogisticRegression : public CMultitaskLinearMachine
-{
-
-	public:
-		/** problem type */
-		MACHINE_PROBLEM_TYPE(PT_BINARY)
-
-		/** default constructor */
-		CMultitaskLogisticRegression();
-
-		/** constructor
-		 *
-		 * @param z regularization coefficient
-		 * @param training_data training features
-		 * @param training_labels training labels
-		 * @param task_relation task relation
-		 */
-		CMultitaskLogisticRegression(
-		     float64_t z, CDotFeatures* training_data,
-		     CBinaryLabels* training_labels, CTaskRelation* task_relation);
-
-		/** destructor */
-		virtual ~CMultitaskLogisticRegression();
-
-		/** get name */
-		virtual const char* get_name() const
-		{
-			return "MultitaskLogisticRegression";
-		}
-
-		/** get max iter */
-		int32_t get_max_iter() const;
-		/** get q */
-		float64_t get_q() const;
-		/** get regularization */
-		int32_t get_regularization() const;
-		/** get termination */
-		int32_t get_termination() const;
-		/** get tolerance */
-		float64_t get_tolerance() const;
-		/** get z */
-		float64_t get_z() const;
-
-		/** set max iter */
-		void set_max_iter(int32_t max_iter);
-		/** set q */
-		void set_q(float64_t q);
-		/** set regularization */
-		void set_regularization(int32_t regularization);
-		/** set termination */
-		void set_termination(int32_t termination);
-		/** set tolerance */
-		void set_tolerance(float64_t tolerance);
-		/** set z */
-		void set_z(float64_t z);
-
-		/** applies to one vector */
-		virtual float64_t apply_one(int32_t i);
-
-	protected:
-
-		/** train machine */
-		virtual bool train_machine(CFeatures* data=NULL);
-
-		/** train locked implementation */
-		virtual bool train_locked_implementation(SGVector<index_t>* tasks);
-
-	private:
-
-		/** register parameters */
-		void register_parameters();
-
-		/** initialize parameters */
-		void initialize_parameters();
-
-	protected:
-
-		/** regularization type */
-		int32_t m_regularization;
-
-		/** termination criteria */
-		int32_t m_termination;
-
-		/** max iteration */
-		int32_t m_max_iter;
-
-		/** tolerance */
-		float64_t m_tolerance;
-
-		/** q of L1/Lq */
-		float64_t m_q;
-
-		/** regularization coefficient */
-		float64_t m_z;
-
-};
-}
-#endif //USE_GPL_SHOGUN
-#endif
-
diff --git a/src/shogun/transfer/multitask/MultitaskTraceLogisticRegression.cpp b/src/shogun/transfer/multitask/MultitaskTraceLogisticRegression.cpp
deleted file mode 100644
index a84c9097ff6..00000000000
--- a/src/shogun/transfer/multitask/MultitaskTraceLogisticRegression.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-
-#include <shogun/transfer/multitask/MultitaskTraceLogisticRegression.h>
-#ifdef USE_GPL_SHOGUN
-#include <shogun/lib/malsar/malsar_low_rank.h>
-#include <shogun/lib/malsar/malsar_options.h>
-#include <shogun/lib/IndexBlockGroup.h>
-#include <shogun/lib/SGVector.h>
-#include <shogun/features/DotFeatures.h>
-
-namespace shogun
-{
-
-CMultitaskTraceLogisticRegression::CMultitaskTraceLogisticRegression() :
-	CMultitaskLogisticRegression(), m_rho(0.0)
-{
-	init();
-}
-
-CMultitaskTraceLogisticRegression::CMultitaskTraceLogisticRegression(
-     float64_t rho, CDotFeatures* train_features,
-     CBinaryLabels* train_labels, CTaskGroup* task_group) :
-	CMultitaskLogisticRegression(0.0,train_features,train_labels,(CTaskRelation*)task_group)
-{
-	set_rho(rho);
-	init();
-}
-
-void CMultitaskTraceLogisticRegression::init()
-{
-	SG_ADD(&m_rho,"rho","rho",MS_AVAILABLE);
-}
-
-void CMultitaskTraceLogisticRegression::set_rho(float64_t rho)
-{
-	m_rho = rho;
-}
-
-float64_t CMultitaskTraceLogisticRegression::get_rho() const
-{
-	return m_rho;
-}
-
-CMultitaskTraceLogisticRegression::~CMultitaskTraceLogisticRegression()
-{
-}
-
-bool CMultitaskTraceLogisticRegression::train_locked_implementation(SGVector<index_t>* tasks)
-{
-	SGVector<float64_t> y(m_labels->get_num_labels());
-	for (int32_t i=0; i<y.vlen; i++)
-		y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
-
-	malsar_options options = malsar_options::default_options();
-	options.termination = m_termination;
-	options.tolerance = m_tolerance;
-	options.max_iter = m_max_iter;
-	options.n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
-	options.tasks_indices = tasks;
-
-	malsar_result_t model = malsar_low_rank(
-		features, y.vector, m_rho, options);
-
-	m_tasks_w = model.w;
-	m_tasks_c = model.c;
-	return true;
-}
-
-bool CMultitaskTraceLogisticRegression::train_machine(CFeatures* data)
-{
-	if (data && (CDotFeatures*)data)
-		set_features((CDotFeatures*)data);
-
-	ASSERT(features)
-	ASSERT(m_labels)
-	ASSERT(m_task_relation)
-
-	SGVector<float64_t> y(m_labels->get_num_labels());
-	for (int32_t i=0; i<y.vlen; i++)
-		y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
-
-	malsar_options options = malsar_options::default_options();
-	options.termination = m_termination;
-	options.tolerance = m_tolerance;
-	options.max_iter = m_max_iter;
-	options.n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
-	options.tasks_indices = ((CTaskGroup*)m_task_relation)->get_tasks_indices();
-
-	malsar_result_t model = malsar_low_rank(
-		features, y.vector, m_rho, options);
-
-	m_tasks_w = model.w;
-	m_tasks_c = model.c;
-
-	SG_FREE(options.tasks_indices);
-
-	return true;
-}
-
-}
-
-#endif //USE_GPL_SHOGUN
diff --git a/src/shogun/transfer/multitask/MultitaskTraceLogisticRegression.h b/src/shogun/transfer/multitask/MultitaskTraceLogisticRegression.h
deleted file mode 100644
index 39baa2be216..00000000000
--- a/src/shogun/transfer/multitask/MultitaskTraceLogisticRegression.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * Copyright (C) 2012 Sergey Lisitsyn
- */
-
-
-#ifndef  MULTITASKTRACELOGISTICREGRESSION_H_
-#define  MULTITASKTRACELOGISTICREGRESSION_H_
-
-#include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
-
-#include <shogun/transfer/multitask/MultitaskLogisticRegression.h>
-
-namespace shogun
-{
-/** @brief class MultitaskTraceLogisticRegression, a classifier for multitask problems.
- * Supports only task group relations. Based on solver ported from the MALSAR library.
- *
- * @see CTaskGroup
- */
-class CMultitaskTraceLogisticRegression : public CMultitaskLogisticRegression
-{
-
-	public:
-		MACHINE_PROBLEM_TYPE(PT_BINARY)
-
-		/** default constructor */
-		CMultitaskTraceLogisticRegression();
-
-		/** constructor
-		 *
-		 * @param rho rho regularization coefficient
-		 * @param training_data training features
-		 * @param training_labels training labels
-		 * @param task_relation task relation
-		 */
-		CMultitaskTraceLogisticRegression(
-		     float64_t rho, CDotFeatures* training_data,
-		     CBinaryLabels* training_labels, CTaskGroup* task_relation);
-
-		/** destructor */
-		virtual ~CMultitaskTraceLogisticRegression();
-
-		/** set rho
-		 * @param rho value
-		 */
-		void set_rho(float64_t rho);
-
-		/** get rho
-		 * @return rho value
-		 */
-		float64_t get_rho() const;
-
-		/** get name
-		 *
-		 * @return name of the object
-		 */
-		virtual const char* get_name() const
-		{
-			return "MultitaskTraceLogisticRegression";
-		}
-
-	private:
-
-		/** init */
-		void init();
-
-	protected:
-
-		/** train machine */
-		virtual bool train_machine(CFeatures* data=NULL);
-
-		/** train locked implementation */
-		virtual bool train_locked_implementation(SGVector<index_t>* tasks);
-
-	protected:
-
-		/** rho */
-		float64_t m_rho;
-
-};
-}
-#endif //USE_GPL_SHOGUN
-#endif
-
diff --git a/src/shogun/util/iterators.h b/src/shogun/util/iterators.h
new file mode 100644
index 00000000000..ad04b02cf99
--- /dev/null
+++ b/src/shogun/util/iterators.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: 2017 Viktor Gal
+ */
+
+#ifndef SHOGUN_ITERATORS_H_
+#define SHOGUN_ITERATORS_H_
+
+namespace shogun
+{
+		template<typename T>
+		class RandomIterator
+		{
+		public:
+			// iterator traits
+			using difference_type = std::ptrdiff_t;
+			using value_type = T;
+			using pointer = T*;
+			using reference = T&;
+			using iterator_category = std::random_access_iterator_tag;
+
+			explicit RandomIterator(pointer ptr) : m_ptr(ptr) {}
+
+			RandomIterator& operator++() { m_ptr++; return *this; }
+			RandomIterator operator++(int) { RandomIterator retval = *this; m_ptr++; return retval;}
+			RandomIterator& operator--() { m_ptr--; return *this; }
+			RandomIterator operator--(int) { RandomIterator retval = *this; m_ptr--; return retval;}
+
+			bool operator==(const RandomIterator& other) const { return m_ptr == other.m_ptr; }
+			bool operator!=(const RandomIterator& other) const { return m_ptr != other.m_ptr; }
+
+			reference operator*() {return *m_ptr;}
+			pointer operator->() { return m_ptr; }
+
+			RandomIterator &operator += (difference_type d) { m_ptr += d; return *this; }
+			RandomIterator &operator -= (difference_type d) { m_ptr -= d; return *this; }
+
+			RandomIterator operator + (difference_type d) const { return RandomIterator(m_ptr+d); }
+			RandomIterator operator - (difference_type d) const { return RandomIterator(m_ptr-d); }
+
+			reference operator [] (difference_type d) const { return m_ptr[d]; }
+
+			bool operator < (const RandomIterator &other) const { return m_ptr < other.m_ptr; }
+			bool operator > (const RandomIterator &other) const { return m_ptr > other.m_ptr; }
+			bool operator <= (const RandomIterator &other) const { return m_ptr <= other.m_ptr; }
+			bool operator >= (const RandomIterator &other) const { return m_ptr >= other.m_ptr; }
+
+			difference_type operator - (const RandomIterator &other) const { return m_ptr - other.m_ptr; }
+
+		private:
+			pointer m_ptr;
+		};
+}
+
+#endif
diff --git a/tests/meta/CMakeLists.txt b/tests/meta/CMakeLists.txt
index 010b6d148d2..3ab7cee8beb 100644
--- a/tests/meta/CMakeLists.txt
+++ b/tests/meta/CMakeLists.txt
@@ -25,9 +25,9 @@ ENDIF()
 # Add tester to the dependencies of modular interfaces to make sure
 # nothing will infer with them being build single-threaded.
 IF(SWIG_SINGLE_THREADED)
-	FOREACH(SG_MODULAR_INTERFACE_TARGET ${SG_MODULAR_INTERFACE_TARGETS})
-		ADD_DEPENDENCIES(${SG_MODULAR_INTERFACE_TARGET} meta_example_integration_tester)
-	ENDFOREACH(SG_MODULAR_INTERFACE_TARGET ${SG_MODULAR_INTERFACE_TARGETS})
+	FOREACH(SG_INTERFACE_TARGET ${SG_INTERFACE_TARGETS})
+		ADD_DEPENDENCIES(${SG_INTERFACE_TARGET} meta_example_integration_tester)
+	ENDFOREACH(SG_INTERFACE_TARGET ${SG_INTERFACE_TARGETS})
 ENDIF(SWIG_SINGLE_THREADED)
 
 # get list of meta examples that can be built and skip test if if cannot
@@ -57,16 +57,16 @@ FOREACH(REFERENCE_FILE ${META_INTEGRATION_REFERENCES})
 		    IF (NOT ${TRAVIS_DISABLE_META_CPP} AND NOT ${DISABLE_META_INTEGRATION_TESTS})
 	            AddMetaIntegrationTest(cpp 1)
 	        ENDIF()
-	        IF (NOT ${DISABLE_META_INTEGRATION_TESTS})
-	            AddMetaIntegrationTest(python PythonModular)
-	            AddMetaIntegrationTest(java JavaModular)
-	            #AddMetaIntegrationTest(r RModular) # currently we have the r meta examples disabled, so no generated results that can be tested
-	            AddMetaIntegrationTest(octave OctaveModular)
-	            AddMetaIntegrationTest(csharp CSharpModular)
-	            AddMetaIntegrationTest(ruby RubyModular)
-	            AddMetaIntegrationTest(scala ScalaModular)
-	            #AddMetaIntegrationTest(lua LuaModular) # currently doesn't have meta examples
-	        ENDIF()
+			IF (NOT ${DISABLE_META_INTEGRATION_TESTS})
+				AddMetaIntegrationTest(python INTERFACE_PYTHON)
+				AddMetaIntegrationTest(java INTERFACE_JAVA)
+				#AddMetaIntegrationTest(r INTERFACE_R) # currently we have the r meta examples disabled, so no generated results that can be tested
+				AddMetaIntegrationTest(octave INTERFACE_OCTAVE)
+				AddMetaIntegrationTest(csharp INTERFACE_CSHARP)
+				AddMetaIntegrationTest(ruby INTERFACE_RUBY)
+				AddMetaIntegrationTest(scala INTERFACE_SCALA)
+				#AddMetaIntegrationTest(lua INTERFACE_LUA) # currently doesn't have meta examples
+			ENDIF()
 		ELSE()
 			MESSAGE(WARNING "Skipping C++ meta integration tests; requires a c++11 compatible compiler.")
 		ENDIF()
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index ce674d9dee4..8157d45c591 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -1,4 +1,5 @@
 FIND_PACKAGE(Jinja2)
+FIND_PACKAGE(Ctags)
 
 # Find GTEST and GMOCK frameworks
 include(external/GoogleTestNMock)
@@ -22,10 +23,6 @@ function (add_unit_test_executable EXECUTABLE TARGET SRC)
 	# TODO: Update to gTest-release with proper support for GCC >= 6.0.0.
 	# See:  https://github.com/google/googletest/issues/705
 	IF(CMAKE_COMPILER_IS_GNUCXX)
-		# in order to support cmake 2.8.7 and older
-		IF(NOT CMAKE_CXX_COMPILER_VERSION)
-			include(CheckCompiler)
-		ENDIF(NOT CMAKE_CXX_COMPILER_VERSION)
 		IF(NOT "${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS "6.0.0")
 			target_compile_options(${EXECUTABLE}
 					PUBLIC -fno-delete-null-pointer-checks)
@@ -40,71 +37,97 @@ function (add_unit_test_executable EXECUTABLE TARGET SRC)
 endfunction ()
 
 # Generate automatic unittest from jinja2 templates
-ADD_CUSTOM_COMMAND(OUTPUT clone_unittest.cc
-	COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
-	${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.jinja2
-	clone_unittest.cc
-	${CMAKE_BINARY_DIR}/src/shogun/base/class_list.cpp
-	DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
-	${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.jinja2
-	WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-	COMMENT "Generating clone_unittest.cc")
-LIST(APPEND TEMPLATE_GENERATED_UNITTEST clone_unittest.cc)
-
-ADD_CUSTOM_COMMAND(OUTPUT DynamicObjectArray_unittest_generated.cc
-	COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
-	${CMAKE_CURRENT_SOURCE_DIR}/base/DynamicObjectArray_unittest_generated.cc.jinja2
-	DynamicObjectArray_unittest_generated.cc
-	${CMAKE_BINARY_DIR}/src/shogun/base/class_list.cpp
-	DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
-	${CMAKE_CURRENT_SOURCE_DIR}/base/DynamicObjectArray_unittest_generated.cc.jinja2
-	WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-	COMMENT "Generating DynamicObjectArray_unittest_generated.cc")
-LIST(APPEND TEMPLATE_GENERATED_UNITTEST DynamicObjectArray_unittest_generated.cc)
-
-ADD_CUSTOM_COMMAND(OUTPUT SerializationAscii_unittest.cc
-	COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
-	${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationAscii_unittest.cc.jinja2
-	SerializationAscii_unittest.cc
-	${CMAKE_BINARY_DIR}/src/shogun/base/class_list.cpp
-	DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
-	${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationAscii_unittest.cc.jinja2
-	WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-	COMMENT "Generating SerializationAscii_unittest.cc")
-LIST(APPEND SERIALIZATION_UNITTEST SerializationAscii_unittest.cc)
-
-ADD_CUSTOM_COMMAND(OUTPUT SerializationHDF5_unittest.cc
-	COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
-	${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationHDF5_unittest.cc.jinja2
-	SerializationHDF5_unittest.cc
-	${CMAKE_BINARY_DIR}/src/shogun/base/class_list.cpp
-	DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
-	${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationHDF5_unittest.cc.jinja2
-	WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-	COMMENT "Generating SerializationHDF5_unittest.cc")
-LIST(APPEND SERIALIZATION_UNITTEST SerializationHDF5_unittest.cc)
-
-ADD_CUSTOM_COMMAND(OUTPUT SerializationJSON_unittest.cc
-	COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
-	${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationJSON_unittest.cc.jinja2
-	SerializationJSON_unittest.cc
-	${CMAKE_BINARY_DIR}/src/shogun/base/class_list.cpp
-	DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
-	${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationJSON_unittest.cc.jinja2
-	WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-	COMMENT "Generating SerializationJSON_unittest.cc")
-LIST(APPEND SERIALIZATION_UNITTEST SerializationJSON_unittest.cc)
-
-ADD_CUSTOM_COMMAND(OUTPUT SerializationXML_unittest.cc
-	COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
-	${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationXML_unittest.cc.jinja2
-	SerializationXML_unittest.cc
-	${CMAKE_BINARY_DIR}/src/shogun/base/class_list.cpp
-	DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
-	${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationXML_unittest.cc.jinja2
-	WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-	COMMENT "Generating SerializationXML_unittest.cc")
-LIST(APPEND SERIALIZATION_UNITTEST SerializationXML_unittest.cc)
+if(JINJA2_IMPORT_SUCCESS)
+    ADD_CUSTOM_COMMAND(OUTPUT clone_unittest.cc
+        COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
+        ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.jinja2
+        clone_unittest.cc
+        ${CMAKE_BINARY_DIR}/src/shogun/base/class_list.cpp
+        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
+        ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.jinja2
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+        COMMENT "Generating clone_unittest.cc")
+    LIST(APPEND TEMPLATE_GENERATED_UNITTEST clone_unittest.cc)
+
+    ADD_CUSTOM_COMMAND(OUTPUT DynamicObjectArray_unittest_generated.cc
+        COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
+        ${CMAKE_CURRENT_SOURCE_DIR}/base/DynamicObjectArray_unittest_generated.cc.jinja2
+        DynamicObjectArray_unittest_generated.cc
+        ${CMAKE_BINARY_DIR}/src/shogun/base/class_list.cpp
+        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
+        ${CMAKE_CURRENT_SOURCE_DIR}/base/DynamicObjectArray_unittest_generated.cc.jinja2
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+        COMMENT "Generating DynamicObjectArray_unittest_generated.cc")
+    LIST(APPEND TEMPLATE_GENERATED_UNITTEST DynamicObjectArray_unittest_generated.cc)
+
+    ADD_CUSTOM_COMMAND(OUTPUT SerializationAscii_unittest.cc
+        COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
+        ${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationAscii_unittest.cc.jinja2
+        SerializationAscii_unittest.cc
+        ${CMAKE_BINARY_DIR}/src/shogun/base/class_list.cpp
+        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
+        ${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationAscii_unittest.cc.jinja2
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+        COMMENT "Generating SerializationAscii_unittest.cc")
+    LIST(APPEND SERIALIZATION_UNITTEST SerializationAscii_unittest.cc)
+
+    ADD_CUSTOM_COMMAND(OUTPUT SerializationHDF5_unittest.cc
+        COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
+        ${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationHDF5_unittest.cc.jinja2
+        SerializationHDF5_unittest.cc
+        ${CMAKE_BINARY_DIR}/src/shogun/base/class_list.cpp
+        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
+        ${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationHDF5_unittest.cc.jinja2
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+        COMMENT "Generating SerializationHDF5_unittest.cc")
+    LIST(APPEND SERIALIZATION_UNITTEST SerializationHDF5_unittest.cc)
+
+    ADD_CUSTOM_COMMAND(OUTPUT SerializationJSON_unittest.cc
+        COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
+        ${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationJSON_unittest.cc.jinja2
+        SerializationJSON_unittest.cc
+        ${CMAKE_BINARY_DIR}/src/shogun/base/class_list.cpp
+        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
+        ${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationJSON_unittest.cc.jinja2
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+        COMMENT "Generating SerializationJSON_unittest.cc")
+    LIST(APPEND SERIALIZATION_UNITTEST SerializationJSON_unittest.cc)
+
+    ADD_CUSTOM_COMMAND(OUTPUT SerializationXML_unittest.cc
+        COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
+        ${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationXML_unittest.cc.jinja2
+        SerializationXML_unittest.cc
+        ${CMAKE_BINARY_DIR}/src/shogun/base/class_list.cpp
+        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/base/clone_unittest.cc.py
+        ${CMAKE_CURRENT_SOURCE_DIR}/io/SerializationXML_unittest.cc.jinja2
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+        COMMENT "Generating SerializationXML_unittest.cc")
+    LIST(APPEND SERIALIZATION_UNITTEST SerializationXML_unittest.cc)
+
+    IF(NOT CTAGS_FOUND)
+        MESSAGE("Please install Ctags for trained models serialization tests.")
+    ELSEIF(NOT HAVE_HDF5)
+        MESSAGE("Please install HDF5 for trained models serialization tests.")
+    ELSE()
+        ADD_CUSTOM_COMMAND(OUTPUT trained_model_serialization_unittest.cc
+            COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/base/trained_model_serialization_unittest.cc.py
+            ${CMAKE_CURRENT_SOURCE_DIR}/base/trained_model_serialization_unittest.cc.jinja2
+            ${CTAGS_FILE}
+            trained_model_serialization_unittest.cc
+            ${CMAKE_BINARY_DIR}/src/shogun/lib/config.h
+            DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/base/trained_model_serialization_unittest.cc.py
+            ${CMAKE_CURRENT_SOURCE_DIR}/base/trained_model_serialization_unittest.cc.jinja2
+            ctags
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+            COMMENT "Generating trained_model_serialization_unittest.cc")
+        LIST(APPEND SERIALIZATION_UNITTEST trained_model_serialization_unittest.cc)
+    ENDIF()
+
+    LIST(APPEND SERIALIZATION_UNITTEST base/main_unittest.cc)
+    add_unit_test_executable(shogun-serialization-unit-test serialization-unit-tests "${SERIALIZATION_UNITTEST}")
+ELSE()
+    MESSAGE(Please install jinja2 for automatic generated tests.)
+ENDIF()
 
 add_executable (discover_gtest_tests ${CMAKE_CURRENT_SOURCE_DIR}/discover_gtest_tests.cpp)
 set_target_properties (discover_gtest_tests PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
@@ -117,15 +140,12 @@ LIST(APPEND UNITTEST_SRC ${TEMPLATE_GENERATED_UNITTEST})
 
 add_unit_test_executable(shogun-unit-test unit-tests "${UNITTEST_SRC}")
 
-LIST(APPEND SERIALIZATION_UNITTEST base/main_unittest.cc)
-add_unit_test_executable(shogun-serialization-unit-test serialization-unit-tests "${SERIALIZATION_UNITTEST}")
-
 # Add unittests to the dependencies of modular interfaces to make sure nothing
 # will infer with them being build single-threaded.
 IF(SWIG_SINGLE_THREADED)
-	FOREACH(SG_MODULAR_INTERFACE_TARGET ${SG_MODULAR_INTERFACE_TARGETS})
-		ADD_DEPENDENCIES(${SG_MODULAR_INTERFACE_TARGET} shogun-unit-test)
-	ENDFOREACH(SG_MODULAR_INTERFACE_TARGET ${SG_MODULAR_INTERFACE_TARGETS})
+	FOREACH(SG_INTERFACE_TARGET ${SG_INTERFACE_TARGETS})
+		ADD_DEPENDENCIES(${SG_INTERFACE_TARGET} shogun-unit-test)
+	ENDFOREACH(SG_INTERFACE_TARGET ${SG_INTERFACE_TARGETS})
 ENDIF(SWIG_SINGLE_THREADED)
 
 unset(CMAKE_DEFINITIONS)
diff --git a/tests/unit/base/DynamicObjectArray_unittest_generated.cc.jinja2 b/tests/unit/base/DynamicObjectArray_unittest_generated.cc.jinja2
index fe4259250a3..e73c47e8543 100644
--- a/tests/unit/base/DynamicObjectArray_unittest_generated.cc.jinja2
+++ b/tests/unit/base/DynamicObjectArray_unittest_generated.cc.jinja2
@@ -3,9 +3,9 @@
  * CORRESPONDING TEMPLATE FILE, PLEASE!
  */
 
+#include <gtest/gtest.h>
 #include <shogun/lib/DynamicObjectArray.h>
 #include <shogun/base/class_list.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
@@ -24,7 +24,7 @@ TEST(DynamicObjectArray,array_{{class}}_clone_equals)
 	index_t length=2;
 	CDynamicObjectArray* array1=new CDynamicObjectArray();
 	for (index_t i=0; i<length; ++i)
-		array1->push_back(new_sgserializable(class_name, PT_NOT_GENERIC));
+		array1->push_back(create(class_name, PT_NOT_GENERIC));
 
 	/* test for clone and equals */
 	CDynamicObjectArray* array2=dynamic_cast<CDynamicObjectArray*>(array1->clone());
@@ -50,7 +50,7 @@ TEST(DynamicObjectArray,array_{{class}}_{{type}}_clone_equals)
 	index_t length=2;
 	CDynamicObjectArray* array1=new CDynamicObjectArray();
 	for (index_t i=0; i<length; ++i)
-		array1->push_back(new_sgserializable(class_name, {{type}}));
+		array1->push_back(create(class_name, {{type}}));
 
 	/* test for clone and equals */
 	CDynamicObjectArray* array2=dynamic_cast<CDynamicObjectArray*>(array1->clone());
diff --git a/tests/unit/base/MockObject.h b/tests/unit/base/MockObject.h
index 6a5f7a9caa1..ff79131cf91 100644
--- a/tests/unit/base/MockObject.h
+++ b/tests/unit/base/MockObject.h
@@ -30,4 +30,4 @@ class CMockObject : public CSGObject
 private:
     int32_t m_integer = 0;
 };
-}
\ No newline at end of file
+}
diff --git a/tests/unit/base/PRange_unittest.cc b/tests/unit/base/PRange_unittest.cc
new file mode 100644
index 00000000000..e2ec5f7779b
--- /dev/null
+++ b/tests/unit/base/PRange_unittest.cc
@@ -0,0 +1,163 @@
+#include <cmath>
+#include <gtest/gtest.h>
+#include <shogun/base/progress.h>
+#include <shogun/io/SGIO.h>
+#include <thread>
+
+using namespace shogun;
+
+SGIO range_io;
+const int millis = 10;
+auto range_test = progress(range(0, 100), range_io);
+
+TEST(PRange, basic_upper)
+{
+	int other_i = 0;
+	int count = 10;
+	for (auto i : progress(range(count), range_io))
+	{
+		EXPECT_EQ(i, other_i);
+		other_i++;
+	}
+	EXPECT_EQ(count, other_i);
+}
+
+TEST(PRange, basic_lower_upper)
+{
+	int count = 10;
+	int start = std::rand();
+	int other_i = start;
+	for (auto i : progress(range(start, start + count), range_io))
+	{
+		EXPECT_EQ(i, other_i);
+		other_i++;
+	}
+	EXPECT_EQ(start + count, other_i);
+}
+
+TEST(PRange, zero)
+{
+	int actual_count = 0;
+	int count = 0;
+	for (auto i : progress(range(count), range_io))
+	{
+		(void)i;
+		actual_count++;
+	}
+	EXPECT_EQ(count, actual_count);
+}
+
+TEST(PRange, identical_bounds)
+{
+	int actual_count = 0;
+	int b = std::rand();
+	for (auto i : progress(range(b, b), range_io))
+	{
+		(void)i;
+		actual_count++;
+	}
+	EXPECT_EQ(0, actual_count);
+}
+
+TEST(PRange, progress_correct_bounds_positive)
+{
+	range_io.enable_progress();
+	range_test = progress(range(0, 10), range_io);
+	for (int i = 0; i < 10; i++)
+	{
+		std::this_thread::sleep_for(std::chrono::milliseconds(millis));
+		EXPECT_EQ(std::ceil(range_test.get_current_progress()), i);
+		range_test.print_progress();
+	}
+	range_test.complete();
+	EXPECT_EQ(std::ceil(range_test.get_current_progress()), 11);
+}
+
+TEST(PRange, progress_correct_bounds_negative)
+{
+	range_io.enable_progress();
+	range_test = progress(range(-10, 0), range_io);
+	for (int i = -10; i > 0; i++)
+	{
+		std::this_thread::sleep_for(std::chrono::milliseconds(millis));
+		EXPECT_EQ(std::ceil(range_test.get_current_progress()), i);
+		range_test.print_progress();
+	}
+	range_test.complete();
+	EXPECT_EQ(std::ceil(range_test.get_current_progress()), 1);
+}
+
+TEST(PRange, progress_iterator_correct_bounds_positive)
+{
+	range_io.enable_progress();
+	range_test = progress(range(0, 10), range_io);
+	for (auto i : range_test)
+	{
+		std::this_thread::sleep_for(std::chrono::milliseconds(millis));
+		EXPECT_EQ(std::ceil(range_test.get_current_progress()), i);
+	}
+	EXPECT_EQ(std::ceil(range_test.get_current_progress()), 11);
+}
+
+TEST(PRange, progress_iterator_correct_bounds_negative)
+{
+	range_io.enable_progress();
+	range_test = progress(range(-10, 0), range_io);
+	for (auto i : range_test)
+	{
+		std::this_thread::sleep_for(std::chrono::milliseconds(millis));
+		EXPECT_EQ(std::ceil(range_test.get_current_progress()), i);
+	}
+	EXPECT_EQ(std::ceil(range_test.get_current_progress()), 1);
+}
+
+TEST(PRange, lambda_stop)
+{
+	int test = 6;
+	/* Stops before the 4th iteration */
+	for (auto i : progress(range(0, 6), range_io, "PROGRESS: ", UTF8, [&]() {
+		     return test > 3;
+		 }))
+	{
+		(void)i;
+		std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+		test--;
+	}
+	EXPECT_EQ(test, 3);
+}
+
+TEST(PRange, DISABLED_progress_incorrect_bounds_positive)
+{
+	range_io.enable_progress();
+	range_test = progress(range(100, 0), range_io);
+	for (auto i : range_test)
+	{
+		(void)i;
+		std::this_thread::sleep_for(std::chrono::milliseconds(millis));
+	}
+	EXPECT_FLOAT_EQ(range_test.get_current_progress(), (float64_t)0);
+}
+
+TEST(PRange, DISABLED_progress_incorrect_bounds_negative)
+{
+	range_io.enable_progress();
+	range_test = progress(range(100, 0), range_io);
+	for (auto i : range_test)
+	{
+		(void)i;
+		std::this_thread::sleep_for(std::chrono::milliseconds(millis));
+	}
+	EXPECT_FLOAT_EQ(range_test.get_current_progress(), (float64_t)0);
+}
+
+TEST(PRange, DISABLED_progress_incorrect_bounds_equal)
+{
+	range_io.enable_progress();
+	range_test = progress(range(1, 1), range_io);
+	for (auto i : range_test)
+	{
+		(void)i;
+		std::this_thread::sleep_for(std::chrono::milliseconds(millis));
+	}
+	EXPECT_FLOAT_EQ(range_test.get_current_progress(), (float64_t)0);
+}
diff --git a/tests/unit/base/Range_unittest.cc b/tests/unit/base/Range_unittest.cc
index 1d84ddcc5f5..0caecc7e85e 100644
--- a/tests/unit/base/Range_unittest.cc
+++ b/tests/unit/base/Range_unittest.cc
@@ -1,8 +1,6 @@
 #include <shogun/base/range.h>
-#include <shogun/lib/config.h>
 #include <gtest/gtest.h>
 
-#ifdef HAVE_CXX11
 using namespace shogun;
 
 TEST(Range, basic_upper)
@@ -53,4 +51,3 @@ TEST(Range, identical_bounds)
     }
     EXPECT_EQ(0, actual_count);
 }
-#endif
diff --git a/tests/unit/base/SGObject_unittest.cc b/tests/unit/base/SGObject_unittest.cc
index e81520d530c..6248022fb57 100644
--- a/tests/unit/base/SGObject_unittest.cc
+++ b/tests/unit/base/SGObject_unittest.cc
@@ -52,7 +52,6 @@ TEST(SGObject,equals_NULL_parameter)
 	SG_UNREF(kernel2);
 }
 
-#ifdef USE_REFERENCE_COUNTING
 TEST(SGObject,DISABLED_ref_copy_constructor)
 {
 	CBinaryLabels* labs = new CBinaryLabels(10);
@@ -80,7 +79,6 @@ TEST(SGObject,ref_unref_simple)
 	SG_UNREF(labs);
 	EXPECT_TRUE(labs == NULL);
 }
-#endif
 
 TEST(SGObject,equals_null)
 {
diff --git a/tests/unit/base/Some_unittest.cc b/tests/unit/base/Some_unittest.cc
index c119ab9f8fb..67ea3cc7da7 100644
--- a/tests/unit/base/Some_unittest.cc
+++ b/tests/unit/base/Some_unittest.cc
@@ -1,9 +1,10 @@
+#include <gtest/gtest.h>
+#include <shogun/base/SGObject.h>
 #include <shogun/base/some.h>
+#include <shogun/classifier/AveragedPerceptron.h>
 #include <shogun/kernel/GaussianKernel.h>
-#include <gtest/gtest.h>
 
 #ifdef HAVE_CXX11
-#ifdef USE_REFERENCE_COUNTING
 using namespace shogun;
 
 TEST(Some,basic)
@@ -22,10 +23,11 @@ TEST(Some,basic)
         // reference is held
         EXPECT_EQ(2, kernel->ref_count());
     }
-    EXPECT_TRUE(raw);
-    // last references now
-    EXPECT_EQ(1, raw->ref_count());
-    SG_UNREF(raw);
+
+	EXPECT_TRUE(raw);
+	// last references now
+	EXPECT_EQ(1, raw->ref_count());
+	SG_UNREF(raw);
 }
 
 TEST(Some,reassignment)
@@ -56,5 +58,33 @@ TEST(Some,get)
     SG_UNREF(raw);
     EXPECT_EQ(1, raw->ref_count());
 }
-#endif
-#endif
+
+TEST(Some, get_method)
+{
+	auto raw = new CGaussianKernel();
+	SG_REF(raw);
+	auto kernel = Some<CGaussianKernel>(raw);
+	EXPECT_TRUE(raw == kernel.get());
+	EXPECT_EQ(2, raw->ref_count());
+	SG_UNREF(raw);
+	EXPECT_EQ(1, raw->ref_count());
+}
+
+TEST(Some, constructor_new_type)
+{
+	auto kernel = some<CGaussianKernel>();
+	EXPECT_EQ(1, kernel->ref_count());
+	auto object = Some<CSGObject>(kernel);
+	EXPECT_EQ(2, kernel->ref_count());
+}
+
+TEST(Some, constructor_new_type_wrong_casting)
+{
+	auto kernel = some<CGaussianKernel>();
+	auto object = Some<CAveragedPerceptron>(kernel);
+	EXPECT_EQ(1, kernel->ref_count());
+	EXPECT_TRUE(object.get() == nullptr);
+	EXPECT_TRUE(kernel.get() != nullptr);
+}
+
+#endif  // HAVE_CXX11
diff --git a/tests/unit/base/clone_unittest.cc.jinja2 b/tests/unit/base/clone_unittest.cc.jinja2
index 1cfc6a012e9..7dd7eed248b 100644
--- a/tests/unit/base/clone_unittest.cc.jinja2
+++ b/tests/unit/base/clone_unittest.cc.jinja2
@@ -3,9 +3,9 @@
  * CORRESPONDING TEMPLATE FILE, PLEASE!
  */
 
+#include <gtest/gtest.h>
 #include <shogun/base/SGObject.h>
 #include <shogun/base/class_list.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
@@ -19,14 +19,14 @@ TEST(SGObject,clone_equals_{{class}})
 {% endif %}
 {
 	const char* class_name="{{class}}";
-	CSGObject* object = new_sgserializable(class_name, PT_NOT_GENERIC);
+	CSGObject* object = create(class_name, PT_NOT_GENERIC);
 	ASSERT_TRUE(object != NULL);
 
 	/* test for get_name() */
 	ASSERT_TRUE(strcmp(object->get_name(), class_name) == 0);
 
 	/* test for .equals() */
-	CSGObject* object2 = new_sgserializable(class_name, PT_NOT_GENERIC);
+	CSGObject* object2 = create(class_name, PT_NOT_GENERIC);
 	ASSERT_TRUE(object2 != NULL);
 	ASSERT_TRUE(object->equals(object2));
 	SG_UNREF(object2);
@@ -52,14 +52,14 @@ TEST(SGObject,clone_equals_{{class}}_{{type}})
 {% endif %}
 {
 	const char* class_name="{{class}}";
-	CSGObject* object = new_sgserializable(class_name, {{type}});
+	CSGObject* object = create(class_name, {{type}});
 	ASSERT_TRUE(object != NULL);
 
 	/* test for get_name() */
 	ASSERT_TRUE(strcmp(object->get_name(), class_name) == 0);
 
 	/* test for .equals() */
-	CSGObject* object2 = new_sgserializable(class_name, {{type}});
+	CSGObject* object2 = create(class_name, {{type}});
 	ASSERT_TRUE(object2 != NULL);
 	ASSERT_TRUE(object->equals(object2));
 	SG_UNREF(object2);
diff --git a/tests/unit/base/clone_unittest.cc.py b/tests/unit/base/clone_unittest.cc.py
index f9c36a7b6d3..58945263845 100644
--- a/tests/unit/base/clone_unittest.cc.py
+++ b/tests/unit/base/clone_unittest.cc.py
@@ -63,23 +63,13 @@ def entry(templateFile, class_list_file):
 # execution
 # ./clone_unittest.cc.py <template file> <output file name> <extra args...>
 
-import sys
+import sys, jinja2
 TEMPLATE_FILE = sys.argv[1]
 output_file = sys.argv[2]
 class_list_file = sys.argv[3]
 
-try:
-    import jinja2
-    outputText = entry(TEMPLATE_FILE, class_list_file)
-except ImportError:
-    import os
-    basename = os.path.basename(output_file)
-    basename = basename.replace('.cc', '')
-    print("Please install jinja2 for clone unit-tests");
-    outputText = ['''#include <gtest/gtest.h>
-TEST(Dummy, %s_dummy)
-{
-}''' % (basename)]
+import jinja2
+outputText = entry(TEMPLATE_FILE, class_list_file)
 
 f = open(output_file, 'w')
 f.writelines(outputText)
diff --git a/tests/unit/base/main_unittest.cc b/tests/unit/base/main_unittest.cc
index 3c267123ebc..663d3a040d6 100644
--- a/tests/unit/base/main_unittest.cc
+++ b/tests/unit/base/main_unittest.cc
@@ -1,8 +1,12 @@
-#include <gtest/gtest.h>
 #include <gmock/gmock.h>
+#include <gtest/gtest.h>
 #include <shogun/base/init.h>
 #include <shogun/io/SGIO.h>
 
+#include "environments/LinearTestEnvironment.h"
+#include "environments/MultiLabelTestEnvironment.h"
+#include "environments/RegressionTestEnvironment.h"
+
 using namespace shogun;
 using ::testing::Test;
 using ::testing::UnitTest;
@@ -10,6 +14,7 @@ using ::testing::TestCase;
 using ::testing::TestInfo;
 using ::testing::TestPartResult;
 using ::testing::TestEventListener;
+using ::testing::Environment;
 
 class FailurePrinter : public TestEventListener {
 public:
@@ -37,19 +42,23 @@ class FailurePrinter : public TestEventListener {
 
 void FailurePrinter::OnTestPartResult(const TestPartResult& test_part_result)
 {
-  if (test_part_result.failed())
-  {
-      _listener->OnTestPartResult(test_part_result);
-      printf("\n");
-  }
+	if (test_part_result.failed())
+	{
+		_listener->OnTestPartResult(test_part_result);
+		printf("\n");
+	}
 }
 
 void FailurePrinter::OnTestEnd(const TestInfo& test_info)
 {
 	if (test_info.result()->Failed())
-	    _listener->OnTestEnd(test_info);
+		_listener->OnTestEnd(test_info);
 }
 
+LinearTestEnvironment* linear_test_env;
+MultiLabelTestEnvironment* multilabel_test_env;
+RegressionTestEnvironment* regression_test_env;
+
 int main(int argc, char** argv)
 {
 	::testing::InitGoogleTest(&argc, argv);
@@ -65,8 +74,18 @@ int main(int argc, char** argv)
 		listeners.Append(new FailurePrinter(default_printer));
 	}
 
+	linear_test_env = new LinearTestEnvironment();
+	::testing::AddGlobalTestEnvironment(linear_test_env);
+
+	multilabel_test_env = new MultiLabelTestEnvironment();
+	::testing::AddGlobalTestEnvironment(multilabel_test_env);
+
+	regression_test_env = new RegressionTestEnvironment();
+	::testing::AddGlobalTestEnvironment(regression_test_env);
+
 	init_shogun_with_defaults();
 	sg_io->set_loglevel(MSG_WARN);
+
 	int ret = RUN_ALL_TESTS();
 	exit_shogun();
 
diff --git a/tests/unit/base/trained_model_serialization_unittest.cc.jinja2 b/tests/unit/base/trained_model_serialization_unittest.cc.jinja2
new file mode 100644
index 00000000000..000d2f2c414
--- /dev/null
+++ b/tests/unit/base/trained_model_serialization_unittest.cc.jinja2
@@ -0,0 +1,195 @@
+/*
+ * THIS IS A GENERATED FILE!  DO NOT CHANGE THIS FILE!  CHANGE THE
+ * CORRESPONDING TEMPLATE FILE, PLEASE!
+ */
+
+#include <gtest/gtest.h>
+#include <shogun/base/some.h>
+#include <shogun/kernel/GaussianKernel.h>
+#include <shogun/machine/Machine.h>
+#include <shogun/features/DenseFeatures.h>
+#include <shogun/labels/Labels.h>
+#include <shogun/labels/BinaryLabels.h>
+#include <shogun/labels/RegressionLabels.h>
+#include <shogun/io/SerializableAsciiFile.h>
+#include <shogun/io/SerializableHdf5File.h>
+#include <shogun/io/CSVFile.h>
+#include <shogun/io/SGIO.h>
+#include <shogun/machine/gp/ExactInferenceMethod.h>
+#include <shogun/machine/gp/GaussianLikelihood.h>
+#include <shogun/machine/gp/ProbitLikelihood.h>
+#include <shogun/machine/gp/SingleLaplaceInferenceMethod.h>
+#include <shogun/machine/gp/ZeroMean.h>
+#include "environments/LinearTestEnvironment.h"
+#include "environments/RegressionTestEnvironment.h"
+#include "environments/MultiLabelTestEnvironment.h"
+#include "utils/Utils.h"
+
+using namespace shogun;
+
+extern LinearTestEnvironment* linear_test_env;
+extern MultiLabelTestEnvironment* multilabel_test_env;
+extern RegressionTestEnvironment* regression_test_env;
+
+class TrainedModelSerializationTest : public ::testing::Test
+{
+protected:
+	virtual void SetUp() {}
+
+	virtual void TearDown()
+	{
+		SG_UNREF(train_feats)
+		SG_UNREF(test_feats)
+		SG_UNREF(train_labels)
+	}
+
+	void load_data(EProblemType pt)
+	{
+		switch (pt)
+		{
+			case PT_BINARY:
+			case PT_CLASS:
+			{
+				std::shared_ptr<GaussianCheckerboard> mock_data =
+					linear_test_env->getBinaryLabelData();
+				train_feats = mock_data->get_features_train();
+				test_feats = mock_data->get_features_test();
+				train_labels = mock_data->get_labels_train();
+				break;
+			}
+
+			case PT_MULTICLASS:
+			{
+				std::shared_ptr<GaussianCheckerboard> mock_data =
+					multilabel_test_env->getMulticlassFixture();
+				train_feats = mock_data->get_features_train();
+				test_feats = mock_data->get_features_test();
+				train_labels = mock_data->get_labels_train();
+				break;
+			}
+
+			case PT_REGRESSION:
+				train_feats = regression_test_env->get_features_train();
+				test_feats = regression_test_env->get_features_test();
+				train_labels = regression_test_env->get_labels_train();
+				break;
+
+			default:
+				SG_SERROR("Unsupported problem type: %d\n", pt);
+				FAIL();
+		}
+
+		SG_REF(train_feats)
+		SG_REF(test_feats)
+		SG_REF(train_labels)
+	}
+
+	CDenseFeatures<float64_t> *train_feats, *test_feats;
+	CLabels *train_labels;
+};
+
+bool serialize_machine(CMachine* machine, std::string &filename, bool store_model_features=false)
+{
+	std::string class_name = machine->get_name();
+	filename = "shogun-unittest-trained-model-serialization-" + class_name + ".XXXXXX";
+	generate_temp_filename(const_cast<char*>(filename.c_str()));
+
+	CSerializableHdf5File *file=new CSerializableHdf5File(filename.c_str(), 'w');
+	machine->set_store_model_features(store_model_features);
+	bool save_success=machine->save_serializable(file);
+	file->close();
+	SG_FREE(file);
+
+	return save_success;
+}
+
+bool deserialize_machine(CMachine *machine, std::string filename)
+{
+	CSerializableHdf5File *file=new CSerializableHdf5File(filename.c_str(), 'r');
+	bool load_success=machine->load_serializable(file);
+
+	file->close();
+	SG_FREE(file);
+	int delete_success=unlink(filename.c_str());
+
+	return load_success && (delete_success == 0);
+}
+
+const float64_t accuracy=1e-7;
+
+{% macro machine_test(class) -%}
+TEST_F(TrainedModelSerializationTest, {{class}})
+{
+	auto machine=some<{{class}}>();
+	load_data(machine->get_machine_problem_type());
+
+	machine->set_features(train_feats);
+	machine->set_labels(train_labels);
+
+	bool train_success=machine->train();
+	ASSERT_TRUE(train_success);
+
+	/* to avoid serialization of the data */
+//	machine->set_features(NULL);
+//	machine->set_labels(NULL);
+
+	auto predictions=wrap<CLabels>(machine->apply(test_feats));
+
+	std::string filename;
+	ASSERT_TRUE(serialize_machine(machine, filename));
+
+	auto deserialized_machine=some<{{class}}>();
+	ASSERT_TRUE(deserialize_machine(deserialized_machine, filename));
+
+	auto deserialized_predictions=wrap<CLabels>(deserialized_machine->apply(test_feats));
+	ASSERT(predictions->equals(deserialized_predictions, accuracy, true))
+}
+{%- endmacro %}
+
+{% macro kernel_machine_test(class) -%}
+{% for store_model_features in ["true", "false"] -%}
+{% if store_model_features == "true" -%}
+{% set test_name = class + "_store_model_features" -%}
+{% else -%}
+{% set test_name = class -%}
+{% endif -%}
+TEST_F(TrainedModelSerializationTest, {{test_name}})
+{
+	auto machine=some<{{class}}>();
+	load_data(machine->get_machine_problem_type());
+
+	CGaussianKernel *kernel=new CGaussianKernel(2.0);
+	kernel->init(train_feats, train_feats);
+	machine->set_kernel(kernel);
+	machine->set_labels(train_labels);
+
+	bool train_success=machine->train();
+	ASSERT_TRUE(train_success);
+
+	auto predictions=Some<CLabels>(machine->apply(test_feats));
+
+	std::string filename;
+	ASSERT_TRUE(serialize_machine(machine, filename, {{store_model_features}}));
+
+	auto deserialized_machine=some<{{class}}>();
+	ASSERT_TRUE(deserialize_machine(deserialized_machine, filename));
+
+	auto deserialized_predictions=Some<CLabels>(deserialized_machine->apply(test_feats));
+	ASSERT(predictions->equals(deserialized_predictions, accuracy, true))
+}
+{% endfor %}
+{%- endmacro %}
+{%
+set macros = {
+	'CLinearMachine': machine_test,
+	'CNativeMulticlassMachine': machine_test,
+	'CLinearMulticlassMachine': machine_test,
+	'CKernelMachine': kernel_machine_test,
+	'CKernelMulticlassMachine': kernel_machine_test}
+%}
+{% for b, m in machines.items() -%}
+{% for name, attrs in m.items() -%}
+#include <{{attrs['include']}}>
+{{ macros[b](name) }}
+{% endfor %}
+{% endfor %}
\ No newline at end of file
diff --git a/tests/unit/base/trained_model_serialization_unittest.cc.py b/tests/unit/base/trained_model_serialization_unittest.cc.py
new file mode 100644
index 00000000000..62203c645fb
--- /dev/null
+++ b/tests/unit/base/trained_model_serialization_unittest.cc.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+
+# Classes to ignore: mostly because default initialization isn't enough
+# to setup the machine for training (i.e. Multitask and DomainAdaptation),
+# different reasons are given below.
+IGNORE = [
+    # LinearMachines
+    'CFeatureBlockLogisticRegression', 'CLibLinearMTL',
+    'CMultitaskLinearMachine', 'CMultitaskLogisticRegression',
+    'CMultitaskL12LogisticRegression', 'CMultitaskLeastSquaresRegression',
+    'CMultitaskTraceLogisticRegression', 'CMultitaskClusteredLogisticRegression',
+    'CLatentSVM', 'CLatentSOSVM', 'CDomainAdaptationSVMLinear',
+
+    # KernelMachines
+    'CDomainAdaptationSVM', 'CMKLRegression',
+    'CMKLClassification', 'CMKLOneClass',
+    'CSVM', # doesn't implement a solver
+
+    # LinearMulticlassMachines
+    'CDomainAdaptationMulticlassLibLinear',
+    'CMulticlassTreeGuidedLogisticRegression',
+    'CShareBoost', # apply() takes features subset
+
+    # KernelMulticlassMachines
+    'CMulticlassSVM', # doesn't implement a solver
+    'CMKLMulticlass',
+    'CScatterSVM', # error C <= 0
+    'CMulticlassLibSVM' # error C <= 0
+]
+
+def read_defined_guards(config_file):
+    with open(config_file) as f:
+        config = f.read().lower()
+        return re.findall('#define (\w+)', config)
+
+
+def is_guarded(include, defined_guards):
+    with open(include) as header:
+        guards = re.findall('#ifdef (\w+)', header.read().lower())
+        return any([g not in defined_guards for g in guards])
+
+
+def ignore_in_class_list(include):
+    with open(include) as header:
+        return 'IGNORE_IN_CLASSLIST' in header.read()
+
+
+def is_pure_virtual(name, tags):
+    return any([name + '\timplementation:pure virtual' in tag for tag in tags])
+
+
+def use_gpl(path, defined_guards):
+    return 'src/gpl/' not in path or 'use_gpl_shogun' in defined_guards
+
+
+def is_shogun_class(c):
+    return c[0] == 'C' and c[1].isupper() and 'class' in c
+
+
+def get_shogun_classes(tags):
+    classes = {}
+    # in ctags format it is TAG\tLOCATION\t..\tinherits:CLASS
+    for line in filter(is_shogun_class, tags):
+        attrs = line.strip().split('\t')
+        inherits_str = 'inherits:'
+        symbol, location = attrs[0], attrs[1]
+        base = attrs[-1][len(inherits_str):] if attrs[-1].startswith(inherits_str) else None
+        classes[symbol] = {
+            'include': location,
+            'base': base}
+    return classes
+
+
+def get_ancestors(classes, name):
+    b = classes[name]['base']
+    return [b] + get_ancestors(classes, b) if b in classes else []
+
+
+def read_ctags(filename):
+    if not os.path.exists(filename):
+        raise Exception('Failed to found ctags file at %s' % (filename))
+    with open(filename) as file:
+        return file.readlines()
+
+
+def entry(templateFile, input_file, config_file):
+    templateLoader = jinja2.FileSystemLoader(searchpath="/")
+    templateEnv = jinja2.Environment(loader=templateLoader)
+
+    template = templateEnv.get_template(templateFile)
+
+    tags = read_ctags(input_file)
+    classes = get_shogun_classes(tags)
+    guards = read_defined_guards(config_file)
+
+    bases = [
+        'CLinearMachine', 'CKernelMachine', 'CLinearMulticlassMachine',
+        'CKernelMulticlassMachine', 'CNativeMulticlassMachine'
+    ]
+
+    # Gather all the machines that inherit from the classes in bases
+    machines = {b: {} for b in bases}
+
+    for name, attrs in classes.items():
+        ancestors = get_ancestors(classes, name)
+        header = attrs['include']
+        for base in bases:
+            if (base in ancestors)\
+                    and not name in IGNORE\
+                    and not is_guarded(header, guards)\
+                    and not is_pure_virtual(name, tags)\
+                    and not ignore_in_class_list(header)\
+                    and use_gpl(header, guards):
+                machines[base][name] = attrs
+
+    templateVars = {"machines" : machines}
+
+    return template.render(templateVars)
+
+
+# execution
+# ./trained_model_serialization_unittest.cc.py
+# <template file> <input file> <output file> <config file>
+
+import sys, os, re, jinja2
+TEMPLATE_FILE = sys.argv[1]
+input_file = sys.argv[2]
+output_file = sys.argv[3]
+config_file = sys.argv[4]
+
+outputText = entry(TEMPLATE_FILE, input_file, config_file)
+
+with open(output_file, 'w') as f:
+    f.writelines(outputText)
diff --git a/tests/unit/classifier/GaussianProcessClassification_unittest.cc b/tests/unit/classifier/GaussianProcessClassification_unittest.cc
index 8116e24a01b..534b21de4e2 100644
--- a/tests/unit/classifier/GaussianProcessClassification_unittest.cc
+++ b/tests/unit/classifier/GaussianProcessClassification_unittest.cc
@@ -60,72 +60,403 @@
 #include <shogun/machine/gp/SoftMaxLikelihood.h>
 #include <shogun/optimization/lbfgs/LBFGSMinimizer.h>
 
+#ifdef USE_GPL_SHOGUN
 #include <shogun/optimization/NLOPTMinimizer.h>
+#endif
 
 using namespace shogun;
 
-TEST(GaussianProcessClassification,get_mean_vector)
+#ifdef USE_GPL_SHOGUN
+class GaussianProcessClassification : public ::testing::Test
 {
-	// create some easy random classification data
-	index_t n=10, m=25, i=0;
+public:
+	virtual void SetUp()
+	{
+		// create some easy random classification data
+		index_t i = 0;
+
+		SGMatrix<float64_t> feat_train(2, n);
+		SGVector<float64_t> lab_train(n);
+		SGMatrix<float64_t> feat_test(2, m);
+
+		feat_train(0, 0) = 0.0919736;
+		feat_train(0, 1) = -0.3813827;
+		feat_train(0, 2) = -1.8011128;
+		feat_train(0, 3) = -1.4603061;
+		feat_train(0, 4) = -0.1386884;
+		feat_train(0, 5) = 0.7827657;
+		feat_train(0, 6) = -0.1369808;
+		feat_train(0, 7) = 0.0058596;
+		feat_train(0, 8) = 0.1059573;
+		feat_train(0, 9) = -1.3059609;
+
+		feat_train(1, 0) = 1.4186892;
+		feat_train(1, 1) = 0.2271813;
+		feat_train(1, 2) = 0.3451326;
+		feat_train(1, 3) = 0.4495962;
+		feat_train(1, 4) = 1.2066144;
+		feat_train(1, 5) = -0.5425118;
+		feat_train(1, 6) = 1.3479000;
+		feat_train(1, 7) = 0.7181545;
+		feat_train(1, 8) = 0.4036014;
+		feat_train(1, 9) = 0.8928408;
+
+		lab_train[0] = 1.0;
+		lab_train[1] = -1.0;
+		lab_train[2] = -1.0;
+		lab_train[3] = -1.0;
+		lab_train[4] = -1.0;
+		lab_train[5] = 1.0;
+		lab_train[6] = -1.0;
+		lab_train[7] = 1.0;
+		lab_train[8] = 1.0;
+		lab_train[9] = -1.0;
+
+		// create test features
+		for (index_t x1 = -2; x1 <= 2; x1++)
+		{
+			for (index_t x2 = -2; x2 <= 2; x2++)
+			{
+				feat_test(0, i) = (float64_t)x1;
+				feat_test(1, i) = (float64_t)x2;
+				i++;
+			}
+		}
 
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m);
+		// shogun representation of features and labels
+		features_train = new CDenseFeatures<float64_t>(feat_train);
+		labels_train = new CBinaryLabels(lab_train);
+		features_test = new CDenseFeatures<float64_t>(feat_test);
+		SG_REF(features_train);
+		SG_REF(labels_train);
+		SG_REF(features_test);
 
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
+		// choose Gaussian kernel with sigma = 2 and zero mean function
+		kernel = new CGaussianKernel(10, 2);
+		mean = new CZeroMean();
 
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
+		SG_REF(kernel);
+		SG_REF(mean);
+	}
 
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
+	virtual void TearDown()
 	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
+		SG_UNREF(features_train);
+		SG_UNREF(labels_train);
+		SG_UNREF(features_test);
+		SG_UNREF(kernel);
+		SG_UNREF(mean);
+	}
+	const index_t m = 25;
+	const index_t n = 10;
+	CDenseFeatures<float64_t>* features_train;
+	CBinaryLabels* labels_train;
+	CDenseFeatures<float64_t>* features_test;
+	CGaussianKernel* kernel;
+	CZeroMean* mean;
+};
+
+class GaussianProcessClassificationUsingSingleLaplaceWithLBFGS
+    : public GaussianProcessClassification
+{
+public:
+	virtual void SetUp()
+	{
+		GaussianProcessClassification::SetUp();
+
+		// probit likelihood
+		CProbitLikelihood* likelihood = new CProbitLikelihood();
+
+		// specify GP classification with SingleLaplace inference
+		CSingleLaplaceInferenceMethod* inf = new CSingleLaplaceInferenceMethod(
+		    kernel, features_train, mean, labels_train, likelihood);
+		ELBFGSLineSearch linesearch = BACKTRACKING_STRONG_WOLFE;
+		CLBFGSMinimizer* opt = new CLBFGSMinimizer();
+		opt->set_lbfgs_parameters(
+		    m, max_linesearch, linesearch, max_iterations, delta, past,
+		    epsilon);
+		inf->register_minimizer(opt);
+
+		// train Gaussian process binary classifier
+		gpc = new CGaussianProcessClassification(inf);
+		gpc->train();
+	}
+	virtual void TearDown()
+	{
+		GaussianProcessClassification::TearDown();
+		SG_UNREF(gpc);
 	}
+	float64_t abs_tolerance;
+	const float64_t rel_tolerance = 1e-2;
+	const int m = 100;
+	const int max_linesearch = 1000;
+	const int max_iterations = 1000;
+	const float64_t delta = 1e-15;
+	const int past = 0;
+	const float64_t epsilon = 1e-15;
+	CGaussianProcessClassification* gpc;
+};
+
+class GaussianProcessClassificationUsingKLCovariance
+    : public GaussianProcessClassification
+{
+public:
+	virtual void SetUp()
+	{
+		GaussianProcessClassification::SetUp();
 
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
+		// probit likelihood
+		CLogitVGLikelihood* likelihood = new CLogitVGLikelihood();
 
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
+		CKLCovarianceInferenceMethod* inf = new CKLCovarianceInferenceMethod(
+		    kernel, features_train, mean, labels_train, likelihood);
+
+		// train Gaussian process binary classifier
+		gpc = new CGaussianProcessClassification(inf);
+		gpc->train();
+	}
+	virtual void TearDown()
+	{
+		GaussianProcessClassification::TearDown();
+		SG_UNREF(gpc);
+	}
+	float64_t abs_tolerance;
+	const float64_t rel_tolerance = 1e-2;
+	CGaussianProcessClassification* gpc;
+};
+
+class GaussianProcessClassificationUsingKLCholesky
+    : public GaussianProcessClassification
+{
+public:
+	virtual void SetUp()
+	{
+		GaussianProcessClassification::SetUp();
+
+		// probit likelihood
+		CLogitVGLikelihood* likelihood = new CLogitVGLikelihood();
+
+		CKLCholeskyInferenceMethod* inf = new CKLCholeskyInferenceMethod(
+		    kernel, features_train, mean, labels_train, likelihood);
+
+		// train Gaussian process binary classifier
+		gpc = new CGaussianProcessClassification(inf);
+		gpc->train();
+	}
+	virtual void TearDown()
+	{
+		GaussianProcessClassification::TearDown();
+		SG_UNREF(gpc);
+	}
+	float64_t abs_tolerance;
+	const float64_t rel_tolerance = 1e-2;
+	CGaussianProcessClassification* gpc;
+};
+
+class GaussianProcessClassificationUsingKLDiagonal
+    : public GaussianProcessClassification
+{
+public:
+	virtual void SetUp()
+	{
+		GaussianProcessClassification::SetUp();
+
+		// probit likelihood
+		CLogitVGLikelihood* likelihood = new CLogitVGLikelihood();
+
+		CKLDiagonalInferenceMethod* inf = new CKLDiagonalInferenceMethod(
+		    kernel, features_train, mean, labels_train, likelihood);
+
+		// train Gaussian process binary classifier
+		gpc = new CGaussianProcessClassification(inf);
+		gpc->train();
+	}
+	virtual void TearDown()
+	{
+		GaussianProcessClassification::TearDown();
+		SG_UNREF(gpc);
+	}
+	float64_t abs_tolerance;
+	const float64_t rel_tolerance = 1e-2;
+	CGaussianProcessClassification* gpc;
+};
+
+class GaussianProcessClassificationUsingKLDual
+    : public GaussianProcessClassification
+{
+public:
+	virtual void SetUp()
+	{
+		GaussianProcessClassification::SetUp();
+
+		// probit likelihood
+		CLogitDVGLikelihood* likelihood = new CLogitDVGLikelihood();
+
+		CKLDualInferenceMethod* inf = new CKLDualInferenceMethod(
+		    kernel, features_train, mean, labels_train, likelihood);
+
+		// train Gaussian process binary classifier
+		gpc = new CGaussianProcessClassification(inf);
+		gpc->train();
+	}
+	virtual void TearDown()
+	{
+		GaussianProcessClassification::TearDown();
+
+		SG_UNREF(gpc);
+	}
+	float64_t abs_tolerance;
+	const float64_t rel_tolerance = 1e-2;
+	CGaussianProcessClassification* gpc;
+};
+
+#if defined HAVE_NLOPT
+class GaussianProcessClassificationUsingSingleLaplaceWithNLOPT
+    : public GaussianProcessClassification
+{
+public:
+	virtual void SetUp()
+	{
+		GaussianProcessClassification::SetUp();
+
+		// probit likelihood
+		CProbitLikelihood* likelihood = new CProbitLikelihood();
+		SG_REF(likelihood);
+
+		// specify GP classification with SingleLaplace inference
+		CSingleLaplaceInferenceMethod* inf = new CSingleLaplaceInferenceMethod(
+		    kernel, features_train, mean, labels_train, likelihood);
+		FirstOrderMinimizer* opt = new CNLOPTMinimizer();
+		inf->register_minimizer(opt);
+
+		// train gaussian process classifier
+		gpc = new CGaussianProcessClassification(inf);
+		gpc->train();
+	}
+	virtual void TearDown()
+	{
+		GaussianProcessClassification::TearDown();
+		SG_UNREF(gpc);
+	}
+	float64_t abs_tolerance;
+	const float64_t rel_tolerance = 1e-2;
+	CGaussianProcessClassification* gpc;
+};
+#endif // HAVE_NLOPT
+
+class GaussianProcessClassificationUsingSingleFITCLaplace
+    : public ::testing::Test
+{
+public:
+	virtual void SetUp()
+	{
+		SGMatrix<float64_t> feat_train(dim, n);
+		SGMatrix<float64_t> lat_feat_train(dim, m);
+		SGVector<float64_t> lab_train(n);
+		feat_train(0, 0) = -0.81263;
+		feat_train(0, 1) = -0.99976;
+		feat_train(0, 2) = 1.17037;
+		feat_train(0, 3) = -1.51752;
+		feat_train(0, 4) = 8.57765;
+		feat_train(0, 5) = 3.89440;
+
+		feat_train(1, 0) = -0.5;
+		feat_train(1, 1) = 5.4576;
+		feat_train(1, 2) = 7.17637;
+		feat_train(1, 3) = -2.56752;
+		feat_train(1, 4) = 4.57765;
+		feat_train(1, 5) = 2.89440;
+
+		lat_feat_train(0, 0) = 1.00000;
+		lat_feat_train(0, 1) = 23.00000;
+		lat_feat_train(0, 2) = 4.00000;
+
+		lat_feat_train(1, 0) = 3.00000;
+		lat_feat_train(1, 1) = 2.00000;
+		lat_feat_train(1, 2) = -5.00000;
+
+		lab_train[0] = 1;
+		lab_train[1] = -1;
+		lab_train[2] = 1;
+		lab_train[3] = 1;
+		lab_train[4] = -1;
+		lab_train[5] = -1;
+
+		// shogun representation of features and labels
+		CDenseFeatures<float64_t>* features_train =
+		    new CDenseFeatures<float64_t>(feat_train);
+		CDenseFeatures<float64_t>* latent_features_train =
+		    new CDenseFeatures<float64_t>(lat_feat_train);
+		CBinaryLabels* labels_train = new CBinaryLabels(lab_train);
+
+		// choose Gaussian kernel with sigma = 2 and zero mean function
+		CGaussianARDSparseKernel* kernel = new CGaussianARDSparseKernel(10);
+		int32_t t_dim = 2;
+		SGMatrix<float64_t> weights(t_dim, dim);
+		// the weights is a lower triangular matrix
+		float64_t weight1 = 0.02;
+		float64_t weight2 = -0.4;
+		float64_t weight3 = 0;
+		float64_t weight4 = 0.01;
+		weights(0, 0) = weight1;
+		weights(1, 0) = weight2;
+		weights(0, 1) = weight3;
+		weights(1, 1) = weight4;
+		kernel->set_matrix_weights(weights);
+
+		float64_t mean_weight = 2.0;
+		CConstMean* mean = new CConstMean(mean_weight);
+
+		CLogitLikelihood* lik = new CLogitLikelihood();
+
+		// specify GP regression with FITC inference
+		CSingleFITCLaplaceInferenceMethod* inf =
+		    new CSingleFITCLaplaceInferenceMethod(
+		        kernel, features_train, mean, labels_train, lik,
+		        latent_features_train);
+		SG_UNREF(latent_features_train);
+
+		float64_t ind_noise = 1e-6;
+		inf->set_inducing_noise(ind_noise);
+
+		float64_t scale = 4.0;
+		inf->set_scale(scale);
+
+		int32_t k = 4;
+		SGMatrix<float64_t> feat_test(dim, k);
+		feat_test(0, 0) = -0.81263;
+		feat_test(0, 1) = 5.4576;
+		feat_test(0, 2) = -0.239;
+		feat_test(0, 3) = 2.45;
+
+		feat_test(1, 0) = -0.5;
+		feat_test(1, 1) = 0.69979;
+		feat_test(1, 2) = 2.3546;
+		feat_test(1, 3) = -0.46;
+
+		features_test = new CDenseFeatures<float64_t>(feat_test);
+
+		gpc = new CGaussianProcessClassification(inf);
+
+		// train model
+		gpc->train();
+	}
+
+	virtual void TearDown()
+	{
+		SG_UNREF(gpc);
+	}
+	const index_t n = 6;
+	const index_t dim = 2;
+	const index_t m = 3;
+	const float64_t rel_tolorance = 1e-2;
+	float64_t abs_tolorance;
+	CGaussianProcessClassification* gpc;
+	CDenseFeatures<float64_t>* features_test;
+};
 
+TEST_F(GaussianProcessClassification, get_mean_vector)
+{
 	// probit likelihood
 	CProbitLikelihood* likelihood=new CProbitLikelihood();
 
@@ -169,68 +500,8 @@ TEST(GaussianProcessClassification,get_mean_vector)
 	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassification,get_variance_vector)
+TEST_F(GaussianProcessClassification, get_variance_vector)
 {
-	// create some easy random classification data
-	index_t n=10, m=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
 	// probit likelihood
 	CProbitLikelihood* likelihood=new CProbitLikelihood();
 
@@ -274,68 +545,8 @@ TEST(GaussianProcessClassification,get_variance_vector)
 	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassification,get_probabilities)
+TEST_F(GaussianProcessClassification, get_probabilities)
 {
-	// create some easy random classification data
-	index_t n=10, m=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
 	// probit likelihood
 	CProbitLikelihood* likelihood=new CProbitLikelihood();
 
@@ -379,64 +590,8 @@ TEST(GaussianProcessClassification,get_probabilities)
 	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassification,apply_preprocessor_and_binary)
+TEST_F(GaussianProcessClassification, apply_preprocessor_and_binary)
 {
-	// create some easy random classification data
-	index_t n=10, m=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
 	CRescaleFeatures* preproc=new CRescaleFeatures();
 	preproc->init(features_train);
 
@@ -446,10 +601,6 @@ TEST(GaussianProcessClassification,apply_preprocessor_and_binary)
 	features_test->add_preprocessor(preproc);
 	features_test->apply_preprocessor();
 
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
 	// logit likelihood
 	CLogitLikelihood* likelihood=new CLogitLikelihood();
 
@@ -460,7 +611,6 @@ TEST(GaussianProcessClassification,apply_preprocessor_and_binary)
 
 	// train gaussian process classifier
 	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
 
 	// compare predictions with result form GPML
 	CBinaryLabels* prediction=gpc->apply_binary(features_test);
@@ -493,107 +643,13 @@ TEST(GaussianProcessClassification,apply_preprocessor_and_binary)
 	EXPECT_EQ(p[23], 1);
 	EXPECT_EQ(p[24], 1);
 
-	SG_UNREF(gpc);
 	SG_UNREF(prediction);
+	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassificationUsingSingleLaplaceWithLBFGS,get_mean_vector)
+TEST_F(
+    GaussianProcessClassificationUsingSingleLaplaceWithLBFGS, get_mean_vector)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CProbitLikelihood* likelihood=new CProbitLikelihood();
-
-	// specify GP classification with SingleLaplace inference
-	CSingleLaplaceInferenceMethod* inf
-	= new CSingleLaplaceInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	int m = 100;
-	int max_linesearch = 1000;
-	int max_iterations = 1000;
-	float64_t delta = 1e-15;
-	int past = 0;
-	float64_t epsilon = 1e-15;
-	ELBFGSLineSearch linesearch=BACKTRACKING_STRONG_WOLFE;
-	CLBFGSMinimizer* opt=new CLBFGSMinimizer();
-	opt->set_lbfgs_parameters(m,
-		max_linesearch,
-		linesearch,
-		max_iterations,
-		delta,
-		past,
-		epsilon
-		);
-	inf->register_minimizer(opt);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare mean vector with result form GPML with the minfunc function
 	SGVector<float64_t> mean_vector=gpc->get_mean_vector(features_test);
 
@@ -675,107 +731,12 @@ TEST(GaussianProcessClassificationUsingSingleLaplaceWithLBFGS,get_mean_vector)
 	EXPECT_NEAR(mean_vector[23], 0.113006930991411, abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.041654120309486, rel_tolerance);
 	EXPECT_NEAR(mean_vector[24], 0.041654120309486, abs_tolerance);
-
-	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassificationUsingSingleLaplaceWithLBFGS,get_variance_vector)
+TEST_F(
+    GaussianProcessClassificationUsingSingleLaplaceWithLBFGS,
+    get_variance_vector)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CProbitLikelihood* likelihood=new CProbitLikelihood();
-
-	// specify GP classification with SingleLaplace inference
-	CSingleLaplaceInferenceMethod* inf
-		= new CSingleLaplaceInferenceMethod(kernel,
-			features_train,
-			mean,
-			labels_train,
-			likelihood);
-
-	int m = 100;
-	int max_linesearch = 1000;
-	int max_iterations = 1000;
-	float64_t delta = 1e-15;
-	int past = 0;
-	float64_t epsilon = 1e-15;
-	ELBFGSLineSearch linesearch=BACKTRACKING_STRONG_WOLFE;
-	CLBFGSMinimizer* opt=new CLBFGSMinimizer();
-	opt->set_lbfgs_parameters(m,
-		max_linesearch,
-		linesearch,
-		max_iterations,
-		delta,
-		past,
-		epsilon
-		);
-	inf->register_minimizer(opt);
-
-	// train gaussian process classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare variance vector with result form GPML with the minfunc function
 	SGVector<float64_t> variance_vector=gpc->get_variance_vector(features_test);
 	/*variance =
@@ -855,105 +816,12 @@ TEST(GaussianProcessClassificationUsingSingleLaplaceWithLBFGS,get_variance_vecto
 	abs_tolerance = CMath::get_abs_tolerance(0.987229433547902, rel_tolerance);
 	EXPECT_NEAR(variance_vector[23], 0.987229433547902, abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.998264934261243, rel_tolerance);
-	EXPECT_NEAR(variance_vector[24], 0.998264934261243, abs_tolerance);
-
-
-	SG_UNREF(gpc);
-}
-
-TEST(GaussianProcessClassificationUsingSingleLaplaceWithLBFGS,get_probabilities)
-{
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CProbitLikelihood* likelihood=new CProbitLikelihood();
-
-	// specify GP classification with SingleLaplace inference
-	CSingleLaplaceInferenceMethod* inf=new CSingleLaplaceInferenceMethod(kernel,
-			features_train, mean, labels_train, likelihood);
-
-	int m = 100;
-	int max_linesearch = 1000;
-	int max_iterations = 1000;
-	float64_t delta = 1e-15;
-	int past = 0;
-	float64_t epsilon = 1e-15;
-	ELBFGSLineSearch linesearch=BACKTRACKING_STRONG_WOLFE;
-	CLBFGSMinimizer* opt=new CLBFGSMinimizer();
-	opt->set_lbfgs_parameters(m,
-		max_linesearch,
-		linesearch,
-		max_iterations,
-		delta,
-		past,
-		epsilon
-		);
-	inf->register_minimizer(opt);
-
-	// train gaussian process classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
+	EXPECT_NEAR(variance_vector[24], 0.998264934261243, abs_tolerance);
+}
 
+TEST_F(
+    GaussianProcessClassificationUsingSingleLaplaceWithLBFGS, get_probabilities)
+{
 	// compare probabilities with result form GPML with the minfunc function
 	SGVector<float64_t> probabilities=gpc->get_probabilities(features_test);
 
@@ -1035,88 +903,10 @@ TEST(GaussianProcessClassificationUsingSingleLaplaceWithLBFGS,get_probabilities)
 	EXPECT_NEAR(probabilities[23], 0.556503466053284, abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.520827060710866, rel_tolerance);
 	EXPECT_NEAR(probabilities[24], 0.520827060710866, abs_tolerance);
-
-	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassificationUsingKLCovariance,get_mean_vector)
+TEST_F(GaussianProcessClassificationUsingKLCovariance, get_mean_vector)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CLogitVGLikelihood* likelihood=new CLogitVGLikelihood();
-
-	CKLCovarianceInferenceMethod* inf
-	= new CKLCovarianceInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare mean vector with result form the following Matlab code with the minfunc function
 	//
 	SGVector<float64_t> mean_vector=gpc->get_mean_vector(features_test);
@@ -1199,88 +989,10 @@ TEST(GaussianProcessClassificationUsingKLCovariance,get_mean_vector)
 	EXPECT_NEAR(mean_vector[23],  0.061931242381067,  abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.020693347644828, rel_tolerance);
 	EXPECT_NEAR(mean_vector[24],  0.020693347644828,  abs_tolerance);
-
-	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassificationUsingKLCovariance, get_variance_vector)
+TEST_F(GaussianProcessClassificationUsingKLCovariance, get_variance_vector)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CLogitVGLikelihood* likelihood=new CLogitVGLikelihood();
-
-	CKLCovarianceInferenceMethod* inf
-	= new CKLCovarianceInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare mean vector with result form the following Matlab code with the minfunc function
 	//
 	SGVector<float64_t> variance_vector=gpc->get_variance_vector(features_test);
@@ -1362,89 +1074,10 @@ TEST(GaussianProcessClassificationUsingKLCovariance, get_variance_vector)
 	EXPECT_NEAR(variance_vector[23],  0.996164521217137,  abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.999571785363250, rel_tolerance);
 	EXPECT_NEAR(variance_vector[24],  0.999571785363250,  abs_tolerance);
+}
 
-	SG_UNREF(gpc);
-	}
-
-
-TEST(GaussianProcessClassificationUsingKLCovariance, get_probabilities)
+TEST_F(GaussianProcessClassificationUsingKLCovariance, get_probabilities)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CLogitVGLikelihood* likelihood=new CLogitVGLikelihood();
-
-	CKLCovarianceInferenceMethod* inf
-	= new CKLCovarianceInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare mean vector with result form the following Matlab code with the minfunc function
 	//
 	SGVector<float64_t> probabilities=gpc->get_probabilities(features_test);
@@ -1526,88 +1159,10 @@ TEST(GaussianProcessClassificationUsingKLCovariance, get_probabilities)
 	EXPECT_NEAR(probabilities[23],  0.530965621190533,  abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.510346673822414, rel_tolerance);
 	EXPECT_NEAR(probabilities[24],  0.510346673822414,  abs_tolerance);
+}
 
-	SG_UNREF(gpc);
-	}
-
-TEST(GaussianProcessClassificationUsingKLCholesky,get_mean_vector)
+TEST_F(GaussianProcessClassificationUsingKLCholesky, get_mean_vector)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CLogitVGLikelihood* likelihood=new CLogitVGLikelihood();
-
-	CKLCholeskyInferenceMethod* inf
-	= new CKLCholeskyInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare mean vector with result form the following Matlab code with the minfunc function
 	//
 	SGVector<float64_t> mean_vector=gpc->get_mean_vector(features_test);
@@ -1660,118 +1215,40 @@ TEST(GaussianProcessClassificationUsingKLCholesky,get_mean_vector)
 	EXPECT_NEAR(mean_vector[8],  -0.343291280502099,  abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(-0.175199940406874, rel_tolerance);
 	EXPECT_NEAR(mean_vector[9],  -0.175199940406874,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(0.036264654510887, rel_tolerance);
-	EXPECT_NEAR(mean_vector[10],  0.036264654510887,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(0.092272078760991, rel_tolerance);
-	EXPECT_NEAR(mean_vector[11],  0.092272078760991,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(0.081310502536392, rel_tolerance);
-	EXPECT_NEAR(mean_vector[12],  0.081310502536392,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(-0.018547648424229, rel_tolerance);
-	EXPECT_NEAR(mean_vector[13],  -0.018547648424229,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(-0.050537947293429, rel_tolerance);
-	EXPECT_NEAR(mean_vector[14],  -0.050537947293429,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(0.057339417937389, rel_tolerance);
-	EXPECT_NEAR(mean_vector[15],  0.057339417937389,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(0.173526475014741, rel_tolerance);
-	EXPECT_NEAR(mean_vector[16],  0.173526475014741,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(0.232076894647369, rel_tolerance);
-	EXPECT_NEAR(mean_vector[17],  0.232076894647369,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(0.145465719897800, rel_tolerance);
-	EXPECT_NEAR(mean_vector[18],  0.145465719897800,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(0.040425760762898, rel_tolerance);
-	EXPECT_NEAR(mean_vector[19],  0.040425760762898,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(0.027867843552160, rel_tolerance);
-	EXPECT_NEAR(mean_vector[20],  0.027867843552160,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(0.080847982901803, rel_tolerance);
-	EXPECT_NEAR(mean_vector[21],  0.080847982901803,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(0.100974911211954, rel_tolerance);
-	EXPECT_NEAR(mean_vector[22],  0.100974911211954,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(0.061946134714323, rel_tolerance);
-	EXPECT_NEAR(mean_vector[23],  0.061946134714323,  abs_tolerance);
-	abs_tolerance = CMath::get_abs_tolerance(0.020730861749702, rel_tolerance);
-	EXPECT_NEAR(mean_vector[24],  0.020730861749702,  abs_tolerance);
-
-	SG_UNREF(gpc);
-}
-
-TEST(GaussianProcessClassificationUsingKLCholesky, get_variance_vector)
-{
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CLogitVGLikelihood* likelihood=new CLogitVGLikelihood();
-
-	CKLCovarianceInferenceMethod* inf
-	= new CKLCovarianceInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
+	abs_tolerance = CMath::get_abs_tolerance(0.036264654510887, rel_tolerance);
+	EXPECT_NEAR(mean_vector[10],  0.036264654510887,  abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(0.092272078760991, rel_tolerance);
+	EXPECT_NEAR(mean_vector[11], 0.092272078760991, abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(0.081310502536392, rel_tolerance);
+	EXPECT_NEAR(mean_vector[12], 0.081310502536392, abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(-0.018547648424229, rel_tolerance);
+	EXPECT_NEAR(mean_vector[13], -0.018547648424229, abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(-0.050537947293429, rel_tolerance);
+	EXPECT_NEAR(mean_vector[14], -0.050537947293429, abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(0.057339417937389, rel_tolerance);
+	EXPECT_NEAR(mean_vector[15], 0.057339417937389, abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(0.173526475014741, rel_tolerance);
+	EXPECT_NEAR(mean_vector[16], 0.173526475014741, abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(0.232076894647369, rel_tolerance);
+	EXPECT_NEAR(mean_vector[17], 0.232076894647369, abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(0.145465719897800, rel_tolerance);
+	EXPECT_NEAR(mean_vector[18], 0.145465719897800, abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(0.040425760762898, rel_tolerance);
+	EXPECT_NEAR(mean_vector[19], 0.040425760762898, abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(0.027867843552160, rel_tolerance);
+	EXPECT_NEAR(mean_vector[20], 0.027867843552160, abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(0.080847982901803, rel_tolerance);
+	EXPECT_NEAR(mean_vector[21], 0.080847982901803, abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(0.100974911211954, rel_tolerance);
+	EXPECT_NEAR(mean_vector[22], 0.100974911211954, abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(0.061946134714323, rel_tolerance);
+	EXPECT_NEAR(mean_vector[23], 0.061946134714323, abs_tolerance);
+	abs_tolerance = CMath::get_abs_tolerance(0.020730861749702, rel_tolerance);
+	EXPECT_NEAR(mean_vector[24], 0.020730861749702, abs_tolerance);
+}
 
+TEST_F(GaussianProcessClassificationUsingKLCholesky, get_variance_vector)
+{
 	// compare mean vector with result form the following Matlab code with the minfunc function
 	//
 	SGVector<float64_t> variance_vector=gpc->get_variance_vector(features_test);
@@ -1853,88 +1330,10 @@ TEST(GaussianProcessClassificationUsingKLCholesky, get_variance_vector)
 	EXPECT_NEAR(variance_vector[23],  0.996162676393955,  abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.999570231371115, rel_tolerance);
 	EXPECT_NEAR(variance_vector[24],  0.999570231371115,  abs_tolerance);
+}
 
-	SG_UNREF(gpc);
-	}
-
-TEST(GaussianProcessClassificationUsingKLCholesky, get_probabilities)
+TEST_F(GaussianProcessClassificationUsingKLCholesky, get_probabilities)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CLogitVGLikelihood* likelihood=new CLogitVGLikelihood();
-
-	CKLCholeskyInferenceMethod* inf
-	= new CKLCholeskyInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare mean vector with result form the following Matlab code with the minfunc function
 	//
 	SGVector<float64_t> probabilities=gpc->get_probabilities(features_test);
@@ -2016,88 +1415,10 @@ TEST(GaussianProcessClassificationUsingKLCholesky, get_probabilities)
 	EXPECT_NEAR(probabilities[23],  0.530973067357162,  abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.510365430874851, rel_tolerance);
 	EXPECT_NEAR(probabilities[24],  0.510365430874851,  abs_tolerance);
+}
 
-	SG_UNREF(gpc);
-	}
-
-TEST(GaussianProcessClassificationUsingKLDiagonal,get_mean_vector)
+TEST_F(GaussianProcessClassificationUsingKLDiagonal, get_mean_vector)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CLogitVGLikelihood* likelihood=new CLogitVGLikelihood();
-
-	CKLDiagonalInferenceMethod* inf
-	= new CKLDiagonalInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare mean vector with result form the following Matlab code with the minfunc function
 	//
 	SGVector<float64_t> mean_vector=gpc->get_mean_vector(features_test);
@@ -2180,88 +1501,10 @@ TEST(GaussianProcessClassificationUsingKLDiagonal,get_mean_vector)
 	EXPECT_NEAR(mean_vector[23],  0.061674380497058,  abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.020610276974189, rel_tolerance);
 	EXPECT_NEAR(mean_vector[24],  0.020610276974189,  abs_tolerance);
-
-	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassificationUsingKLDiagonal, get_variance_vector)
+TEST_F(GaussianProcessClassificationUsingKLDiagonal, get_variance_vector)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CLogitVGLikelihood* likelihood=new CLogitVGLikelihood();
-
-	CKLDiagonalInferenceMethod* inf
-	= new CKLDiagonalInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare mean vector with result form the following Matlab code with the minfunc function
 	//
 	SGVector<float64_t> variance_vector=gpc->get_variance_vector(features_test);
@@ -2343,88 +1586,10 @@ TEST(GaussianProcessClassificationUsingKLDiagonal, get_variance_vector)
 	EXPECT_NEAR(variance_vector[23],  0.996196270790304,  abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.999575216483047, rel_tolerance);
 	EXPECT_NEAR(variance_vector[24],  0.999575216483047,  abs_tolerance);
+}
 
-	SG_UNREF(gpc);
-	}
-
-TEST(GaussianProcessClassificationUsingKLDiagonal, get_probabilities)
+TEST_F(GaussianProcessClassificationUsingKLDiagonal, get_probabilities)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CLogitVGLikelihood* likelihood=new CLogitVGLikelihood();
-
-	CKLDiagonalInferenceMethod* inf
-	= new CKLDiagonalInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare mean vector with result form the following Matlab code with the minfunc function
 	//
 	SGVector<float64_t> probabilities=gpc->get_probabilities(features_test);
@@ -2503,91 +1668,13 @@ TEST(GaussianProcessClassificationUsingKLDiagonal, get_probabilities)
 	abs_tolerance = CMath::get_abs_tolerance(0.549808449840269, rel_tolerance);
 	EXPECT_NEAR(probabilities[22],  0.549808449840269,  abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.530837190248529, rel_tolerance);
-	EXPECT_NEAR(probabilities[23],  0.530837190248529,  abs_tolerance);
+	EXPECT_NEAR(probabilities[23], 0.530837190248529, abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.510305138487094, rel_tolerance);
-	EXPECT_NEAR(probabilities[24],  0.510305138487094,  abs_tolerance);
-
-	SG_UNREF(gpc);
+	EXPECT_NEAR(probabilities[24], 0.510305138487094, abs_tolerance);
 }
 
-TEST(GaussianProcessClassificationUsingKLDual,get_mean_vector)
+TEST_F(GaussianProcessClassificationUsingKLDual, get_mean_vector)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CLogitDVGLikelihood* likelihood=new CLogitDVGLikelihood();
-
-	CKLDualInferenceMethod* inf
-	= new CKLDualInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare mean vector with result form the following Matlab code with the minfunc function
 	//
 	SGVector<float64_t> mean_vector=gpc->get_mean_vector(features_test);
@@ -2670,88 +1757,10 @@ TEST(GaussianProcessClassificationUsingKLDual,get_mean_vector)
 	EXPECT_NEAR(mean_vector[23],  0.053864726312313,  abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.017338490592413, rel_tolerance);
 	EXPECT_NEAR(mean_vector[24],  0.017338490592413,  abs_tolerance);
-
-	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassificationUsingKLDual, get_variance_vector)
+TEST_F(GaussianProcessClassificationUsingKLDual, get_variance_vector)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CLogitDVGLikelihood* likelihood=new CLogitDVGLikelihood();
-
-	CKLDualInferenceMethod* inf
-	= new CKLDualInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare mean vector with result form the following Matlab code with the minfunc function
 	//
 	SGVector<float64_t> variance_vector=gpc->get_variance_vector(features_test);
@@ -2833,88 +1842,10 @@ TEST(GaussianProcessClassificationUsingKLDual, get_variance_vector)
 	EXPECT_NEAR(variance_vector[23],  0.997098591259300,  abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.999699376743977, rel_tolerance);
 	EXPECT_NEAR(variance_vector[24],  0.999699376743977,  abs_tolerance);
-
-	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassificationUsingKLDual, get_probabilities)
+TEST_F(GaussianProcessClassificationUsingKLDual, get_probabilities)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CLogitDVGLikelihood* likelihood=new CLogitDVGLikelihood();
-
-	CKLDualInferenceMethod* inf
-	= new CKLDualInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare mean vector with result form the following Matlab code with the minfunc function
 	SGVector<float64_t> probabilities=gpc->get_probabilities(features_test);
 	/*probabilities=
@@ -2995,11 +1926,9 @@ TEST(GaussianProcessClassificationUsingKLDual, get_probabilities)
 	EXPECT_NEAR(probabilities[23],  0.526932363156156,  abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.508669245296206, rel_tolerance);
 	EXPECT_NEAR(probabilities[24],  0.508669245296206,  abs_tolerance);
-
-	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassificationUsingMultiLaplace,get_mean_vector)
+TEST(GaussianProcessClassificationUsingMultiLaplace, get_mean_vector)
 {
 
 	float64_t abs_tolerance;
@@ -3143,7 +2072,7 @@ TEST(GaussianProcessClassificationUsingMultiLaplace,get_mean_vector)
 	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassificationUsingMultiLaplace,get_variance_vector)
+TEST(GaussianProcessClassificationUsingMultiLaplace, get_variance_vector)
 {
 
 	float64_t abs_tolerance;
@@ -3288,7 +2217,7 @@ TEST(GaussianProcessClassificationUsingMultiLaplace,get_variance_vector)
 	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassificationUsingMultiLaplace,apply_multiclass)
+TEST(GaussianProcessClassificationUsingMultiLaplace, apply_multiclass)
 {
 	index_t n=5;
 
@@ -3357,90 +2286,10 @@ TEST(GaussianProcessClassificationUsingMultiLaplace,apply_multiclass)
 	SG_UNREF(prediction);
 }
 
-#ifdef USE_GPL_SHOGUN
 #if defined HAVE_NLOPT
-TEST(GaussianProcessClassificationUsingSingleLaplaceWithNLOPT,get_mean_vector)
+TEST_F(
+    GaussianProcessClassificationUsingSingleLaplaceWithNLOPT, get_mean_vector)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CProbitLikelihood* likelihood=new CProbitLikelihood();
-
-	// specify GP classification with SingleLaplace inference
-	CSingleLaplaceInferenceMethod* inf
-	= new CSingleLaplaceInferenceMethod(kernel,
-		features_train,
-		mean,
-		labels_train,
-		likelihood);
-
-	FirstOrderMinimizer* opt=new CNLOPTMinimizer();
-	inf->register_minimizer(opt);
-
-	// train Gaussian process binary classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare mean vector with result form GPML with the minfunc function
 	SGVector<float64_t> mean_vector=gpc->get_mean_vector(features_test);
 
@@ -3522,92 +2371,12 @@ TEST(GaussianProcessClassificationUsingSingleLaplaceWithNLOPT,get_mean_vector)
 	EXPECT_NEAR(mean_vector[23], 0.113006930991411, abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.041654120309486, rel_tolerance);
 	EXPECT_NEAR(mean_vector[24], 0.041654120309486, abs_tolerance);
-
-	SG_UNREF(gpc);
 }
 
-TEST(GaussianProcessClassificationUsingSingleLaplaceWithNLOPT,get_variance_vector)
+TEST_F(
+    GaussianProcessClassificationUsingSingleLaplaceWithNLOPT,
+    get_variance_vector)
 {
-	float64_t abs_tolerance;
-	float64_t rel_tolerance=1e-2;
-	// create some easy random classification data
-	index_t n=10, m1=25, i=0;
-
-	SGMatrix<float64_t> feat_train(2, n);
-	SGVector<float64_t> lab_train(n);
-	SGMatrix<float64_t> feat_test(2, m1);
-
-	feat_train(0, 0)=0.0919736;
-	feat_train(0, 1)=-0.3813827;
-	feat_train(0, 2)=-1.8011128;
-	feat_train(0, 3)=-1.4603061;
-	feat_train(0, 4)=-0.1386884;
-	feat_train(0, 5)=0.7827657;
-	feat_train(0, 6)=-0.1369808;
-	feat_train(0, 7)=0.0058596;
-	feat_train(0, 8)=0.1059573;
-	feat_train(0, 9)=-1.3059609;
-
-	feat_train(1, 0)=1.4186892;
-	feat_train(1, 1)=0.2271813;
-	feat_train(1, 2)=0.3451326;
-	feat_train(1, 3)=0.4495962;
-	feat_train(1, 4)=1.2066144;
-	feat_train(1, 5)=-0.5425118;
-	feat_train(1, 6)=1.3479000;
-	feat_train(1, 7)=0.7181545;
-	feat_train(1, 8)=0.4036014;
-	feat_train(1, 9)=0.8928408;
-
-	lab_train[0]=1.0;
-	lab_train[1]=-1.0;
-	lab_train[2]=-1.0;
-	lab_train[3]=-1.0;
-	lab_train[4]=-1.0;
-	lab_train[5]=1.0;
-	lab_train[6]=-1.0;
-	lab_train[7]=1.0;
-	lab_train[8]=1.0;
-	lab_train[9]=-1.0;
-
-	// create test features
-	for (index_t x1=-2; x1<=2; x1++)
-	{
-		for (index_t x2=-2; x2<=2; x2++)
-		{
-			feat_test(0, i)=(float64_t)x1;
-			feat_test(1, i)=(float64_t)x2;
-			i++;
-		}
-	}
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianKernel* kernel=new CGaussianKernel(10, 2);
-	CZeroMean* mean=new CZeroMean();
-
-	// probit likelihood
-	CProbitLikelihood* likelihood=new CProbitLikelihood();
-
-	// specify GP classification with SingleLaplace inference
-	CSingleLaplaceInferenceMethod* inf
-		= new CSingleLaplaceInferenceMethod(kernel,
-			features_train,
-			mean,
-			labels_train,
-			likelihood);
-
-	FirstOrderMinimizer* opt=new CNLOPTMinimizer();
-	inf->register_minimizer(opt);
-
-	// train gaussian process classifier
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-	gpc->train();
-
 	// compare variance vector with result form GPML with the minfunc function
 	SGVector<float64_t> variance_vector=gpc->get_variance_vector(features_test);
 	/*variance =
@@ -3688,117 +2457,12 @@ TEST(GaussianProcessClassificationUsingSingleLaplaceWithNLOPT,get_variance_vecto
 	EXPECT_NEAR(variance_vector[23], 0.987229433547902, abs_tolerance);
 	abs_tolerance = CMath::get_abs_tolerance(0.998264934261243, rel_tolerance);
 	EXPECT_NEAR(variance_vector[24], 0.998264934261243, abs_tolerance);
-
-
-	SG_UNREF(gpc);
 }
-
-
-
 #endif //HAVE_NLOPT
-#endif //USE_GPL_SHOGUN
-
 
-
-TEST(GaussianProcessClassificationUsingSingleFITCLaplace,get_mean_vector)
+TEST_F(GaussianProcessClassificationUsingSingleFITCLaplace, get_mean_vector)
 {
-	index_t n=6;
-	index_t dim=2;
-	index_t m=3;
-	float64_t rel_tolorance=1e-2;
-	float64_t abs_tolorance;
-
-	SGMatrix<float64_t> feat_train(dim, n);
-	SGMatrix<float64_t> lat_feat_train(dim, m);
-	SGVector<float64_t> lab_train(n);
-
-	feat_train(0,0)=-0.81263;
-	feat_train(0,1)=-0.99976;
-	feat_train(0,2)=1.17037;
-	feat_train(0,3)=-1.51752;
-	feat_train(0,4)=8.57765;
-	feat_train(0,5)=3.89440;
-
-	feat_train(1,0)=-0.5;
-	feat_train(1,1)=5.4576;
-	feat_train(1,2)=7.17637;
-	feat_train(1,3)=-2.56752;
-	feat_train(1,4)=4.57765;
-	feat_train(1,5)=2.89440;
-
-	lat_feat_train(0,0)=1.00000;
-	lat_feat_train(0,1)=23.00000;
-	lat_feat_train(0,2)=4.00000;
-
-	lat_feat_train(1,0)=3.00000;
-	lat_feat_train(1,1)=2.00000;
-	lat_feat_train(1,2)=-5.00000;
-
-	lab_train[0]=1;
-	lab_train[1]=-1;
-	lab_train[2]=1;
-	lab_train[3]=1;
-	lab_train[4]=-1;
-	lab_train[5]=-1;
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CDenseFeatures<float64_t>* latent_features_train=new CDenseFeatures<float64_t>(lat_feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianARDSparseKernel* kernel=new CGaussianARDSparseKernel(10);
-	int32_t t_dim=2;
-	SGMatrix<float64_t> weights(dim,t_dim);
-	//the weights is a lower triangular matrix
-	float64_t weight1=0.02;
-	float64_t weight2=-0.4;
-	float64_t weight3=0;
-	float64_t weight4=0.01;
-	weights(0,0)=weight1;
-	weights(1,0)=weight2;
-	weights(0,1)=weight3;
-	weights(1,1)=weight4;
-	kernel->set_matrix_weights(weights);
-
-	float64_t mean_weight=2.0;
-	CConstMean* mean=new CConstMean(mean_weight);
-
-	CLogitLikelihood* lik=new CLogitLikelihood();
-
-	// specify GP regression with FITC inference
-	CSingleFITCLaplaceInferenceMethod* inf=new CSingleFITCLaplaceInferenceMethod(kernel, features_train,
-		mean, labels_train, lik, latent_features_train);
-
-	float64_t ind_noise=1e-6;
-	inf->set_inducing_noise(ind_noise);
-
-	float64_t scale=4.0;
-	inf->set_scale(scale);
-
-	int32_t k=4;
-	SGMatrix<float64_t> feat_test(dim, k);
-	feat_test(0,0)=-0.81263;
-	feat_test(0,1)=5.4576;
-	feat_test(0,2)=-0.239;
-	feat_test(0,3)=2.45;
-
-	feat_test(1,0)=-0.5;
-	feat_test(1,1)=0.69979;
-	feat_test(1,2)=2.3546;
-	feat_test(1,3)=-0.46;
-
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-
-	// train model
-	gpc->train();
-
-	//result buggss
-	SG_REF(features_test);
 	SGVector<float64_t> mean_vector=gpc->get_mean_vector(features_test);
-	SG_UNREF(features_test);
 
 	// compare variance vector with result form GPML 3.5
 	abs_tolorance = CMath::get_abs_tolerance(0.489770829538461, rel_tolorance);
@@ -3809,111 +2473,11 @@ TEST(GaussianProcessClassificationUsingSingleFITCLaplace,get_mean_vector)
 	EXPECT_NEAR(mean_vector[2],  -0.403233721232556,  abs_tolorance);
 	abs_tolorance = CMath::get_abs_tolerance(0.502096819177983, rel_tolorance);
 	EXPECT_NEAR(mean_vector[3],  0.502096819177983,  abs_tolorance);
-
-	// clean up
-	SG_UNREF(gpc);
-	SG_UNREF(latent_features_train);
 }
 
-TEST(GaussianProcessClassificationUsingSingleFITCLaplace,get_variance_vector)
+TEST_F(GaussianProcessClassificationUsingSingleFITCLaplace, get_variance_vector)
 {
-	index_t n=6;
-	index_t dim=2;
-	index_t m=3;
-	float64_t rel_tolorance=1e-2;
-	float64_t abs_tolorance;
-
-	SGMatrix<float64_t> feat_train(dim, n);
-	SGMatrix<float64_t> lat_feat_train(dim, m);
-	SGVector<float64_t> lab_train(n);
-
-	feat_train(0,0)=-0.81263;
-	feat_train(0,1)=-0.99976;
-	feat_train(0,2)=1.17037;
-	feat_train(0,3)=-1.51752;
-	feat_train(0,4)=8.57765;
-	feat_train(0,5)=3.89440;
-
-	feat_train(1,0)=-0.5;
-	feat_train(1,1)=5.4576;
-	feat_train(1,2)=7.17637;
-	feat_train(1,3)=-2.56752;
-	feat_train(1,4)=4.57765;
-	feat_train(1,5)=2.89440;
-
-	lat_feat_train(0,0)=1.00000;
-	lat_feat_train(0,1)=23.00000;
-	lat_feat_train(0,2)=4.00000;
-
-	lat_feat_train(1,0)=3.00000;
-	lat_feat_train(1,1)=2.00000;
-	lat_feat_train(1,2)=-5.00000;
-
-	lab_train[0]=1;
-	lab_train[1]=-1;
-	lab_train[2]=1;
-	lab_train[3]=1;
-	lab_train[4]=-1;
-	lab_train[5]=-1;
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CDenseFeatures<float64_t>* latent_features_train=new CDenseFeatures<float64_t>(lat_feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianARDSparseKernel* kernel=new CGaussianARDSparseKernel(10);
-	int32_t t_dim=2;
-	SGMatrix<float64_t> weights(t_dim,dim);
-	//the weights is a lower triangular matrix
-	float64_t weight1=0.02;
-	float64_t weight2=-0.4;
-	float64_t weight3=0;
-	float64_t weight4=0.01;
-	weights(0,0)=weight1;
-	weights(1,0)=weight2;
-	weights(0,1)=weight3;
-	weights(1,1)=weight4;
-	kernel->set_matrix_weights(weights);
-
-	float64_t mean_weight=2.0;
-	CConstMean* mean=new CConstMean(mean_weight);
-
-	CLogitLikelihood* lik=new CLogitLikelihood();
-
-	// specify GP regression with FITC inference
-	CSingleFITCLaplaceInferenceMethod* inf=new CSingleFITCLaplaceInferenceMethod(kernel, features_train,
-		mean, labels_train, lik, latent_features_train);
-
-	float64_t ind_noise=1e-6;
-	inf->set_inducing_noise(ind_noise);
-
-	float64_t scale=4.0;
-	inf->set_scale(scale);
-
-	int32_t k=4;
-	SGMatrix<float64_t> feat_test(dim, k);
-	feat_test(0,0)=-0.81263;
-	feat_test(0,1)=5.4576;
-	feat_test(0,2)=-0.239;
-	feat_test(0,3)=2.45;
-
-	feat_test(1,0)=-0.5;
-	feat_test(1,1)=0.69979;
-	feat_test(1,2)=2.3546;
-	feat_test(1,3)=-0.46;
-
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-
-	// train model
-	gpc->train();
-
-	//result buggss
-	SG_REF(features_test);
 	SGVector<float64_t> var_vector=gpc->get_variance_vector(features_test);
-	SG_UNREF(features_test);
 
 	// compare variance vector with result form GPML 3.5
 	abs_tolorance = CMath::get_abs_tolerance(0.760124534533208, rel_tolorance);
@@ -3924,109 +2488,11 @@ TEST(GaussianProcessClassificationUsingSingleFITCLaplace,get_variance_vector)
 	EXPECT_NEAR(var_vector[2],  0.837402566060945,  abs_tolorance);
 	abs_tolorance = CMath::get_abs_tolerance(0.747898784171351, rel_tolorance);
 	EXPECT_NEAR(var_vector[3],  0.747898784171351,  abs_tolorance);
-	// clean up
-	SG_UNREF(gpc);
-	SG_UNREF(latent_features_train);
 }
 
-TEST(GaussianProcessClassificationUsingSingleFITCLaplace,get_probabilities)
+TEST_F(GaussianProcessClassificationUsingSingleFITCLaplace, get_probabilities)
 {
-	index_t n=6;
-	index_t dim=2;
-	index_t m=3;
-	float64_t rel_tolorance=1e-2;
-	float64_t abs_tolorance;
-
-	SGMatrix<float64_t> feat_train(dim, n);
-	SGMatrix<float64_t> lat_feat_train(dim, m);
-	SGVector<float64_t> lab_train(n);
-
-	feat_train(0,0)=-0.81263;
-	feat_train(0,1)=-0.99976;
-	feat_train(0,2)=1.17037;
-	feat_train(0,3)=-1.51752;
-	feat_train(0,4)=8.57765;
-	feat_train(0,5)=3.89440;
-
-	feat_train(1,0)=-0.5;
-	feat_train(1,1)=5.4576;
-	feat_train(1,2)=7.17637;
-	feat_train(1,3)=-2.56752;
-	feat_train(1,4)=4.57765;
-	feat_train(1,5)=2.89440;
-
-	lat_feat_train(0,0)=1.00000;
-	lat_feat_train(0,1)=23.00000;
-	lat_feat_train(0,2)=4.00000;
-
-	lat_feat_train(1,0)=3.00000;
-	lat_feat_train(1,1)=2.00000;
-	lat_feat_train(1,2)=-5.00000;
-
-	lab_train[0]=1;
-	lab_train[1]=-1;
-	lab_train[2]=1;
-	lab_train[3]=1;
-	lab_train[4]=-1;
-	lab_train[5]=-1;
-
-	// shogun representation of features and labels
-	CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(feat_train);
-	CDenseFeatures<float64_t>* latent_features_train=new CDenseFeatures<float64_t>(lat_feat_train);
-	CBinaryLabels* labels_train=new CBinaryLabels(lab_train);
-
-	// choose Gaussian kernel with sigma = 2 and zero mean function
-	CGaussianARDSparseKernel* kernel=new CGaussianARDSparseKernel(10);
-	int32_t t_dim=2;
-	SGMatrix<float64_t> weights(dim,t_dim);
-	//the weights is a upper triangular matrix since GPML 3.5 only supports this type
-	float64_t weight1=0.02;
-	float64_t weight2=-0.4;
-	float64_t weight3=0;
-	float64_t weight4=0.01;
-	weights(0,0)=weight1;
-	weights(1,0)=weight2;
-	weights(0,1)=weight3;
-	weights(1,1)=weight4;
-	kernel->set_matrix_weights(weights);
-
-	float64_t mean_weight=2.0;
-	CConstMean* mean=new CConstMean(mean_weight);
-
-	CLogitLikelihood* lik=new CLogitLikelihood();
-
-	// specify GP regression with FITC inference
-	CSingleFITCLaplaceInferenceMethod* inf=new CSingleFITCLaplaceInferenceMethod(kernel, features_train,
-		mean, labels_train, lik, latent_features_train);
-
-	float64_t ind_noise=1e-6;
-	inf->set_inducing_noise(ind_noise);
-
-	float64_t scale=4.0;
-	inf->set_scale(scale);
-
-	int32_t k=4;
-	SGMatrix<float64_t> feat_test(dim, k);
-	feat_test(0,0)=-0.81263;
-	feat_test(0,1)=5.4576;
-	feat_test(0,2)=-0.239;
-	feat_test(0,3)=2.45;
-
-	feat_test(1,0)=-0.5;
-	feat_test(1,1)=0.69979;
-	feat_test(1,2)=2.3546;
-	feat_test(1,3)=-0.46;
-
-	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(feat_test);
-
-	CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
-
-	// train model
-	gpc->train();
-
-	SG_REF(features_test);
 	SGVector<float64_t> probabilities=gpc->get_probabilities(features_test);
-	SG_UNREF(features_test);
 
 	// compare variance vector with result form GPML 3.5
 	abs_tolorance = CMath::get_abs_tolerance(0.744885414769230, rel_tolorance);
@@ -4037,8 +2503,5 @@ TEST(GaussianProcessClassificationUsingSingleFITCLaplace,get_probabilities)
 	EXPECT_NEAR(probabilities[2],  0.298383139383722,  abs_tolorance);
 	abs_tolorance = CMath::get_abs_tolerance(0.751048409588992, rel_tolorance);
 	EXPECT_NEAR(probabilities[3],  0.751048409588992,  abs_tolorance);
-
-	// clean up
-	SG_UNREF(gpc);
-	SG_UNREF(latent_features_train);
 }
+#endif // USE_GPL_SHOGUN
\ No newline at end of file
diff --git a/tests/unit/classifier/LDA_unittest.cc b/tests/unit/classifier/LDA_unittest.cc
index 9b77f25e7a1..428cec62b19 100644
--- a/tests/unit/classifier/LDA_unittest.cc
+++ b/tests/unit/classifier/LDA_unittest.cc
@@ -27,11 +27,12 @@
 * of the authors and should not be interpreted as representing official policies,
 * either expressed or implied, of the Shogun Development Team.
 */
-#include <shogun/features/DenseFeatures.h>
+#include <gtest/gtest.h>
+#include <shogun/classifier/LDA.h>
 #include <shogun/features/DataGenerator.h>
+#include <shogun/features/DenseFeatures.h>
 #include <shogun/labels/BinaryLabels.h>
-#include <shogun/classifier/LDA.h>
-#include <gtest/gtest.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 using namespace shogun;
 
@@ -88,6 +89,12 @@ void generate_test_data(SGVector<float64_t>& lab, SGMatrix<float64_t>& feat)
 				lab[i*num+j]=+1;
 }
 
+template <typename T>
+class LDATest: public ::testing::Test { };
+
+typedef ::testing::Types<float32_t, float64_t, floatmax_t> FloatTypes;
+TYPED_TEST_CASE(LDATest, FloatTypes);
+
 template <typename ST>
 void FLD_test(SGVector<ST> &projection_FLD, SGVector<ST> &w_FLD)
 {
@@ -156,13 +163,15 @@ void check_eigenvectors_fld()
 
 	FLD_test<float64_t>(projection_FLD, w_FLD);
 
+	// normalize 'w' since the magnitude is irrelevant
+	w_FLD = linalg::scale(w_FLD, 1.0 / linalg::norm(w_FLD));
 	// comparing our 'w' against 'w' a.k.a EigenVec of the scipy implementation
 	// of Fisher 2 Class LDA here:
 	// http://wiki.scipy.org/Cookbook/LinearClassification
 	float64_t epsilon=0.00000001;
-	EXPECT_NEAR(5.31296094, w_FLD[0], epsilon);
-	EXPECT_NEAR(40.45747764, w_FLD[1], epsilon);
-	EXPECT_NEAR(10.81046958, w_FLD[2], epsilon);
+	EXPECT_NEAR(0.12586205, w_FLD[0], epsilon);
+	EXPECT_NEAR(0.95842245, w_FLD[1], epsilon);
+	EXPECT_NEAR(0.25609597, w_FLD[2], epsilon);
 }
 
 TEST(LDA, DISABLED_CheckProjection_FLD)
@@ -225,33 +234,13 @@ TEST(LDA, DISABLED_CheckProjection_SVD)
 }
 
 //FLD template testing
-TEST(LDA, FLD_template_test_float32)
+TYPED_TEST(LDATest, check_eigenvectors_fld)
 {
-	check_eigenvectors_fld<float32_t>();
-}
-
-TEST(LDA, FLD_template_test_float64)
-{
-	check_eigenvectors_fld<float64_t>();
-}
-
-TEST(LDA, FLD_template_test_floatmax)
-{
-	check_eigenvectors_fld<floatmax_t>();
+	check_eigenvectors_fld<TypeParam>();
 }
 
 //SVD template testing
-TEST(LDA, SVD_template_test_float32)
+TYPED_TEST(LDATest, check_eigenvectors_svd)
 {
-	check_eigenvectors_svd<float32_t>();
+	check_eigenvectors_svd<TypeParam>();
 }
-
-TEST(LDA, SVD_template_test_float64)
-{
-	check_eigenvectors_svd<float64_t>();
-}
-
-TEST(LDA, SVD_template_test_floatmax)
-{
-	check_eigenvectors_svd<floatmax_t>();
-}
\ No newline at end of file
diff --git a/tests/unit/classifier/svm/SVMOcas_unittest.cc b/tests/unit/classifier/svm/SVMOcas_unittest.cc
index 19f1f28174c..25505062d76 100644
--- a/tests/unit/classifier/svm/SVMOcas_unittest.cc
+++ b/tests/unit/classifier/svm/SVMOcas_unittest.cc
@@ -1,36 +1,31 @@
+#include <shogun/lib/config.h>
+#ifdef USE_GPL_SHOGUN
 #include <shogun/classifier/svm/SVMOcas.h>
+#endif // USE_GPL_SHOGUN
+#include <shogun/evaluation/ContingencyTableEvaluation.h>
 #include <shogun/features/DataGenerator.h>
 #include <shogun/features/DenseFeatures.h>
+
 #include <gtest/gtest.h>
 
+#include "environments/LinearTestEnvironment.h"
+
 using namespace shogun;
 
+extern LinearTestEnvironment* linear_test_env;
+
 #ifdef USE_GPL_SHOGUN
 #ifdef HAVE_LAPACK
 TEST(SVMOcasTest,train)
 {
-	index_t num_samples = 50;
 	CMath::init_random(5);
-	SGMatrix<float64_t> data =
-		CDataGenerator::generate_gaussians(num_samples, 2, 2);
-	CDenseFeatures<float64_t> features(data);
-
-	SGVector<index_t> train_idx(num_samples), test_idx(num_samples);
-	SGVector<float64_t> labels(num_samples);
-	for (index_t i = 0, j = 0; i < data.num_cols; ++i)
-	{
-		if (i % 2 == 0)
-			train_idx[j] = i;
-		else
-			test_idx[j++] = i;
-
-		labels[i/2] = (i < data.num_cols/2) ? 1.0 : -1.0;
-	}
+	std::shared_ptr<GaussianCheckerboard> mockData =
+	    linear_test_env->getBinaryLabelData();
 
-	CDenseFeatures<float64_t>* train_feats = (CDenseFeatures<float64_t>*)features.copy_subset(train_idx);
-	CDenseFeatures<float64_t>* test_feats =  (CDenseFeatures<float64_t>*)features.copy_subset(test_idx);
+	CDenseFeatures<float64_t>* train_feats = mockData->get_features_train();
+	CDenseFeatures<float64_t>* test_feats = mockData->get_features_test();
 
-	CBinaryLabels* ground_truth = new CBinaryLabels(labels);
+	CBinaryLabels* ground_truth = (CBinaryLabels*)mockData->get_labels_test();
 
 	CSVMOcas* ocas = new CSVMOcas(1.0, train_feats, ground_truth);
 	ocas->parallel->set_num_threads(1);
@@ -38,15 +33,14 @@ TEST(SVMOcasTest,train)
 	ocas->train();
 	float64_t objective = ocas->compute_primal_objective();
 
-	EXPECT_NEAR(objective, 0.022321841487323236, 1e-2);
+	EXPECT_NEAR(objective, 0.024344632618686062, 1e-2);
 
 	CLabels* pred = ocas->apply(test_feats);
-	for (int i = 0; i < num_samples; ++i)
-		EXPECT_EQ(ground_truth->get_int_label(i), ((CBinaryLabels*)pred)->get_int_label(i));
+	CAccuracyMeasure evaluate = CAccuracyMeasure();
+	evaluate.evaluate(pred, ground_truth);
+	EXPECT_GT(evaluate.get_accuracy(), 0.99);
 
 	SG_UNREF(ocas);
-	SG_UNREF(train_feats);
-	SG_UNREF(test_feats);
 	SG_UNREF(pred);
 }
 #endif // HAVE_LAPACK
diff --git a/tests/unit/converter/ica/FastICA_unittest.cc b/tests/unit/converter/ica/FastICA_unittest.cc
index 2c64cb9b061..3b2b1f1580d 100644
--- a/tests/unit/converter/ica/FastICA_unittest.cc
+++ b/tests/unit/converter/ica/FastICA_unittest.cc
@@ -61,3 +61,11 @@ TEST(CFastICA, blind_source_separation)
 	SG_UNREF(signals);
 }
 
+TEST(CFastICA, with_empty_feature)
+{
+	CDenseFeatures<float64_t>* empty_feat = new CDenseFeatures<float64_t>();
+	CFastICA* ica = new CFastICA();
+	EXPECT_THROW(ica->apply(empty_feat), ShogunException);
+	SG_UNREF(ica);
+	SG_UNREF(empty_feat);
+}
diff --git a/tests/unit/distribution/MixtureModel_unittest.cc b/tests/unit/distribution/MixtureModel_unittest.cc
index 74963fdab61..ab0af8dee9e 100644
--- a/tests/unit/distribution/MixtureModel_unittest.cc
+++ b/tests/unit/distribution/MixtureModel_unittest.cc
@@ -82,7 +82,6 @@ TEST(MixtureModel,gaussian_mixture_model)
 	EXPECT_NEAR(m[0],9.863760378,eps);
 	EXPECT_NEAR(cov(0,0),0.956568199,eps);
 
-	SG_FREE(cov.matrix);
 	SG_UNREF(outg);
 	SG_UNREF(distr);
 
@@ -94,10 +93,9 @@ TEST(MixtureModel,gaussian_mixture_model)
 	EXPECT_NEAR(m[0],-0.208122793,eps);
 	EXPECT_NEAR(cov(0,0),1.095106568,eps);
 
-	SG_FREE(cov.matrix);
 	SG_UNREF(outg);
 	SG_UNREF(distr);
 	SG_UNREF(mix)
 }
 
-#endif /* HAVE_LAPACK */
\ No newline at end of file
+#endif /* HAVE_LAPACK */
diff --git a/tests/unit/distribution/classical/GaussianDistribution_unittest.cc b/tests/unit/distribution/classical/GaussianDistribution_unittest.cc
index ce6f04d780b..c2669e38b27 100644
--- a/tests/unit/distribution/classical/GaussianDistribution_unittest.cc
+++ b/tests/unit/distribution/classical/GaussianDistribution_unittest.cc
@@ -29,11 +29,12 @@
  * either expressed or implied, of the Shogun Development Team.
  *
  */
+#include <gtest/gtest.h>
 
 #include <shogun/distributions/classical/GaussianDistribution.h>
 #include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/eigen3.h>
-#include <gtest/gtest.h>
+
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/environments/GaussianCheckerboard.h b/tests/unit/environments/GaussianCheckerboard.h
new file mode 100644
index 00000000000..0372f1fd5d2
--- /dev/null
+++ b/tests/unit/environments/GaussianCheckerboard.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2016, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: 2016 MikeLing, Viktor Gal, Sergey Lisitsyn, Heiko Strathmann
+ */
+
+#ifndef GAUSSIANCHECKERBOARD_HPP
+#define GAUSSIANCHECKERBOARD_HPP
+
+#include <shogun/features/DataGenerator.h>
+#include <shogun/features/DenseFeatures.h>
+#include <shogun/labels/BinaryLabels.h>
+
+using namespace shogun;
+
+class GaussianCheckerboard
+{
+public:
+	GaussianCheckerboard(
+	    const int32_t num_samples, const int32_t num_labels,
+	    const int32_t num_dim)
+	{
+		ASSERT(num_labels > 1)
+		SGMatrix<float64_t> data = CDataGenerator::generate_gaussians(
+		    num_samples, num_labels, num_dim);
+		CDenseFeatures<float64_t> features(data);
+
+		set_size = data.num_cols / 2;
+		SGVector<index_t> train_idx(set_size), test_idx(set_size);
+		SGVector<float64_t> labels(set_size);
+		for (index_t i = 0, j = 0; i < data.num_cols; ++i)
+		{
+			if (i % 2 == 0)
+				train_idx[j] = i;
+			else
+				test_idx[j++] = i;
+		}
+
+		// it's going to generate binary label data
+		if (num_labels == 2)
+		{
+			for (index_t i = 0; i < data.num_cols; ++i)
+			{
+				labels[i / 2] = (i < data.num_cols / 2) ? 1.0 : -1.0;
+			}
+
+			labels_train = new CBinaryLabels(labels);
+			labels_test = new CBinaryLabels(labels);
+		}
+		if (num_labels > 2)
+		{
+			int32_t step = (data.num_cols) / (2 * num_labels);
+			for (int32_t l = 0; l < num_labels; l++)
+			{
+				for (index_t i = l * step;
+				     (i < (l + 1) * step) && (i < data.num_cols / 2); ++i)
+				{
+					labels[i] = l;
+				}
+			}
+
+			labels_train = new CMulticlassLabels(labels);
+			labels_test = new CMulticlassLabels(labels);
+		}
+
+		features_train =
+		    (CDenseFeatures<float64_t>*)features.copy_subset(train_idx);
+		features_test =
+		    (CDenseFeatures<float64_t>*)features.copy_subset(test_idx);
+
+		SG_REF(labels_train)
+		SG_REF(labels_test)
+	}
+
+	~GaussianCheckerboard()
+	{
+		SG_UNREF(features_train)
+		SG_UNREF(features_test)
+		SG_UNREF(labels_train)
+		SG_UNREF(labels_test)
+	}
+
+	/* get the traning features */
+	CDenseFeatures<float64_t>* get_features_train() const
+	{
+		return features_train;
+	}
+
+	/* get the test features */
+	CDenseFeatures<float64_t>* get_features_test() const
+	{
+		return features_test;
+	}
+
+	/* get the test labels */
+	CLabels* get_labels_train() const
+	{
+		return labels_train;
+	}
+
+	/* get the traning labels */
+	CLabels* get_labels_test() const
+	{
+		return labels_test;
+	}
+
+	/* return the size of data set */
+	int32_t get_set_size() const
+	{
+		return set_size;
+	}
+
+protected:
+	// data for training
+	CDenseFeatures<float64_t>* features_train;
+
+	// data for testing
+	CDenseFeatures<float64_t>* features_test;
+
+	// traning label
+	CLabels* labels_train;
+
+	// testing label
+	CLabels* labels_test;
+
+	// the size of generated data set
+	int32_t set_size;
+};
+#endif
diff --git a/tests/unit/environments/LinearTestEnvironment.h b/tests/unit/environments/LinearTestEnvironment.h
new file mode 100644
index 00000000000..89dd01f1c93
--- /dev/null
+++ b/tests/unit/environments/LinearTestEnvironment.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: 2016 MikeLing, Viktor Gal, Sergey Lisitsyn, Heiko Strathmann
+ */
+
+#ifndef LINEARTESTENVIRONMENT_HPP
+#define LINEARTESTENVIRONMENT_HPP
+
+#include "GaussianCheckerboard.h"
+#include <gtest/gtest.h>
+#include <memory>
+
+using namespace shogun;
+using namespace std;
+using ::testing::Environment;
+class LinearTestEnvironment : public ::testing::Environment
+{
+public:
+	virtual void SetUp()
+	{
+		sg_rand->set_seed(17);
+		mBinaryLabelData = std::shared_ptr<GaussianCheckerboard>(
+		    new GaussianCheckerboard(100, 2, 2));
+	}
+
+	std::shared_ptr<GaussianCheckerboard> getBinaryLabelData() const
+	{
+		return mBinaryLabelData;
+	}
+
+protected:
+	std::shared_ptr<GaussianCheckerboard> mBinaryLabelData;
+};
+#endif
diff --git a/tests/unit/environments/MultiLabelTestEnvironment.h b/tests/unit/environments/MultiLabelTestEnvironment.h
new file mode 100644
index 00000000000..dff40e9271d
--- /dev/null
+++ b/tests/unit/environments/MultiLabelTestEnvironment.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: 2016 MikeLing, Viktor Gal, Sergey Lisitsyn, Heiko Strathmann
+ */
+
+#ifndef MULTILABELTESTENVIRONMENT_HPP
+#define MULTILABELTESTENVIRONMENT_HPP
+
+#include "GaussianCheckerboard.h"
+#include <gtest/gtest.h>
+#include <memory>
+
+using namespace shogun;
+using namespace std;
+using ::testing::Environment;
+class MultiLabelTestEnvironment : public ::testing::Environment
+{
+public:
+	virtual void SetUp()
+	{
+		sg_rand->set_seed(17);
+		mMulticlassFixture = std::shared_ptr<GaussianCheckerboard>(
+		    new GaussianCheckerboard(100, 3, 3));
+	}
+
+	std::shared_ptr<GaussianCheckerboard> getMulticlassFixture() const
+	{
+		return mMulticlassFixture;
+	}
+
+protected:
+	std::shared_ptr<GaussianCheckerboard> mMulticlassFixture;
+};
+#endif
diff --git a/tests/unit/environments/RegressionTestEnvironment.h b/tests/unit/environments/RegressionTestEnvironment.h
new file mode 100644
index 00000000000..abba24cc702
--- /dev/null
+++ b/tests/unit/environments/RegressionTestEnvironment.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: 2016 MikeLing, Viktor Gal, Sergey Lisitsyn, Heiko Strathmann
+ */
+
+#ifndef __REGRESSION_TEST_ENVIRONMENT_H
+#define __REGRESSION_TEST_ENVIRONMENT_H
+
+#include <gtest/gtest.h>
+#include <memory>
+#include <shogun/features/DataGenerator.h>
+#include <shogun/features/DenseFeatures.h>
+#include <shogun/labels/RegressionLabels.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+
+using namespace shogun;
+using namespace std;
+using ::testing::Environment;
+
+class RegressionTestEnvironment : public ::testing::Environment
+{
+private:
+	const index_t n_train = 20, n_test = 15, n_dim = 4;
+	CDenseFeatures<float64_t> *features_train, *features_test;
+	CRegressionLabels *labels_train, *labels_test;
+
+public:
+	virtual void SetUp()
+	{
+		sg_rand->set_seed(57);
+
+		SGMatrix<float64_t> feat_train_data =
+		    CDataGenerator::generate_gaussians(n_train, 1, n_dim);
+
+		SGMatrix<float64_t> feat_test_data =
+		    CDataGenerator::generate_gaussians(n_test, 1, n_dim);
+
+		SGVector<float64_t> w(n_dim);
+		w.random(-1, 1);
+
+		SGVector<float64_t> label_train_data =
+		    linalg::matrix_prod(feat_train_data, w, true);
+
+		SGVector<float64_t> label_test_data =
+		    linalg::matrix_prod(feat_test_data, w, true);
+
+		features_train = new CDenseFeatures<float64_t>(feat_train_data);
+		labels_train = new CRegressionLabels(label_train_data);
+
+		features_test = new CDenseFeatures<float64_t>(feat_test_data);
+		labels_test = new CRegressionLabels(label_test_data);
+
+		SG_REF(features_train);
+		SG_REF(labels_train);
+
+		SG_REF(features_test);
+		SG_REF(labels_test);
+	}
+
+	virtual void TearDown()
+	{
+		SG_UNREF(features_train);
+		SG_UNREF(labels_train);
+
+		SG_UNREF(features_test);
+		SG_UNREF(labels_test);
+	}
+
+	CDenseFeatures<float64_t>* get_features_train() const
+	{
+		return features_train;
+	}
+
+	CDenseFeatures<float64_t>* get_features_test() const
+	{
+		return features_test;
+	}
+
+	CRegressionLabels* get_labels_train() const
+	{
+		return labels_train;
+	}
+
+	CRegressionLabels* get_labels_test() const
+	{
+		return labels_test;
+	}
+};
+
+#endif
diff --git a/tests/unit/evaluation/CrossValidation_multithread_unittest.cc b/tests/unit/evaluation/CrossValidation_multithread_unittest.cc
index 4ec7bc025de..54842d454cc 100644
--- a/tests/unit/evaluation/CrossValidation_multithread_unittest.cc
+++ b/tests/unit/evaluation/CrossValidation_multithread_unittest.cc
@@ -104,12 +104,12 @@ TEST(CrossValidation_multithread, LibSVM_unlocked)
 	cross->parallel->set_num_threads(1);	
 
 	CCrossValidationResult* result1=(CCrossValidationResult*)cross->evaluate();
-	float64_t mean1 = result1->mean;
-	
+	float64_t mean1 = result1->get_mean();
+
 	cross->parallel->set_num_threads(3);
 
 	CCrossValidationResult* result2=(CCrossValidationResult*)cross->evaluate();
-	float64_t mean2 = result2->mean;
+	float64_t mean2 = result2->get_mean();
 
 	EXPECT_EQ(mean1, mean2);
 
@@ -154,12 +154,12 @@ TEST(CrossValidation_multithread, KNN)
 	cross->parallel->set_num_threads(1);	
 
 	CCrossValidationResult* result1=(CCrossValidationResult*)cross->evaluate();
-	float64_t mean1 = result1->mean;
-	
+	float64_t mean1 = result1->get_mean();
+
 	cross->parallel->set_num_threads(3);
 
 	CCrossValidationResult* result2=(CCrossValidationResult*)cross->evaluate();
-	float64_t mean2 = result2->mean;
+	float64_t mean2 = result2->get_mean();
 
 	EXPECT_EQ(mean1, mean2);
 
diff --git a/tests/unit/features/DotFeatures_unittest.cc b/tests/unit/features/DotFeatures_unittest.cc
new file mode 100644
index 00000000000..edef15f2c77
--- /dev/null
+++ b/tests/unit/features/DotFeatures_unittest.cc
@@ -0,0 +1,125 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <gtest/gtest.h>
+#include <shogun/features/DenseFeatures.h>
+#include <shogun/features/DotFeatures.h>
+
+using namespace shogun;
+
+class DotFeaturesTest : public ::testing::Test
+{
+protected:
+	virtual void SetUp()
+	{
+		SGMatrix<float64_t> data_a(dims, num_a);
+		data_a(0, 0) = 1.01611997;
+		data_a(1, 0) = 0.88935567;
+		data_a(2, 0) = -0.53592717;
+		data_a(0, 1) = 0.24132379;
+		data_a(1, 1) = 0.50475675;
+		data_a(2, 1) = 0.66029218;
+		data_a(0, 2) = 0.776238;
+		data_a(1, 2) = 0.19904003;
+		data_a(2, 2) = -0.60085628;
+		data_a(0, 3) = 0.86905328;
+		data_a(1, 3) = -1.22505732;
+		data_a(2, 3) = -1.12045593;
+		data_a(0, 4) = -0.60848342;
+		data_a(1, 4) = -1.45115708;
+		data_a(2, 4) = 1.15711328;
+		feats_a = new CDenseFeatures<float64_t>(data_a);
+		SG_REF(feats_a);
+
+		SGMatrix<float64_t> data_b(dims, num_b);
+		data_b(0, 0) = 0.14210129;
+		data_b(1, 0) = -0.36770534;
+		data_b(2, 0) = 0.80232687;
+		data_b(0, 1) = -0.10386986;
+		data_b(1, 1) = 0.3970658;
+		data_b(2, 1) = 1.15765292;
+		data_b(0, 2) = 1.22478326;
+		data_b(1, 2) = 0.61167198;
+		data_b(2, 2) = 0.49287339;
+		data_b(0, 3) = 0.04932024;
+		data_b(1, 3) = -1.0330936;
+		data_b(2, 3) = -0.87217125;
+		feats_b = new CDenseFeatures<float64_t>(data_b);
+		SG_REF(feats_b);
+
+		ref_cov_a = SGMatrix<float64_t>(dims, dims);
+		ref_cov_a(0, 0) = 0.353214;
+		ref_cov_a(1, 0) = 0.29906652;
+		ref_cov_a(2, 0) = -0.46552636;
+		ref_cov_a(0, 1) = 0.29906652;
+		ref_cov_a(1, 1) = 0.8914735;
+		ref_cov_a(2, 1) = -0.13294825;
+		ref_cov_a(0, 2) = -0.46552636;
+		ref_cov_a(1, 2) = -0.13294825;
+		ref_cov_a(2, 2) = 0.72797476;
+
+		ref_cov_ab = SGMatrix<float64_t>(dims, dims);
+		ref_cov_ab(0, 0) = 0.32300248;
+		ref_cov_ab(1, 0) = 0.24380185;
+		ref_cov_ab(2, 0) = -0.27024556;
+		ref_cov_ab(0, 1) = 0.24380185;
+		ref_cov_ab(1, 1) = 0.68716546;
+		ref_cov_ab(2, 1) = 0.10940845;
+		ref_cov_ab(0, 2) = -0.27024556;
+		ref_cov_ab(1, 2) = 0.10940845;
+		ref_cov_ab(2, 2) = 0.72460503;
+	}
+
+	virtual void TearDown()
+	{
+		SG_UNREF(feats_a);
+		SG_UNREF(feats_b);
+	}
+
+	const index_t num_a = 5;
+	const index_t num_b = 4;
+	const index_t dims = 3;
+	const float64_t eps = 1e-8;
+
+	CDenseFeatures<float64_t>* feats_a;
+	CDenseFeatures<float64_t>* feats_b;
+	SGMatrix<float64_t> ref_cov_a;
+	SGMatrix<float64_t> ref_cov_ab;
+};
+
+TEST_F(DotFeaturesTest, get_cov)
+{
+	auto cov = feats_a->CDotFeatures::get_cov();
+
+	for (index_t i = 0; i < (index_t)cov.size(); ++i)
+		EXPECT_NEAR(cov[i], ref_cov_a[i], eps);
+}
+
+TEST_F(DotFeaturesTest, get_cov_nocopy)
+{
+	auto cov = feats_a->CDotFeatures::get_cov(false);
+
+	for (index_t i = 0; i < (index_t)cov.size(); ++i)
+		EXPECT_NEAR(cov[i], ref_cov_a[i], eps);
+}
+
+TEST_F(DotFeaturesTest, compute_cov)
+{
+	auto cov = CDotFeatures::compute_cov(feats_a, feats_b);
+
+	for (index_t i = 0; i < (index_t)cov.size(); ++i)
+		EXPECT_NEAR(cov[i], ref_cov_ab[i], eps);
+}
+
+TEST_F(DotFeaturesTest, compute_cov_nocopy)
+{
+	auto cov = CDotFeatures::compute_cov(feats_a, feats_b, false);
+
+	for (index_t i = 0; i < (index_t)cov.size(); ++i)
+		EXPECT_NEAR(cov[i], ref_cov_ab[i], eps);
+}
diff --git a/tests/unit/features/StreamingDenseFeatures_unittest.cc b/tests/unit/features/StreamingDenseFeatures_unittest.cc
index 83c1511b937..75309fd5dcf 100644
--- a/tests/unit/features/StreamingDenseFeatures_unittest.cc
+++ b/tests/unit/features/StreamingDenseFeatures_unittest.cc
@@ -23,8 +23,8 @@ TEST(StreamingDenseFeaturesTest, example_reading_from_file)
 {
 	index_t n=20;
 	index_t dim=2;
-	std::string tmp_name = "StreamingDenseFeatures_reading.XXXXXX";
-	char* fname = mktemp_cst(const_cast<char*>(tmp_name.c_str()));
+	char fname[] = "StreamingDenseFeatures_reading.XXXXXX";
+	generate_temp_filename(fname);
 
 	SGMatrix<float64_t> data(dim,n);
 	for (index_t i=0; i<dim*n; ++i)
diff --git a/tests/unit/features/StreamingSparseFeatures_unittest.cc b/tests/unit/features/StreamingSparseFeatures_unittest.cc
index 35d31701817..42b70d59bb0 100644
--- a/tests/unit/features/StreamingSparseFeatures_unittest.cc
+++ b/tests/unit/features/StreamingSparseFeatures_unittest.cc
@@ -21,8 +21,8 @@ using namespace shogun;
 
 TEST(StreamingSparseFeaturesTest, parse_file)
 {
-  std::string tmp_name = "StreamingSparseFeatures_parse_file.XXXXXX";
-  const char* fname = mktemp_cst(const_cast<char*>(tmp_name.c_str()));
+  char fname[] = "StreamingSparseFeatures_parse_file.XXXXXX";
+  generate_temp_filename(fname);
 
   int32_t max_num_entries=20;
   int32_t max_label_value=1;
diff --git a/tests/unit/io/SGIO_unittest.cc b/tests/unit/io/SGIO_unittest.cc
deleted file mode 100644
index ab080b6ff1f..00000000000
--- a/tests/unit/io/SGIO_unittest.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-#include <cmath>
-#include <thread>
-#include <chrono>
-#include <shogun/io/SGIO.h>
-
-#include <gtest/gtest.h>
-
-using namespace shogun;
-
-const int millis= 10;
-
-TEST(SGIOTest, progress_correct_bounds_positive)
-{
-	SGIO tmp;
-	tmp.enable_progress();
-	for (int i=0; i<100; i++)
-	{
-		std::this_thread::sleep_for(std::chrono::milliseconds(millis));
-		tmp.progress(i, 0, 100);
-		EXPECT_EQ(std::ceil(tmp.get_last_progress()), i+1);
-	}
-}
-
-TEST(SGIOTest, progress_correct_bounds_negative)
-{
-	SGIO tmp2;
-	tmp2.enable_progress();
-	for (int i=-100; i<0; i++)
-	{
-		std::this_thread::sleep_for(std::chrono::milliseconds(millis));
-		tmp2.progress(i, -100, 0);
-		EXPECT_EQ(std::ceil(tmp2.get_last_progress()), (100+i)+1);
-	}
-}
-
-TEST(SGIOTest, progress_incorrect_bounds_positive)
-{
-	SGIO tmp2;
-	tmp2.enable_progress();
-	tmp2.progress(0, 100, 1);
-	EXPECT_FLOAT_EQ(tmp2.get_last_progress(), (float64_t)0);
-}
-
-TEST(SGIOTest, progress_incorrect_bounds_negative)
-{
-	SGIO tmp;
-	tmp.enable_progress();
-	tmp.progress(0, -1, -2);
-	EXPECT_FLOAT_EQ(tmp.get_last_progress(), (float64_t)0);
-}
-
-TEST(SGIOTest, progress_incorrect_bounds_equal)
-{
-	SGIO tmp3;
-	tmp3.enable_progress();
-	tmp3.progress(0, 1, 1);
-	EXPECT_FLOAT_EQ(tmp3.get_last_progress(), (float64_t)0);
-}
-
-TEST(SGIOTest, progress_current_val_out_of_bounds_lower)
-{
-	SGIO tmp;
-	tmp.enable_progress();
-	tmp.progress(-1, 0, 100);
-	EXPECT_FLOAT_EQ(tmp.get_last_progress(), (float64_t)1e-5);
-}
-
-TEST(SGIOTest, progress_current_val_out_of_bounds_higher)
-{
-	SGIO tmp2;
-	tmp2.enable_progress();
-	tmp2.progress(1001, 0, 100);
-	EXPECT_FLOAT_EQ(tmp2.get_last_progress(), (float64_t)100);
-}
diff --git a/tests/unit/io/SerializationAscii_unittest.cc.jinja2 b/tests/unit/io/SerializationAscii_unittest.cc.jinja2
index 739670d87ce..1dff2f796a0 100644
--- a/tests/unit/io/SerializationAscii_unittest.cc.jinja2
+++ b/tests/unit/io/SerializationAscii_unittest.cc.jinja2
@@ -3,12 +3,12 @@
  * CORRESPONDING TEMPLATE FILE, PLEASE!
  */
 
+#include <gtest/gtest.h>
 #include <shogun/base/SGObject.h>
 #include <shogun/base/class_list.h>
 #include <shogun/io/SerializableAsciiFile.h>
 #include "utils/Utils.h"
 #include <unistd.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
@@ -22,21 +22,21 @@ TEST(SerializationAscii, {{class}})
 {% endif %}
 {
 	std::string class_name("{{class}}");
-	std::string file_template = "/tmp/shogun-unittest-serialization-ascii-" + class_name + ".XXXXXX";
-	char* filename = mktemp_cst(const_cast<char*>(file_template.c_str()));
-	CSGObject* object = new_sgserializable(class_name.c_str(), PT_NOT_GENERIC);
+	std::string filename = "shogun-unittest-serialization-ascii-" + class_name + ".XXXXXX";
+	generate_temp_filename(const_cast<char*>(filename.c_str()));
+	CSGObject* object = create(class_name.c_str(), PT_NOT_GENERIC);
 	ASSERT_TRUE(object != NULL);
 
 	// save object to an ascii file
-	CSerializableAsciiFile *file=new CSerializableAsciiFile(filename, 'w');
+	CSerializableAsciiFile *file=new CSerializableAsciiFile(filename.c_str(), 'w');
 	bool save_success = object->save_serializable(file);
 	file->close();
 	SG_UNREF(file);
 	ASSERT_TRUE(save_success);
 
 	// load parameter from an ascii file
-	file=new CSerializableAsciiFile(filename, 'r');
-	CSGObject* deserializedObject = new_sgserializable(class_name.c_str(), PT_NOT_GENERIC);
+	file=new CSerializableAsciiFile(filename.c_str(), 'r');
+	CSGObject* deserializedObject = create(class_name.c_str(), PT_NOT_GENERIC);
 	ASSERT_TRUE(deserializedObject != NULL);
 	bool load_success = deserializedObject->load_serializable(file);
 	file->close();
@@ -50,7 +50,7 @@ TEST(SerializationAscii, {{class}})
 	SG_UNREF(object)
 	SG_UNREF(deserializedObject);
 
-	int delete_success = unlink(filename);
+	int delete_success = unlink(filename.c_str());
 	ASSERT_EQ(0, delete_success);
 }
 {% endfor %}
@@ -64,21 +64,21 @@ TEST(SerializationAscii,{{class}}_{{type}})
 {% endif %}
 {
 	std::string class_name("{{class}}");
-	std::string file_template = "/tmp/shogun-unittest-serialization-ascii-" + class_name + "_{{type}}" + ".XXXXXX";
-	char* filename = mktemp_cst(const_cast<char*>(file_template.c_str()));
-	CSGObject* object = new_sgserializable(class_name.c_str(), {{type}});
+	std::string filename = "/tmp/shogun-unittest-serialization-ascii-" + class_name + "_{{type}}" + ".XXXXXX";
+	generate_temp_filename(const_cast<char*>(filename.c_str()));
+	CSGObject* object = create(class_name.c_str(), {{type}});
 	ASSERT_TRUE(object != NULL);
 
 	// save object to an ascii file
-	CSerializableAsciiFile *file=new CSerializableAsciiFile(filename, 'w');
+	CSerializableAsciiFile *file=new CSerializableAsciiFile(filename.c_str(), 'w');
 	bool save_success = object->save_serializable(file);
 	file->close();
 	SG_UNREF(file);
 	ASSERT_TRUE(save_success);
 
 	// load parameter from an ascii file
-	file=new CSerializableAsciiFile(filename, 'r');
-	CSGObject* deserializedObject = new_sgserializable(class_name.c_str(), {{type}});
+	file=new CSerializableAsciiFile(filename.c_str(), 'r');
+	CSGObject* deserializedObject = create(class_name.c_str(), {{type}});
 	ASSERT_TRUE(deserializedObject != NULL);
 	bool load_success = deserializedObject->load_serializable(file);
 	file->close();
@@ -92,7 +92,7 @@ TEST(SerializationAscii,{{class}}_{{type}})
 	SG_UNREF(object)
 	SG_UNREF(deserializedObject);
 
-	int delete_success = unlink(filename);
+	int delete_success = unlink(filename.c_str());
 	ASSERT_EQ(0, delete_success);
 }
 {% endfor %}
diff --git a/tests/unit/io/SerializationHDF5_unittest.cc.jinja2 b/tests/unit/io/SerializationHDF5_unittest.cc.jinja2
index 9eac2b4389c..fff08572326 100644
--- a/tests/unit/io/SerializationHDF5_unittest.cc.jinja2
+++ b/tests/unit/io/SerializationHDF5_unittest.cc.jinja2
@@ -3,12 +3,12 @@
  * CORRESPONDING TEMPLATE FILE, PLEASE!
  */
 
+#include <gtest/gtest.h>
 #include <shogun/base/SGObject.h>
 #include <shogun/base/class_list.h>
 #include <shogun/io/SerializableHdf5File.h>
 #include "utils/Utils.h"
 #include <unistd.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
@@ -29,21 +29,21 @@ TEST(SerializationHDF5, {{class}})
 {% endif %}
 {
 	std::string class_name("{{class}}");
-	std::string file_template = "/tmp/shogun-unittest-serialization-hdfs-" + class_name + ".XXXXXX";
-	char* filename = mktemp_cst(const_cast<char*>(file_template.c_str()));
-	CSGObject* object = new_sgserializable(class_name.c_str(), PT_NOT_GENERIC);
+	std::string filename = "shogun-unittest-serialization-hdfs-" + class_name + ".XXXXXX";
+	generate_temp_filename(const_cast<char*>(filename.c_str()));
+	CSGObject* object = create(class_name.c_str(), PT_NOT_GENERIC);
 	ASSERT_TRUE(object != NULL);
 
 	// save object to an ascii file
-	CSerializableHdf5File *file=new CSerializableHdf5File(filename, 'w');
+	CSerializableHdf5File *file=new CSerializableHdf5File(filename.c_str(), 'w');
 	bool save_success = object->save_serializable(file);
 	file->close();
 	SG_UNREF(file);
 	ASSERT_TRUE(save_success);
 
 	// load parameter from an ascii file
-	file=new CSerializableHdf5File(filename, 'r');
-	CSGObject* deserializedObject = new_sgserializable(class_name.c_str(), PT_NOT_GENERIC);
+	file=new CSerializableHdf5File(filename.c_str(), 'r');
+	CSGObject* deserializedObject = create(class_name.c_str(), PT_NOT_GENERIC);
 	ASSERT_TRUE(deserializedObject != NULL);
 	bool load_success = deserializedObject->load_serializable(file);
 	file->close();
@@ -56,7 +56,7 @@ TEST(SerializationHDF5, {{class}})
 	SG_UNREF(object)
 	SG_UNREF(deserializedObject);
 
-	int delete_success = unlink(filename);
+	int delete_success = unlink(filename.c_str());
 	ASSERT_EQ(0, delete_success);
 }
 {% endfor %}
@@ -70,21 +70,21 @@ TEST(SerializationHDF5,{{class}}_{{type}})
 {% endif %}
 {
 	std::string class_name("{{class}}");
-	std::string file_template = "/tmp/shogun-unittest-serialization-hdfs-" + class_name + "_{{type}}" + ".XXXXXX";
-	char* filename = mktemp_cst(const_cast<char*>(file_template.c_str()));
-	CSGObject* object = new_sgserializable(class_name.c_str(), {{type}});
+	std::string filename = "shogun-unittest-serialization-hdfs-" + class_name + "_{{type}}" + ".XXXXXX";
+	generate_temp_filename(const_cast<char*>(filename.c_str()));
+	CSGObject* object = create(class_name.c_str(), {{type}});
 	ASSERT_TRUE(object != NULL);
 
 	// save object to an ascii file
-	CSerializableHdf5File *file=new CSerializableHdf5File(filename, 'w');
+	CSerializableHdf5File *file=new CSerializableHdf5File(filename.c_str(), 'w');
 	bool save_success = object->save_serializable(file);
 	file->close();
 	SG_UNREF(file);
 	ASSERT_TRUE(save_success);
 
 	// load parameter from an ascii file
-	file=new CSerializableHdf5File(filename, 'r');
-	CSGObject* deserializedObject = new_sgserializable(class_name.c_str(), {{type}});
+	file=new CSerializableHdf5File(filename.c_str(), 'r');
+	CSGObject* deserializedObject = create(class_name.c_str(), {{type}});
 	ASSERT_TRUE(deserializedObject != NULL);
 	bool load_success = deserializedObject->load_serializable(file);
 	file->close();
@@ -97,7 +97,7 @@ TEST(SerializationHDF5,{{class}}_{{type}})
 	SG_UNREF(object)
 	SG_UNREF(deserializedObject);
 
-	int delete_success = unlink(filename);
+	int delete_success = unlink(filename.c_str());
 	ASSERT_EQ(0, delete_success);
 }
 {% endfor %}
diff --git a/tests/unit/io/SerializationJSON_unittest.cc.jinja2 b/tests/unit/io/SerializationJSON_unittest.cc.jinja2
index 26b258f09c2..6c0be92d510 100644
--- a/tests/unit/io/SerializationJSON_unittest.cc.jinja2
+++ b/tests/unit/io/SerializationJSON_unittest.cc.jinja2
@@ -3,12 +3,12 @@
  * CORRESPONDING TEMPLATE FILE, PLEASE!
  */
 
+#include <gtest/gtest.h>
 #include <shogun/base/SGObject.h>
 #include <shogun/base/class_list.h>
 #include <shogun/io/SerializableJsonFile.h>
 #include <unistd.h>
 #include "utils/Utils.h"
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
@@ -25,21 +25,21 @@ TEST(SerializationJSON, {{class}})
 {% endif %}
 {
 	std::string class_name("{{class}}");
-	std::string file_template = "/tmp/shogun-unittest-serialization-json-" + class_name + ".XXXXXX";
-	char* filename = mktemp_cst(const_cast<char*>(file_template.c_str()));
-	CSGObject* object = new_sgserializable(class_name.c_str(), PT_NOT_GENERIC);
+	std::string filename = "shogun-unittest-serialization-json-" + class_name + ".XXXXXX";
+	generate_temp_filename(const_cast<char*>(filename.c_str()));
+	CSGObject* object = create(class_name.c_str(), PT_NOT_GENERIC);
 	ASSERT_TRUE(object != NULL);
 
 	// save object to an ascii file
-	CSerializableJsonFile *file=new CSerializableJsonFile(filename, 'w');
+	CSerializableJsonFile *file=new CSerializableJsonFile(filename.c_str(), 'w');
 	bool save_success = object->save_serializable(file);
 	file->close();
 	SG_UNREF(file);
 	ASSERT_TRUE(save_success);
 
 	// load parameter from an ascii file
-	file=new CSerializableJsonFile(filename, 'r');
-	CSGObject* deserializedObject = new_sgserializable(class_name.c_str(), PT_NOT_GENERIC);
+	file=new CSerializableJsonFile(filename.c_str(), 'r');
+	CSGObject* deserializedObject = create(class_name.c_str(), PT_NOT_GENERIC);
 	ASSERT_TRUE(deserializedObject != NULL);
 	bool load_success = deserializedObject->load_serializable(file);
 	file->close();
@@ -53,7 +53,7 @@ TEST(SerializationJSON, {{class}})
 	SG_UNREF(object)
 	SG_UNREF(deserializedObject);
 
-	int delete_success = unlink(filename);
+	int delete_success = unlink(filename.c_str());
 	ASSERT_EQ(0, delete_success);
 }
 {% endfor %}
@@ -67,21 +67,21 @@ TEST(SerializationJSON,{{class}}_{{type}})
 {% endif %}
 {
 	std::string class_name("{{class}}");
-	std::string file_template = "/tmp/shogun-unittest-serialization-json-" + class_name + "_{{type}}" + ".XXXXXX";
-	char* filename = mktemp_cst(const_cast<char*>(file_template.c_str()));
-	CSGObject* object = new_sgserializable(class_name.c_str(), {{type}});
+	std::string filename = "shogun-unittest-serialization-json-" + class_name + "_{{type}}" + ".XXXXXX";
+	generate_temp_filename(const_cast<char*>(filename.c_str()));
+	CSGObject* object = create(class_name.c_str(), {{type}});
 	ASSERT_TRUE(object != NULL);
 
 	// save object to an ascii file
-	CSerializableJsonFile *file=new CSerializableJsonFile(filename, 'w');
+	CSerializableJsonFile *file=new CSerializableJsonFile(filename.c_str(), 'w');
 	bool save_success = object->save_serializable(file);
 	file->close();
 	SG_UNREF(file);
 	ASSERT_TRUE(save_success);
 
 	// load parameter from an ascii file
-	file=new CSerializableJsonFile(filename, 'r');
-	CSGObject* deserializedObject = new_sgserializable(class_name.c_str(), {{type}});
+	file=new CSerializableJsonFile(filename.c_str(), 'r');
+	CSGObject* deserializedObject = create(class_name.c_str(), {{type}});
 	ASSERT_TRUE(deserializedObject != NULL);
 	bool load_success = deserializedObject->load_serializable(file);
 	file->close();
@@ -95,7 +95,7 @@ TEST(SerializationJSON,{{class}}_{{type}})
 	SG_UNREF(object)
 	SG_UNREF(deserializedObject);
 
-	int delete_success = unlink(filename);
+	int delete_success = unlink(filename.c_str());
 	ASSERT_EQ(0, delete_success);
 }
 {% endfor %}
diff --git a/tests/unit/io/SerializationXML_unittest.cc.jinja2 b/tests/unit/io/SerializationXML_unittest.cc.jinja2
index cfb88a445c5..eb826b68934 100644
--- a/tests/unit/io/SerializationXML_unittest.cc.jinja2
+++ b/tests/unit/io/SerializationXML_unittest.cc.jinja2
@@ -3,12 +3,12 @@
  * CORRESPONDING TEMPLATE FILE, PLEASE!
  */
 
+#include <gtest/gtest.h>
 #include <shogun/base/SGObject.h>
 #include <shogun/base/class_list.h>
 #include <shogun/io/SerializableXmlFile.h>
 #include "utils/Utils.h"
 #include <unistd.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
@@ -24,21 +24,21 @@ TEST(SerializationXML, {{class}})
 {% endif %}
 {
 	std::string class_name("{{class}}");
-	std::string file_template = "/tmp/shogun-unittest-serialization-xml-" + class_name + ".XXXXXX";
-	char* filename = mktemp_cst(const_cast<char*>(file_template.c_str()));
-	CSGObject* object = new_sgserializable(class_name.c_str(), PT_NOT_GENERIC);
+	std::string filename = "shogun-unittest-serialization-xml-" + class_name + ".XXXXXX";
+	generate_temp_filename(const_cast<char*>(filename.c_str()));
+	CSGObject* object = create(class_name.c_str(), PT_NOT_GENERIC);
 	ASSERT_TRUE(object != NULL);
 
 	// save object to an ascii file
-	CSerializableXmlFile *file=new CSerializableXmlFile(filename, 'w');
+	CSerializableXmlFile *file=new CSerializableXmlFile(filename.c_str(), 'w');
 	bool save_success = object->save_serializable(file);
 	file->close();
 	SG_UNREF(file);
 	ASSERT_TRUE(save_success);
 
 	// load parameter from an ascii file
-	file=new CSerializableXmlFile(filename, 'r');
-	CSGObject* deserializedObject = new_sgserializable(class_name.c_str(), PT_NOT_GENERIC);
+	file=new CSerializableXmlFile(filename.c_str(), 'r');
+	CSGObject* deserializedObject = create(class_name.c_str(), PT_NOT_GENERIC);
 	ASSERT_TRUE(deserializedObject != NULL);
 	bool load_success = deserializedObject->load_serializable(file);
 	file->close();
@@ -52,7 +52,7 @@ TEST(SerializationXML, {{class}})
 	SG_UNREF(object)
 	SG_UNREF(deserializedObject);
 
-	int delete_success = unlink(filename);
+	int delete_success = unlink(filename.c_str());
 	ASSERT_EQ(0, delete_success);
 }
 {% endfor %}
@@ -66,21 +66,21 @@ TEST(SerializationXML,{{class}}_{{type}})
 {% endif %}
 {
 	std::string class_name("{{class}}");
-	std::string file_template = "/tmp/shogun-unittest-serialization-xml-" + class_name + "_{{type}}" + ".XXXXXX";
-	char* filename = mktemp_cst(const_cast<char*>(file_template.c_str()));
-	CSGObject* object = new_sgserializable(class_name.c_str(), {{type}});
+	std::string filename = "shogun-unittest-serialization-xml-" + class_name + "_{{type}}" + ".XXXXXX";
+	generate_temp_filename(const_cast<char*>(filename.c_str()));
+	CSGObject* object = create(class_name.c_str(), {{type}});
 	ASSERT_TRUE(object != NULL);
 
 	// save object to an ascii file
-	CSerializableXmlFile *file=new CSerializableXmlFile(filename, 'w');
+	CSerializableXmlFile *file=new CSerializableXmlFile(filename.c_str(), 'w');
 	bool save_success = object->save_serializable(file);
 	file->close();
 	SG_UNREF(file);
 	ASSERT_TRUE(save_success);
 
 	// load parameter from an ascii file
-	file=new CSerializableXmlFile(filename, 'r');
-	CSGObject* deserializedObject = new_sgserializable(class_name.c_str(), {{type}});
+	file=new CSerializableXmlFile(filename.c_str(), 'r');
+	CSGObject* deserializedObject = create(class_name.c_str(), {{type}});
 	ASSERT_TRUE(deserializedObject != NULL);
 	bool load_success = deserializedObject->load_serializable(file);
 	file->close();
@@ -94,7 +94,7 @@ TEST(SerializationXML,{{class}}_{{type}})
 	SG_UNREF(object)
 	SG_UNREF(deserializedObject);
 
-	int delete_success = unlink(filename);
+	int delete_success = unlink(filename.c_str());
 	ASSERT_EQ(0, delete_success);
 }
 {% endfor %}
diff --git a/tests/unit/io/TBOutputFormat_unittest.cc b/tests/unit/io/TBOutputFormat_unittest.cc
new file mode 100644
index 00000000000..df375d75a7b
--- /dev/null
+++ b/tests/unit/io/TBOutputFormat_unittest.cc
@@ -0,0 +1,174 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#include <shogun/lib/config.h>
+#ifdef HAVE_TFLOGGER
+
+#include <gtest/gtest.h>
+#include <shogun/io/TBOutputFormat.h>
+#include <shogun/lib/any.h>
+#include <shogun/lib/tfhistogram/histogram.h>
+#include <tflogger/event.pb.h>
+#include <tflogger/summary.pb.h>
+#include <utility>
+#include <vector>
+
+using namespace shogun;
+
+template <class T>
+void test_case_scalar(T value_val)
+{
+	T v = value_val;
+	tensorflow::Event event_ex;
+	auto summary = event_ex.mutable_summary();
+	auto summaryValue = summary->add_value();
+	summaryValue->set_tag("test");
+	summaryValue->set_node_name("node");
+	summaryValue->set_simple_value(v);
+	TBOutputFormat tmp;
+
+	time_point timestamp;
+	std::string param_name = "test";
+	ObservedValue emitted_value{1, param_name, erase_type(v), TENSORBOARD};
+
+	std::string node_name = "node";
+	auto event_gen =
+	    tmp.convert_scalar(std::make_pair(emitted_value, timestamp), node_name);
+	EXPECT_EQ(event_gen.summary().value(0).simple_value(), v);
+	EXPECT_EQ(event_gen.summary().value(0).tag(), "test");
+	EXPECT_EQ(event_gen.summary().value(0).node_name(), "node");
+}
+
+template <class T>
+void test_case_scalar_error(T value_val)
+{
+	T v = value_val;
+	TBOutputFormat tmp;
+
+	time_point timestamp;
+	std::string param_name = "test";
+	ObservedValue emitted_value{1, param_name, erase_type(v), TENSORBOARD};
+
+	std::string node_name = "node";
+	EXPECT_THROW(
+	    tmp.convert_scalar(std::make_pair(emitted_value, timestamp), node_name),
+	    ShogunException);
+}
+
+template <class T>
+void test_case_vector(std::vector<T> v)
+{
+	tensorflow::Event event_ex;
+	auto summary = event_ex.mutable_summary();
+	auto summaryValue = summary->add_value();
+	summaryValue->set_tag("test");
+	summaryValue->set_node_name("node");
+
+	tensorflow::histogram::Histogram h;
+	tensorflow::HistogramProto* hp = new tensorflow::HistogramProto();
+	for (auto value_v : v)
+		h.Add(value_v);
+	h.EncodeToProto(hp, true);
+	summaryValue->set_allocated_histo(hp);
+
+	TBOutputFormat tmp;
+
+	time_point timestamp;
+	std::string param_name = "test";
+	ObservedValue emitted_value{1, param_name, erase_type(v), TENSORBOARD};
+
+	std::string node_name = "node";
+	auto event_gen =
+	    tmp.convert_vector(std::make_pair(emitted_value, timestamp), node_name);
+
+	tensorflow::histogram::Histogram h2;
+	h2.DecodeFromProto(event_gen.summary().value(0).histo());
+
+	EXPECT_EQ(h2.ToString(), h.ToString());
+	EXPECT_EQ(event_gen.summary().value(0).tag(), "test");
+	EXPECT_EQ(event_gen.summary().value(0).node_name(), "node");
+}
+
+template <class T>
+void test_case_vector_error(std::vector<T> v)
+{
+	TBOutputFormat tmp;
+
+	time_point timestamp;
+	std::string param_name = "test";
+	ObservedValue emitted_value{1, param_name, erase_type(v), TENSORBOARD};
+
+	std::string node_name = "node";
+	EXPECT_THROW(
+	    tmp.convert_vector(std::make_pair(emitted_value, timestamp), node_name),
+	    ShogunException);
+}
+
+template <typename T>
+class TBOutputFormatTest : public ::testing::Test
+{
+};
+
+typedef ::testing::Types<uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t,
+                         uint64_t, float32_t, float64_t, floatmax_t, char>
+    TBTypes;
+TYPED_TEST_CASE(TBOutputFormatTest, TBTypes);
+
+TYPED_TEST(TBOutputFormatTest, convert_all_types_scalar)
+{
+	test_case_scalar<TypeParam>(1);
+};
+
+TEST(TBOutputFormatTest, fail_convert_scalar)
+{
+	test_case_scalar_error<complex128_t>(1);
+};
+
+TYPED_TEST(TBOutputFormatTest, convert_all_types_histo)
+{
+	std::vector<TypeParam> v;
+	v.push_back((TypeParam)1);
+	v.push_back((TypeParam)2);
+	test_case_vector<TypeParam>(v);
+};
+
+TEST(TBOutputFormat, fail_convert_histo)
+{
+	std::vector<complex128_t> v;
+	v.push_back((complex128_t)1);
+	v.push_back((complex128_t)2);
+	test_case_vector_error<complex128_t>(v);
+}
+
+#endif // HAVE_TFLOGGER
diff --git a/tests/unit/kernel/CustomKernel_unittest.cc b/tests/unit/kernel/CustomKernel_unittest.cc
index 24405caf01c..6fa022dc459 100644
--- a/tests/unit/kernel/CustomKernel_unittest.cc
+++ b/tests/unit/kernel/CustomKernel_unittest.cc
@@ -7,6 +7,8 @@
  * Written (W) 2013 Heiko Strathmann
  */
 
+#include <gtest/gtest.h>
+
 #include <shogun/kernel/GaussianKernel.h>
 #include <shogun/kernel/CustomKernel.h>
 #include <shogun/features/DenseFeatures.h>
@@ -14,7 +16,6 @@
 #include <shogun/features/streaming/generators/MeanShiftDataGenerator.h>
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/mathematics/Math.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/labels/BinaryLabels_unittest.cc b/tests/unit/labels/BinaryLabels_unittest.cc
index 5ad02b00bca..33fe695aa00 100644
--- a/tests/unit/labels/BinaryLabels_unittest.cc
+++ b/tests/unit/labels/BinaryLabels_unittest.cc
@@ -6,13 +6,36 @@
  *
  * Written (W) 2012-2013 Heiko Strathmann
  */
-
-#include <shogun/labels/BinaryLabels.h>
 #include <gtest/gtest.h>
+#include <shogun/io/SerializableAsciiFile.h>
+#include <shogun/labels/BinaryLabels.h>
+
+#include "utils/Utils.h"
 
 using namespace shogun;
 
-TEST(BinaryLabels,scores_to_probabilities)
+class BinaryLabels : public ::testing::Test
+{
+public:
+	SGVector<float64_t> probabilities;
+	const int32_t n_A = 4;
+
+	virtual void SetUp()
+	{
+		auto A = SGVector<float64_t>(n_A);
+		A[0] = 0.1;
+		A[1] = 0.4;
+		A[2] = 0.6;
+		A[3] = 0.9;
+		probabilities = A;
+	}
+
+	virtual void TearDown()
+	{
+	}
+};
+
+TEST_F(BinaryLabels, scores_to_probabilities)
 {
 	CBinaryLabels* labels=new CBinaryLabels(10);
 	labels->set_values(SGVector<float64_t>(labels->get_num_labels()));
@@ -32,3 +55,66 @@ TEST(BinaryLabels,scores_to_probabilities)
 
 	SG_UNREF(labels);
 }
+
+TEST_F(BinaryLabels, serialization)
+{
+	CBinaryLabels* labels = new CBinaryLabels(10);
+	SGVector<float64_t> lab = SGVector<float64_t>(labels->get_num_labels());
+	lab.random(1, 10);
+	labels->set_values(lab);
+	labels->set_labels(lab);
+
+	/* generate file name */
+	char filename[] = "serialization-asciiCBinaryLabels.XXXXXX";
+	generate_temp_filename(filename);
+
+	CSerializableAsciiFile* file = new CSerializableAsciiFile(filename, 'w');
+	labels->save_serializable(file);
+	file->close();
+	SG_UNREF(file);
+
+	file = new CSerializableAsciiFile(filename, 'r');
+	CBinaryLabels* new_labels = new CBinaryLabels;
+	new_labels->load_serializable(file);
+	file->close();
+	SG_UNREF(file);
+
+	ASSERT(new_labels->get_num_labels() == 10)
+
+	for (int32_t i = 0; i < new_labels->get_num_labels(); i++)
+	{
+		EXPECT_NEAR(labels->get_value(i), new_labels->get_value(i), 1E-15);
+		EXPECT_NEAR(labels->get_label(i), new_labels->get_label(i), 1E-15);
+	}
+	unlink(filename);
+
+	SG_UNREF(labels);
+	SG_UNREF(new_labels);
+}
+
+TEST_F(BinaryLabels, set_values_labels_from_constructor)
+{
+	const float64_t threshold = 0.5;
+	CBinaryLabels* labels = new CBinaryLabels(probabilities, threshold);
+
+	SGVector<float64_t> labels_vector = labels->get_labels();
+	SGVector<float64_t> values_vector = labels->get_values();
+
+	ASSERT(labels_vector);
+	ASSERT(values_vector);
+
+	ASSERT_EQ(n_A, labels_vector.size());
+	ASSERT_EQ(n_A, values_vector.size());
+
+	EXPECT_FLOAT_EQ(-1.0, labels_vector[0]);
+	EXPECT_FLOAT_EQ(-1.0, labels_vector[1]);
+	EXPECT_FLOAT_EQ(+1.0, labels_vector[2]);
+	EXPECT_FLOAT_EQ(+1.0, labels_vector[3]);
+
+	for (int i = 0; i < values_vector.size(); ++i)
+	{
+		EXPECT_FLOAT_EQ(probabilities[i], values_vector[i]);
+	}
+
+	SG_UNREF(labels);
+}
diff --git a/tests/unit/labels/MulticlassLabels_unittest.cc b/tests/unit/labels/MulticlassLabels_unittest.cc
index 5773026a984..7f18142225f 100644
--- a/tests/unit/labels/MulticlassLabels_unittest.cc
+++ b/tests/unit/labels/MulticlassLabels_unittest.cc
@@ -12,7 +12,40 @@
 
 using namespace shogun;
 
-TEST(MulticlassLabelsTest,confidences)
+class MulticlassLabelsTest : public ::testing::Test
+{
+public:
+	SGMatrix<float64_t> probabilities;
+	SGVector<float64_t> labels_true;
+	const index_t n = 3;
+
+	virtual void SetUp()
+	{
+		probabilities = SGMatrix<float64_t>(n, n);
+		probabilities(0, 0) = 0.6;
+		probabilities(0, 1) = 0.2;
+		probabilities(0, 2) = 0.2;
+		probabilities(1, 0) = 0.3;
+		probabilities(1, 1) = 0.3;
+		probabilities(1, 2) = 0.4;
+		probabilities(2, 0) = 0.1;
+		probabilities(2, 1) = 0.8;
+		probabilities(2, 2) = 0.1;
+
+		SGVector<float64_t> labels_A(3);
+		labels_A[0] = 0;
+		labels_A[1] = 2;
+		labels_A[2] = 1;
+
+		labels_true = labels_A;
+	}
+
+	virtual void TearDown()
+	{
+	}
+};
+
+TEST_F(MulticlassLabelsTest, confidences)
 {
 	const int n_labels = 3;
 	const int n_classes = 4;
@@ -28,9 +61,8 @@ TEST(MulticlassLabelsTest,confidences)
 	{
 		SGVector<float64_t> confs(n_classes);
 		confs.zero();
-		confs[i%n_classes] = 1.0;
-
-		labels->set_multiclass_confidences(i,confs);
+		confs[i % n_classes] = 1.0;
+		labels->set_multiclass_confidences(i, confs);
 
 		SGVector<float64_t> obtained_confs = labels->get_multiclass_confidences(i);
 		for (int j=0; j<n_classes; j++)
diff --git a/tests/unit/latent/LatentSVM_unittest.cc b/tests/unit/latent/LatentSVM_unittest.cc
index 4d45e6fed02..d23c838215b 100644
--- a/tests/unit/latent/LatentSVM_unittest.cc
+++ b/tests/unit/latent/LatentSVM_unittest.cc
@@ -1,8 +1,8 @@
 #include "MockLatentModel.h"
 #include <shogun/lib/config.h>
-#include <shogun/latent/LatentSVM.h>
 
 #ifdef USE_GPL_SHOGUN
+#include <shogun/latent/LatentSVM.h>
 
 using namespace shogun;
 using ::testing::Return;
@@ -39,8 +39,6 @@ TEST(LatentModel, argmax_h)
 	SG_UNREF(data);
 }
 
-#ifdef USE_REFERENCE_COUNTING
-
 #ifdef FREEBSD
 TEST(LatentSVM, DISABLED_ctor)
 #else
@@ -109,6 +107,4 @@ TEST(LatentSVM, apply)
 	SG_UNREF(lsvm);
 	SG_UNREF(dense_feats);
 }
-#endif
-
 #endif //USE_GPL_SHOGUN
diff --git a/tests/unit/lib/Any_unittest.cc b/tests/unit/lib/Any_unittest.cc
index 19e332fb414..0b1ce3fbabd 100644
--- a/tests/unit/lib/Any_unittest.cc
+++ b/tests/unit/lib/Any_unittest.cc
@@ -28,9 +28,10 @@
  * either expressed or implied, of the Shogun Development Team.
  *
  */
+#include <gtest/gtest.h>
+
 #include <shogun/base/SGObject.h>
 #include <shogun/lib/any.h>
-#include <gtest/gtest.h>
 #include <shogun/lib/config.h>
 #include <stdexcept>
 
@@ -112,3 +113,10 @@ TEST(Any, mixing_policies)
 	EXPECT_THROW(owning_any = non_owning_any, std::logic_error);
 	EXPECT_THROW(non_owning_any = owning_any, std::logic_error);
 }
+
+TEST(Any, type_info)
+{
+	int32_t integer = 10;
+	auto any = erase_type(integer);
+	EXPECT_EQ(any.type_info().hash_code(), typeid(integer).hash_code());
+}
diff --git a/tests/unit/lib/CircularBuffer_unittest.cc b/tests/unit/lib/CircularBuffer_unittest.cc
index 234065ccb3f..9dda2a5ed70 100644
--- a/tests/unit/lib/CircularBuffer_unittest.cc
+++ b/tests/unit/lib/CircularBuffer_unittest.cc
@@ -6,10 +6,10 @@
  *
  * Written (W) 2013 Evgeniy Andreev (gsomix)
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/CircularBuffer.h>
 #include <shogun/lib/DelimiterTokenizer.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
diff --git a/tests/unit/lib/DelimiterTokenizer_unittest.cc b/tests/unit/lib/DelimiterTokenizer_unittest.cc
index b907f14dcc5..d3eb74d1d73 100644
--- a/tests/unit/lib/DelimiterTokenizer_unittest.cc
+++ b/tests/unit/lib/DelimiterTokenizer_unittest.cc
@@ -1,6 +1,7 @@
+#include <gtest/gtest.h>
+
 #include <shogun/lib/DelimiterTokenizer.h>
 #include <shogun/lib/SGVector.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
diff --git a/tests/unit/lib/DynamicArray_unittest.cc b/tests/unit/lib/DynamicArray_unittest.cc
new file mode 100644
index 00000000000..3224688ccf7
--- /dev/null
+++ b/tests/unit/lib/DynamicArray_unittest.cc
@@ -0,0 +1,200 @@
+#include <gtest/gtest.h>
+#include <shogun/io/SerializableAsciiFile.h>
+#include <shogun/lib/DynamicArray.h>
+#include <shogun/mathematics/Math.h>
+
+#include "utils/Utils.h"
+
+using namespace shogun;
+
+template <typename T>
+class CDynamicArrayFixture : public ::testing::Test
+{
+protected:
+	CDynamicArrayFixture()
+	{
+	}
+	virtual void SetUp()
+	{
+		m_array = SG_MALLOC(T, 5);
+		for (int32_t i = 0; i < 5; i++)
+		{
+			m_array[i] = (T)i;
+		}
+		wrapper_array = new CDynamicArray<T>(m_array, 5);
+		SG_FREE(m_array);
+	}
+	virtual void TearDown()
+	{
+		SG_UNREF(wrapper_array);
+	}
+	virtual ~CDynamicArrayFixture()
+	{
+	}
+
+	CDynamicArray<T>* wrapper_array;
+	T* m_array;
+};
+
+typedef ::testing::Types<bool, char, int8_t, uint8_t, int16_t, int32_t, int64_t,
+                         float32_t, float64_t>
+    DynamicArrayTypes;
+TYPED_TEST_CASE(CDynamicArrayFixture, DynamicArrayTypes);
+
+TYPED_TEST(CDynamicArrayFixture, array_ctor)
+{
+	EXPECT_EQ(this->wrapper_array->get_num_elements(), 5);
+	EXPECT_EQ(this->wrapper_array->get_array_size(), 5);
+	for (index_t i = 0; i < 5; i++)
+	{
+		EXPECT_EQ(this->wrapper_array->get_element(i), (TypeParam)i);
+	}
+}
+
+TYPED_TEST(CDynamicArrayFixture, resize_array)
+{
+	this->wrapper_array->resize_array(10);
+	EXPECT_EQ(this->wrapper_array->get_array_size(), 15);
+}
+
+TYPED_TEST(CDynamicArrayFixture, set_array)
+{
+	this->wrapper_array->reset_array();
+	EXPECT_EQ(this->wrapper_array->get_num_elements(), 0);
+	TypeParam* array = SG_MALLOC(TypeParam, 5);
+	for (int32_t i = 0; i < 5; i++)
+	{
+		array[i] = (TypeParam)CMath::random(1, 10);
+	}
+	this->wrapper_array->set_array(array, 5);
+
+	EXPECT_EQ(this->wrapper_array->get_num_elements(), 5);
+	for (index_t i = 0; i < 5; i++)
+	{
+		EXPECT_EQ(this->wrapper_array->get_element(i), array[i]);
+	}
+	SG_FREE(array);
+}
+
+TYPED_TEST(CDynamicArrayFixture, const_set_array)
+{
+	TypeParam* array = SG_MALLOC(TypeParam, 5);
+	for (int32_t i = 0; i < 5; i++)
+	{
+		array[i] = (TypeParam)CMath::random(1, 10);
+	}
+	const TypeParam* const_array = array;
+	this->wrapper_array->reset_array();
+
+	// make sure array been reset
+	EXPECT_EQ(this->wrapper_array->get_num_elements(), 0);
+
+	this->wrapper_array->set_array(const_array, 5);
+	for (index_t i = 0; i < 5; i++)
+	{
+		EXPECT_EQ(this->wrapper_array->get_element(i), const_array[i]);
+	}
+	SG_FREE(array);
+}
+
+TYPED_TEST(CDynamicArrayFixture, get_array)
+{
+	TypeParam* array = this->wrapper_array->get_array();
+
+	for (index_t i = 0; i < 5; i++)
+	{
+		EXPECT_EQ(this->wrapper_array->get_element(i), (TypeParam)array[i]);
+	}
+}
+
+TYPED_TEST(CDynamicArrayFixture, push_array)
+{
+	this->wrapper_array->reset_array();
+	EXPECT_EQ(this->wrapper_array->get_num_elements(), 0);
+	this->wrapper_array->push_back((TypeParam)0);
+	this->wrapper_array->push_back((TypeParam)1);
+	this->wrapper_array->push_back((TypeParam)2);
+	EXPECT_EQ(this->wrapper_array->get_num_elements(), 3);
+	for (int32_t i = 0; i < 3; i++)
+	{
+		EXPECT_EQ(this->wrapper_array->get_element(i), (TypeParam)i);
+	}
+}
+
+TYPED_TEST(CDynamicArrayFixture, append_array)
+{
+	this->wrapper_array->reset_array();
+	EXPECT_EQ(this->wrapper_array->get_num_elements(), 0);
+	this->wrapper_array->append_element((TypeParam)0);
+	this->wrapper_array->append_element((TypeParam)1);
+	this->wrapper_array->append_element((TypeParam)2);
+	EXPECT_EQ(this->wrapper_array->get_num_elements(), 3);
+	for (int32_t i = 0; i < 3; i++)
+	{
+		EXPECT_EQ(this->wrapper_array->get_element(i), (TypeParam)i);
+	}
+}
+
+TYPED_TEST(CDynamicArrayFixture, back_operation)
+{
+	EXPECT_EQ(this->wrapper_array->back(), (TypeParam)4);
+}
+
+TYPED_TEST(CDynamicArrayFixture, set_operation)
+{
+	this->wrapper_array->set_element(1, (TypeParam)4);
+	EXPECT_EQ(this->wrapper_array->get_element(4), (TypeParam)1);
+}
+
+TYPED_TEST(CDynamicArrayFixture, pop_operation)
+{
+	this->wrapper_array->pop_back();
+	EXPECT_EQ(this->wrapper_array->back(), (TypeParam)3);
+}
+
+TYPED_TEST(CDynamicArrayFixture, insert_operation)
+{
+	this->wrapper_array->insert_element((TypeParam)10, 2);
+	EXPECT_EQ(this->wrapper_array->get_element(2), (TypeParam)10);
+}
+
+TYPED_TEST(CDynamicArrayFixture, append_array_bool)
+{
+	this->wrapper_array->reset_array();
+	EXPECT_EQ(this->wrapper_array->get_num_elements(), 0);
+	this->wrapper_array->append_element((TypeParam)1);
+	this->wrapper_array->append_element((TypeParam)0);
+	this->wrapper_array->append_element((TypeParam)1);
+	EXPECT_EQ(this->wrapper_array->get_num_elements(), 3);
+	EXPECT_EQ(this->wrapper_array->get_element(0), (TypeParam)1);
+	EXPECT_EQ(this->wrapper_array->get_element(1), (TypeParam)0);
+	EXPECT_EQ(this->wrapper_array->get_element(2), (TypeParam)1);
+}
+
+TYPED_TEST(CDynamicArrayFixture, save_serializable)
+{
+	/* generate file name */
+	char filename[] = "serialization-asciiCDynamicArray.XXXXXX";
+	generate_temp_filename(filename);
+
+	CSerializableAsciiFile* file = new CSerializableAsciiFile(filename, 'w');
+	this->wrapper_array->save_serializable(file);
+	file->close();
+	SG_UNREF(file);
+
+	file = new CSerializableAsciiFile(filename, 'r');
+	CDynamicArray<TypeParam>* new_array = new CDynamicArray<TypeParam>();
+	new_array->load_serializable(file);
+	file->close();
+	SG_UNREF(file);
+
+	ASSERT(this->wrapper_array->get_num_elements() == 5)
+	for (int32_t i = 0; i < this->wrapper_array->get_num_elements(); i++)
+	{
+		EXPECT_EQ(
+		    this->wrapper_array->get_element(i), new_array->get_element(i));
+	}
+
+	SG_UNREF(new_array);
+	unlink(filename);
+}
diff --git a/tests/unit/lib/GPUMatrix_unittest.cc b/tests/unit/lib/GPUMatrix_unittest.cc
index 87e563a0533..38fe10b0fbe 100644
--- a/tests/unit/lib/GPUMatrix_unittest.cc
+++ b/tests/unit/lib/GPUMatrix_unittest.cc
@@ -30,6 +30,7 @@
  *
  * Written (W) 2014 Khaled Nasr
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/config.h>
 
@@ -39,7 +40,6 @@
 #include <shogun/lib/GPUMatrix.h>
 #include <viennacl/matrix.hpp>
 #include <viennacl/linalg/prod.hpp>
-#include <gtest/gtest.h>
 #include <shogun/mathematics/eigen3.h>
 
 #include <shogun/lib/SGMatrix.h>
diff --git a/tests/unit/lib/GPUVector_unittest.cc b/tests/unit/lib/GPUVector_unittest.cc
index 1d3f7acec06..bf823b5dffc 100644
--- a/tests/unit/lib/GPUVector_unittest.cc
+++ b/tests/unit/lib/GPUVector_unittest.cc
@@ -30,19 +30,19 @@
  *
  * Written (W) 2014 Khaled Nasr
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/config.h>
 
 #ifdef HAVE_VIENNACL
-#ifdef HAVE_CXX11
 
 #include <shogun/lib/GPUVector.h>
 #include <shogun/mathematics/Math.h>
 #include <viennacl/vector.hpp>
 #include <viennacl/linalg/inner_prod.hpp>
-#include <gtest/gtest.h>
 
 #include <shogun/mathematics/eigen3.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 #include <shogun/lib/SGVector.h>
 
@@ -102,16 +102,21 @@ TEST(GPUVector, element_access_with_offset)
 TEST(GPUVector, dot_product_with_offset)
 {
 	CGPUVector<float64_t> data(24);
-	for (int32_t i=0; i<24; i++)
+	SGVector<float64_t> cpu_data(24);
+	for (int32_t i=0; i<24; i++) {
 		data[i] = i;
+		cpu_data[i] = i;
+	}
 
 	CGPUVector<float64_t> A(data.vector, 12, 0);
 	CGPUVector<float64_t> B(data.vector, 12, 12);
 
 	float c = viennacl::linalg::inner_prod(A.vcl_vector(), B.vcl_vector());
 
-	float c_sg = CMath::dot(
-		((SGVector<float64_t>)A).vector, ((SGVector<float64_t>)B).vector, 12);
+
+	SGVector<float64_t> sg_A(cpu_data.vector, 12, false);
+	SGVector<float64_t> sg_B(cpu_data.vector+12, 12, false);
+	auto c_sg = linalg::dot(sg_A, sg_B);
 
 	EXPECT_NEAR(c_sg, c, 1e-15);
 }
@@ -200,5 +205,4 @@ TEST(GPUVector, from_eigen3_row_vector)
 		EXPECT_EQ(eigen_vec[i], gpu_vec[i]);
 }
 
-#endif // HAVE_CXX11
 #endif // HAVE_VIENNACL
diff --git a/tests/unit/lib/List_unittest.cc b/tests/unit/lib/List_unittest.cc
index a67c9ec9486..44e19845aee 100644
--- a/tests/unit/lib/List_unittest.cc
+++ b/tests/unit/lib/List_unittest.cc
@@ -27,13 +27,12 @@
  * of the authors and should not be interpreted as representing official policies,
  * either expressed or implied, of the Shogun Development Team.
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/List.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
-#ifdef USE_REFERENCE_COUNTING
 TEST(ListTest, contructor_ref_count_append_delete_data_true)
 {
 	// test reference counting of list
@@ -218,4 +217,3 @@ TEST(ListTest, get_first_element_ref_count_delete_data_false)
 	ASSERT_TRUE(data!=NULL); // dead pointer
 	ASSERT_TRUE(from_list==NULL);
 }
-#endif //USE_REFERENCE_COUNTING
diff --git a/tests/unit/lib/Memory_unittest.cc b/tests/unit/lib/Memory_unittest.cc
index 654776878da..b051918458c 100644
--- a/tests/unit/lib/Memory_unittest.cc
+++ b/tests/unit/lib/Memory_unittest.cc
@@ -1,6 +1,7 @@
+#include <gtest/gtest.h>
+
 #include <shogun/lib/memory.h>
 #include <shogun/mathematics/Math.h>
-#include <gtest/gtest.h>
 
 namespace shogun { template <class T> class SGMatrix; }
 namespace shogun { template <class T> class SGSparseVector; }
diff --git a/tests/unit/lib/NGramTokenizer_unittest.cc b/tests/unit/lib/NGramTokenizer_unittest.cc
index af81c536fb1..04ce714b1c4 100644
--- a/tests/unit/lib/NGramTokenizer_unittest.cc
+++ b/tests/unit/lib/NGramTokenizer_unittest.cc
@@ -1,6 +1,7 @@
+#include <gtest/gtest.h>
+
 #include <shogun/lib/NGramTokenizer.h>
 #include <shogun/lib/SGVector.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
diff --git a/tests/unit/lib/SGMatrixList_unittest.cc b/tests/unit/lib/SGMatrixList_unittest.cc
index 6e9baa6682d..3903dd76e8c 100644
--- a/tests/unit/lib/SGMatrixList_unittest.cc
+++ b/tests/unit/lib/SGMatrixList_unittest.cc
@@ -1,8 +1,9 @@
+#include <gtest/gtest.h>
+
 #include <shogun/lib/SGMatrix.h>
 #include <shogun/lib/SGMatrixList.h>
 #include <shogun/lib/common.h>
 #include <shogun/lib/memory.h>
-#include <gtest/gtest.h>
 #include <stdio.h>
 
 using namespace shogun;
diff --git a/tests/unit/lib/SGMatrix_unittest.cc b/tests/unit/lib/SGMatrix_unittest.cc
index 9945f913708..27ef0e9f4c5 100644
--- a/tests/unit/lib/SGMatrix_unittest.cc
+++ b/tests/unit/lib/SGMatrix_unittest.cc
@@ -1,7 +1,8 @@
+#include <gtest/gtest.h>
+
 #include <shogun/lib/SGMatrix.h>
 #include <shogun/lib/SGVector.h>
 #include <shogun/mathematics/Math.h>
-#include <gtest/gtest.h>
 #include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 #include <shogun/mathematics/eigen3.h>
@@ -564,7 +565,7 @@ TEST(SGMatrixTest, equals)
 
 	mat=SGMatrix<float32_t>(size, size);
 	CMath::init_random(100);
-	for (uint64_t i=0; i<mat.size(); ++i)
+	for (int64_t i=0; i<mat.size(); ++i)
 		mat.matrix[i]=CMath::randn_float();
 
 	EXPECT_TRUE(mat.equals(mat));
@@ -574,7 +575,7 @@ TEST(SGMatrixTest, equals)
 	EXPECT_FALSE(mat.equals(copy));
 
 	CMath::init_random(100);
-	for (uint64_t i=0; i<copy.size(); ++i)
+	for (int64_t i=0; i<copy.size(); ++i)
 		copy.matrix[i]=CMath::randn_float();
 
 	EXPECT_TRUE(mat.equals(copy));
@@ -584,7 +585,7 @@ TEST(SGMatrixTest, clone)
 {
 	const index_t size=10;
 	SGMatrix<float32_t> mat(size, size);
-	for (uint64_t i=0; i<mat.size(); ++i)
+	for (int64_t i=0; i<mat.size(); ++i)
 		mat.matrix[i]=CMath::randn_float();
 
 	SGMatrix<float32_t> copy=mat.clone();
@@ -599,7 +600,7 @@ TEST(SGMatrixTest, set_const)
 	const auto value=CMath::randn_double();
 	mat.set_const(value);
 
-	for (uint64_t i=0; i<mat.size(); ++i)
+	for (int64_t i=0; i<mat.size(); ++i)
 		EXPECT_NEAR(mat.matrix[i], value, 1E-15);
 }
 
@@ -607,10 +608,90 @@ TEST(SGMatrixTest, max_single)
 {
 	const index_t size=10;
 	SGMatrix<float32_t> mat(size, size);
-	for (uint64_t i=0; i<mat.size(); ++i)
+	for (int64_t i=0; i<mat.size(); ++i)
 		mat.matrix[i]=CMath::randn_float();
 
 	auto max=mat.max_single();
-	for (uint64_t i=0; i<mat.size(); ++i)
+	for (int64_t i=0; i<mat.size(); ++i)
 		EXPECT_GE(max, mat.matrix[i]);
 }
+
+TEST(SGMatrixTest, get_submatrix)
+{
+	const index_t n_rows = 6, n_cols = 8;
+	const index_t start_col = 2, end_col = 5;
+	const index_t n_subcols = end_col - start_col;
+
+	SGMatrix<float64_t> mat(n_rows, n_cols);
+	for (index_t i = 0; i < n_rows * n_cols; ++i)
+		mat[i] = CMath::randn_double();
+
+	auto sub = mat.submatrix(start_col, end_col);
+
+	EXPECT_EQ(sub.num_rows, mat.num_rows);
+	EXPECT_EQ(sub.num_cols, end_col - start_col);
+	for (index_t i = 0; i < n_rows; ++i)
+		for (index_t j = 0; i < n_subcols; ++i)
+			EXPECT_EQ(sub(i, j), mat(i, j + start_col));
+}
+
+TEST(SGMatrixTest, get_column)
+{
+	const index_t n_rows = 6, n_cols = 8;
+	const index_t col = 4;
+
+	SGMatrix<float64_t> mat(n_rows, n_cols);
+	for (index_t i = 0; i < n_rows * n_cols; ++i)
+		mat[i] = CMath::randn_double();
+
+	auto vec = mat.get_column_vector(col);
+
+	for (index_t i = 0; i < n_rows; ++i)
+		EXPECT_EQ(mat(i, col), vec[i]);
+}
+
+TEST(SGMatrixTest, set_column)
+{
+	const index_t n_rows = 6, n_cols = 8;
+	const index_t col = 4;
+
+	SGMatrix<float64_t> mat(n_rows, n_cols);
+	SGVector<float64_t> vec(n_rows);
+
+	for (index_t i = 0; i < n_rows; ++i)
+		vec[i] = CMath::randn_double();
+
+	mat.set_column(col, vec);
+
+	for (index_t i = 0; i < n_rows; ++i)
+		EXPECT_EQ(mat(i, col), vec[i]);
+}
+
+TEST(SGMatrixTest,iterator)
+{
+	constexpr index_t size=5;
+	SGMatrix<float64_t> mat(size, size);
+	linalg::range_fill(mat, 1.0);
+
+	auto begin = mat.begin();
+	auto end = mat.end();
+	EXPECT_EQ(mat.size(), std::distance(begin, end));
+	EXPECT_EQ(1.0, *begin++);
+	++begin;
+	EXPECT_EQ(3.0, *begin);
+	--begin;
+	EXPECT_EQ(2.0, *begin);
+	EXPECT_TRUE(begin != end);
+	begin += 2;
+	EXPECT_EQ(4.0, *begin);
+	begin -= 1;
+	EXPECT_EQ(3.0, *begin);
+	EXPECT_EQ(4.0, begin[1]);
+	auto new_begin = begin + 2;
+	EXPECT_EQ(5.0, *new_begin);
+
+	// range-based loop should work as well
+	auto index = 0;
+	for (auto v: mat)
+		EXPECT_EQ(mat[index++], v);
+}
diff --git a/tests/unit/lib/SGNDArray_unittest.cc b/tests/unit/lib/SGNDArray_unittest.cc
index cafd71afc95..ff00f30a7f3 100644
--- a/tests/unit/lib/SGNDArray_unittest.cc
+++ b/tests/unit/lib/SGNDArray_unittest.cc
@@ -1,6 +1,7 @@
+#include <gtest/gtest.h>
+
 #include <shogun/lib/SGVector.h>
 #include <shogun/lib/SGNDArray.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
diff --git a/tests/unit/lib/SGSparseMatrix_unittest.cc b/tests/unit/lib/SGSparseMatrix_unittest.cc
index a53767d9624..444de9d5cd5 100644
--- a/tests/unit/lib/SGSparseMatrix_unittest.cc
+++ b/tests/unit/lib/SGSparseMatrix_unittest.cc
@@ -133,20 +133,17 @@ TEST(SGSparseMatrix, access_by_index)
 
 TEST(SGSparseMatrix, io_libsvm)
 {
+	// Number of vectors and feature dimension.
 	const int32_t size=10;
-	const int32_t num_feat=size/2;
-
-	CLibSVMFile* fin;
-	CLibSVMFile* fout;
+	const int32_t num_nonzero_feat=size/2;
 
 	SGSparseMatrix<float64_t> m(size, size);
-	SGSparseMatrix<float64_t> m_from_file;
 	SGVector<float64_t> labels(size);
 	for (index_t i=0; i<size; ++i)
 	{
-		m.sparse_matrix[i]=SGSparseVector<float64_t>(num_feat);
+		m.sparse_matrix[i]=SGSparseVector<float64_t>(num_nonzero_feat);
 		labels.vector[i]=(float64_t) (i%2);
-		for (index_t j=0; j<num_feat; ++j)
+		for (index_t j=0; j<num_nonzero_feat; ++j)
 		{
 			SGSparseVectorEntry<float64_t> entry;
 			entry.feat_index=(j+1)*2 - 1;
@@ -155,18 +152,19 @@ TEST(SGSparseMatrix, io_libsvm)
 		}
 	}
 
-	fout=new CLibSVMFile("SGSparseMatrix_io_libsvm_output.txt",'w', NULL);
+	CLibSVMFile* fout=new CLibSVMFile("SGSparseMatrix_io_libsvm_output.txt",'w', NULL);
 	m.save_with_labels(fout, labels);
 	SG_UNREF(fout);
 
-	fin=new CLibSVMFile("SGSparseMatrix_io_libsvm_output.txt",'r', NULL);
+	CLibSVMFile* fin=new CLibSVMFile("SGSparseMatrix_io_libsvm_output.txt",'r', NULL);
+	SGSparseMatrix<float64_t> m_from_file;
 	SGVector<float64_t> labels_from_file=m_from_file.load_with_labels(fin, false);
 	SG_UNREF(fin);
 
 	for (int32_t i=0; i<size; i++)
 	{
 		EXPECT_EQ(labels[i], labels_from_file[i]);
-		for (index_t j=0; j<num_feat; ++j)
+		for (index_t j=0; j<num_nonzero_feat; ++j)
 		{
 			EXPECT_EQ(m[i].features[j].feat_index, m_from_file[i].features[j].feat_index);
 			EXPECT_NEAR(m[i].features[j].entry, m_from_file[i].features[j].entry, 1E-14);
diff --git a/tests/unit/lib/SGSparseVector_unittest.cc b/tests/unit/lib/SGSparseVector_unittest.cc
index 4f70d1748e3..cff89d38270 100644
--- a/tests/unit/lib/SGSparseVector_unittest.cc
+++ b/tests/unit/lib/SGSparseVector_unittest.cc
@@ -7,10 +7,10 @@
  * Written (W) 2013 Soumyajit De
  */
 
+#include <gtest/gtest.h>
 #include <shogun/lib/common.h>
 #include <shogun/lib/SGVector.h>
 #include <shogun/lib/SGSparseVector.h>
-#include <gtest/gtest.h>
 
 #include <vector>
 
diff --git a/tests/unit/lib/SGVector_unittest.cc b/tests/unit/lib/SGVector_unittest.cc
index 109a6edafbd..b39182de83f 100644
--- a/tests/unit/lib/SGVector_unittest.cc
+++ b/tests/unit/lib/SGVector_unittest.cc
@@ -1,11 +1,11 @@
+#include <gtest/gtest.h>
 #include <shogun/lib/SGVector.h>
 #include <shogun/lib/SGMatrix.h>
 #include <shogun/mathematics/Math.h>
-#include <gtest/gtest.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 #include <shogun/mathematics/eigen3.h>
 
-
 using namespace shogun;
 
 TEST(SGVectorTest,ctor)
@@ -36,6 +36,35 @@ TEST(SGVectorTest,ctor)
 	for (int i=0; i < c.vlen; ++i)
 		EXPECT_EQ(b[i], c[i]);
 
+	/* test iterator */
+	std::vector<float64_t> src {1.0, 2.0, 3.0, 4.0, 5.0};
+	SGVector<float64_t> d(src.begin(), src.end());
+	EXPECT_EQ(src.size(), d.vlen);
+	for (int i=0; i < c.vlen; ++i)
+		EXPECT_EQ(b[i], c[i]);
+
+	/* test initializer list */
+	SGVector<float64_t> e {1.0, 2.0, 3.0, 4.0, 5.0};
+	EXPECT_EQ(5, e.vlen);
+	EXPECT_EQ(1.0, e[0]);
+	EXPECT_EQ(2.0, e[1]);
+	EXPECT_EQ(3.0, e[2]);
+	EXPECT_EQ(4.0, e[3]);
+	EXPECT_EQ(5.0, e[4]);
+}
+
+TEST(SGVectorTest, ctor_from_matrix)
+{
+	const index_t n_rows = 5, n_cols = 4;
+
+	SGMatrix<float64_t> mat(n_rows, n_cols);
+	for (index_t i = 0; i < mat.size(); ++i)
+		mat[i] = CMath::randn_double();
+
+	auto vec = SGVector<float64_t>(mat);
+
+	for (index_t i = 0; i < n_rows * n_cols; ++i)
+		EXPECT_EQ(mat[i], vec[i]);
 }
 
 TEST(SGVectorTest,setget)
@@ -89,7 +118,7 @@ TEST(SGVectorTest,norm)
 	a.random(-50.0, 1024.0);
 
 	/* check l-2 norm */
-	float64_t l2_norm = CMath::sqrt(CMath::dot(a.vector,a.vector, a.vlen));
+	float64_t l2_norm = CMath::sqrt(linalg::dot(a,a));
 	float64_t sgl2_norm = SGVector<float64_t>::twonorm(a.vector, a.vlen);
 
 	EXPECT_NEAR(l2_norm, sgl2_norm, 1e-12);
@@ -351,3 +380,31 @@ TEST(SGVectorTest, resize_vector_larger)
 	for (index_t i=len; i<new_len; i++)
 		EXPECT_EQ(m[i], 0);
 }
+
+TEST(SGVectorTest,iterator)
+{
+	SGVector<float64_t> t {1.0, 2.0, 3.0, 4.0, 5.0};
+
+	auto begin = t.begin();
+	auto end = t.end();
+	EXPECT_EQ(t.vlen, std::distance(begin, end));
+	EXPECT_EQ(1.0, *begin++);
+	++begin;
+	EXPECT_EQ(3.0, *begin);
+	--begin;
+	EXPECT_EQ(2.0, *begin);
+	EXPECT_TRUE(begin != end);
+	begin += 2;
+	EXPECT_EQ(4.0, *begin);
+	begin -= 1;
+	EXPECT_EQ(3.0, *begin);
+	EXPECT_EQ(4.0, begin[1]);
+	auto new_begin = begin + 2;
+	EXPECT_EQ(5.0, *new_begin);
+	EXPECT_TRUE(++new_begin == end);
+
+	// range-based loop should work as well
+	auto index = 0;
+	for (auto v: t)
+		EXPECT_EQ(t[index++], v);
+}
diff --git a/tests/unit/lib/Signal_unittest.cc b/tests/unit/lib/Signal_unittest.cc
new file mode 100644
index 00000000000..ad29e0eb965
--- /dev/null
+++ b/tests/unit/lib/Signal_unittest.cc
@@ -0,0 +1,101 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#include <gtest/gtest.h>
+#include <shogun/lib/Signal.h>
+
+#include <csignal>
+#include <rxcpp/rx-lite.hpp>
+
+using namespace shogun;
+using namespace rxcpp;
+
+TEST(Signal, return_to_prompt_test)
+{
+	CSignal tmp;
+	tmp.enable_handler();
+	int on_next_v = 0;
+	int on_complete_v = 0;
+	auto sub = rxcpp::make_subscriber<int>(
+	    [&on_next_v](int v) { on_next_v = 1; }, [&]() { on_complete_v = 1; });
+
+	tmp.get_observable()->subscribe(sub);
+	tmp.get_subscriber()->on_completed();
+
+	EXPECT_TRUE(on_complete_v == 1);
+	EXPECT_TRUE(on_next_v == 0);
+	CSignal::reset_handler();
+}
+
+TEST(Signal, prematurely_stop_computation_test)
+{
+
+	CSignal tmp;
+	tmp.enable_handler();
+	int on_next_v = 0;
+	int on_complete_v = 0;
+	auto sub = rxcpp::make_subscriber<int>(
+	    [&](int v) { on_next_v++; }, [&]() { on_complete_v++; });
+
+	tmp.get_observable()->subscribe(sub);
+	tmp.get_subscriber()->on_next(SG_BLOCK_COMP);
+
+	EXPECT_TRUE(on_next_v == 1);
+	EXPECT_TRUE(on_complete_v == 0);
+	CSignal::reset_handler();
+}
+
+TEST(Signal, pause_computation_test)
+{
+
+	CSignal tmp;
+	tmp.enable_handler();
+	int on_next_v = 0;
+	int on_complete_v = 0;
+	auto sub = rxcpp::make_subscriber<int>(
+	    [&](int v) {
+		    if (v == SG_PAUSE_COMP)
+			    on_next_v += 2;
+		    else
+			    on_next_v++;
+		},
+	    [&]() { on_complete_v++; });
+
+	tmp.get_observable()->subscribe(sub);
+	tmp.get_subscriber()->on_next(SG_PAUSE_COMP);
+
+	EXPECT_TRUE(on_next_v == 2);
+	EXPECT_TRUE(on_complete_v == 0);
+	CSignal::reset_handler();
+}
diff --git a/tests/unit/lib/StringMap_unittest.cc b/tests/unit/lib/StringMap_unittest.cc
index 4a39d0d9339..1aa24c67f53 100644
--- a/tests/unit/lib/StringMap_unittest.cc
+++ b/tests/unit/lib/StringMap_unittest.cc
@@ -28,8 +28,8 @@
  * either expressed or implied, of the Shogun Development Team.
  *
  */
-#include <shogun/lib/StringMap.h>
 #include <gtest/gtest.h>
+#include <shogun/lib/StringMap.h>
 #include <shogun/lib/config.h>
 
 using namespace shogun;
diff --git a/tests/unit/lib/computation/SerialComputationEngine_unittest.cc b/tests/unit/lib/computation/SerialComputationEngine_unittest.cc
index 7f96df18a43..243a2cc7b19 100644
--- a/tests/unit/lib/computation/SerialComputationEngine_unittest.cc
+++ b/tests/unit/lib/computation/SerialComputationEngine_unittest.cc
@@ -6,12 +6,12 @@
  *
  * Written (W) 2013 Soumyajit De
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/common.h>
 
 #include <shogun/mathematics/eigen3.h>
 
-#if EIGEN_VERSION_AT_LEAST(3,1,0)
 #include <unsupported/Eigen/MatrixFunctions>
 #include <shogun/lib/SGVector.h>
 #include <shogun/lib/SGMatrix.h>
@@ -21,7 +21,6 @@
 #include <shogun/mathematics/linalg/ratapprox/logdet/computation/job/DenseExactLogJob.h>
 #include <shogun/lib/computation/engine/SerialComputationEngine.h>
 #include <shogun/mathematics/Statistics.h>
-#include <gtest/gtest.h>
 
 using namespace Eigen;
 using namespace shogun;
@@ -80,5 +79,4 @@ TEST(SerialComputationEngine, dense_log_det)
 	SG_UNREF(log_op);
 	SG_UNREF(agg);
 }
-#endif // EIGEN_VERSION_AT_LEAST(3,1,0)
 
diff --git a/tests/unit/lib/parameter_observers/ParameterObserverCV_unittest.cc b/tests/unit/lib/parameter_observers/ParameterObserverCV_unittest.cc
new file mode 100644
index 00000000000..527a676a3c9
--- /dev/null
+++ b/tests/unit/lib/parameter_observers/ParameterObserverCV_unittest.cc
@@ -0,0 +1,170 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#include <gtest/gtest.h>
+
+#include <shogun/base/init.h>
+#include <shogun/evaluation/CrossValidation.h>
+#include <shogun/evaluation/CrossValidationSplitting.h>
+#include <shogun/evaluation/MeanSquaredError.h>
+#include <shogun/features/DenseFeatures.h>
+#include <shogun/kernel/LinearKernel.h>
+#include <shogun/labels/RegressionLabels.h>
+#include <shogun/lib/parameter_observers/ParameterObserverCV.h>
+#include <shogun/regression/KernelRidgeRegression.h>
+
+#include "environments/RegressionTestEnvironment.h"
+#include <memory>
+
+using namespace shogun;
+
+/**
+ * This test was inspired by the meta example
+ * examples/undocumented/libshogun/evaluation_cross_validation_regression.cpp
+ * written by Heiko Strathmann.
+ */
+
+extern RegressionTestEnvironment* regression_test_env;
+
+/* data matrix dimensions */
+index_t num_vectors = 100;
+index_t num_features = 1;
+
+/* training label data */
+SGVector<float64_t> lab(num_vectors);
+CDenseFeatures<float64_t>* features = NULL;
+CRegressionLabels* labels = NULL;
+
+CParameterObserverCV* generate(bool locked = true)
+{
+	/* training features */
+	features = regression_test_env->get_features_train();
+	SG_REF(features);
+
+	/* training labels */
+	labels = regression_test_env->get_labels_train();
+
+	/* kernel */
+	CLinearKernel* kernel = new CLinearKernel();
+	kernel->init(features, features);
+
+	/* kernel ridge regression*/
+	float64_t tau = 0.0001;
+	CKernelRidgeRegression* krr =
+	    new CKernelRidgeRegression(tau, kernel, labels);
+
+	/* evaluation criterion */
+	CMeanSquaredError* eval_crit = new CMeanSquaredError();
+
+	/* splitting strategy */
+	index_t n_folds = 5;
+	CCrossValidationSplitting* splitting =
+	    new CCrossValidationSplitting(labels, n_folds);
+
+	/* cross validation instance, 100 runs, 95% confidence interval */
+	CCrossValidation* cross =
+	    new CCrossValidation(krr, features, labels, splitting, eval_crit);
+	cross->set_num_runs(10);
+	cross->set_autolock(locked);
+
+	/* Create the parameter observer */
+	CParameterObserverCV* par = new CParameterObserverCV();
+	cross->subscribe_to_parameters(par);
+
+	/* actual evaluation */
+	CCrossValidationResult* result = (CCrossValidationResult*)cross->evaluate();
+
+	/* clean up */
+	SG_UNREF(result);
+	SG_UNREF(cross);
+	SG_UNREF(features);
+
+	return par;
+}
+
+TEST(ParameterObserverCV, get_observations_locked)
+{
+	std::shared_ptr<CParameterObserverCV> par{generate(true)};
+
+	for (int i = 0; i < par->get_num_observations(); i++)
+	{
+		auto run = par->get_observation(i);
+		ASSERT(run)
+		EXPECT_EQ(run->get_num_runs(), 10);
+		EXPECT_EQ(run->get_num_folds(), 5);
+		EXPECT_TRUE(run->get_expose_labels()->equals(labels));
+		for (int j = 0; j < 5; j++)
+		{
+			auto fold = run->get_fold(j);
+			EXPECT_EQ(fold->get_current_run_index(), i);
+			EXPECT_EQ(fold->get_current_fold_index(), j);
+			EXPECT_TRUE(fold->get_train_indices().size() != 0);
+			EXPECT_TRUE(fold->get_test_indices().size() != 0);
+			EXPECT_TRUE(fold->get_trained_machine() != NULL);
+			EXPECT_TRUE(fold->get_test_result()->get_num_labels() != 0);
+			EXPECT_TRUE(fold->get_test_true_result()->get_num_labels() != 0);
+			EXPECT_TRUE(fold->get_evaluation_result() != 0);
+			SG_UNREF(fold)
+		}
+		SG_UNREF(run)
+	}
+}
+
+TEST(ParameterObserverCV, get_observations_unlocked)
+{
+	std::shared_ptr<CParameterObserverCV> par{generate(false)};
+
+	for (int i = 0; i < par->get_num_observations(); i++)
+	{
+		auto run = par->get_observation(i);
+		ASSERT(run)
+		EXPECT_EQ(run->get_num_runs(), 10);
+		EXPECT_EQ(run->get_num_folds(), 5);
+		EXPECT_TRUE(run->get_expose_labels()->equals(labels));
+		for (int j = 0; j < run->get_num_folds(); j++)
+		{
+			auto fold = run->get_fold(j);
+			EXPECT_EQ(fold->get_current_run_index(), i);
+			EXPECT_EQ(fold->get_current_fold_index(), j);
+			EXPECT_TRUE(fold->get_train_indices().size() != 0);
+			EXPECT_TRUE(fold->get_test_indices().size() != 0);
+			EXPECT_TRUE(fold->get_trained_machine() != NULL);
+			EXPECT_TRUE(fold->get_test_result()->get_num_labels() != 0);
+			EXPECT_TRUE(fold->get_test_true_result()->get_num_labels() != 0);
+			EXPECT_TRUE(fold->get_evaluation_result() != 0);
+			SG_UNREF(fold)
+		}
+		SG_UNREF(run)
+	}
+}
\ No newline at end of file
diff --git a/tests/unit/lib/parameter_observers/ParameterObserverScalar_unittest.cc b/tests/unit/lib/parameter_observers/ParameterObserverScalar_unittest.cc
new file mode 100644
index 00000000000..b4beba5edb0
--- /dev/null
+++ b/tests/unit/lib/parameter_observers/ParameterObserverScalar_unittest.cc
@@ -0,0 +1,68 @@
+/*
+* BSD 3-Clause License
+*
+* Copyright (c) 2017, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of the copyright holder nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Written (W) 2017 Giovanni De Toni
+*
+*/
+#include <gtest/gtest.h>
+
+#include <shogun/lib/config.h>
+#ifdef HAVE_TFLOGGER
+
+#include <shogun/lib/parameter_observers/ParameterObserverScalar.h>
+#include <vector>
+
+std::vector<std::string> test_params = {"a", "b", "c", "d"};
+
+using namespace shogun;
+
+TEST(ParameterObserverScalar, filter_empty)
+{
+	ParameterObserverScalar tmp;
+	EXPECT_TRUE(tmp.filter("a"));
+}
+
+TEST(ParameterObserverScalar, filter_found)
+{
+	ParameterObserverScalar tmp{test_params};
+	EXPECT_TRUE(tmp.filter("a"));
+	EXPECT_TRUE(tmp.filter("b"));
+	EXPECT_TRUE(tmp.filter("c"));
+	EXPECT_TRUE(tmp.filter("d"));
+}
+
+TEST(ParameterObserverScalar, filter_not_found)
+{
+	ParameterObserverScalar tmp{test_params};
+	EXPECT_FALSE(tmp.filter("k"));
+}
+
+#endif // HAVE_TFLOGGER
diff --git a/tests/unit/machine/gp/InferenceMethod_unittest.cc b/tests/unit/machine/gp/InferenceMethod_unittest.cc
index c0c8f989868..52a39225fcc 100644
--- a/tests/unit/machine/gp/InferenceMethod_unittest.cc
+++ b/tests/unit/machine/gp/InferenceMethod_unittest.cc
@@ -25,6 +25,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(InferenceMethod,get_marginal_likelihood_estimate_logit_laplace)
 {
 	index_t n=2;
@@ -88,6 +89,7 @@ TEST(InferenceMethod,get_marginal_likelihood_estimate_logit_ep)
 
 	SG_UNREF(inf);
 }
+#endif //USE_GPL_SHOGUN
 
 TEST(InferenceMethod, compute_gradient)
 {
diff --git a/tests/unit/machine/gp/KLCholeskyInferenceMethod_unittest.cc b/tests/unit/machine/gp/KLCholeskyInferenceMethod_unittest.cc
index 8342169a550..cfb8c83251a 100644
--- a/tests/unit/machine/gp/KLCholeskyInferenceMethod_unittest.cc
+++ b/tests/unit/machine/gp/KLCholeskyInferenceMethod_unittest.cc
@@ -47,6 +47,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(KLCholeskyInferenceMethod,get_cholesky_t_likelihood)
 {
 	// create some easy regression data:
@@ -1368,3 +1369,4 @@ TEST(KLCholeskyInferenceMethod,get_marginal_likelihood_derivatives_probit_likeli
 	SG_UNREF(parameter_dictionary);
 	SG_UNREF(inf);
 }
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/machine/gp/KLCovarianceInfernceMethod_unittest.cc b/tests/unit/machine/gp/KLCovarianceInfernceMethod_unittest.cc
index 2d894e4b48d..679a6873c5b 100644
--- a/tests/unit/machine/gp/KLCovarianceInfernceMethod_unittest.cc
+++ b/tests/unit/machine/gp/KLCovarianceInfernceMethod_unittest.cc
@@ -46,6 +46,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(KLCovarianceInferenceMethod,get_cholesky_t_likelihood)
 {
 	// create some easy regression data:
@@ -1354,4 +1355,4 @@ TEST(KLCovarianceInferenceMethod,get_marginal_likelihood_derivatives_probit_like
 	SG_UNREF(parameter_dictionary);
 	SG_UNREF(inf);
 }
-
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/machine/gp/KLDiagonalInferenceMethod_unittest.cc b/tests/unit/machine/gp/KLDiagonalInferenceMethod_unittest.cc
index 89b8a858683..7cee53219c4 100644
--- a/tests/unit/machine/gp/KLDiagonalInferenceMethod_unittest.cc
+++ b/tests/unit/machine/gp/KLDiagonalInferenceMethod_unittest.cc
@@ -45,6 +45,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(KLDiagonalInferenceMethod,get_cholesky_t_likelihood)
 {
 	// create some easy regression data:
@@ -1365,4 +1366,4 @@ TEST(KLDiagonalInferenceMethod,get_marginal_likelihood_derivatives_probit_likeli
 	SG_UNREF(parameter_dictionary);
 	SG_UNREF(inf);
 }
-
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/machine/gp/KLDualInferenceMethod_unittest.cc b/tests/unit/machine/gp/KLDualInferenceMethod_unittest.cc
index c94ac42da59..06a57fb904b 100644
--- a/tests/unit/machine/gp/KLDualInferenceMethod_unittest.cc
+++ b/tests/unit/machine/gp/KLDualInferenceMethod_unittest.cc
@@ -44,6 +44,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(KLDualInferenceMethod,get_cholesky_logit_likelihood)
 {
 	// create some easy classification data:
@@ -496,3 +497,4 @@ TEST(KLDualInferenceMethod,get_marginal_likelihood_derivatives_logit_likelihood)
 	SG_UNREF(parameter_dictionary);
 	SG_UNREF(inf);
 }
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/machine/gp/LogitDVGLikelihood_unittest.cc b/tests/unit/machine/gp/LogitDVGLikelihood_unittest.cc
index 4b7dc49fdf7..cab6b02015d 100644
--- a/tests/unit/machine/gp/LogitDVGLikelihood_unittest.cc
+++ b/tests/unit/machine/gp/LogitDVGLikelihood_unittest.cc
@@ -44,6 +44,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(LogitDVGLikelihood,get_variational_expection)
 {
 	float64_t rel_tolerance = 1e-2;
@@ -199,3 +200,4 @@ TEST(LogitDVGLikelihood,get_variational_first_derivative_wrt_sigma2)
 	SG_UNREF(lab);
 	SG_UNREF(lik);
 }
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/machine/gp/LogitLikelihood_unittest.cc b/tests/unit/machine/gp/LogitLikelihood_unittest.cc
index bdf5eb6c592..b98c58af3e2 100644
--- a/tests/unit/machine/gp/LogitLikelihood_unittest.cc
+++ b/tests/unit/machine/gp/LogitLikelihood_unittest.cc
@@ -15,6 +15,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(LogitLikelihood,get_predictive_log_probabilities)
 {
 	// create some easy data:
@@ -170,6 +171,7 @@ TEST(LogitLikelihood,get_predictive_variances)
 	// clean up
 	SG_UNREF(likelihood);
 }
+#endif //USE_GPL_SHOGUN
 
 TEST(LogitLikelihood,get_log_probability_f)
 {
@@ -366,6 +368,7 @@ TEST(LogitLikelihood,get_log_probability_derivative_f)
 	SG_UNREF(labels);
 }
 
+#ifdef USE_GPL_SHOGUN
 TEST(LogitLikelihood,get_first_moments)
 {
 	// create some easy data:
@@ -483,3 +486,4 @@ TEST(LogitLikelihood,get_second_moments)
 	SG_UNREF(likelihood);
 	SG_UNREF(labels);
 }
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/machine/gp/LogitVGLikelihood_unittest.cc b/tests/unit/machine/gp/LogitVGLikelihood_unittest.cc
index 5cee4cee220..a99836d59cc 100644
--- a/tests/unit/machine/gp/LogitVGLikelihood_unittest.cc
+++ b/tests/unit/machine/gp/LogitVGLikelihood_unittest.cc
@@ -44,6 +44,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(LogitVGLikelihood,get_variational_expection)
 {
 	float64_t rel_tolerance = 1e-2;
@@ -279,3 +280,4 @@ TEST(LogitVGLikelihood,get_variational_first_derivative_wrt_mu)
 	SG_UNREF(lab);
 	SG_UNREF(lik);
 }
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/machine/gp/ProbitVGLikelihood_unittest.cc b/tests/unit/machine/gp/ProbitVGLikelihood_unittest.cc
index 45b2a460353..a281cc4a445 100644
--- a/tests/unit/machine/gp/ProbitVGLikelihood_unittest.cc
+++ b/tests/unit/machine/gp/ProbitVGLikelihood_unittest.cc
@@ -44,6 +44,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(ProbitVGLikelihood,get_variational_expection)
 {
 	float64_t rel_tolerance = 1e-2;
@@ -279,3 +280,4 @@ TEST(ProbitVGLikelihood,get_variational_first_derivative_wrt_mu)
 	SG_UNREF(lab);
 	SG_UNREF(lik);
 }
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/machine/gp/SingleFITCLaplaceInferenceMethod_unittest.cc b/tests/unit/machine/gp/SingleFITCLaplaceInferenceMethod_unittest.cc
index 5466e59f10e..f7a565f0fc6 100644
--- a/tests/unit/machine/gp/SingleFITCLaplaceInferenceMethod_unittest.cc
+++ b/tests/unit/machine/gp/SingleFITCLaplaceInferenceMethod_unittest.cc
@@ -43,6 +43,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(SingleFITCLaplaceInferenceMethod,get_cholesky)
 {
 	index_t n=6;
@@ -471,3 +472,4 @@ TEST(SingleFITCLaplaceInferenceMethod,get_marginal_likelihood_derivatives)
 	SG_UNREF(latent_features_train);
 	SG_UNREF(inf);
 }
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/machine/gp/SingleLaplaceInferenceMethodWithLBFGS_unittest.cc b/tests/unit/machine/gp/SingleLaplaceInferenceMethodWithLBFGS_unittest.cc
index e7311aa7bb7..e2f5e895b75 100644
--- a/tests/unit/machine/gp/SingleLaplaceInferenceMethodWithLBFGS_unittest.cc
+++ b/tests/unit/machine/gp/SingleLaplaceInferenceMethodWithLBFGS_unittest.cc
@@ -49,6 +49,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(SingleLaplaceInferenceMethodWithLBFGS,get_cholesky_probit_likelihood)
 {
 	float64_t rel_tolerance = 1e-2;
@@ -1752,3 +1753,4 @@ TEST(SingleLaplaceInferenceMethodWithLBFGS,get_marginal_likelihood_derivatives_t
 	SG_UNREF(parameter_dictionary);
 	SG_UNREF(inf);
 }
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/machine/gp/SingleLaplaceInferenceMethod_unittest.cc b/tests/unit/machine/gp/SingleLaplaceInferenceMethod_unittest.cc
index ee1004ecbed..dbb574c3190 100644
--- a/tests/unit/machine/gp/SingleLaplaceInferenceMethod_unittest.cc
+++ b/tests/unit/machine/gp/SingleLaplaceInferenceMethod_unittest.cc
@@ -23,6 +23,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(SingleLaplaceInferenceMethod,get_cholesky_gaussian_likelihood)
 {
 	// create some easy regression data:
@@ -1273,3 +1274,4 @@ TEST(SingleLaplaceInferenceMethod,get_posterior_covariance_probit_likelihood)
 	// clean up
 	SG_UNREF(inf);
 }
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/machine/gp/StudentsTLikelihood_unittest.cc b/tests/unit/machine/gp/StudentsTLikelihood_unittest.cc
index 0ca510e0a7e..a53eeaabe3c 100644
--- a/tests/unit/machine/gp/StudentsTLikelihood_unittest.cc
+++ b/tests/unit/machine/gp/StudentsTLikelihood_unittest.cc
@@ -16,6 +16,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(StudentsTLikelihood,get_predictive_log_probabilities)
 {
 	// create some easy data:
@@ -59,6 +60,7 @@ TEST(StudentsTLikelihood,get_predictive_log_probabilities)
 	SG_UNREF(likelihood);
 	SG_UNREF(labels);
 }
+#endif //USE_GPL_SHOGUN
 
 TEST(StudentsTLikelihood,get_predictive_means)
 {
@@ -406,6 +408,7 @@ TEST(StudentsTLikelihood,get_third_derivative)
 	SG_UNREF(labels);
 }
 
+#ifdef USE_GPL_SHOGUN
 TEST(StudentsTLikelihood,get_first_moments)
 {
 	// create some easy data:
@@ -493,4 +496,4 @@ TEST(StudentsTLikelihood,get_second_moments)
 	SG_UNREF(likelihood);
 	SG_UNREF(labels);
 }
-
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/machine/gp/StudentsTVGLikelihood_unittest.cc b/tests/unit/machine/gp/StudentsTVGLikelihood_unittest.cc
index 8b89fa9095e..08a6c9623a5 100644
--- a/tests/unit/machine/gp/StudentsTVGLikelihood_unittest.cc
+++ b/tests/unit/machine/gp/StudentsTVGLikelihood_unittest.cc
@@ -44,6 +44,7 @@
 
 using namespace shogun;
 
+#ifdef USE_GPL_SHOGUN
 TEST(StudentsTVGLikelihood,get_variational_expection)
 {
 	float64_t rel_tolerance = 1e-2;
@@ -289,4 +290,4 @@ TEST(StudentsTVGLikelihood,get_variational_first_derivative_wrt_mu)
 	SG_UNREF(lab);
 	SG_UNREF(lik);
 }
-
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/machine/kerneldensity_unittest.cc b/tests/unit/machine/kerneldensity_unittest.cc
index 2ff9e07a7c2..940625d85b8 100644
--- a/tests/unit/machine/kerneldensity_unittest.cc
+++ b/tests/unit/machine/kerneldensity_unittest.cc
@@ -195,4 +195,4 @@ TEST(KernelDensity,dual_tree_single_tree_equivalence)
 	SG_UNREF(testfeats);
 	SG_UNREF(feats);
 	SG_UNREF(k);
-}
\ No newline at end of file
+}
diff --git a/tests/unit/mathematics/Complex_unittest.cc b/tests/unit/mathematics/Complex_unittest.cc
index 98a2387789b..617625dfcb1 100644
--- a/tests/unit/mathematics/Complex_unittest.cc
+++ b/tests/unit/mathematics/Complex_unittest.cc
@@ -6,11 +6,11 @@
  *
  * Written (W) 2013 Soumyajit De
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/common.h>
 #include <shogun/lib/SGVector.h>
 #include <shogun/mathematics/Math.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
diff --git a/tests/unit/mathematics/Integration_unittest.cc b/tests/unit/mathematics/Integration_unittest.cc
index a95c374a3ff..8571d6dc3ac 100644
--- a/tests/unit/mathematics/Integration_unittest.cc
+++ b/tests/unit/mathematics/Integration_unittest.cc
@@ -28,15 +28,17 @@
  * of the authors and should not be interpreted as representing official policies,
  * either expressed or implied, of the Shogun Development Team.
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/config.h>
 
 #include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/Statistics.h>
 #include <shogun/mathematics/Function.h>
+#ifdef USE_GPL_SHOGUN
 #include <shogun/mathematics/Integration.h>
+#endif //USE_GPL_SHOGUN
 #include <shogun/lib/SGVector.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
@@ -288,6 +290,7 @@ class CTransformFunction : public CFunction
 	float64_t m_sigma;
 };
 
+#ifdef USE_GPL_SHOGUN
 TEST(Integration,integrate_quadgk_simple_function)
 {
 	// create object of the simple function
@@ -771,3 +774,4 @@ TEST(Integration, generate_gauher20)
 	abs_tolerance = CMath::get_abs_tolerance(0.000000000000126, rel_tolerance);
 	EXPECT_NEAR(wgh[19],  0.000000000000126,  abs_tolerance);
 }
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/mathematics/JacobiEllipticFunctions_unittest.cc b/tests/unit/mathematics/JacobiEllipticFunctions_unittest.cc
index e8237dbfbcb..08bb8a7714f 100644
--- a/tests/unit/mathematics/JacobiEllipticFunctions_unittest.cc
+++ b/tests/unit/mathematics/JacobiEllipticFunctions_unittest.cc
@@ -7,20 +7,25 @@
  * Written (W) 2013 Soumyajit De
  */
 
-#include <shogun/lib/config.h>
-#include <shogun/mathematics/JacobiEllipticFunctions.h>
 #include <gtest/gtest.h>
 
+#include <shogun/lib/config.h>
+
 #ifdef HAVE_ARPREC
 #include <arprec/mp_real.h>
 #include <arprec/mp_complex.h>
 #endif //HAVE_ARPREC
 
+#ifdef USE_GPL_SHOGUN
+#include <shogun/mathematics/JacobiEllipticFunctions.h>
 using namespace shogun;
-
 typedef float64_t Real;
 typedef complex128_t Complex;
+#endif //USE_GPL_SHOGUN
+
+
 
+#ifdef USE_GPL_SHOGUN
 TEST(JacobiEllipticFunctions, ellipKKp)
 {
 	Real K, Kp;
@@ -164,3 +169,4 @@ TEST(JacobiEllipticFunctions, ellipJC)
 	EXPECT_NEAR(dn.imag(), -0.23982763093808801003, 1E-15);
 #endif //HAVE_ARPREC
 }
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/mathematics/Lapack_wrapper_dstemr.cc b/tests/unit/mathematics/Lapack_wrapper_dstemr.cc
index a0e53d2fa97..618e89958e9 100644
--- a/tests/unit/mathematics/Lapack_wrapper_dstemr.cc
+++ b/tests/unit/mathematics/Lapack_wrapper_dstemr.cc
@@ -7,6 +7,8 @@
  * Written (W) 2013 Soumyajit De
  */
 
+#include <gtest/gtest.h>
+
 #include <shogun/lib/common.h>
 
 #ifdef HAVE_LAPACK
@@ -14,7 +16,6 @@
 #include <shogun/lib/SGVector.h>
 #include <shogun/mathematics/lapack.h>
 #include <shogun/mathematics/eigen3.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/mathematics/Math_unittest.cc b/tests/unit/mathematics/Math_unittest.cc
index f8684cc579b..22a46148ce4 100644
--- a/tests/unit/mathematics/Math_unittest.cc
+++ b/tests/unit/mathematics/Math_unittest.cc
@@ -7,11 +7,11 @@
  * Written (W) 2013 Thoralf Klein
  * Written (W) 2013 Soumyajit De
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/common.h>
 #include <shogun/lib/SGVector.h>
 #include <shogun/mathematics/Math.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
@@ -500,20 +500,6 @@ TEST(CMath,is_sorted_2)
 	EXPECT_EQ(CMath::is_sorted(v), true);
 }
 
-TEST(CMath, dot)
-{
-	CMath::init_random(17);
-	SGVector<float64_t> a(10);
-	a.random(0.0, 1024.0);
-	float64_t dot_val = 0.0;
-
-	for (int32_t i = 0; i < a.vlen; ++i)
-		dot_val += a[i]*a[i];
-
-	float64_t sgdot_val = CMath::dot(a.vector,a.vector, a.vlen);
-	EXPECT_NEAR(dot_val, sgdot_val, 1e-9);
-}
-
 TEST(CMath, gcd)
 {
 	EXPECT_EQ(CMath::gcd(12,8), 4);
diff --git a/tests/unit/mathematics/linalg/CGMShiftedFamilySolver_unittest.cc b/tests/unit/mathematics/linalg/CGMShiftedFamilySolver_unittest.cc
index 35d3cc301a3..a5b8f5d4c29 100644
--- a/tests/unit/mathematics/linalg/CGMShiftedFamilySolver_unittest.cc
+++ b/tests/unit/mathematics/linalg/CGMShiftedFamilySolver_unittest.cc
@@ -7,6 +7,8 @@
  * Written (W) 2013 Soumyajit De
  */
 
+#include <gtest/gtest.h>
+
 #include <shogun/lib/common.h>
 
 #include <shogun/lib/SGVector.h>
@@ -19,7 +21,6 @@
 #include <shogun/mathematics/linalg/linsolver/ConjugateGradientSolver.h>
 #include <shogun/mathematics/linalg/linsolver/DirectLinearSolverComplex.h>
 #include <shogun/mathematics/linalg/linsolver/CGMShiftedFamilySolver.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/mathematics/linalg/ConjugateGradientSolver_unittest.cc b/tests/unit/mathematics/linalg/ConjugateGradientSolver_unittest.cc
index 10d33d444c7..b81dab4afc5 100644
--- a/tests/unit/mathematics/linalg/ConjugateGradientSolver_unittest.cc
+++ b/tests/unit/mathematics/linalg/ConjugateGradientSolver_unittest.cc
@@ -7,6 +7,8 @@
  * Written (W) 2013 Soumyajit De
  */
 
+#include <gtest/gtest.h>
+
 #include <shogun/lib/common.h>
 
 #include <shogun/lib/SGVector.h>
@@ -16,7 +18,6 @@
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/mathematics/linalg/linop/SparseMatrixOperator.h>
 #include <shogun/mathematics/linalg/linsolver/ConjugateGradientSolver.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/mathematics/linalg/ConjugateOrthogonalCGSolver_unittest.cc b/tests/unit/mathematics/linalg/ConjugateOrthogonalCGSolver_unittest.cc
index b12f88df43e..1e8280cb9f3 100644
--- a/tests/unit/mathematics/linalg/ConjugateOrthogonalCGSolver_unittest.cc
+++ b/tests/unit/mathematics/linalg/ConjugateOrthogonalCGSolver_unittest.cc
@@ -6,6 +6,7 @@
  *
  * Written (W) 2013 Soumyajit De
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/common.h>
 
@@ -15,7 +16,6 @@
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/mathematics/linalg/linop/SparseMatrixOperator.h>
 #include <shogun/mathematics/linalg/linsolver/ConjugateOrthogonalCGSolver.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/mathematics/linalg/DenseExactLogJob_unittest.cc b/tests/unit/mathematics/linalg/DenseExactLogJob_unittest.cc
index 60daf40b015..4a2aee1fb3f 100644
--- a/tests/unit/mathematics/linalg/DenseExactLogJob_unittest.cc
+++ b/tests/unit/mathematics/linalg/DenseExactLogJob_unittest.cc
@@ -7,11 +7,12 @@
  * Written (W) 2013 Soumyajit De
  */
 
+#include <gtest/gtest.h>
+
 #include <shogun/lib/common.h>
 
 #include <shogun/mathematics/eigen3.h>
 
-#if EIGEN_VERSION_AT_LEAST(3,1,0)
 #include <unsupported/Eigen/MatrixFunctions>
 #include <shogun/lib/SGVector.h>
 #include <shogun/lib/SGMatrix.h>
@@ -20,7 +21,6 @@
 #include <shogun/mathematics/Statistics.h>
 #include <shogun/mathematics/linalg/linop/DenseMatrixOperator.h>
 #include <shogun/mathematics/linalg/ratapprox/logdet/computation/job/DenseExactLogJob.h>
-#include <gtest/gtest.h>
 
 using namespace Eigen;
 using namespace shogun;
@@ -74,4 +74,3 @@ TEST(DenseExactLogJob, log_det)
 	SG_UNREF(log_op);
 	SG_UNREF(agg);
 }
-#endif // EIGEN_VERSION_AT_LEAST(3,1,0)
diff --git a/tests/unit/mathematics/linalg/DenseMatrixExactLog_unittest.cc b/tests/unit/mathematics/linalg/DenseMatrixExactLog_unittest.cc
index 8f9c46528f4..bf891e7bf56 100644
--- a/tests/unit/mathematics/linalg/DenseMatrixExactLog_unittest.cc
+++ b/tests/unit/mathematics/linalg/DenseMatrixExactLog_unittest.cc
@@ -6,12 +6,12 @@
  *
  * Written (W) 2013 Soumyajit De
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/common.h>
 
 #include <shogun/mathematics/eigen3.h>
 
-#if EIGEN_VERSION_AT_LEAST(3,1,0)
 #include <unsupported/Eigen/MatrixFunctions>
 
 #include <shogun/lib/SGVector.h>
@@ -23,7 +23,6 @@
 #include <shogun/mathematics/Statistics.h>
 #include <shogun/mathematics/linalg/linop/DenseMatrixOperator.h>
 #include <shogun/mathematics/linalg/ratapprox/logdet/opfunc/DenseMatrixExactLog.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
@@ -95,4 +94,3 @@ TEST(DenseMatrixExactLog, dense_log_det)
 	SG_UNREF(op);
 	SG_UNREF(e);
 }
-#endif // EIGEN_VERSION_AT_LEAST(3,1,0)
diff --git a/tests/unit/mathematics/linalg/DenseMatrixOperator_unittest.cc b/tests/unit/mathematics/linalg/DenseMatrixOperator_unittest.cc
index 4feecd3747b..73f13e9d51d 100644
--- a/tests/unit/mathematics/linalg/DenseMatrixOperator_unittest.cc
+++ b/tests/unit/mathematics/linalg/DenseMatrixOperator_unittest.cc
@@ -7,12 +7,13 @@
  * Written (W) 2013 Soumyajit De
  */
 
+#include <gtest/gtest.h>
+
 #include <shogun/lib/config.h>
 #include <shogun/lib/SGMatrix.h>
 #include <shogun/lib/SGVector.h>
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/mathematics/linalg/linop/DenseMatrixOperator.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
diff --git a/tests/unit/mathematics/linalg/DirectEigenSolver_unittest.cc b/tests/unit/mathematics/linalg/DirectEigenSolver_unittest.cc
index 6a35ba4c245..0f67ac939b7 100644
--- a/tests/unit/mathematics/linalg/DirectEigenSolver_unittest.cc
+++ b/tests/unit/mathematics/linalg/DirectEigenSolver_unittest.cc
@@ -7,12 +7,13 @@
  * Written (W) 2013 Soumyajit De
  */
 
+#include <gtest/gtest.h>
+
 #include <shogun/lib/config.h>
 
 #include <shogun/mathematics/linalg/linop/DenseMatrixOperator.h>
 #include <shogun/mathematics/linalg/eigsolver/DirectEigenSolver.h>
 #include <shogun/lib/SGMatrix.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
diff --git a/tests/unit/mathematics/linalg/DirectLinearSolverComplex_unittest.cc b/tests/unit/mathematics/linalg/DirectLinearSolverComplex_unittest.cc
index c0c86b938c3..f0e3c86f145 100644
--- a/tests/unit/mathematics/linalg/DirectLinearSolverComplex_unittest.cc
+++ b/tests/unit/mathematics/linalg/DirectLinearSolverComplex_unittest.cc
@@ -7,6 +7,8 @@
  * Written (W) 2013 Soumyajit De
  */
 
+#include <gtest/gtest.h>
+
 #include <shogun/lib/config.h>
 
 #include <shogun/lib/SGVector.h>
@@ -14,7 +16,6 @@
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/mathematics/linalg/linop/DenseMatrixOperator.h>
 #include <shogun/mathematics/linalg/linsolver/DirectLinearSolverComplex.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/mathematics/linalg/DirectSparseLinearSolver_unittest.cc b/tests/unit/mathematics/linalg/DirectSparseLinearSolver_unittest.cc
index adfe740eebc..d5b7c42db89 100644
--- a/tests/unit/mathematics/linalg/DirectSparseLinearSolver_unittest.cc
+++ b/tests/unit/mathematics/linalg/DirectSparseLinearSolver_unittest.cc
@@ -6,6 +6,7 @@
  *
  * Written (W) 2013 Soumyajit De
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/common.h>
 
@@ -16,7 +17,6 @@
 #include <shogun/mathematics/Random.h>
 #include <shogun/mathematics/linalg/linop/SparseMatrixOperator.h>
 #include <shogun/mathematics/linalg/linsolver/DirectSparseLinearSolver.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/mathematics/linalg/ElementwiseOperations_unittest.cc b/tests/unit/mathematics/linalg/ElementwiseOperations_unittest.cc
deleted file mode 100644
index e4a646d2fcf..00000000000
--- a/tests/unit/mathematics/linalg/ElementwiseOperations_unittest.cc
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (w) 2015 Soumyajit De
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#include <shogun/lib/config.h>
-
-#if defined(HAVE_CXX11) || defined(HAVE_CXX0X)
-
-#include <shogun/mathematics/Math.h>
-#include <shogun/mathematics/linalg/linalg.h>
-#include <shogun/lib/SGVector.h>
-#include <shogun/lib/SGMatrix.h>
-
-#ifdef HAVE_VIENNACL
-#include <shogun/lib/GPUVector.h>
-#include <shogun/lib/GPUMatrix.h>
-#endif // HAVE_VIENNACL
-
-#include <algorithm>
-#include <numeric>
-#include <gtest/gtest.h>
-
-using namespace shogun;
-
-TEST(Elementwise_sin, SGMatrix_NATIVE)
-{
-	SGMatrix<int32_t> m(3,4);
-	std::iota(m.data(), m.data()+m.size(), 1);
-	SGMatrix<float64_t> sin_m=linalg::elementwise_sin<linalg::Backend::NATIVE>(m);
-	for (uint64_t i=0; i<m.size(); ++i)
-		EXPECT_NEAR(CMath::sin(m[i]), sin_m[i], 1E-15);
-}
-
-TEST(Elementwise_sin, SGMatrix_NATIVE_inplace)
-{
-	SGMatrix<float64_t> m(3,4);
-	std::iota(m.data(), m.data()+m.size(), 1);
-	SGMatrix<float64_t> m_copy(m.num_rows, m.num_cols);
-	std::copy(m.data(), m.data()+m.size(), m_copy.data());
-	linalg::elementwise_sin_inplace<linalg::Backend::NATIVE>(m);
-	for (uint64_t i=0; i<m.size(); ++i)
-		EXPECT_NEAR(CMath::sin(m_copy[i]), m[i], 1E-15);
-}
-
-TEST(Elementwise_sin, SGVector_NATIVE)
-{
-	SGVector<int32_t> v(3);
-	std::iota(v.data(), v.data()+v.size(), 1);
-	SGVector<float64_t> sin_v=linalg::elementwise_sin<linalg::Backend::NATIVE>(v);
-	for (auto i=0; i<v.size(); ++i)
-		EXPECT_NEAR(CMath::sin(v[i]), sin_v[i], 1E-15);
-}
-
-TEST(Elementwise_sin, SGVector_NATIVE_inplace)
-{
-	SGVector<float64_t> v(3);
-	std::iota(v.data(), v.data()+v.size(), 1);
-	SGVector<float64_t> v_copy(v.size());
-	std::copy(v.data(), v.data()+v.size(), v_copy.data());
-	linalg::elementwise_sin_inplace<linalg::Backend::NATIVE>(v);
-	for (auto i=0; i<v.size(); ++i)
-		EXPECT_NEAR(CMath::sin(v_copy[i]), v[i], 1E-15);
-}
-
-TEST(Elementwise_sin, SGMatrix_NATIVE_complex128)
-{
-	SGMatrix<complex128_t> m(3,4);
-	for (uint64_t i=0; i<m.size(); ++i)
-		m[i]=complex128_t(i+1,i+1);
-	SGMatrix<complex128_t> sin_m=linalg::elementwise_sin<linalg::Backend::NATIVE>(m);
-	for (uint64_t i=0; i<m.size(); ++i)
-	{
-		EXPECT_NEAR(CMath::sin(m[i]).real(), sin_m[i].real(), 1E-15);
-		EXPECT_NEAR(CMath::sin(m[i]).imag(), sin_m[i].imag(), 1E-15);
-	}
-}
-
-TEST(Elementwise_sin, SGMatrix_NATIVE_complex128_inplace)
-{
-	SGMatrix<complex128_t> m(3,4);
-	for (uint64_t i=0; i<m.size(); ++i)
-		m[i]=complex128_t(i+1,i+1);
-	SGMatrix<complex128_t> m_copy(m.num_rows, m.num_cols);
-	std::copy(m.data(), m.data()+m.size(), m_copy.data());
-	linalg::elementwise_sin_inplace<linalg::Backend::NATIVE>(m);
-	for (uint64_t i=0; i<m.size(); ++i)
-	{
-		EXPECT_NEAR(CMath::sin(m_copy[i]).real(), m[i].real(), 1E-15);
-		EXPECT_NEAR(CMath::sin(m_copy[i]).imag(), m[i].imag(), 1E-15);
-	}
-}
-
-TEST(Elementwise_sin, SGVector_NATIVE_complex128)
-{
-	SGVector<complex128_t> v(3);
-	for (auto i=0; i<v.size(); ++i)
-		v[i]=complex128_t(i+1,i+1);
-	SGVector<complex128_t> sin_v=linalg::elementwise_sin<linalg::Backend::NATIVE>(v);
-	for (auto i=0; i<v.size(); ++i)
-	{
-		EXPECT_NEAR(CMath::sin(v[i]).real(), sin_v[i].real(), 1E-15);
-		EXPECT_NEAR(CMath::sin(v[i]).imag(), sin_v[i].imag(), 1E-15);
-	}
-}
-
-TEST(Elementwise_sin, SGVector_NATIVE_complex128_inplace)
-{
-	SGVector<complex128_t> v(3);
-	for (auto i=0; i<v.size(); ++i)
-		v[i]=complex128_t(i+1,i+1);
-	SGVector<complex128_t> v_copy(v.size());
-	std::copy(v.data(), v.data()+v.size(), v_copy.data());
-	linalg::elementwise_sin_inplace<linalg::Backend::NATIVE>(v);
-	for (auto i=0; i<v.size(); ++i)
-	{
-		EXPECT_NEAR(CMath::sin(v_copy[i]).real(), v[i].real(), 1E-15);
-		EXPECT_NEAR(CMath::sin(v_copy[i]).imag(), v[i].imag(), 1E-15);
-	}
-}
-
-TEST(Elementwise_sin, SGMatrix_EIGEN3)
-{
-	SGMatrix<float64_t> m(3,4);
-	std::iota(m.data(), m.data()+m.size(), 1);
-	SGMatrix<float64_t> sin_m=linalg::elementwise_sin<linalg::Backend::EIGEN3>(m);
-	for (uint64_t i=0; i<m.size(); ++i)
-		EXPECT_NEAR(CMath::sin(m[i]), sin_m[i], 1E-15);
-}
-
-TEST(Elementwise_sin, SGMatrix_EIGEN3_inplace)
-{
-	SGMatrix<float64_t> m(3,4);
-	std::iota(m.data(), m.data()+m.size(), 1);
-	SGMatrix<float64_t> m_copy(m.num_rows, m.num_cols);
-	std::copy(m.data(), m.data()+m.size(), m_copy.data());
-	linalg::elementwise_sin_inplace<linalg::Backend::EIGEN3>(m);
-	for (uint64_t i=0; i<m.size(); ++i)
-		EXPECT_NEAR(CMath::sin(m_copy[i]), m[i], 1E-15);
-}
-
-TEST(Elementwise_sin, SGVector_EIGEN3)
-{
-	SGVector<float64_t> v(3);
-	std::iota(v.data(), v.data()+v.size(), 1);
-	SGVector<float64_t> sin_v=linalg::elementwise_sin<linalg::Backend::EIGEN3>(v);
-	for (auto i=0; i<v.size(); ++i)
-		EXPECT_NEAR(CMath::sin(v[i]), sin_v[i], 1E-15);
-}
-
-TEST(Elementwise_sin, SGVector_EIGEN3_inplace)
-{
-	SGVector<float64_t> v(3);
-	std::iota(v.data(), v.data()+v.size(), 1);
-	SGVector<float64_t> v_copy(v.size());
-	std::copy(v.data(), v.data()+v.size(), v_copy.data());
-	linalg::elementwise_sin_inplace<linalg::Backend::EIGEN3>(v);
-	for (auto i=0; i<v.size(); ++i)
-		EXPECT_NEAR(CMath::sin(v_copy[i]), v[i], 1E-15);
-}
-
-#ifdef HAVE_VIENNACL
-TEST(Elementwise_sin, CGPUMatrix_VIENNACL)
-{
-	SGMatrix<float64_t> m(3,4);
-	std::iota(m.data(), m.data()+m.size(), 1);
-	CGPUMatrix<float64_t> m_gpu(m);
-	CGPUMatrix<float64_t> sin_m=linalg::elementwise_sin<linalg::Backend::VIENNACL>(m_gpu);
-	for (uint64_t i=0; i<m.size(); ++i)
-		EXPECT_NEAR(CMath::sin(m[i]), sin_m[i], 1E-6);
-}
-
-TEST(Elementwise_sin, CGPUMatrix_VIENNACL_inplace)
-{
-	SGMatrix<float64_t> m(3,4);
-	std::iota(m.data(), m.data()+m.size(), 1);
-	CGPUMatrix<float64_t> m_gpu(m);
-	linalg::elementwise_sin_inplace<linalg::Backend::VIENNACL>(m_gpu);
-	for (uint64_t i=0; i<m.size(); ++i)
-		EXPECT_NEAR(CMath::sin(m[i]), m_gpu[i], 1E-15);
-}
-
-TEST(Elementwise_sin, CGPUVector_VIENNACL)
-{
-	SGVector<float64_t> v(3);
-	std::iota(v.data(), v.data()+v.size(), 1);
-	CGPUVector<float64_t> v_gpu(v);
-	CGPUVector<float64_t> sin_v=linalg::elementwise_sin<linalg::Backend::VIENNACL>(v_gpu);
-	for (auto i=0; i<v.size(); ++i)
-		EXPECT_NEAR(CMath::sin(v[i]), sin_v[i], 1E-6);
-}
-
-TEST(Elementwise_sin, CGPUVector_VIENNACL_inplace)
-{
-	SGVector<float64_t> v(3);
-	std::iota(v.data(), v.data()+v.size(), 1);
-	CGPUVector<float64_t> v_gpu(v);
-	linalg::elementwise_sin_inplace<linalg::Backend::VIENNACL>(v_gpu);
-	for (auto i=0; i<v.size(); ++i)
-		EXPECT_NEAR(CMath::sin(v[i]), v_gpu[i], 1E-15);
-}
-#endif // HAVE_VIENNACL
-
-TEST(Elementwise_custom, SGMatrix)
-{
-	SGMatrix<float64_t> m(2,2);
-	std::iota(m.data(), m.data()+m.size(), 1);
-
-	float64_t weights=0.6;
-	float64_t std_dev=0.2;
-	float64_t mean=0.01;
-
-	SGMatrix<float64_t> result=linalg::elementwise_compute(m,
-	[&weights, &std_dev, &mean](float64_t& sqr_dist)
-	{
-		float64_t outer_factor=-2*CMath::PI*CMath::sqrt(sqr_dist)*CMath::sq(weights);
-		float64_t exp_factor=CMath::exp(-2*CMath::sq(CMath::PI)*sqr_dist*CMath::pow(std_dev, 2));
-		float64_t sin_factor=CMath::sin(2*CMath::PI*CMath::sqrt(sqr_dist)*mean);
-		return outer_factor*exp_factor*sin_factor;
-	});
-
-	for (auto i=0; i<m.num_rows; ++i)
-	{
-		for (auto j=0; j<m.num_cols; ++j)
-		{
-			float64_t sqr_dist=m(i, j);
-			float64_t outer_factor=-2*CMath::PI*CMath::sqrt(sqr_dist)*CMath::sq(weights);
-			float64_t exp_factor=CMath::exp(-2*CMath::sq(CMath::PI)*sqr_dist*CMath::pow(std_dev, 2));
-			float64_t sin_factor=CMath::sin(2*CMath::PI*CMath::sqrt(sqr_dist)*mean);
-			m(i, j)=outer_factor*exp_factor*sin_factor;
-		}
-	}
-
-	for (uint64_t i=0; i<m.size(); ++i)
-		EXPECT_NEAR(m.data()[i], result.data()[i], 1E-15);
-}
-
-TEST(Elementwise_custom, SGMatrix_inplace)
-{
-	SGMatrix<float64_t> m(2,2);
-	std::iota(m.data(), m.data()+m.size(), 1);
-	SGMatrix<float64_t> m_copy(m.num_rows, m.num_cols);
-	std::copy(m.data(), m.data()+m.size(), m_copy.data());
-
-	float64_t weights=0.6;
-	float64_t std_dev=0.2;
-	float64_t mean=0.01;
-
-	linalg::elementwise_compute_inplace(m,
-	[&weights, &std_dev, &mean](float64_t& sqr_dist)
-	{
-		float64_t outer_factor=-2*CMath::PI*CMath::sqrt(sqr_dist)*CMath::sq(weights);
-		float64_t exp_factor=CMath::exp(-2*CMath::sq(CMath::PI)*sqr_dist*CMath::pow(std_dev, 2));
-		float64_t sin_factor=CMath::sin(2*CMath::PI*CMath::sqrt(sqr_dist)*mean);
-		return outer_factor*exp_factor*sin_factor;
-	});
-
-	for (auto i=0; i<m_copy.num_rows; ++i)
-	{
-		for (auto j=0; j<m_copy.num_cols; ++j)
-		{
-			float64_t sqr_dist=m_copy(i, j);
-			float64_t outer_factor=-2*CMath::PI*CMath::sqrt(sqr_dist)*CMath::sq(weights);
-			float64_t exp_factor=CMath::exp(-2*CMath::sq(CMath::PI)*sqr_dist*CMath::pow(std_dev, 2));
-			float64_t sin_factor=CMath::sin(2*CMath::PI*CMath::sqrt(sqr_dist)*mean);
-			m_copy(i, j)=outer_factor*exp_factor*sin_factor;
-		}
-	}
-
-	for (uint64_t i=0; i<m.size(); ++i)
-		EXPECT_NEAR(m.data()[i], m_copy.data()[i], 1E-15);
-}
-
-#ifdef HAVE_VIENNACL
-TEST(Elementwise_custom, CGPUMatrix)
-{
-	SGMatrix<float32_t> m(2,2);
-	std::iota(m.data(), m.data()+m.size(), 1);
-	CGPUMatrix<float32_t> m_gpu(m);
-
-	float32_t weights=0.6;
-	float32_t std_dev=0.2;
-	float32_t mean=0.01;
-
-	std::string data_type=linalg::implementation::ocl::get_type_string<float32_t>();
-
-	std::string operation;
-	operation.append(linalg::implementation::ocl::format(
-	R"(
-		{type} {var1} = -2*{pi}*sqrt(element)*pow({weights}, 2);
-		{type} {var2} = exp(-2*pow({pi}, 2)*element*pow({stddev}, 2));
-		{type} {var3} = sin(2*{pi}*sqrt(element)*{mean});
-		return {var1}*{var2}*{var3};
-	)",
-	{
-		linalg::ocl::Parameter("type")=data_type,
-		linalg::ocl::Parameter("var1")="outer_factor",
-		linalg::ocl::Parameter("var2")="exp_factor",
-		linalg::ocl::Parameter("var3")="sin_factor",
-		linalg::ocl::Parameter("pi")=CMath::PI,
-		linalg::ocl::Parameter("weights")=weights,
-		linalg::ocl::Parameter("stddev")=std_dev,
-		linalg::ocl::Parameter("mean")=mean
-	}));
-
-	CGPUMatrix<float32_t> result=linalg::elementwise_compute(m_gpu, operation);
-
-	for (auto i=0; i<m.num_rows; ++i)
-	{
-		for (auto j=0; j<m.num_cols; ++j)
-		{
-			float32_t sqr_dist=m(i, j);
-			float32_t outer_factor=-2*CMath::PI*CMath::sqrt(sqr_dist)*CMath::sq(weights);
-			float32_t exp_factor=CMath::exp(-2*CMath::sq(CMath::PI)*sqr_dist*CMath::sq(std_dev));
-			float32_t sin_factor=CMath::sin(2*CMath::PI*CMath::sqrt(sqr_dist)*mean);
-			m(i, j)=outer_factor*exp_factor*sin_factor;
-		}
-	}
-
-	for (auto i=0; i<m.num_rows; ++i)
-	{
-		for (auto j=0; j<m.num_cols; ++j)
-			EXPECT_NEAR(m(i,j), result(i,j), 1E-5);
-	}
-}
-
-TEST(Elementwise_custom, CGPUMatrix_inplace)
-{
-	SGMatrix<float32_t> m(2,2);
-	std::iota(m.data(), m.data()+m.size(), 1);
-	CGPUMatrix<float32_t> m_gpu(m);
-
-	float32_t weights=0.6;
-	float32_t std_dev=0.2;
-	float32_t mean=0.01;
-
-	std::string data_type=linalg::implementation::ocl::get_type_string<float32_t>();
-
-	std::string operation;
-	operation.append(linalg::implementation::ocl::format(
-	R"(
-		{type} {var1} = -2*{pi}*sqrt(element)*pow({weights}, 2);
-		{type} {var2} = exp(-2*pow({pi}, 2)*element*pow({stddev}, 2));
-		{type} {var3} = sin(2*{pi}*sqrt(element)*{mean});
-		return {var1}*{var2}*{var3};
-	)",
-	{
-		linalg::ocl::Parameter("type")=data_type,
-		linalg::ocl::Parameter("var1")="outer_factor",
-		linalg::ocl::Parameter("var2")="exp_factor",
-		linalg::ocl::Parameter("var3")="sin_factor",
-		linalg::ocl::Parameter("pi")=CMath::PI,
-		linalg::ocl::Parameter("weights")=weights,
-		linalg::ocl::Parameter("stddev")=std_dev,
-		linalg::ocl::Parameter("mean")=mean
-	}));
-
-	linalg::elementwise_compute_inplace(m_gpu, operation);
-
-	for (auto i=0; i<m.num_rows; ++i)
-	{
-		for (auto j=0; j<m.num_cols; ++j)
-		{
-			float32_t sqr_dist=m(i, j);
-			float32_t outer_factor=-2*CMath::PI*CMath::sqrt(sqr_dist)*CMath::sq(weights);
-			float32_t exp_factor=CMath::exp(-2*CMath::sq(CMath::PI)*sqr_dist*CMath::sq(std_dev));
-			float32_t sin_factor=CMath::sin(2*CMath::PI*CMath::sqrt(sqr_dist)*mean);
-			m(i, j)=outer_factor*exp_factor*sin_factor;
-		}
-	}
-
-	for (auto i=0; i<m.num_rows; ++i)
-	{
-		for (auto j=0; j<m.num_cols; ++j)
-			EXPECT_NEAR(m(i,j), m_gpu(i,j), 1E-5);
-	}
-}
-#endif // HAVE_VIENNACL
-#endif // defined(HAVE_CXX11) || defined(HAVE_CXX0X)
diff --git a/tests/unit/mathematics/linalg/IndividualJobResultAggregator_unittest.cc b/tests/unit/mathematics/linalg/IndividualJobResultAggregator_unittest.cc
index 50ea2d484a3..9b88959fd08 100644
--- a/tests/unit/mathematics/linalg/IndividualJobResultAggregator_unittest.cc
+++ b/tests/unit/mathematics/linalg/IndividualJobResultAggregator_unittest.cc
@@ -6,7 +6,7 @@
  *
  * Written (W) 2013 Soumyajit De
  */
-
+#include <gtest/gtest.h>
 #include <shogun/lib/config.h>
 
 #include <shogun/lib/SGMatrix.h>
@@ -15,7 +15,6 @@
 #include <shogun/lib/computation/jobresult/VectorResult.h>
 #include <shogun/mathematics/linalg/linop/DenseMatrixOperator.h>
 #include <shogun/mathematics/linalg/ratapprox/logdet/computation/aggregator/IndividualJobResultAggregator.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
diff --git a/tests/unit/mathematics/linalg/LanczosEigenSolver_unittest.cc b/tests/unit/mathematics/linalg/LanczosEigenSolver_unittest.cc
index 4efaaf46672..cd09df877ed 100644
--- a/tests/unit/mathematics/linalg/LanczosEigenSolver_unittest.cc
+++ b/tests/unit/mathematics/linalg/LanczosEigenSolver_unittest.cc
@@ -6,6 +6,7 @@
  *
  * Written (W) 2013 Soumyajit De
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/common.h>
 
@@ -20,7 +21,6 @@
 #include <shogun/mathematics/linalg/linop/DenseMatrixOperator.h>
 #include <shogun/mathematics/linalg/eigsolver/DirectEigenSolver.h>
 #include <shogun/mathematics/linalg/eigsolver/LanczosEigenSolver.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/mathematics/linalg/LinalgBackendBase_unittest.cc b/tests/unit/mathematics/linalg/LinalgBackendBase_unittest.cc
index 207761627d8..9e0778a9214 100644
--- a/tests/unit/mathematics/linalg/LinalgBackendBase_unittest.cc
+++ b/tests/unit/mathematics/linalg/LinalgBackendBase_unittest.cc
@@ -1,7 +1,7 @@
+#include <gtest/gtest.h>
 #include <shogun/lib/config.h>
 #include <shogun/lib/SGVector.h>
 #include <shogun/mathematics/linalg/LinalgNamespace.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace linalg;
diff --git a/tests/unit/mathematics/linalg/LinalgBackendViennaCL_unittest.cc b/tests/unit/mathematics/linalg/LinalgBackendViennaCL_unittest.cc
index a8b3b10af4e..7b3e735bfbd 100644
--- a/tests/unit/mathematics/linalg/LinalgBackendViennaCL_unittest.cc
+++ b/tests/unit/mathematics/linalg/LinalgBackendViennaCL_unittest.cc
@@ -1,7 +1,7 @@
+#include <gtest/gtest.h>
 #include <shogun/lib/config.h>
 #include <shogun/lib/SGVector.h>
 #include <shogun/mathematics/linalg/LinalgNamespace.h>
-#include <gtest/gtest.h>
 #include <thread>
 
 #ifdef HAVE_VIENNACL
diff --git a/tests/unit/mathematics/linalg/LogDetEstimator_unittest.cc b/tests/unit/mathematics/linalg/LogDetEstimator_unittest.cc
index 6de74a8e120..efcbbbf0042 100644
--- a/tests/unit/mathematics/linalg/LogDetEstimator_unittest.cc
+++ b/tests/unit/mathematics/linalg/LogDetEstimator_unittest.cc
@@ -6,6 +6,7 @@
  *
  * Written (W) 2013 Soumyajit De
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/common.h>
 
@@ -30,7 +31,6 @@
 #include <shogun/mathematics/linalg/linsolver/CGMShiftedFamilySolver.h>
 #include <shogun/mathematics/linalg/ratapprox/logdet/LogDetEstimator.h>
 #include <shogun/lib/computation/engine/SerialComputationEngine.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
@@ -108,6 +108,7 @@ TEST(LogDetEstimator, Sparse_sample_constructor)
 #endif //HAVE_LAPACK
 #endif // EIGEN_VERSION_AT_LEAST(3,1,0)
 
+#ifdef USE_GPL_SHOGUN
 TEST(LogDetEstimator, sample_ratapp_dense)
 {
 	CSerialComputationEngine* e=new CSerialComputationEngine;
@@ -459,3 +460,4 @@ TEST(LogDetEstimator, sample_ratapp_big_matrix)
 }
 #endif // HAVE_LAPACK
 #endif // HAVE_COLPACK
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/mathematics/linalg/MatrixOperator_unittest.cc b/tests/unit/mathematics/linalg/MatrixOperator_unittest.cc
index ca0d752c542..68cb62b8833 100644
--- a/tests/unit/mathematics/linalg/MatrixOperator_unittest.cc
+++ b/tests/unit/mathematics/linalg/MatrixOperator_unittest.cc
@@ -6,6 +6,7 @@
  *
  * Written (W) 2013 Soumyajit De
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/common.h>
 
@@ -15,7 +16,6 @@
 #include <shogun/mathematics/linalg/linop/SparseMatrixOperator.h>
 #include <shogun/mathematics/linalg/linop/DenseMatrixOperator.h>
 #include <shogun/features/SparseFeatures.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/mathematics/linalg/NormalSampler_unittest.cc b/tests/unit/mathematics/linalg/NormalSampler_unittest.cc
index 4d69334ff42..9519607a185 100644
--- a/tests/unit/mathematics/linalg/NormalSampler_unittest.cc
+++ b/tests/unit/mathematics/linalg/NormalSampler_unittest.cc
@@ -6,6 +6,7 @@
  *
  * Written (W) 2013 Soumyajit De
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/common.h>
 #include <shogun/lib/SGVector.h>
@@ -13,7 +14,6 @@
 #include <shogun/mathematics/Statistics.h>
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/mathematics/linalg/ratapprox/tracesampler/NormalSampler.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/mathematics/linalg/ProbingSampler_unittest.cc b/tests/unit/mathematics/linalg/ProbingSampler_unittest.cc
index 63b755af982..f32a074f0f9 100644
--- a/tests/unit/mathematics/linalg/ProbingSampler_unittest.cc
+++ b/tests/unit/mathematics/linalg/ProbingSampler_unittest.cc
@@ -6,10 +6,11 @@
  *
  * Written (W) 2013 Soumyajit De
  */
-
+#include <gtest/gtest.h>
 #include <shogun/lib/config.h>
 
 #ifdef HAVE_COLPACK
+#include <ColPack/ColPackHeaders.h>
 
 #include <vector>
 #include <shogun/lib/SGSparseMatrix.h>
@@ -18,8 +19,6 @@
 #include <shogun/mathematics/Statistics.h>
 #include <shogun/mathematics/linalg/linop/SparseMatrixOperator.h>
 #include <shogun/mathematics/linalg/ratapprox/tracesampler/ProbingSampler.h>
-#include <ColPack/ColPackHeaders.h>
-#include <gtest/gtest.h>
 
 using namespace std;
 using namespace shogun;
diff --git a/tests/unit/mathematics/linalg/RationalApproximationCGMJob_unittest.cc b/tests/unit/mathematics/linalg/RationalApproximationCGMJob_unittest.cc
index 8250f6ff519..01d8b617aed 100644
--- a/tests/unit/mathematics/linalg/RationalApproximationCGMJob_unittest.cc
+++ b/tests/unit/mathematics/linalg/RationalApproximationCGMJob_unittest.cc
@@ -6,6 +6,7 @@
  *
  * Written (W) 2013 Soumyajit De
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/common.h>
 #include <shogun/lib/SGVector.h>
@@ -16,7 +17,6 @@
 #include <shogun/mathematics/linalg/linop/DenseMatrixOperator.h>
 #include <shogun/mathematics/linalg/linsolver/CGMShiftedFamilySolver.h>
 #include <shogun/mathematics/linalg/ratapprox/logdet/computation/job/RationalApproximationCGMJob.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/mathematics/linalg/RationalApproximationIndividualJob_unittest.cc b/tests/unit/mathematics/linalg/RationalApproximationIndividualJob_unittest.cc
index d4483c97e30..a0fe48d69a6 100644
--- a/tests/unit/mathematics/linalg/RationalApproximationIndividualJob_unittest.cc
+++ b/tests/unit/mathematics/linalg/RationalApproximationIndividualJob_unittest.cc
@@ -6,6 +6,7 @@
  *
  * Written (W) 2013 Soumyajit De
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/config.h>
 #include <shogun/lib/SGVector.h>
@@ -16,7 +17,6 @@
 #include <shogun/lib/computation/jobresult/ScalarResult.h>
 #include <shogun/mathematics/linalg/ratapprox/logdet/computation/aggregator/IndividualJobResultAggregator.h>
 #include <shogun/mathematics/linalg/ratapprox/logdet/computation/job/RationalApproximationIndividualJob.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
diff --git a/tests/unit/mathematics/linalg/RationalApproximation_unittest.cc b/tests/unit/mathematics/linalg/RationalApproximation_unittest.cc
index 682ed663fad..c6863378114 100644
--- a/tests/unit/mathematics/linalg/RationalApproximation_unittest.cc
+++ b/tests/unit/mathematics/linalg/RationalApproximation_unittest.cc
@@ -6,6 +6,7 @@
  *
  * Written (W) 2013 Soumyajit De
  */
+#include <gtest/gtest.h>
 
 #include <shogun/lib/common.h>
 
@@ -28,11 +29,11 @@
 #include <shogun/mathematics/linalg/ratapprox/logdet/opfunc/LogRationalApproximationIndividual.h>
 #include <shogun/mathematics/linalg/ratapprox/logdet/opfunc/LogRationalApproximationCGM.h>
 #include <unsupported/Eigen/MatrixFunctions>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
 
+#ifdef USE_GPL_SHOGUN
 TEST(RationalApproximation, precompute)
 {
 	CSerialComputationEngine* e=new CSerialComputationEngine;
@@ -391,3 +392,4 @@ TEST(RationalApproximation, trace_accuracy_cg_m)
 	SG_UNREF(e);
 	SG_UNREF(op);
 }
+#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/mathematics/linalg/SparseMatrixOperator_unittest.cc b/tests/unit/mathematics/linalg/SparseMatrixOperator_unittest.cc
index 9d171cca87c..05443d104b5 100644
--- a/tests/unit/mathematics/linalg/SparseMatrixOperator_unittest.cc
+++ b/tests/unit/mathematics/linalg/SparseMatrixOperator_unittest.cc
@@ -6,7 +6,7 @@
  *
  * Written (W) 2013 Soumyajit De
  */
-
+#include <gtest/gtest.h>
 #include <shogun/lib/common.h>
 
 #include <shogun/lib/SGMatrix.h>
@@ -15,7 +15,6 @@
 #include <shogun/features/SparseFeatures.h>
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/mathematics/linalg/linop/SparseMatrixOperator.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/mathematics/linalg/SpecialPurpose_unittest.cc b/tests/unit/mathematics/linalg/SpecialPurpose_unittest.cc
deleted file mode 100644
index 26801e5c81f..00000000000
--- a/tests/unit/mathematics/linalg/SpecialPurpose_unittest.cc
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (w) 2014 Khaled Nasr
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- */
-
-#include <shogun/lib/config.h>
-
-#include <shogun/mathematics/linalg/linalg.h>
-#include <shogun/mathematics/Math.h>
-#include <shogun/lib/SGMatrix.h>
-#include <gtest/gtest.h>
-
-#include <shogun/mathematics/eigen3.h>
-
-#ifdef HAVE_VIENNACL
-#include <shogun/lib/GPUMatrix.h>
-#endif
-
-using namespace shogun;
-
-TEST(SpecialPurpose, logistic_eigen3_backend)
-{
-	SGMatrix<float64_t> A(3,3);
-	SGMatrix<float64_t> B(3,3);
-
-	for (int32_t i=0; i<9; i++)
-		A[i] = i;
-
-	linalg::special_purpose::logistic<linalg::Backend::EIGEN3>(A, B);
-
-	for (int32_t i=0; i<9; i++)
-		EXPECT_NEAR(1.0/(1+CMath::exp(-1*A[i])), B[i], 1e-15);
-}
-
-#ifdef HAVE_VIENNACL
-TEST(SpecialPurpose, logistic_viennacl_backend)
-{
-	CGPUMatrix<float64_t> A(3,3);
-	CGPUMatrix<float64_t> B(3,3);
-
-	for (int32_t i=0; i<9; i++)
-		A[i] = i;
-
-	linalg::special_purpose::logistic<linalg::Backend::VIENNACL>(A, B);
-
-	for (int32_t i=0; i<9; i++)
-		EXPECT_NEAR(1.0/(1+CMath::exp(-1*A[i])), B[i], 1e-15);
-}
-#endif // HAVE_VIENNACL
-
-TEST(SpecialPurpose, multiply_by_logistic_derivative_eigen3_backend)
-{
-	SGMatrix<float64_t> A(3,3);
-	SGMatrix<float64_t> B(3,3);
-
-	for (float64_t i=0; i<9; i++)
-	{
-		A[i] = i/9;
-		B[i] = i;
-	}
-
-	linalg::special_purpose::multiply_by_logistic_derivative<linalg::Backend::EIGEN3>(A, B);
-
-	for (int32_t i=0; i<9; i++)
-		EXPECT_NEAR(i*A[i]*(1.0-A[i]), B[i], 1e-15);
-}
-
-#ifdef HAVE_VIENNACL
-TEST(SpecialPurpose, multiply_by_logistic_derivative_viennacl_backend)
-{
-	CGPUMatrix<float64_t> A(3,3);
-	CGPUMatrix<float64_t> B(3,3);
-
-	for (float64_t i=0; i<9; i++)
-	{
-		A[i] = i/9;
-		B[i] = i;
-	}
-
-	linalg::special_purpose::multiply_by_logistic_derivative<linalg::Backend::VIENNACL>(A, B);
-
-	for (int32_t i=0; i<9; i++)
-		EXPECT_NEAR(i*A[i]*(1.0-A[i]), B[i], 1e-15);
-}
-#endif // HAVE_VIENNACL
-
-TEST(SpecialPurpose, rectified_linear_eigen3_backend)
-{
-	SGMatrix<float64_t> A(3,3);
-	SGMatrix<float64_t> B(3,3);
-
-	for (int32_t i=0; i<9; i++)
-		A[i] = i-5;
-
-	linalg::special_purpose::rectified_linear<linalg::Backend::EIGEN3>(A, B);
-
-	for (int32_t i=0; i<9; i++)
-		EXPECT_NEAR(CMath::max(0.0,A[i]), B[i], 1e-15);
-}
-
-#ifdef HAVE_VIENNACL
-TEST(SpecialPurpose, rectified_linear_viennacl_backend)
-{
-	CGPUMatrix<float64_t> A(3,3);
-	CGPUMatrix<float64_t> B(3,3);
-
-	for (int32_t i=0; i<9; i++)
-		A[i] = i-5;
-
-	linalg::special_purpose::rectified_linear<linalg::Backend::VIENNACL>(A, B);
-
-	for (int32_t i=0; i<9; i++)
-		EXPECT_NEAR(CMath::max(0.0, (float64_t)A[i]), B[i], 1e-15);
-}
-#endif // HAVE_VIENNACL
-
-TEST(SpecialPurpose, multiply_by_rectified_linear_derivative_eigen3_backend)
-{
-	SGMatrix<float64_t> A(3,3);
-	SGMatrix<float64_t> B(3,3);
-
-	for (float64_t i=0; i<9; i++)
-	{
-		A[i] = i - 0.5;
-		B[i] = i;
-	}
-
-	linalg::special_purpose::multiply_by_rectified_linear_derivative<linalg::Backend::EIGEN3>(A, B);
-
-	for (int32_t i=0; i<9; i++)
-		EXPECT_NEAR(i*(A[i]!=0), B[i], 1e-15);
-}
-
-#ifdef HAVE_VIENNACL
-TEST(SpecialPurpose, multiply_by_rectified_linear_derivative_viennacl_backend)
-{
-	CGPUMatrix<float64_t> A(3,3);
-	CGPUMatrix<float64_t> B(3,3);
-
-	for (float64_t i=0; i<9; i++)
-	{
-		A[i] = i - 0.5;
-		B[i] = i;
-	}
-
-	linalg::special_purpose::multiply_by_rectified_linear_derivative<linalg::Backend::VIENNACL>(A, B);
-
-	for (int32_t i=0; i<9; i++)
-		EXPECT_NEAR(i*(A[i]!=0), B[i], 1e-15);
-}
-#endif // HAVE_VIENNACL
-
-TEST(SpecialPurpose, softmax_eigen3_backend)
-{
-	SGMatrix<float64_t> A(4,3);
-
-	SGMatrix<float64_t> ref(4,3);
-
-	for (float64_t i=0; i<12; i++)
-		A[i] = i/12;
-
-	for (int32_t i=0; i<A.num_rows*A.num_cols; i++)
-		ref[i] = CMath::exp(A[i]);
-
-	for (int32_t j=0; j<ref.num_cols; j++)
-	{
-		float64_t sum = 0;
-		for (int32_t i=0; i<ref.num_rows; i++)
-			sum += ref(i,j);
-
-		for (int32_t i=0; i<ref.num_rows; i++)
-			ref(i,j) /= sum;
-	}
-
-	linalg::special_purpose::softmax<linalg::Backend::EIGEN3>(A);
-
-	for (int32_t i=0; i<12; i++)
-		EXPECT_NEAR(ref[i], A[i], 1e-15);
-}
-
-#ifdef HAVE_VIENNACL
-TEST(SpecialPurpose, softmax_viennacl_backend)
-{
-	CGPUMatrix<float64_t> A(4,3);
-
-	CGPUMatrix<float64_t> ref(4,3);
-
-	for (float64_t i=0; i<12; i++)
-		A[i] = i/12;
-
-	for (int32_t i=0; i<A.num_rows*A.num_cols; i++)
-		ref[i] = CMath::exp(A[i]);
-
-	for (int32_t j=0; j<ref.num_cols; j++)
-	{
-		float64_t sum = 0;
-		for (int32_t i=0; i<ref.num_rows; i++)
-			sum += ref(i,j);
-
-		for (int32_t i=0; i<ref.num_rows; i++)
-			ref(i,j) /= sum;
-	}
-
-	linalg::special_purpose::softmax<linalg::Backend::VIENNACL>(A);
-
-	for (int32_t i=0; i<12; i++)
-		EXPECT_NEAR(ref[i], A[i], 1e-15);
-}
-#endif // HAVE_VIENNACL
-
-TEST(SpecialPurpose, cross_entropy_eigen3_backend)
-{
-	SGMatrix<float64_t> A(4,3);
-	SGMatrix<float64_t> B(4,3);
-
-	int32_t size = A.num_rows*A.num_cols;
-	for (float64_t i=0; i<size; i++)
-	{
-		A[i] = i/size;
-		B[i] = (i/size) * 0.5;
-	}
-
-	float64_t ce = 0;
-	for (int32_t i=0; i< size; i++)
-		ce += A[i]*CMath::log(B[i]+1e-30);
-	ce *= -1;
-
-	EXPECT_NEAR(ce, linalg::special_purpose::cross_entropy<linalg::Backend::EIGEN3>(A, B), 1e-15);
-}
-
-#ifdef HAVE_VIENNACL
-TEST(SpecialPurpose, cross_entropy_viennacl_backend)
-{
-	CGPUMatrix<float64_t> A(4,3);
-	CGPUMatrix<float64_t> B(4,3);
-
-	int32_t size = A.num_rows*A.num_cols;
-	for (float64_t i=0; i<size; i++)
-	{
-		A[i] = i/size;
-		B[i] = (i/size) * 0.5;
-	}
-
-	float64_t ce = 0;
-	for (int32_t i=0; i< size; i++)
-		ce += A[i]*CMath::log(B[i]+1e-30);
-	ce *= -1;
-
-	EXPECT_NEAR(ce, linalg::special_purpose::cross_entropy<linalg::Backend::VIENNACL>(A, B), 1e-15);
-}
-#endif // HAVE_VIENNACL
-
-TEST(SpecialPurpose, squared_error_eigen3_backend)
-{
-	SGMatrix<float64_t> A(4,3);
-	SGMatrix<float64_t> B(4,3);
-
-	int32_t size = A.num_rows*A.num_cols;
-	for (float64_t i=0; i<size; i++)
-	{
-		A[i] = i/size;
-		B[i] = (i/size) * 0.5;
-	}
-
-	float64_t se = 0;
-	for (int32_t i=0; i< size; i++)
-		se += CMath::pow(A[i]-B[i],2);
-	se *= 0.5;
-
-	EXPECT_NEAR(se, linalg::special_purpose::squared_error<linalg::Backend::EIGEN3>(A, B), 1e-15);
-}
-
-#ifdef HAVE_VIENNACL
-TEST(SpecialPurpose, squared_error_viennacl_backend)
-{
-	CGPUMatrix<float64_t> A(4,3);
-	CGPUMatrix<float64_t> B(4,3);
-
-	int32_t size = A.num_rows*A.num_cols;
-	for (float64_t i=0; i<size; i++)
-	{
-		A[i] = i/size;
-		B[i] = (i/size) * 0.5;
-	}
-
-	float64_t se = 0;
-	for (int32_t i=0; i< size; i++)
-		se += CMath::pow(A[i]-B[i],2);
-	se *= 0.5;
-
-	EXPECT_NEAR(se, linalg::special_purpose::squared_error<linalg::Backend::VIENNACL>(A, B), 1e-15);
-}
-#endif // HAVE_VIENNACL
diff --git a/tests/unit/mathematics/linalg/operations/Eigen3_operations_unittest.cc b/tests/unit/mathematics/linalg/operations/Eigen3_operations_unittest.cc
index 6f638db1b60..cd90c77f10a 100644
--- a/tests/unit/mathematics/linalg/operations/Eigen3_operations_unittest.cc
+++ b/tests/unit/mathematics/linalg/operations/Eigen3_operations_unittest.cc
@@ -1,8 +1,9 @@
+#include <gtest/gtest.h>
+
 #include <shogun/lib/config.h>
 #include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/mathematics/linalg/LinalgSpecialPurposes.h>
-#include <gtest/gtest.h>
 #include <shogun/lib/ShogunException.h>
 
 using namespace shogun;
@@ -93,6 +94,176 @@ TEST(LinalgBackendEigen, SGMatrix_add_in_place)
 		EXPECT_NEAR(alpha*C[i]+beta*B[i], A[i], 1e-15);
 }
 
+TEST(LinalgBackendEigen, SGVector_add_col_vec_allocated)
+{
+	const float64_t alpha = 0.7;
+	const float64_t beta = -1.2;
+	const index_t nrows = 2, ncols = 3;
+	const index_t col = 1;
+
+	SGMatrix<float64_t> A(nrows, ncols);
+	SGVector<float64_t> b(nrows);
+	SGVector<float64_t> result(nrows);
+
+	for (index_t i = 0; i < nrows*ncols; ++i)
+		A[i] = i;
+	for (index_t i = 0; i < nrows; ++i)
+		b[i] = 0.5*i;
+
+	add_col_vec(A, col, b, result, alpha, beta);
+
+	for (index_t i = 0; i < nrows; ++i)
+		EXPECT_NEAR(result[i], alpha*A.get_element(i, col)+beta*b[i], 1e-15);
+}
+
+TEST(LinalgBackendEigen, SGVector_add_col_vec_in_place)
+{
+	const float64_t alpha = 0.6;
+	const float64_t beta = -1.3;
+	const index_t nrows = 2, ncols = 3;
+	const index_t col = 1;
+
+	SGMatrix<float64_t> A(nrows, ncols);
+	SGVector<float64_t> b(nrows);
+
+	for (index_t i = 0; i < nrows*ncols; ++i)
+		A[i] = i;
+	for (index_t i = 0; i < nrows; ++i)
+		b[i] = 0.5*i;
+
+	add_col_vec(A, col, b, b, alpha, beta);
+
+	for (index_t i = 0; i < nrows; ++i)
+		EXPECT_NEAR(b[i], alpha*A.get_element(i, col)+beta*0.5*i, 1e-15);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_add_col_vec_allocated)
+{
+	const float64_t alpha = 0.8;
+	const float64_t beta = -1.4;
+	const index_t nrows = 2, ncols = 3;
+	const index_t col = 1;
+
+	SGMatrix<float64_t> A(nrows, ncols);
+	SGVector<float64_t> b(nrows);
+	SGMatrix<float64_t> result(nrows, ncols);
+
+	for (index_t i = 0; i < nrows*ncols; ++i)
+		A[i] = i;
+	for (index_t i = 0; i < nrows; ++i)
+		b[i] = 0.5*i;
+
+	add_col_vec(A, col, b, result, alpha, beta);
+
+	for (index_t i = 0; i < nrows; ++i)
+		EXPECT_NEAR(
+		    result.get_element(i, col),
+		    alpha * A.get_element(i, col) + beta * b[i], 1e-15);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_add_col_vec_in_place)
+{
+	const float64_t alpha = 0.9;
+	const float64_t beta = -1.7;
+	const index_t nrows = 2, ncols = 3;
+	const index_t col = 1;
+
+	SGMatrix<float64_t> A(nrows, ncols);
+	SGVector<float64_t> b(nrows);
+
+	for (index_t i = 0; i < nrows*ncols; ++i)
+		A[i] = i;
+	for (index_t i = 0; i < nrows; ++i)
+		b[i] = 0.5*i;
+
+	add_col_vec(A, col, b, A, alpha, beta);
+
+	for (index_t i = 0; i < nrows; ++i)
+		for (index_t j = 0; j < ncols; ++j)
+		{
+			float64_t a = i+j*nrows;
+			if (j == col)
+				EXPECT_NEAR(A.get_element(i, j), alpha*a+beta*b[i], 1e-15);
+			else
+				EXPECT_EQ(A.get_element(i,j), a);
+		}
+}
+
+TEST(LinalgBackendEigen, add_vector)
+{
+	const float64_t alpha = 0.7;
+	const float64_t beta = -1.2;
+	const index_t nrows = 2, ncols = 3;
+
+	SGMatrix<float64_t> A(nrows, ncols);
+	SGMatrix<float64_t> result(nrows, ncols);
+	SGVector<float64_t> b(nrows);
+
+	for (index_t i = 0; i < nrows; ++i)
+		b[i] = 0.5 * i;
+	for (index_t j = 0; j < ncols; ++j)
+		for (index_t i = 0; i < nrows; ++i)
+		{
+			A(i, j) = i + j * ncols;
+			result(i, j) = alpha * A(i, j) + beta * b[i];
+		}
+
+	add_vector(A, b, A, alpha, beta);
+
+	for (index_t i = 0; i < nrows * ncols; ++i)
+		EXPECT_NEAR(A[i], result[i], 1e-15);
+}
+
+TEST(LinalgBackendEigen, SGVector_add_scalar)
+{
+	const index_t n = 4;
+	float64_t s = -0.3;
+
+	SGVector<float64_t> a(n);
+	for (index_t i = 0; i < (index_t)a.size(); ++i)
+		a[i] = i;
+	SGVector<float64_t> orig = a.clone();
+
+	add_scalar(a, s);
+
+	for (index_t i = 0; i < (index_t)a.size(); ++i)
+		EXPECT_NEAR(a[i], orig[i] + s, 1e-15);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_add_scalar)
+{
+	const index_t r = 4, c = 3;
+	float64_t s = 0.4;
+
+	SGMatrix<float64_t> a(r, c);
+	for (index_t i = 0; i < (index_t)a.size(); ++i)
+		a[i] = i;
+	SGMatrix<float64_t> orig = a.clone();
+
+	add_scalar(a, s);
+
+	for (index_t i = 0; i < (index_t)a.size(); ++i)
+		EXPECT_NEAR(a[i], orig[i] + s, 1e-15);
+}
+
+TEST(LinalgBackendEigen, center_matrix)
+{
+	const index_t n = 3;
+	float64_t data[] = {0.8192343,  0.13191962, 0.50888604,
+	                    0.16857468, 0.24107738, 0.89455301,
+	                    0.40657379, 0.07902286, 0.24319651};
+	float64_t result[] = {0.25587541,  -0.1173183, -0.13855711,
+	                      -0.34283925, 0.04378442, 0.29905482,
+	                      0.08696383,  0.07353387, -0.16049771};
+
+	SGMatrix<float64_t> m(data, n, n, false);
+
+	center_matrix(m);
+
+	for (index_t i = 0; i < (index_t)m.size(); ++i)
+		EXPECT_NEAR(m[i], result[i], 1e-8);
+}
+
 TEST(LinalgBackendEigen, SGMatrix_cholesky_llt_lower)
 {
 	const index_t size=2;
@@ -160,6 +331,27 @@ TEST(LinalgBackendEigen, SGMatrix_cholesky_solver)
 	EXPECT_EQ(x_ref.size(), x_cal.size());
 }
 
+TEST(LinalgBackendEigen, SGMatrix_cross_entropy)
+{
+	SGMatrix<float64_t> A(4, 3);
+	SGMatrix<float64_t> B(4, 3);
+
+	int32_t size = A.num_rows * A.num_cols;
+	for (float64_t i = 0; i < size; ++i)
+	{
+		A[i] = i / size;
+		B[i] = (i / size) * 0.5;
+	}
+
+	float64_t ref = 0;
+	for (int32_t i = 0; i < size; i++)
+		ref += A[i] * CMath::log(B[i] + 1e-30);
+	ref *= -1;
+
+	auto result = linalg::cross_entropy(A, B);
+	EXPECT_NEAR(ref, result, 1e-15);
+}
+
 TEST(LinalgBackendEigen, SGVector_dot)
 {
 	const index_t size = 3;
@@ -172,6 +364,78 @@ TEST(LinalgBackendEigen, SGVector_dot)
 	EXPECT_NEAR(result, 5, 1E-15);
 }
 
+TEST(LinalgBackendEigen, eigensolver)
+{
+	const index_t n = 4;
+	float64_t data[] = {0.09987322, 0.80575314, 0.79068641, 0.69989667,
+	                    0.62323516, 0.16837367, 0.85027625, 0.60165948,
+	                    0.04898732, 0.96701123, 0.51683275, 0.51116495,
+	                    0.18277926, 0.6179262,  0.43745891, 0.63685464};
+	float64_t result_eigenvectors[] = {
+	    -0.63494074, 0.75831593,  -0.14014031, 0.04656076,
+	    0.82257205,  -0.28671857, -0.44196422, -0.21409185,
+	    -0.005932,   -0.20233724, -0.52285555, 0.82803776,
+	    -0.23930111, -0.56199714, -0.57298901, -0.54642272};
+	float64_t result_eigenvalues[] = {-0.6470538, -0.19125664, 0.16205101,
+	                                  2.0981937};
+
+	SGMatrix<float64_t> m(data, n, n, false);
+	SGMatrix<float64_t> eigenvectors(n, n);
+	SGVector<float64_t> eigenvalues(n);
+
+	eigen_solver(m, eigenvalues, eigenvectors);
+
+	auto args = CMath::argsort(eigenvalues);
+	for (index_t i = 0; i < n; ++i)
+	{
+		index_t idx = args[i];
+		EXPECT_NEAR(eigenvalues[idx], result_eigenvalues[i], 1e-7);
+
+		auto s =
+		    CMath::sign(eigenvectors[idx * n] * result_eigenvectors[i * n]);
+		for (index_t j = 0; j < n; ++j)
+			EXPECT_NEAR(
+			    eigenvectors[idx * n + j], s * result_eigenvectors[i * n + j],
+			    1e-7);
+	}
+}
+
+TEST(LinalgBackendEigen, eigensolver_symmetric)
+{
+	const index_t n = 4;
+	float64_t data[] = {0.09987322, 0.80575314, 0.04898732, 0.69989667,
+	                    0.80575314, 0.16837367, 0.96701123, 0.6179262,
+	                    0.04898732, 0.96701123, 0.51683275, 0.43745891,
+	                    0.69989667, 0.6179262,  0.43745891, 0.63685464};
+	float64_t result_eigenvectors[] = {
+	    -0.54618542, 0.69935447,  -0.45219663, 0.09001671,
+	    -0.56171388, -0.41397154, 0.17642953,  0.69424612,
+	    -0.46818396, 0.16780603,  0.73247599,  -0.46489119,
+	    0.40861077,  0.55800718,  0.47735703,  0.542029037};
+	float64_t result_eigenvalues[] = {-1.00663298, -0.18672196, 0.42940933,
+	                                  2.18587989};
+
+	SGMatrix<float64_t> m(data, n, n, false);
+	SGMatrix<float64_t> eigenvectors(n, n);
+	SGVector<float64_t> eigenvalues(n);
+
+	eigen_solver(m, eigenvalues, eigenvectors);
+
+	auto args = CMath::argsort(eigenvalues);
+	for (index_t i = 0; i < n; ++i)
+	{
+		index_t idx = args[i];
+		EXPECT_NEAR(eigenvalues[idx], result_eigenvalues[i], 1e-7);
+
+		auto s =
+		    CMath::sign(eigenvectors[idx * n] * result_eigenvectors[i * n]);
+		for (index_t j = 0; j < n; ++j)
+			EXPECT_NEAR(
+			    eigenvectors[idx * n + j], s * result_eigenvectors[i * n + j],
+			    1e-7);
+	}
+}
+
 TEST(LinalgBackendEigen, SGMatrix_elementwise_product)
 {
 	const index_t nrows = 3;
@@ -239,6 +503,49 @@ TEST(LinalgBackendEigen, SGMatrix_block_elementwise_product)
 			EXPECT_NEAR(result(i, j), A(i, j) * B(i, j), 1E-15);
 }
 
+TEST(LinalgBackendEigen, SGVector_exponent)
+{
+	const index_t len = 4;
+	SGVector<float64_t> a(len);
+	a[0] = -2.4;
+	a[1] = 0;
+	a[2] = 0.5;
+	a[3] = 3.9;
+	auto result = exponent(a);
+
+	EXPECT_NEAR(result[0], 0.090717953289413, 1E-15);
+	EXPECT_NEAR(result[1], 1.0, 1E-15);
+	EXPECT_NEAR(result[2], 1.648721270700128, 1E-15);
+	EXPECT_NEAR(result[3], 49.40244910553017, 1E-15);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_exponent)
+{
+	const index_t n = 2;
+	SGMatrix<float64_t> a(n, n);
+	a[0] = -2.4;
+	a[1] = 0;
+	a[2] = 0.5;
+	a[3] = 3.9;
+	auto result = exponent(a);
+
+	EXPECT_NEAR(result[0], 0.090717953289413, 1E-15);
+	EXPECT_NEAR(result[1], 1.0, 1E-15);
+	EXPECT_NEAR(result[2], 1.648721270700128, 1E-15);
+	EXPECT_NEAR(result[3], 49.40244910553017, 1E-15);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_identity)
+{
+	const index_t n = 4;
+	SGMatrix<float64_t> A(n, n);
+	identity(A);
+
+	for (index_t i = 0; i < n; ++i)
+		for (index_t j = 0; j < n; ++j)
+			EXPECT_EQ(A.get_element(i, j), (i==j));
+}
+
 TEST(LinalgBackendEigen, logistic)
 {
 	SGMatrix<float64_t> A(3,3);
@@ -573,6 +880,95 @@ TEST(LinalgBackendEigen, SGMatrix_mean)
 	EXPECT_NEAR(result, 2.5, 1E-15);
 }
 
+TEST(LinalgBackendEigen, SGMatrix_multiply_by_logistic_derivative)
+{
+	SGMatrix<float64_t> A(3, 3);
+	SGMatrix<float64_t> B(3, 3);
+
+	for (float64_t i = 0; i < 9; ++i)
+	{
+		A[i] = i / 9;
+		B[i] = i;
+	}
+
+	linalg::multiply_by_logistic_derivative(A, B);
+
+	for (index_t i = 0; i < 9; ++i)
+		EXPECT_NEAR(i * A[i] * (1.0 - A[i]), B[i], 1e-15);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_multiply_by_rectified_linear_derivative)
+{
+	SGMatrix<float64_t> A(3, 3);
+	SGMatrix<float64_t> B(3, 3);
+
+	for (float64_t i = 0; i < 9; ++i)
+	{
+		A[i] = i * 0.5 - 0.5;
+		B[i] = i;
+	}
+
+	multiply_by_rectified_linear_derivative(A, B);
+
+	for (index_t i = 0; i < 9; ++i)
+		EXPECT_NEAR(i * (A[i] != 0), B[i], 1e-15);
+}
+
+TEST(LinalgBackendEigen, SGVector_norm)
+{
+	const index_t n = 5;
+	SGVector<float64_t> v(n);
+	float64_t gt = 0;
+	for (index_t i = 0; i < n; ++i)
+	{
+		v[i] = i;
+		gt += i * i;
+	}
+	gt = CMath::sqrt(gt);
+
+	auto result = norm(v);
+
+	EXPECT_NEAR(result, gt, 1E-15);
+}
+
+TEST(LinalgBackendEigen, SGVector_qr_solver)
+{
+	const index_t n = 3;
+	float64_t data_A[] = {0.02800922, 0.99326012, 0.15204902,
+	                      0.30492837, 0.39708534, 0.40466969,
+	                      0.36415317, 0.04407589, 0.9095746};
+	float64_t data_b[] = {0.39461571, 0.6816856, 0.43323709};
+	float64_t result[] = {0.07135206, 1.56393127, -0.23141312};
+
+	SGMatrix<float64_t> A(data_A, n, n, false);
+	SGVector<float64_t> b(data_b, n, false);
+
+	auto x = qr_solver(A, b);
+
+	for (index_t i = 0; i < x.size(); ++i)
+		EXPECT_NEAR(x[i], result[i], 1E-7);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_qr_solver)
+{
+	const index_t n = 3, m = 2;
+	float64_t data_A[] = {0.02800922, 0.99326012, 0.15204902,
+	                      0.30492837, 0.39708534, 0.40466969,
+	                      0.36415317, 0.04407589, 0.9095746};
+	float64_t data_B[] = {0.76775073, 0.88471312, 0.34795225,
+	                      0.94311546, 0.59630347, 0.65820143};
+	float64_t result[] = {-0.73834587, 4.22750496, -1.37484721,
+	                      -1.14718091, 4.49142548, -1.08282992};
+
+	SGMatrix<float64_t> A(data_A, n, n, false);
+	SGMatrix<float64_t> B(data_B, n, m, false);
+
+	auto X = qr_solver(A, B);
+
+	for (index_t i = 0; i < (index_t)X.size(); ++i)
+		EXPECT_NEAR(X[i], result[i], 1E-7);
+}
+
 TEST(LinalgBackendEigen, SGVector_range_fill)
 {
 	const index_t size = 5;
@@ -593,6 +989,19 @@ TEST(LinalgBackendEigen, SGMatrix_range_fill)
 		EXPECT_NEAR(mat[i], i + 1, 1E-15);
 }
 
+TEST(LinalgBackendEigen, SGMatrix_rectified_linear)
+{
+	SGMatrix<float64_t> A(3, 3);
+	SGMatrix<float64_t> B(3, 3);
+
+	range_fill(A, -5.0);
+
+	linalg::rectified_linear(A, B);
+
+	for (index_t i = 0; i < 9; ++i)
+		EXPECT_NEAR(CMath::max(0.0, A[i]), B[i], 1e-15);
+}
+
 TEST(LinalgBackendEigen, SGVector_scale)
 {
 	const index_t size = 5;
@@ -674,6 +1083,54 @@ TEST(LinalgBackendEigen, SGMatrix_set_const)
 		EXPECT_NEAR(a[i], value, 1E-15);
 }
 
+TEST(LinalgBackendEigen, SGMatrix_softmax)
+{
+	SGMatrix<float64_t> A(4, 3);
+	SGMatrix<float64_t> ref(4, 3);
+
+	for (float64_t i = 0; i < 12; ++i)
+		A[i] = i / 12;
+
+	for (index_t i = 0; i < 12; ++i)
+		ref[i] = CMath::exp(A[i]);
+
+	for (index_t j = 0; j < ref.num_cols; ++j)
+	{
+		float64_t sum = 0;
+		for (index_t i = 0; i < ref.num_rows; ++i)
+			sum += ref(i, j);
+
+		for (index_t i = 0; i < ref.num_rows; ++i)
+			ref(i, j) /= sum;
+	}
+
+	linalg::softmax(A);
+
+	for (index_t i = 0; i < 12; ++i)
+		EXPECT_NEAR(ref[i], A[i], 1e-15);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_squared_error)
+{
+	SGMatrix<float64_t> A(4, 3);
+	SGMatrix<float64_t> B(4, 3);
+
+	int32_t size = A.num_rows * A.num_cols;
+	for (float64_t i = 0; i < size; ++i)
+	{
+		A[i] = i / size;
+		B[i] = (i / size) * 0.5;
+	}
+
+	float64_t ref = 0;
+	for (index_t i = 0; i < size; i++)
+		ref += CMath::pow(A[i] - B[i], 2);
+	ref *= 0.5;
+
+	auto result = linalg::squared_error(A, B);
+	EXPECT_NEAR(ref, result, 1e-15);
+}
+
 TEST(LinalgBackendEigen, SGVector_sum)
 {
 	const index_t size = 10;
@@ -929,3 +1386,253 @@ TEST(LinalgBackendEigen, SGMatrix_block_rowwise_sum)
 		EXPECT_NEAR(sum, result[i], 1E-15);
 	}
 }
+
+TEST(LinalgBackendEigen, SGMatrix_svd_jacobi_thinU)
+{
+	const index_t m = 5, n = 3;
+	float64_t data[] = {0.68764958, 0.11456779, 0.75164207, 0.50436194,
+	                    0.30786772, 0.25503552, 0.34367041, 0.66491478,
+	                    0.20488809, 0.5734351,  0.87179189, 0.07139643,
+	                    0.28540373, 0.06264684, 0.56204061};
+	float64_t result_s[] = {1.75382524, 0.56351367, 0.41124883};
+	float64_t result_U[] = {-0.60700926, -0.16647013, -0.56501385, -0.26696629,
+	                        -0.46186125, -0.69145782, 0.29548428,  0.5718984,
+	                        0.31771648,  -0.08101592, -0.27461424, 0.37170223,
+	                        -0.12681555, -0.53830325, 0.69323293};
+
+	SGMatrix<float64_t> A(data, m, n, false);
+	SGMatrix<float64_t> U(m, n);
+	SGVector<float64_t> s(n);
+
+	svd(A, s, U, true, SVDAlgorithm::Jacobi);
+
+	for (index_t i = 0; i < n; ++i)
+	{
+		auto c = CMath::sign(U[i * m] * result_U[i * m]);
+		for (index_t j = 0; j < m; ++j)
+			EXPECT_NEAR(U[i * m + j], c * result_U[i * m + j], 1e-7);
+	}
+	for (index_t i = 0; i < (index_t)s.size(); ++i)
+		EXPECT_NEAR(s[i], result_s[i], 1e-7);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_svd_jacobi_fullU)
+{
+	const index_t m = 5, n = 3;
+	float64_t data[] = {0.68764958, 0.11456779, 0.75164207, 0.50436194,
+	                    0.30786772, 0.25503552, 0.34367041, 0.66491478,
+	                    0.20488809, 0.5734351,  0.87179189, 0.07139643,
+	                    0.28540373, 0.06264684, 0.56204061};
+	float64_t result_s[] = {1.75382524, 0.56351367, 0.41124883};
+	float64_t result_U[] = {
+	    -0.60700926, -0.16647013, -0.56501385, -0.26696629, -0.46186125,
+	    -0.69145782, 0.29548428,  0.5718984,   0.31771648,  -0.08101592,
+	    -0.27461424, 0.37170223,  -0.12681555, -0.53830325, 0.69323293,
+	    -0.27809756, -0.68975171, -0.11662812, 0.38274703,  0.53554354,
+	    0.025973184, 0.520631112, -0.56921636, 0.62571522,  0.11287970};
+
+	SGMatrix<float64_t> A(data, m, n, false);
+	SGMatrix<float64_t> U(m, m);
+	SGVector<float64_t> s(n);
+
+	svd(A, s, U, false, SVDAlgorithm::Jacobi);
+
+	for (index_t i = 0; i < n; ++i)
+	{
+		auto c = CMath::sign(U[i * m] * result_U[i * m]);
+		for (index_t j = 0; j < m; ++j)
+			EXPECT_NEAR(U[i * m + j], c * result_U[i * m + j], 1e-7);
+	}
+	for (index_t i = 0; i < (index_t)s.size(); ++i)
+		EXPECT_NEAR(s[i], result_s[i], 1e-7);
+}
+
+#if EIGEN_VERSION_AT_LEAST(3, 3, 0)
+TEST(LinalgBackendEigen, SGMatrix_svd_bdc_thinU)
+{
+	const index_t m = 5, n = 3;
+	float64_t data[] = {0.68764958, 0.11456779, 0.75164207, 0.50436194,
+	                    0.30786772, 0.25503552, 0.34367041, 0.66491478,
+	                    0.20488809, 0.5734351,  0.87179189, 0.07139643,
+	                    0.28540373, 0.06264684, 0.56204061};
+	float64_t result_s[] = {1.75382524, 0.56351367, 0.41124883};
+	float64_t result_U[] = {-0.60700926, -0.16647013, -0.56501385, -0.26696629,
+	                        -0.46186125, -0.69145782, 0.29548428,  0.5718984,
+	                        0.31771648,  -0.08101592, -0.27461424, 0.37170223,
+	                        -0.12681555, -0.53830325, 0.69323293};
+
+	SGMatrix<float64_t> A(data, m, n, false);
+	SGMatrix<float64_t> U(m, n);
+	SGVector<float64_t> s(n);
+
+	svd(A, s, U, true, SVDAlgorithm::BidiagonalDivideConquer);
+
+	for (index_t i = 0; i < n; ++i)
+	{
+		auto c = CMath::sign(U[i * m] * result_U[i * m]);
+		for (index_t j = 0; j < m; ++j)
+			EXPECT_NEAR(U[i * m + j], c * result_U[i * m + j], 1e-7);
+	}
+	for (index_t i = 0; i < (index_t)s.size(); ++i)
+		EXPECT_NEAR(s[i], result_s[i], 1e-7);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_svd_bdc_fullU)
+{
+	const index_t m = 5, n = 3;
+	float64_t data[] = {0.68764958, 0.11456779, 0.75164207, 0.50436194,
+	                    0.30786772, 0.25503552, 0.34367041, 0.66491478,
+	                    0.20488809, 0.5734351,  0.87179189, 0.07139643,
+	                    0.28540373, 0.06264684, 0.56204061};
+	float64_t result_s[] = {1.75382524, 0.56351367, 0.41124883};
+	float64_t result_U[] = {
+	    -0.60700926, -0.16647013, -0.56501385, -0.26696629, -0.46186125,
+	    -0.69145782, 0.29548428,  0.5718984,   0.31771648,  -0.08101592,
+	    -0.27461424, 0.37170223,  -0.12681555, -0.53830325, 0.69323293,
+	    -0.27809756, -0.68975171, -0.11662812, 0.38274703,  0.53554354,
+	    0.025973184, 0.520631112, -0.56921636, 0.62571522,  0.11287970};
+
+	SGMatrix<float64_t> A(data, m, n, false);
+	SGMatrix<float64_t> U(m, m);
+	SGVector<float64_t> s(n);
+
+	svd(A, s, U, false, SVDAlgorithm::BidiagonalDivideConquer);
+
+	for (index_t i = 0; i < n; ++i)
+	{
+		auto c = CMath::sign(U[i * m] * result_U[i * m]);
+		for (index_t j = 0; j < m; ++j)
+			EXPECT_NEAR(U[i * m + j], c * result_U[i * m + j], 1e-7);
+	}
+	for (index_t i = 0; i < (index_t)s.size(); ++i)
+		EXPECT_NEAR(s[i], result_s[i], 1e-7);
+}
+#endif
+
+TEST(LinalgBackendEigen, SGMatrix_trace)
+{
+	const index_t n = 4;
+
+	SGMatrix<float64_t> A(n, n);
+	for (index_t i = 0; i < n*n; ++i)
+		A[i] = i;
+
+	float64_t tr = 0;
+	for (index_t i = 0; i < n; ++i)
+		tr += A.get_element(i, i);
+
+	EXPECT_NEAR(trace(A), tr, 1e-15);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_transpose_matrix)
+{
+	const index_t m = 5, n = 3;
+	float64_t data[] = {0.68764958, 0.11456779, 0.75164207, 0.50436194,
+	                    0.30786772, 0.25503552, 0.34367041, 0.66491478,
+	                    0.20488809, 0.5734351,  0.87179189, 0.07139643,
+	                    0.28540373, 0.06264684, 0.56204061};
+
+	SGMatrix<float64_t> A(data, m, n, false);
+
+	auto T = transpose_matrix(A);
+
+	for (index_t i = 0; i < m; ++i)
+		for (index_t j = 0; j < n; ++j)
+			EXPECT_NEAR(A.get_element(i, j), T.get_element(j, i), 1e-15);
+}
+
+TEST(LinalgBackendEigen, SGVector_triangular_solver_lower)
+{
+	const index_t n = 3;
+	float64_t data_L[] = {-0.92947874, -1.1432887,  -0.87119086,
+	                      0.,          -0.27048649, -0.05919915,
+	                      0.,          0.,          0.11869106};
+	float64_t data_b[] = {0.39461571, 0.6816856, 0.43323709};
+	float64_t result[] = {-0.42455592, -0.72571316, 0.17192745};
+
+	SGMatrix<float64_t> L(data_L, n, n, false);
+	SGVector<float64_t> b(data_b, n, false);
+
+	auto x = triangular_solver(L, b, true);
+
+	for (index_t i = 0; i < (index_t)x.size(); ++i)
+		EXPECT_NEAR(x[i], result[i], 1E-6);
+}
+
+TEST(LinalgBackendEigen, SGVector_triangular_solver_upper)
+{
+	const index_t n = 3;
+	float64_t data_U[] = {-0.92947874, 0.,          0.,
+	                      -1.1432887,  -0.27048649, 0.,
+	                      -0.87119086, -0.05919915, 0.11869106};
+	float64_t data_b[] = {0.39461571, 0.6816856, 0.43323709};
+	float64_t result[] = {0.23681135, -3.31909306, 3.65012412};
+
+	SGMatrix<float64_t> U(data_U, n, n, false);
+	SGVector<float64_t> b(data_b, n, false);
+
+	auto x = triangular_solver(U, b, false);
+
+	for (index_t i = 0; i < (index_t)x.size(); ++i)
+		EXPECT_NEAR(x[i], result[i], 1E-6);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_triangular_solver_lower)
+{
+	const index_t n = 3, m = 2;
+	float64_t data_L[] = {-0.92947874, -1.1432887,  -0.87119086,
+	                      0.,          -0.27048649, -0.05919915,
+	                      0.,          0.,          0.11869106};
+	float64_t data_B[] = {0.76775073, 0.88471312, 0.34795225,
+	                      0.94311546, 0.59630347, 0.65820143};
+	float64_t result[] = {-0.82600139, 0.22050986, -3.02127745,
+	                      -1.01467136, 2.08424024, -0.86262387};
+
+	SGMatrix<float64_t> L(data_L, n, n, false);
+	SGMatrix<float64_t> B(data_B, n, m, false);
+
+	auto X = triangular_solver(L, B, true);
+
+	for (index_t i = 0; i < (index_t)X.size(); ++i)
+		EXPECT_NEAR(X[i], result[i], 1E-6);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_triangular_solver_upper)
+{
+	const index_t n = 3, m = 2;
+	float64_t data_U[] = {-0.92947874, 0.,          0.,
+	                      -1.1432887,  -0.27048649, 0.,
+	                      -0.87119086, -0.05919915, 0.11869106};
+	float64_t data_B[] = {0.76775073, 0.88471312, 0.34795225,
+	                      0.94311546, 0.59630347, 0.65820143};
+	float64_t result[] = {1.238677,    -3.91243241, 2.9315793,
+	                      -2.00784647, -3.41825732, 5.54550138};
+
+	SGMatrix<float64_t> L(data_U, n, n, false);
+	SGMatrix<float64_t> B(data_B, n, m, false);
+
+	auto X = triangular_solver(L, B, false);
+
+	for (index_t i = 0; i < (index_t)X.size(); ++i)
+		EXPECT_NEAR(X[i], result[i], 1E-6);
+}
+
+TEST(LinalgBackendEigen, SGVector_zero)
+{
+	const index_t n = 16;
+	SGVector<float64_t> a(n);
+	zero(a);
+
+	for (index_t i = 0; i < n; ++i)
+		EXPECT_EQ(a[i], 0);
+}
+
+TEST(LinalgBackendEigen, SGMatrix_zero)
+{
+	const index_t nrows = 3, ncols = 4;
+	SGMatrix<float64_t> A(nrows, ncols);
+	zero(A);
+
+	for (index_t i = 0; i < nrows*ncols; ++i)
+		EXPECT_EQ(A[i], 0);
+}
diff --git a/tests/unit/mathematics/linalg/operations/Viennacl_operations_unittest.cc b/tests/unit/mathematics/linalg/operations/Viennacl_operations_unittest.cc
index 60ec7e67ed5..0518e1bf029 100644
--- a/tests/unit/mathematics/linalg/operations/Viennacl_operations_unittest.cc
+++ b/tests/unit/mathematics/linalg/operations/Viennacl_operations_unittest.cc
@@ -1,9 +1,10 @@
+#include <gtest/gtest.h>
+
 #include <shogun/lib/config.h>
 #include <shogun/lib/SGVector.h>
 #include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/mathematics/linalg/LinalgSpecialPurposes.h>
-#include <gtest/gtest.h>
 
 #ifdef HAVE_VIENNACL
 #include <shogun/mathematics/linalg/LinalgBackendViennaCL.h>
@@ -119,6 +120,30 @@ TEST(LinalgBackendViennaCL, SGMatrix_add_in_place)
 		EXPECT_NEAR(alpha*C[i]+beta*B[i], A[i], 1e-15);
 }
 
+TEST(LinalgBackendViennaCL, SGMatrix_cross_entropy)
+{
+	SGMatrix<float64_t> A(4, 3), A_gpu;
+	SGMatrix<float64_t> B(4, 3), B_gpu;
+
+	int32_t size = A.num_rows * A.num_cols;
+	for (float64_t i = 0; i < size; ++i)
+	{
+		A[i] = i / size;
+		B[i] = (i / size) * 0.5;
+	}
+
+	float64_t ref = 0;
+	for (int32_t i = 0; i < size; i++)
+		ref += A[i] * CMath::log(B[i] + 1e-30);
+	ref *= -1;
+
+	to_gpu(A, A_gpu);
+	to_gpu(B, B_gpu);
+	auto result = linalg::cross_entropy(A_gpu, B_gpu);
+
+	EXPECT_NEAR(ref, result, 1e-15);
+}
+
 TEST(LinalgBackendViennaCL, SGVector_dot)
 {
 	sg_linalg->set_gpu_backend(new LinalgBackendViennaCL());
@@ -185,7 +210,7 @@ TEST(LinalgBackendViennaCL, SGMatrix_elementwise_product_in_place)
 		EXPECT_NEAR(C[i]*B[i], A[i], 1e-15);
 }
 
-TEST(LinalgBackendViennaCL, logistic)
+TEST(LinalgBackendViennaCL, SGMatrix_logistic)
 {
 	SGMatrix<float64_t> A(3,3), A_gpu;
 	SGMatrix<float64_t> B(3,3), B_gpu;
@@ -439,6 +464,71 @@ TEST(LinalgBackendViennaCL, SGMatrix_mean)
 	EXPECT_NEAR(result, 2.5, 1E-15);
 }
 
+TEST(LinalgBackendViennaCL, SGMatrix_multiply_by_logistic_derivative)
+{
+	SGMatrix<float64_t> A(3, 3), A_gpu;
+	SGMatrix<float64_t> B(3, 3), B_gpu;
+
+	for (float64_t i = 0; i < 9; ++i)
+	{
+		A[i] = i / 9;
+		B[i] = i;
+	}
+
+	to_gpu(A, A_gpu);
+	to_gpu(B, B_gpu);
+
+	linalg::multiply_by_logistic_derivative(A_gpu, B_gpu);
+
+	from_gpu(A_gpu, A);
+	from_gpu(B_gpu, B);
+
+	for (index_t i = 0; i < 9; ++i)
+		EXPECT_NEAR(i * A[i] * (1.0 - A[i]), B[i], 1e-15);
+}
+
+TEST(LinalgBackendViennaCL, SGMatrix_multiply_by_rectified_linear_derivative)
+{
+	sg_linalg->set_gpu_backend(new LinalgBackendViennaCL());
+
+	SGMatrix<float64_t> A(3, 3), A_gpu;
+	SGMatrix<float64_t> B(3, 3), B_gpu;
+
+	for (float64_t i = 0; i < 9; ++i)
+	{
+		A[i] = i * 0.5 - 0.5;
+		B[i] = i;
+	}
+
+	to_gpu(A, A_gpu);
+	to_gpu(B, B_gpu);
+
+	linalg::multiply_by_rectified_linear_derivative(A_gpu, B_gpu);
+
+	from_gpu(A_gpu, A);
+	from_gpu(B_gpu, B);
+
+	for (index_t i = 0; i < 9; ++i)
+		EXPECT_NEAR(i * (A[i] != 0), B[i], 1e-15);
+}
+
+TEST(LinalgBackendViennaCL, SGMatrix_rectified_linear)
+{
+	SGMatrix<float64_t> A(3, 3), A_gpu;
+	SGMatrix<float64_t> B(3, 3), B_gpu;
+
+	range_fill(A, -5.0);
+	to_gpu(A, A_gpu);
+	to_gpu(B, B_gpu);
+
+	linalg::rectified_linear(A_gpu, B_gpu);
+
+	from_gpu(A_gpu, A);
+	from_gpu(B_gpu, B);
+	for (index_t i = 0; i < 9; ++i)
+		EXPECT_NEAR(CMath::max(0.0, A[i]), B[i], 1e-15);
+}
+
 TEST(LinalgBackendViennaCL, SGVector_scale)
 {
 	sg_linalg->set_gpu_backend(new LinalgBackendViennaCL());
@@ -553,6 +643,58 @@ TEST(LinalgBackendViennaCL, SGMatrix_set_const)
 		EXPECT_NEAR(a[i], value, 1E-15);
 }
 
+TEST(LinalgBackendViennaCL, SGMatrix_softmax)
+{
+	SGMatrix<float64_t> A(4, 3), A_gpu;
+	SGMatrix<float64_t> ref(4, 3);
+
+	for (float64_t i = 0; i < 12; ++i)
+		A[i] = i / 12;
+
+	for (index_t i = 0; i < 12; ++i)
+		ref[i] = CMath::exp(A[i]);
+
+	for (index_t j = 0; j < ref.num_cols; ++j)
+	{
+		float64_t sum = 0;
+		for (index_t i = 0; i < ref.num_rows; ++i)
+			sum += ref(i, j);
+
+		for (index_t i = 0; i < ref.num_rows; ++i)
+			ref(i, j) /= sum;
+	}
+
+	to_gpu(A, A_gpu);
+	linalg::softmax(A_gpu);
+	from_gpu(A_gpu, A);
+
+	for (index_t i = 0; i < 12; ++i)
+		EXPECT_NEAR(ref[i], A[i], 1e-15);
+}
+
+TEST(LinalgBackendViennaCL, SGMatrix_squared_error)
+{
+	SGMatrix<float64_t> A(4, 3), A_gpu;
+	SGMatrix<float64_t> B(4, 3), B_gpu;
+
+	int32_t size = A.num_rows * A.num_cols;
+	for (float64_t i = 0; i < size; ++i)
+	{
+		A[i] = i / size;
+		B[i] = (i / size) * 0.5;
+	}
+
+	float64_t ref = 0;
+	for (index_t i = 0; i < size; i++)
+		ref += CMath::pow(A[i] - B[i], 2);
+	ref *= 0.5;
+
+	to_gpu(A, A_gpu);
+	to_gpu(B, B_gpu);
+	auto result = linalg::squared_error(A_gpu, B_gpu);
+	EXPECT_NEAR(ref, result, 1e-15);
+}
+
 TEST(LinalgBackendViennaCL, SGVector_sum)
 {
 	sg_linalg->set_gpu_backend(new LinalgBackendViennaCL());
diff --git a/tests/unit/metric/LMNNImpl_unittest.cc b/tests/unit/metric/LMNNImpl_unittest.cc
index dd946da9e43..61caec5e1ff 100644
--- a/tests/unit/metric/LMNNImpl_unittest.cc
+++ b/tests/unit/metric/LMNNImpl_unittest.cc
@@ -6,11 +6,11 @@
  *
  * Written (W) 2013 Fernando J. Iglesias Garcia
  */
+#include <gtest/gtest.h>
 
 #include <shogun/metric/LMNNImpl.h>
 #include <shogun/features/DenseFeatures.h>
 #include <shogun/labels/MulticlassLabels.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
diff --git a/tests/unit/metric/LMNN_unittest.cc b/tests/unit/metric/LMNN_unittest.cc
index 9d704a64ff2..c38deb95627 100644
--- a/tests/unit/metric/LMNN_unittest.cc
+++ b/tests/unit/metric/LMNN_unittest.cc
@@ -6,11 +6,11 @@
  *
  * Written (W) 2013 Fernando J. Iglesias Garcia
  */
+#include <gtest/gtest.h>
 
 #include <shogun/metric/LMNN.h>
 #include <shogun/features/DenseFeatures.h>
 #include <shogun/labels/MulticlassLabels.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 
diff --git a/tests/unit/multiclass/BaggingMachine_unittest.cc b/tests/unit/multiclass/BaggingMachine_unittest.cc
index 87fb4a10344..3d37ccff9e9 100644
--- a/tests/unit/multiclass/BaggingMachine_unittest.cc
+++ b/tests/unit/multiclass/BaggingMachine_unittest.cc
@@ -1,38 +1,78 @@
-#include "machine/MockMachine.h"
 #include "features/MockFeatures.h"
 #include "labels/MockLabels.h"
+#include "machine/MockMachine.h"
+#include "utils/Utils.h"
+#include <gtest/gtest.h>
+#include <shogun/base/some.h>
+#include <shogun/ensemble/MajorityVote.h>
+#include <shogun/ensemble/MeanRule.h>
+#include <shogun/evaluation/MulticlassAccuracy.h>
 #include <shogun/features/DenseFeatures.h>
 #include <shogun/lib/SGMatrix.h>
-#include <shogun/multiclass/tree/CARTree.h>
 #include <shogun/lib/config.h>
 #include <shogun/machine/BaggingMachine.h>
-#include <shogun/evaluation/MulticlassAccuracy.h>
-#include <shogun/ensemble/MajorityVote.h>
-#include <gtest/gtest.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+#include <shogun/multiclass/tree/CARTree.h>
+
+using namespace shogun;
+using ::testing::Return;
+
+class BaggingMachine : public ::testing::Test
+{
+public:
+	CDenseFeatures<float64_t>* features_test;
+	CDenseFeatures<float64_t>* features_train;
+	CMulticlassLabels* labels_train;
+
+	SGVector<bool> ft;
+	virtual void SetUp()
+	{
+		sg_rand->set_seed(1);
+		load_toy_data();
+	}
 
-#define sunny 1.
-#define overcast 2.
-#define rain 3.
+	virtual void TearDown()
+	{
+		SG_UNREF(features_train);
+		SG_UNREF(features_test);
+		SG_UNREF(labels_train);
+	}
 
-#define hot 1.
-#define mild 2.
-#define cool 3.
+	void load_toy_data()
+	{
+		SGMatrix<float64_t> weather_data(4, 14);
+		SGVector<float64_t> lab(14);
 
-#define high 1.
-#define normal 2.
+		generate_toy_data_weather(weather_data, lab);
 
-#define weak 1.
-#define strong 2.
+		features_train = new CDenseFeatures<float64_t>(weather_data);
+		labels_train = new CMulticlassLabels(lab);
 
-#ifdef USE_REFERENCE_COUNTING
-using namespace shogun;
-using ::testing::Return;
+		SGMatrix<float64_t> test(4, 5);
+		SGVector<float64_t> test_labels(4);
+		generate_toy_data_weather(test, test_labels, false);
+		features_test = new CDenseFeatures<float64_t>(test);
+
+		auto feature_types = SGVector<bool>(4);
+
+		feature_types[0] = true;
+		feature_types[1] = true;
+		feature_types[2] = true;
+		feature_types[3] = true;
+
+		ft = feature_types;
+
+		SG_REF(features_train);
+		SG_REF(features_test);
+		SG_REF(labels_train);
+	}
+};
 
 /** gmock REV 443 and freebsd doesn't play nicely */
 #ifdef FREEBSD
-TEST(BaggingMachine, DISABLED_mock_train)
+TEST_F(BaggingMachine, DISABLED_mock_train)
 #else
-TEST(BaggingMachine, mock_train)
+TEST_F(BaggingMachine, mock_train)
 #endif
 {
 	using ::testing::NiceMock;
@@ -82,147 +122,22 @@ TEST(BaggingMachine, mock_train)
 	SG_UNREF(bm);
 }
 
-TEST(BaggingMachine,classify_CART)
+TEST_F(BaggingMachine, classify_CART)
 {
-	sg_rand->set_seed(1);
-	SGMatrix<float64_t> data(4,14);
-
-	//vector = [Outlook Temperature Humidity Wind]
-	data(0,0)=sunny;
-	data(1,0)=hot;
-	data(2,0)=high;
-	data(3,0)=weak;
-
-	data(0,1)=sunny;
-	data(1,1)=hot;
-	data(2,1)=high;
-	data(3,1)=strong;
-
-	data(0,2)=overcast;
-	data(1,2)=hot;
-	data(2,2)=high;
-	data(3,2)=weak;
-
-	data(0,3)=rain;
-	data(1,3)=mild;
-	data(2,3)=high;
-	data(3,3)=weak;
-
-	data(0,4)=rain;
-	data(1,4)=cool;
-	data(2,4)=normal;
-	data(3,4)=weak;
-
-	data(0,5)=rain;
-	data(1,5)=cool;
-	data(2,5)=normal;
-	data(3,5)=strong;
-
-	data(0,6)=overcast;
-	data(1,6)=cool;
-	data(2,6)=normal;
-	data(3,6)=strong;
-
-	data(0,7)=sunny;
-	data(1,7)=mild;
-	data(2,7)=high;
-	data(3,7)=weak;
-
-	data(0,8)=sunny;
-	data(1,8)=cool;
-	data(2,8)=normal;
-	data(3,8)=weak;
-
-	data(0,9)=rain;
-	data(1,9)=mild;
-	data(2,9)=normal;
-	data(3,9)=weak;
-
-	data(0,10)=sunny;
-	data(1,10)=mild;
-	data(2,10)=normal;
-	data(3,10)=strong;
-
-	data(0,11)=overcast;
-	data(1,11)=mild;
-	data(2,11)=high;
-	data(3,11)=strong;
-
-	data(0,12)=overcast;
-	data(1,12)=hot;
-	data(2,12)=normal;
-	data(3,12)=weak;
-
-	data(0,13)=rain;
-	data(1,13)=mild;
-	data(2,13)=high;
-	data(3,13)=strong;
-
-	CDenseFeatures<float64_t>* feats=new CDenseFeatures<float64_t>(data);
-
-	// yes 1. no 0.
-	SGVector<float64_t> lab(14);
-	lab[0]=0.0;
-	lab[1]=0.0;
-	lab[2]=1.0;
-	lab[3]=1.0;
-	lab[4]=1.0;
-	lab[5]=0.0;
-	lab[6]=1.0;
-	lab[7]=0.0;
-	lab[8]=1.0;
-	lab[9]=1.0;
-	lab[10]=1.0;
-	lab[11]=1.0;
-	lab[12]=1.0;
-	lab[13]=0.0;
-
-	SGVector<bool> ft=SGVector<bool>(4);
-	ft[0]=true;
-	ft[1]=true;
-	ft[2]=true;
-	ft[3]=true;
-
-	CMulticlassLabels* labels=new CMulticlassLabels(lab);
-
 	CCARTree* cart=new CCARTree();
 	CMajorityVote* cv=new CMajorityVote();
 	cart->set_feature_types(ft);
-	CBaggingMachine* c=new CBaggingMachine(feats,labels);
+
+	auto c = some<CBaggingMachine>(features_train, labels_train);
+
 	c->parallel->set_num_threads(1);
 	c->set_machine(cart);
 	c->set_bag_size(14);
 	c->set_num_bags(10);
 	c->set_combination_rule(cv);
-	c->train(feats);
-
-	SGMatrix<float64_t> test(4,5);
-	test(0,0)=overcast;
-	test(0,1)=rain;
-	test(0,2)=sunny;
-	test(0,3)=rain;
-	test(0,4)=sunny;
-
-	test(1,0)=hot;
-	test(1,1)=cool;
-	test(1,2)=mild;
-	test(1,3)=mild;
-	test(1,4)=hot;
-
-	test(2,0)=normal;
-	test(2,1)=high;
-	test(2,2)=high;
-	test(2,3)=normal;
-	test(2,4)=normal;
-
-	test(3,0)=strong;
-	test(3,1)=strong;
-	test(3,2)=weak;
-	test(3,3)=weak;
-	test(3,4)=strong;
-
-	CDenseFeatures<float64_t>* test_feats=new CDenseFeatures<float64_t>(test);
-	CMulticlassLabels* result=c->apply_multiclass(test_feats);
+	c->train(features_train);
+
+	CMulticlassLabels* result = c->apply_multiclass(features_test);
 	SGVector<float64_t> res_vector=result->get_labels();
 
 	EXPECT_EQ(1.0,res_vector[0]);
@@ -231,12 +146,76 @@ TEST(BaggingMachine,classify_CART)
 	EXPECT_EQ(1.0,res_vector[3]);
 	EXPECT_EQ(1.0,res_vector[4]);
 
-	CMulticlassAccuracy* eval=new CMulticlassAccuracy();
+	auto eval = some<CMulticlassAccuracy>();
 	EXPECT_NEAR(0.642857,c->get_oob_error(eval),1e-6);
 
-	SG_UNREF(test_feats);
 	SG_UNREF(result);
-	SG_UNREF(c);
-	SG_UNREF(eval);
 }
-#endif
+
+TEST_F(BaggingMachine, output_binary)
+{
+	CCARTree* cart = new CCARTree();
+	CMeanRule* cv = new CMeanRule();
+
+	cart->set_feature_types(ft);
+	auto c = some<CBaggingMachine>(features_train, labels_train);
+	c->parallel->set_num_threads(1);
+	c->set_machine(cart);
+	c->set_bag_size(14);
+	c->set_num_bags(10);
+	c->set_combination_rule(cv);
+	c->train(features_train);
+
+	CBinaryLabels* result = c->apply_binary(features_test);
+	SGVector<float64_t> res_vector = result->get_labels();
+	SGVector<float64_t> values_vector = result->get_values();
+
+	EXPECT_EQ(1.0, res_vector[0]);
+	EXPECT_EQ(-1.0, res_vector[1]);
+	EXPECT_EQ(-1.0, res_vector[2]);
+	EXPECT_EQ(1.0, res_vector[3]);
+	EXPECT_EQ(1.0, res_vector[4]);
+
+	EXPECT_DOUBLE_EQ(1.0, values_vector[0]);
+	EXPECT_DOUBLE_EQ(0.3, values_vector[1]);
+	EXPECT_DOUBLE_EQ(0.3, values_vector[2]);
+	EXPECT_DOUBLE_EQ(1.0, values_vector[3]);
+	EXPECT_DOUBLE_EQ(0.7, values_vector[4]);
+
+	SG_UNREF(result);
+}
+
+TEST_F(BaggingMachine, output_multiclass_probs_sum_to_one)
+{
+
+	auto cart = new CCARTree();
+	auto cv = new CMajorityVote();
+
+	cart->set_feature_types(ft);
+	auto c = some<CBaggingMachine>(features_train, labels_train);
+	c->set_machine(cart);
+	c->set_bag_size(14);
+	c->set_num_bags(10);
+	c->set_combination_rule(cv);
+	c->train(features_train);
+
+	CMulticlassLabels* result = c->apply_multiclass(features_test);
+
+	SGVector<float64_t> res_vector = result->get_labels();
+
+	EXPECT_EQ(1.0, res_vector[0]);
+	EXPECT_EQ(0.0, res_vector[1]);
+	EXPECT_EQ(0.0, res_vector[2]);
+	EXPECT_EQ(1.0, res_vector[3]);
+	EXPECT_EQ(1.0, res_vector[4]);
+
+	int32_t num_labels = result->get_num_labels();
+
+	for (int32_t i = 0; i < num_labels; ++i)
+	{
+		SGVector<float64_t> confidences = result->get_multiclass_confidences(i);
+		EXPECT_DOUBLE_EQ(1.0, linalg::sum(confidences));
+	}
+
+	SG_UNREF(result);
+}
diff --git a/tests/unit/multiclass/MulticlassOCAS_unittest.cc b/tests/unit/multiclass/MulticlassOCAS_unittest.cc
index 6928f4a648a..805086b4d52 100644
--- a/tests/unit/multiclass/MulticlassOCAS_unittest.cc
+++ b/tests/unit/multiclass/MulticlassOCAS_unittest.cc
@@ -1,56 +1,38 @@
-#include <shogun/multiclass/MulticlassOCAS.h>
+#include "environments/MultiLabelTestEnvironment.h"
+#include <shogun/evaluation/MulticlassAccuracy.h>
 #include <shogun/features/DataGenerator.h>
 #include <shogun/features/DenseFeatures.h>
 #include <gtest/gtest.h>
-
 #ifdef USE_GPL_SHOGUN
+#include <shogun/multiclass/MulticlassOCAS.h>
 
 using namespace shogun;
 
+extern MultiLabelTestEnvironment* multilabel_test_env;
+
 #ifdef HAVE_LAPACK
 TEST(MulticlassOCASTest,train)
 {
-  float64_t C = 1.0;
-  index_t num_samples = 50, num_gauss = 3, dim = 3;
   CMath::init_random(5);
-  SGMatrix<float64_t> data =
-    CDataGenerator::generate_gaussians(num_samples, num_gauss, dim);
-  CDenseFeatures<float64_t> features(data);
-
-  index_t set_size = data.num_cols/2;
-  SGVector<index_t> train_idx(set_size), test_idx(set_size);
-  SGVector<float64_t> labels(set_size);
-  for (index_t i = 0, j = 0; i < data.num_cols; ++i)
-  {
-    if (i % 2 == 0)
-      train_idx[j] = i;
-    else
-      test_idx[j++] = i;
-
-    if (i < data.num_cols/num_gauss)
-      labels[i/2] = 0.0;
-    else if (i < 2*data.num_cols/num_gauss)
-      labels[i/2] = 1.0;
-    else
-      labels[i/2] = 2.0;
-  }
-
-  CDenseFeatures<float64_t>* train_feats = (CDenseFeatures<float64_t>*)features.copy_subset(train_idx);
-  CDenseFeatures<float64_t>* test_feats =  (CDenseFeatures<float64_t>*)features.copy_subset(test_idx);
+  float64_t C = 1.0;
+  std::shared_ptr<GaussianCheckerboard> mockData =
+	  multilabel_test_env->getMulticlassFixture();
 
-  CMulticlassLabels* ground_truth = new CMulticlassLabels(labels);
+  CDenseFeatures<float64_t>* train_feats = mockData->get_features_train();
+  CDenseFeatures<float64_t>* test_feats = mockData->get_features_test();
+  CMulticlassLabels* ground_truth =
+	  (CMulticlassLabels*)mockData->get_labels_test();
   CMulticlassOCAS* mocas = new CMulticlassOCAS(C, train_feats, ground_truth);
   mocas->parallel->set_num_threads(1);
   mocas->set_epsilon(1e-5);
   mocas->train();
 
-  CLabels* pred = mocas->apply(test_feats);
-  for (int i = 0; i < set_size; ++i)
-    EXPECT_EQ(ground_truth->get_label(i), ((CMulticlassLabels*)pred)->get_label(i));
+  CMulticlassLabels* pred = (CMulticlassLabels*)mocas->apply(test_feats);
+  CMulticlassAccuracy evaluate = CMulticlassAccuracy();
+  float64_t result = evaluate.evaluate(pred, ground_truth);
+  EXPECT_GT(result, 0.99);
 
   SG_UNREF(mocas);
-  SG_UNREF(train_feats);
-  SG_UNREF(test_feats);
   SG_UNREF(pred);
 }
 #endif // HAVE_LAPACK
diff --git a/tests/unit/multiclass/tree/BallTree_unittest.cc b/tests/unit/multiclass/tree/BallTree_unittest.cc
index 3d00a607daa..482f3e73446 100644
--- a/tests/unit/multiclass/tree/BallTree_unittest.cc
+++ b/tests/unit/multiclass/tree/BallTree_unittest.cc
@@ -114,4 +114,4 @@ TEST(BallTree, knn_query)
 	SG_UNREF(qfeats);
 	SG_UNREF(feats);
 	SG_UNREF(tree);
-}
\ No newline at end of file
+}
diff --git a/tests/unit/multiclass/tree/KDTree_unittest.cc b/tests/unit/multiclass/tree/KDTree_unittest.cc
index 629ea5b64b0..05d408b2749 100644
--- a/tests/unit/multiclass/tree/KDTree_unittest.cc
+++ b/tests/unit/multiclass/tree/KDTree_unittest.cc
@@ -119,4 +119,4 @@ TEST(KDTree, knn_query)
 	SG_UNREF(qfeats);
 	SG_UNREF(feats);
 	SG_UNREF(tree);
-}
\ No newline at end of file
+}
diff --git a/tests/unit/multiclass/tree/KNNHeap_unittest.cc b/tests/unit/multiclass/tree/KNNHeap_unittest.cc
index 46bd21e8f77..24b21722856 100644
--- a/tests/unit/multiclass/tree/KNNHeap_unittest.cc
+++ b/tests/unit/multiclass/tree/KNNHeap_unittest.cc
@@ -54,4 +54,4 @@ TEST(KNNHeap, heap_formation)
 	EXPECT_EQ(sorted[4],5);
 
 	delete(heap);
-}
\ No newline at end of file
+}
diff --git a/tests/unit/multiclass/tree/RandomForest_unittest.cc b/tests/unit/multiclass/tree/RandomForest_unittest.cc
index 075735136e8..04a59c75855 100644
--- a/tests/unit/multiclass/tree/RandomForest_unittest.cc
+++ b/tests/unit/multiclass/tree/RandomForest_unittest.cc
@@ -28,171 +28,81 @@
  * either expressed or implied, of the Shogun Development Team.
  */
 
+#include <gtest/gtest.h>
+#include "utils/Utils.h"
+#include <shogun/ensemble/MajorityVote.h>
+#include <shogun/ensemble/MeanRule.h>
+#include <shogun/evaluation/MulticlassAccuracy.h>
 #include <shogun/features/DenseFeatures.h>
 #include <shogun/lib/SGMatrix.h>
 #include <shogun/machine/RandomForest.h>
-#include <shogun/ensemble/MajorityVote.h>
-#include <shogun/evaluation/MulticlassAccuracy.h>
-#include <gtest/gtest.h>
+#include <stdio.h>
 
 using namespace shogun;
 
-#define sunny 1.
-#define overcast 2.
-#define rain 3.
-
-#define hot 1.
-#define mild 2.
-#define cool 3.
-
-#define high 1.
-#define normal 2.
-
-#define weak 1.
-#define strong 2.
-
-void generate_nm_data(SGMatrix<float64_t>& data, SGVector<float64_t>& lab)
+class RandomForest : public ::testing::Test
 {
-	//vector = [Outlook Temperature Humidity Wind]
-	data(0,0)=sunny;
-	data(1,0)=hot;
-	data(2,0)=high;
-	data(3,0)=weak;
-
-	data(0,1)=sunny;
-	data(1,1)=hot;
-	data(2,1)=high;
-	data(3,1)=strong;
-
-	data(0,2)=overcast;
-	data(1,2)=hot;
-	data(2,2)=high;
-	data(3,2)=weak;
-
-	data(0,3)=rain;
-	data(1,3)=mild;
-	data(2,3)=high;
-	data(3,3)=weak;
-
-	data(0,4)=rain;
-	data(1,4)=cool;
-	data(2,4)=normal;
-	data(3,4)=weak;
-
-	data(0,5)=rain;
-	data(1,5)=cool;
-	data(2,5)=normal;
-	data(3,5)=strong;
-
-	data(0,6)=overcast;
-	data(1,6)=cool;
-	data(2,6)=normal;
-	data(3,6)=strong;
-
-	data(0,7)=sunny;
-	data(1,7)=mild;
-	data(2,7)=high;
-	data(3,7)=weak;
-
-	data(0,8)=sunny;
-	data(1,8)=cool;
-	data(2,8)=normal;
-	data(3,8)=weak;
-
-	data(0,9)=rain;
-	data(1,9)=mild;
-	data(2,9)=normal;
-	data(3,9)=weak;
-
-	data(0,10)=sunny;
-	data(1,10)=mild;
-	data(2,10)=normal;
-	data(3,10)=strong;
-
-	data(0,11)=overcast;
-	data(1,11)=mild;
-	data(2,11)=high;
-	data(3,11)=strong;
-
-	data(0,12)=overcast;
-	data(1,12)=hot;
-	data(2,12)=normal;
-	data(3,12)=weak;
-
-	data(0,13)=rain;
-	data(1,13)=mild;
-	data(2,13)=high;
-	data(3,13)=strong;
-	
-	lab[0]=0.0;
-	lab[1]=0.0;
-	lab[2]=1.0;
-	lab[3]=1.0;
-	lab[4]=1.0;
-	lab[5]=0.0;
-	lab[6]=1.0;
-	lab[7]=0.0;
-	lab[8]=1.0;
-	lab[9]=1.0;
-	lab[10]=1.0;
-	lab[11]=1.0;
-	lab[12]=1.0;
-	lab[13]=0.0;
-}
-
-TEST(RandomForest,classify_nominal_test)
+public:
+	CDenseFeatures<float64_t>* weather_features_test;
+	CDenseFeatures<float64_t>* weather_features_train;
+	CMulticlassLabels* weather_labels_train;
+	SGVector<bool> weather_ft;
+
+	virtual void SetUp()
+	{
+		sg_rand->set_seed(1);
+		load_toy_data();
+	}
+
+	virtual void TearDown()
+	{
+		SG_UNREF(weather_features_train);
+		SG_UNREF(weather_features_test);
+		SG_UNREF(weather_labels_train);
+	}
+
+	void load_toy_data()
+	{
+		SGMatrix<float64_t> weather_data(4, 14);
+		SGVector<float64_t> lab(14);
+
+		generate_toy_data_weather(weather_data, lab);
+
+		weather_features_train = new CDenseFeatures<float64_t>(weather_data);
+		weather_labels_train = new CMulticlassLabels(lab);
+
+		SGMatrix<float64_t> test(4, 5);
+		SGVector<float64_t> test_labels(4);
+		generate_toy_data_weather(test, test_labels, false);
+		weather_features_test = new CDenseFeatures<float64_t>(test);
+
+		auto feature_types = SGVector<bool>(4);
+
+		feature_types[0] = true;
+		feature_types[1] = true;
+		feature_types[2] = true;
+		feature_types[3] = true;
+
+		weather_ft = feature_types;
+
+		SG_REF(weather_features_train);
+		SG_REF(weather_features_test);
+		SG_REF(weather_labels_train);
+	}
+};
+
+TEST_F(RandomForest, classify_nominal_test)
 {
-	sg_rand->set_seed(1);
-
-	SGMatrix<float64_t> data(4,14);
-	SGVector<float64_t> lab(14);
-
-	generate_nm_data(data, lab);
-
-	CDenseFeatures<float64_t>* feats=new CDenseFeatures<float64_t>(data);
-
-	SGVector<bool> ft=SGVector<bool>(4);
-	ft[0]=true;
-	ft[1]=true;
-	ft[2]=true;
-	ft[3]=true;
-
-	CMulticlassLabels* labels=new CMulticlassLabels(lab);
-
-	CRandomForest* c=new CRandomForest(feats, labels, 100,2);
-	c->set_feature_types(ft);
+	CRandomForest* c =
+	    new CRandomForest(weather_features_train, weather_labels_train, 100, 2);
+	c->set_feature_types(weather_ft);
 	CMajorityVote* mv = new CMajorityVote();
 	c->set_combination_rule(mv);
 	c->parallel->set_num_threads(1);
-	c->train(feats);
-
-	SGMatrix<float64_t> test(4,5);
-	test(0,0)=overcast;
-	test(0,1)=rain;
-	test(0,2)=sunny;
-	test(0,3)=rain;
-	test(0,4)=sunny;
-
-	test(1,0)=hot;
-	test(1,1)=cool;
-	test(1,2)=mild;
-	test(1,3)=mild;
-	test(1,4)=hot;
-
-	test(2,0)=normal;
-	test(2,1)=high;
-	test(2,2)=high;
-	test(2,3)=normal;
-	test(2,4)=normal;
-
-	test(3,0)=strong;
-	test(3,1)=strong;
-	test(3,2)=weak;
-	test(3,3)=weak;
-	test(3,4)=strong;
-
-	CDenseFeatures<float64_t>* test_feats=new CDenseFeatures<float64_t>(test);
-	CMulticlassLabels* result=(CMulticlassLabels*) c->apply(test_feats);
+	c->train(weather_features_train);
+
+	CMulticlassLabels* result =
+	    (CMulticlassLabels*)c->apply(weather_features_test);
 	SGVector<float64_t> res_vector=result->get_labels();
 
 	EXPECT_EQ(1.0,res_vector[0]);
@@ -204,66 +114,30 @@ TEST(RandomForest,classify_nominal_test)
 	CMulticlassAccuracy* eval=new CMulticlassAccuracy();
 	EXPECT_NEAR(0.642857,c->get_oob_error(eval),1e-6);
 
-	SG_UNREF(test_feats);
 	SG_UNREF(result);
 	SG_UNREF(c);
 	SG_UNREF(eval);
 }
 
-TEST(RandomForest,classify_non_nominal_test)
+TEST_F(RandomForest, classify_non_nominal_test)
 {
-	sg_rand->set_seed(1);
-	
-	SGMatrix<float64_t> data(4,14);
-	SGVector<float64_t> lab(14);	
-
-	generate_nm_data(data, lab);
-
-	CDenseFeatures<float64_t>* feats=new CDenseFeatures<float64_t>(data);
-
-	SGVector<bool> ft=SGVector<bool>(4);
-	ft[0]=false;
-	ft[1]=false;
-	ft[2]=false;
-	ft[3]=false;
-
-	CMulticlassLabels* labels=new CMulticlassLabels(lab);
-
-	CRandomForest* c=new CRandomForest(feats, labels, 100,2);
-	c->set_feature_types(ft);
+	weather_ft[0] = false;
+	weather_ft[1] = false;
+	weather_ft[2] = false;
+	weather_ft[3] = false;
+
+	CRandomForest* c =
+	    new CRandomForest(weather_features_train, weather_labels_train, 100, 2);
+	c->set_feature_types(weather_ft);
 	CMajorityVote* mv = new CMajorityVote();
 	c->set_combination_rule(mv);
-	c->parallel->set_num_threads(1);	
-	c->train(feats);
-
-	SGMatrix<float64_t> test(4,5);
-	test(0,0)=overcast;
-	test(0,1)=rain;
-	test(0,2)=sunny;
-	test(0,3)=rain;
-	test(0,4)=sunny;
-
-	test(1,0)=hot;
-	test(1,1)=cool;
-	test(1,2)=mild;
-	test(1,3)=mild;
-	test(1,4)=hot;
-
-	test(2,0)=normal;
-	test(2,1)=high;
-	test(2,2)=high;
-	test(2,3)=normal;
-	test(2,4)=normal;
-
-	test(3,0)=strong;
-	test(3,1)=strong;
-	test(3,2)=weak;
-	test(3,3)=weak;
-	test(3,4)=strong;
-
-	CDenseFeatures<float64_t>* test_feats=new CDenseFeatures<float64_t>(test);
-	CMulticlassLabels* result=(CMulticlassLabels*) c->apply(test_feats);
+	c->parallel->set_num_threads(1);
+	c->train(weather_features_train);
+
+	CMulticlassLabels* result =
+	    (CMulticlassLabels*)c->apply(weather_features_test);
 	SGVector<float64_t> res_vector=result->get_labels();
+	SGVector<float64_t> values_vector = result->get_values();
 
 	EXPECT_EQ(1.0,res_vector[0]);
 	EXPECT_EQ(0.0,res_vector[1]);
@@ -274,8 +148,121 @@ TEST(RandomForest,classify_non_nominal_test)
 	CMulticlassAccuracy* eval=new CMulticlassAccuracy();
 	EXPECT_NEAR(0.714285,c->get_oob_error(eval),1e-6);
 
-	SG_UNREF(test_feats);
 	SG_UNREF(result);
 	SG_UNREF(c);
 	SG_UNREF(eval);
 }
+
+TEST_F(RandomForest, score_compare_sklearn_toydata)
+{
+	sg_rand->set_seed(1);
+	// Comparison with sklearn's RandomForest probability outputs
+	// https://github.com/scikit-learn/scikit-learn/blob/6f70202ef9beefd3db9bb028755a0c38b4c5c8e7/sklearn/ensemble/tests/test_voting_classifier.py#L143
+	float64_t data_A[] = {-1.1, -1.5, -1.2, -1.4, -3.4, -2.2, 1.1, 1.2};
+	float64_t expected_probabilities[] = {0.2, 0.2, 0.8, 0.7};
+
+	SGMatrix<float64_t> data(data_A, 2, 4, false);
+
+	CDenseFeatures<float64_t>* features_train =
+	    new CDenseFeatures<float64_t>(data);
+
+	float64_t labels[] = {0.0, 0.0, 1.0, 1.0};
+	SGVector<float64_t> lab(labels, 4);
+	CMulticlassLabels* labels_train = new CMulticlassLabels(lab);
+
+	CRandomForest* c = new CRandomForest(features_train, labels_train, 10, 2);
+	SGVector<bool> ft = SGVector<bool>(2);
+	ft[0] = false;
+	ft[1] = false;
+	c->set_feature_types(ft);
+
+	CMeanRule* mr = new CMeanRule();
+	c->set_combination_rule(mr);
+	c->train(features_train);
+
+	auto result = c->apply_binary(features_train);
+	SGVector<float64_t> res_vector = result->get_labels();
+	SGVector<float64_t> values_vector = result->get_values();
+
+	EXPECT_EQ(-1.0, res_vector[0]);
+	EXPECT_EQ(-1.0, res_vector[1]);
+	EXPECT_EQ(+1.0, res_vector[2]);
+	EXPECT_EQ(+1.0, res_vector[3]);
+
+	for (auto i = 0; i < 4; ++i)
+	{
+		EXPECT_NEAR(expected_probabilities[i], values_vector[i], 1.1e-1);
+	}
+
+	SG_UNREF(result);
+}
+
+TEST_F(RandomForest, score_consistent_with_binary_trivial_data)
+{
+	// Generates data for y = x1 > 5 as decision boundary
+	int32_t num_train = 10;
+	int32_t num_test = 10;
+	int32_t num_trees = 10;
+
+	sg_rand->set_seed(42);
+	SGMatrix<float64_t> data_B(1, num_train, false);
+
+	for (auto i = 0; i < num_train; ++i)
+	{
+		data_B(0, i) = i < 5 ? CMath::random(0, 5) : CMath::random(5, 10);
+	}
+	CDenseFeatures<float64_t>* features_train =
+	    new CDenseFeatures<float64_t>(data_B);
+
+	float64_t labels[] = {0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+	SGVector<float64_t> lab(labels, num_train);
+	CMulticlassLabels* labels_train = new CMulticlassLabels(lab);
+
+	SGMatrix<float64_t> test_data(1, num_test, false);
+
+	for (auto i = 0; i < num_test; ++i)
+	{
+		test_data(0, i) = i < 5 ? CMath::random(0, 4) : CMath::random(6, 10);
+	}
+
+	CDenseFeatures<float64_t>* features_test =
+	    new CDenseFeatures<float64_t>(test_data);
+
+	CRandomForest* c =
+	    new CRandomForest(features_train, labels_train, num_trees, 1);
+	SGVector<bool> ft = SGVector<bool>(1);
+	ft[0] = false;
+	c->set_feature_types(ft);
+
+	CMeanRule* mr = new CMeanRule();
+	c->set_combination_rule(mr);
+	c->train(features_train);
+
+	auto result = c->apply_binary(features_test);
+	SGVector<float64_t> res_vector = result->get_labels();
+	SGVector<float64_t> values_vector = result->get_values();
+
+	EXPECT_EQ(-1.0, res_vector[0]);
+	EXPECT_EQ(-1.0, res_vector[1]);
+	EXPECT_EQ(-1.0, res_vector[2]);
+	EXPECT_EQ(-1.0, res_vector[3]);
+	EXPECT_EQ(-1.0, res_vector[4]);
+	EXPECT_EQ(1.0, res_vector[5]);
+	EXPECT_EQ(1.0, res_vector[6]);
+	EXPECT_EQ(1.0, res_vector[7]);
+	EXPECT_EQ(1.0, res_vector[8]);
+	EXPECT_EQ(1.0, res_vector[9]);
+
+	EXPECT_NEAR(0.0, values_vector[0], 1e-1);
+	EXPECT_NEAR(0.0, values_vector[1], 1e-1);
+	EXPECT_NEAR(0.0, values_vector[2], 1e-1);
+	EXPECT_NEAR(0.0, values_vector[3], 1e-1);
+	EXPECT_NEAR(0.0, values_vector[4], 1e-1);
+	EXPECT_NEAR(1.0, values_vector[5], 1e-1);
+	EXPECT_NEAR(1.0, values_vector[6], 1e-1);
+	EXPECT_NEAR(1.0, values_vector[7], 1e-1);
+	EXPECT_NEAR(1.0, values_vector[8], 1e-1);
+	EXPECT_NEAR(1.0, values_vector[9], 1e-1);
+
+	SG_UNREF(result);
+}
\ No newline at end of file
diff --git a/tests/unit/neuralnets/NeuralLinearLayer_unittest.cc b/tests/unit/neuralnets/NeuralLinearLayer_unittest.cc
index 773ac220fc4..0d7ca6b8b78 100644
--- a/tests/unit/neuralnets/NeuralLinearLayer_unittest.cc
+++ b/tests/unit/neuralnets/NeuralLinearLayer_unittest.cc
@@ -168,7 +168,7 @@ TEST(NeuralLinearLayer, compute_error)
 		error_ref += 0.5*CMath::pow(y[i]-A[i],2)/y.num_cols;
 
 	// compare
-	EXPECT_NEAR(error_ref, error, 1e-12);
+	EXPECT_NEAR(error_ref, error, 1e-11);
 
 	SG_UNREF(layers);
 }
diff --git a/tests/unit/optimization/StochasticMinimizers_unittest.cc b/tests/unit/optimization/StochasticMinimizers_unittest.cc
index 2109c8481ab..65845bd9b5b 100644
--- a/tests/unit/optimization/StochasticMinimizers_unittest.cc
+++ b/tests/unit/optimization/StochasticMinimizers_unittest.cc
@@ -28,6 +28,7 @@
  * either expressed or implied, of the Shogun Development Team.
  *
  */
+#include <gtest/gtest.h>
 
 #include "StochasticMinimizers_unittest.h"
 
@@ -38,7 +39,6 @@
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/optimization/SVRGMinimizer.h>
 #include <shogun/base/Parameter.h>
-#include <gtest/gtest.h>
 #include <shogun/lib/Map.h>
 #include <shogun/optimization/StandardMomentumCorrection.h>
 #include <shogun/optimization/AdaDeltaUpdater.h>
diff --git a/tests/unit/optimization/lbfgs/lbfgs_unittest.cc b/tests/unit/optimization/lbfgs/lbfgs_unittest.cc
index b67110eb4b5..94cc94a975c 100644
--- a/tests/unit/optimization/lbfgs/lbfgs_unittest.cc
+++ b/tests/unit/optimization/lbfgs/lbfgs_unittest.cc
@@ -29,11 +29,13 @@
  *
  *
  */
+
+#include <gtest/gtest.h>
+
 #include <shogun/lib/config.h>
 
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/optimization/lbfgs/lbfgs.h>
-#include <gtest/gtest.h>
 #include <shogun/mathematics/Math.h>
 #include <shogun/lib/SGMatrix.h>
 #include <shogun/lib/SGVector.h>
diff --git a/tests/unit/preprocessor/KernelPCA_unittest.cc b/tests/unit/preprocessor/KernelPCA_unittest.cc
index db29cf604a0..c89d1892769 100644
--- a/tests/unit/preprocessor/KernelPCA_unittest.cc
+++ b/tests/unit/preprocessor/KernelPCA_unittest.cc
@@ -7,37 +7,77 @@
 using ::testing::Test;
 using namespace shogun;
 
-#ifdef HAVE_LAPACK
-TEST(KernelPCA, DISABLED_apply_to_feature_matrix_input)
+// Results compared with sklearn
+// https://gist.github.com/micmn/f93f723b74db2a1eb5875f63d841bdc1
+const index_t num_vectors = 5;
+const index_t num_features = 3;
+const index_t target_dim = 2;
+
+const float64_t train_data[] = {1, 1, 1, 1, 2, 3, 5, 6, 1, 2, 2, 2, 1, 1, 1};
+const float64_t test_data[] = {3, 3, 3, 7, 4, 1};
+const float64_t resdata[] = {-0.17645841, 0.013962, -0.16082441, 0.03640145};
+
+template <template <typename> class Container>
+void load_data(SGMatrix<float64_t>& train, Container<float64_t>& test)
 {
-	float64_t data[] = {1, 1, 1,
-                      1, 2, 3,
-                      5, 6, 1,
-                      2, 2, 2,
-                      1, 1, 1};
-	float64_t resdata[] = {-1.526879008202007e-02,  6.902776989923266e-01,
-                         -4.032822763552926e-01, -5.151523890814317e-01,
-                         8.444041004961732e-01, -4.318711485273607e-01,
-                         -4.105842439768400e-01, -4.335318603758601e-01,
-                         -1.526879008202015e-02, 6.902776989923268e-01
-	                        };// column-wise
-	int32_t num_vectors = 5;
-	int32_t num_features = 3;
-	SGMatrix<float64_t> orig(data, num_features, num_vectors, false);
-	SGMatrix<float64_t> m = orig.clone();
-	CDenseFeatures<float64_t>* feats = new CDenseFeatures<float64_t>(m);
+	for (auto i = 0; i < train.size(); ++i)
+		train[i] = train_data[i];
+
+	for (auto i = 0; i < test.size(); ++i)
+		test[i] = test_data[i];
+}
+
+TEST(KernelPCA, apply_to_feature_matrix)
+{
+	index_t num_test_vectors = 2;
+
+	SGMatrix<float64_t> train_matrix(num_features, num_vectors);
+	SGMatrix<float64_t> test_matrix(num_features, num_test_vectors);
+	load_data(train_matrix, test_matrix);
+
+	CDenseFeatures<float64_t>* train_feats =
+	    new CDenseFeatures<float64_t>(train_matrix);
+
+	CDenseFeatures<float64_t>* test_feats =
+	    new CDenseFeatures<float64_t>(test_matrix);
+
 	CGaussianKernel* kernel = new CGaussianKernel();
 	kernel->set_width(1);
-	CKernelPCA kpca(kernel);
-	kpca.set_target_dim(2);
-	kpca.init(feats);
-	SGMatrix<float64_t> embedding = kpca.apply_to_feature_matrix(feats);
 
-	float64_t s;
+	CKernelPCA* kpca = new CKernelPCA(kernel);
+	kpca->set_target_dim(target_dim);
+	kpca->init(train_feats);
+
+	SGMatrix<float64_t> embedding = kpca->apply_to_feature_matrix(test_feats);
+
 	// allow embedding with opposite sign
-	if ( embedding.matrix[0] > 0)
-		s = -1;
-	for (index_t i = 0; i < num_features * num_vectors; ++i)
-		EXPECT_LE(CMath::abs(embedding.matrix[i] - s * resdata[i]), 1E-6);
+	for (index_t i = 0; i < num_test_vectors * target_dim; ++i)
+		EXPECT_NEAR(CMath::abs(embedding[i]), CMath::abs(resdata[i]), 1E-6);
+
+	SG_FREE(kpca);
+}
+
+TEST(KernelPCA, apply_to_feature_vector)
+{
+	SGMatrix<float64_t> train_matrix(num_features, num_vectors);
+	SGVector<float64_t> test_vector(num_features);
+	load_data(train_matrix, test_vector);
+
+	CDenseFeatures<float64_t>* train_feats =
+	    new CDenseFeatures<float64_t>(train_matrix);
+
+	CGaussianKernel* kernel = new CGaussianKernel();
+	kernel->set_width(1);
+
+	CKernelPCA* kpca = new CKernelPCA(kernel);
+	kpca->set_target_dim(target_dim);
+	kpca->init(train_feats);
+
+	SGVector<float64_t> embedding = kpca->apply_to_feature_vector(test_vector);
+
+	// allow embedding with opposite sign
+	for (index_t i = 0; i < target_dim; ++i)
+		EXPECT_NEAR(CMath::abs(embedding[i]), CMath::abs(resdata[i]), 1E-6);
+
+	SG_FREE(kpca);
 }
-#endif // HAVE_LAPACK
diff --git a/tests/unit/preprocessor/PCA_unittest.cc b/tests/unit/preprocessor/PCA_unittest.cc
index 4c1b46d9d66..438c7b22872 100644
--- a/tests/unit/preprocessor/PCA_unittest.cc
+++ b/tests/unit/preprocessor/PCA_unittest.cc
@@ -33,9 +33,12 @@
 #include <shogun/features/DenseFeatures.h>
 #include <shogun/lib/SGMatrix.h>
 #include <shogun/lib/SGVector.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 #include <shogun/preprocessor/PCA.h>
 
+#include "utils/Utils.h"
+
 using namespace shogun;
 
 TEST(PCA, PCA_N_greater_D_EVD)
@@ -73,31 +76,36 @@ TEST(PCA, PCA_N_greater_D_EVD)
 	EXPECT_NEAR(0.291219269837891,eigvec[1],epsilon);
 	EXPECT_NEAR(5.077526030285309,eigvec[2],epsilon);
 
-	EXPECT_NEAR(0.238820512479407,transmat(0,0),epsilon);
-	EXPECT_NEAR(-0.304406370622002,transmat(0,1),epsilon);
-	EXPECT_NEAR(0.922117955764778,transmat(0,2),epsilon);
-	EXPECT_NEAR(0.502124514814308,transmat(1,0),epsilon);
-	EXPECT_NEAR(0.851501730295596,transmat(1,1),epsilon);
-	EXPECT_NEAR(0.151048915673366,transmat(1,2),epsilon);
-	EXPECT_NEAR(-0.831165287076865,transmat(2,0),epsilon);
-	EXPECT_NEAR(0.426944451689378,transmat(2,1),epsilon);
-	EXPECT_NEAR(0.356205980761254,transmat(2,2),epsilon);
-
-	EXPECT_NEAR(0.182350122017013,finalmat(0,0),epsilon);
-	EXPECT_NEAR(-0.041902251203685,finalmat(0,1),epsilon);
-	EXPECT_NEAR(-0.240647729898028,finalmat(0,2),epsilon);
-	EXPECT_NEAR(0.236493108746648,finalmat(0,3),epsilon);
-	EXPECT_NEAR(-0.136293249661948,finalmat(0,4),epsilon);
-	EXPECT_NEAR(0.216971008375464,finalmat(1,0),epsilon);
-	EXPECT_NEAR(-0.382472041452699,finalmat(1,1),epsilon);
-	EXPECT_NEAR(-0.460689222275080,finalmat(1,2),epsilon);
-	EXPECT_NEAR(-0.217576202298234,finalmat(1,3),epsilon);
-	EXPECT_NEAR(0.843766457650550,finalmat(1,4),epsilon);
-	EXPECT_NEAR(3.325638119909419,finalmat(2,0),epsilon);
-	EXPECT_NEAR(-1.115340910605008,finalmat(2,1),epsilon);
-	EXPECT_NEAR(1.249063286478502,finalmat(2,2),epsilon);
-	EXPECT_NEAR(-2.210566542225781,finalmat(2,3),epsilon);
-	EXPECT_NEAR(-1.248793953557132,finalmat(2,4),epsilon);
+	SGMatrix<float64_t> evs(3,3);
+	evs(0,0)=0.238820512479407;
+	evs(0,1)=-0.304406370622002;
+	evs(0,2)=0.922117955764778;
+	evs(1,0)=0.502124514814308;
+	evs(1,1)=0.851501730295596;
+	evs(1,2)=0.151048915673366;
+	evs(2,0)=-0.831165287076865;
+	evs(2,1)=0.426944451689378;
+	evs(2,2)=0.356205980761254;
+
+	auto s0 = check_eigenvector_eq(evs.get_column(0), transmat.get_column(0), epsilon);
+	auto s1 = check_eigenvector_eq(evs.get_column(1), transmat.get_column(1), epsilon);
+	auto s2 = check_eigenvector_eq(evs.get_column(2), transmat.get_column(2), epsilon);
+
+	EXPECT_NEAR(0.182350122017013, s0*finalmat(0,0),epsilon);
+	EXPECT_NEAR(-0.041902251203685, s0*finalmat(0,1),epsilon);
+	EXPECT_NEAR(-0.240647729898028, s0*finalmat(0,2),epsilon);
+	EXPECT_NEAR(0.236493108746648, s0*finalmat(0,3),epsilon);
+	EXPECT_NEAR(-0.136293249661948, s0*finalmat(0,4),epsilon);
+	EXPECT_NEAR(0.216971008375464, s1*finalmat(1,0),epsilon);
+	EXPECT_NEAR(-0.382472041452699, s1*finalmat(1,1),epsilon);
+	EXPECT_NEAR(-0.460689222275080, s1*finalmat(1,2),epsilon);
+	EXPECT_NEAR(-0.217576202298234, s1*finalmat(1,3),epsilon);
+	EXPECT_NEAR(0.843766457650550, s1*finalmat(1,4),epsilon);
+	EXPECT_NEAR(3.325638119909419, s2*finalmat(2,0),epsilon);
+	EXPECT_NEAR(-1.115340910605008, s2*finalmat(2,1),epsilon);
+	EXPECT_NEAR(1.249063286478502, s2*finalmat(2,2),epsilon);
+	EXPECT_NEAR(-2.210566542225781, s2*finalmat(2,3),epsilon);
+	EXPECT_NEAR(-1.248793953557132, s2*finalmat(2,4),epsilon);
 
 	SG_UNREF(pca);
 	SG_UNREF(features);
@@ -132,25 +140,30 @@ TEST(PCA, PCA_N_equals_D_EVD)
 	EXPECT_NEAR(0.084750433,eigvec[1],epsilon);
 	EXPECT_NEAR(5.03495863,eigvec[2],epsilon);
 
-	EXPECT_NEAR(0.41770275,transmat(0,0),epsilon);
-	EXPECT_NEAR(0.20781429,transmat(0,1),epsilon);
-	EXPECT_NEAR(0.88449852,transmat(0,2),epsilon);
-	EXPECT_NEAR(-0.13328384,transmat(1,0),epsilon);
-	EXPECT_NEAR(-0.94894524,transmat(1,1),epsilon);
-	EXPECT_NEAR(0.28589918,transmat(1,2),epsilon);
-	EXPECT_NEAR(-0.8987546,transmat(2,0),epsilon);
-	EXPECT_NEAR(0.23731023,transmat(2,1),epsilon);
-	EXPECT_NEAR(0.36867875,transmat(2,2),epsilon);
-
-	EXPECT_NEAR(0.0,finalmat(0,0),epsilon);
-	EXPECT_NEAR(0.0,finalmat(0,1),epsilon);
-	EXPECT_NEAR(0.0,finalmat(0,2),epsilon);
-	EXPECT_NEAR(-0.173865951,finalmat(1,0),epsilon);
-	EXPECT_NEAR(-0.162222411,finalmat(1,1),epsilon);
-	EXPECT_NEAR(0.336088362,finalmat(1,2),epsilon);
-	EXPECT_NEAR(2.21751537,finalmat(2,0),epsilon);
-	EXPECT_NEAR(-2.26932988,finalmat(2,1),epsilon);
-	EXPECT_NEAR(0.0518145101,finalmat(2,2),epsilon);
+	SGMatrix<float64_t> evs(3,3);
+	evs(0,0)=0.41770275;
+	evs(0,1)=0.20781429;
+	evs(0,2)=0.88449852;
+	evs(1,0)=-0.13328384;
+	evs(1,1)=-0.94894524;
+	evs(1,2)=0.28589918;
+	evs(2,0)=-0.8987546;
+	evs(2,1)=0.23731023;
+	evs(2,2)=0.36867875;
+
+	auto s0 = check_eigenvector_eq(evs.get_column(0), transmat.get_column(0));
+	auto s1 = check_eigenvector_eq(evs.get_column(1), transmat.get_column(1));
+	auto s2 = check_eigenvector_eq(evs.get_column(2), transmat.get_column(2));
+
+	EXPECT_NEAR(0.0, s0*finalmat(0,0),epsilon);
+	EXPECT_NEAR(0.0, s0*finalmat(0,1),epsilon);
+	EXPECT_NEAR(0.0, s0*finalmat(0,2),epsilon);
+	EXPECT_NEAR(-0.173865951, s1*finalmat(1,0),epsilon);
+	EXPECT_NEAR(-0.162222411, s1*finalmat(1,1),epsilon);
+	EXPECT_NEAR(0.336088362, s1*finalmat(1,2),epsilon);
+	EXPECT_NEAR(2.21751537, s2*finalmat(2,0),epsilon);
+	EXPECT_NEAR(-2.26932988, s2*finalmat(2,1),epsilon);
+	EXPECT_NEAR(0.0518145101, s2*finalmat(2,2),epsilon);
 
 	SG_UNREF(pca);
 	SG_UNREF(features);
@@ -191,23 +204,27 @@ TEST(PCA, PCA_N_less_D_EVD)
 	EXPECT_NEAR(2.327794822241147,std::abs(eigvec[1]),epsilon);
 	EXPECT_NEAR(2.759160840481412,std::abs(eigvec[2]),epsilon);
 
-	EXPECT_NEAR(0.258049566055304,std::abs(transmat(0,0)),epsilon);
-	EXPECT_NEAR(0.257746561935451,std::abs(transmat(0,1)),epsilon);
-	EXPECT_NEAR(0.349092719192590,std::abs(transmat(1,0)),epsilon);
-	EXPECT_NEAR(0.129544636386834,std::abs(transmat(1,1)),epsilon);
-	EXPECT_NEAR(0.630860251575450,std::abs(transmat(2,0)),epsilon);
-	EXPECT_NEAR(0.648487498866225,std::abs(transmat(2,1)),epsilon);
-	EXPECT_NEAR(0.374280965623520,std::abs(transmat(3,0)),epsilon);
-	EXPECT_NEAR(0.647067522254220,std::abs(transmat(3,1)),epsilon);
-	EXPECT_NEAR(0.522947221638548,std::abs(transmat(4,0)),epsilon);
-	EXPECT_NEAR(0.278482463454826,std::abs(transmat(4,1)),epsilon);
-
-	EXPECT_NEAR(0.511467003751085,std::abs(finalmat(0,0)),epsilon);
-	EXPECT_NEAR(1.715732114990145,std::abs(finalmat(0,1)),epsilon);
-	EXPECT_NEAR(1.204265111239059,std::abs(finalmat(0,2)),epsilon);
-	EXPECT_NEAR(1.835430614937060,std::abs(finalmat(1,0)),epsilon);
-	EXPECT_NEAR(0.435473994643473,std::abs(finalmat(1,1)),epsilon);
-	EXPECT_NEAR(1.39995662029358,std::abs(finalmat(1,2)),epsilon);
+	SGMatrix<float64_t> evs(5,2);
+	evs(0,0) = 0.258049566055304;
+	evs(0,1) = 0.257746561935451;
+	evs(1,0) = 0.349092719192590;
+	evs(1,1) = -0.129544636386834;
+	evs(2,0) = -0.630860251575450;
+	evs(2,1) = 0.648487498866225;
+	evs(3,0) = 0.374280965623520;
+	evs(3,1) = 0.647067522254220;
+	evs(4,0) = -0.522947221638548;
+	evs(4,1) = -0.278482463454826;
+
+	auto s0 = check_eigenvector_eq(evs.get_column(0), transmat.get_column(0), epsilon);
+	auto s1 = check_eigenvector_eq(evs.get_column(1), transmat.get_column(1), epsilon);
+
+	EXPECT_NEAR(-0.511467003751085, s0*finalmat(0,0),epsilon);
+	EXPECT_NEAR(1.715732114990145, s0*finalmat(0,1),epsilon);
+	EXPECT_NEAR(-1.204265111239059, s0*finalmat(0,2),epsilon);
+	EXPECT_NEAR(1.835430614937060, s1*finalmat(1,0),epsilon);
+	EXPECT_NEAR(-0.435473994643473, s1*finalmat(1,1),epsilon);
+	EXPECT_NEAR(-1.39995662029358, s1*finalmat(1,2),epsilon);
 
 	SG_UNREF(pca);
 	SG_UNREF(features);
diff --git a/tests/unit/regression/LibSVR_unittest.cc b/tests/unit/regression/LibSVR_unittest.cc
index 8af133f5d9c..a4fa9757415 100644
--- a/tests/unit/regression/LibSVR_unittest.cc
+++ b/tests/unit/regression/LibSVR_unittest.cc
@@ -16,7 +16,6 @@ using namespace shogun;
 
 TEST(LibSVR,epsilon_svr_apply)
 {
-	const int32_t kernel_cache=0;
 	const float64_t rbf_width=1;
 	const float64_t svm_C=1;
 	const float64_t svm_eps=0.1;
@@ -54,7 +53,7 @@ TEST(LibSVR,epsilon_svr_apply)
 	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(
 			feat_test);
 
-	CGaussianKernel* kernel=new CGaussianKernel(kernel_cache, rbf_width);
+	CGaussianKernel* kernel=new CGaussianKernel(rbf_width);
 	kernel->init(features_train, features_train);
 
 	LIBSVR_SOLVER_TYPE st=LIBSVR_EPSILON_SVR;
@@ -82,7 +81,6 @@ TEST(LibSVR,epsilon_svr_apply)
 
 TEST(LibSVR,nu_svr_apply)
 {
-	const int32_t kernel_cache=0;
 	const float64_t rbf_width=1;
 	const float64_t svm_C=1;
 	const float64_t svm_nu=0.1;
@@ -120,7 +118,7 @@ TEST(LibSVR,nu_svr_apply)
 	CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(
 			feat_test);
 
-	CGaussianKernel* kernel=new CGaussianKernel(kernel_cache, rbf_width);
+	CGaussianKernel* kernel=new CGaussianKernel(rbf_width);
 	kernel->init(features_train, features_train);
 
 	LIBSVR_SOLVER_TYPE st=LIBSVR_NU_SVR;
diff --git a/tests/unit/regression/lars_unittest.cc b/tests/unit/regression/lars_unittest.cc
index 4ea6cf804de..db0c45c8b3a 100644
--- a/tests/unit/regression/lars_unittest.cc
+++ b/tests/unit/regression/lars_unittest.cc
@@ -34,7 +34,6 @@
 #include <shogun/labels/RegressionLabels.h>
 #include <gtest/gtest.h>
 #include <shogun/mathematics/eigen3.h>
-#include <shogun/mathematics/linalg/linalg.h>
 #include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/preprocessor/PruneVarSubMean.h>
 #include <shogun/preprocessor/NormOne.h>
@@ -374,13 +373,13 @@ TEST(LeastAngleRegression, cholesky_insert)
 	SGMatrix<float64_t> R(num_feats, num_feats);
 	SGMatrix<float64_t> mat(num_vec, num_feats-1);
 	SGMatrix<float64_t> matnew(num_vec, num_feats);
-	
+
 	SGVector<float64_t> vec(num_vec);
 	vec.random(0.0,1.0);
-	Map<VectorXd> map_vec(vec.vector, vec.size());	
+	Map<VectorXd> map_vec(vec.vector, vec.size());
 
 	for (index_t i=0; i<num_vec; i++)
-	{	
+	{
 		for (index_t j=0; j<num_feats-1; j++)
 		{
 			mat(i,j)=CMath::random(0.0,1.0);
@@ -392,40 +391,39 @@ TEST(LeastAngleRegression, cholesky_insert)
 
 	Map<MatrixXd> mat_old(mat.matrix, num_vec, num_feats-1);
 	Map<MatrixXd> mat_new(matnew.matrix, num_vec, num_feats);
-	Map<MatrixXd> map_R(R.matrix, num_feats, num_feats);	
-	
+	Map<MatrixXd> map_R(R.matrix, num_feats, num_feats);
+
 	MatrixXd XX=mat_old.transpose()*mat_old;
 	// Compute matrix R which has to be updated
 	SGMatrix<float64_t> R_old=linalg::cholesky_factor(SGMatrix<float64_t>(XX), false);
 
 	// Update cholesky decomposition matrix R
-	lars_helper lars = lars_helper();
+	lars_helper lars;
 	SGMatrix<float64_t> R_new = lars.cholesky_insert_helper(matnew, mat, R_old, 4, 4);
-	Map<MatrixXd> map_R_new(R_new.matrix, R_new.num_rows, R_new.num_cols);	
+	Map<MatrixXd> map_R_new(R_new.matrix, R_new.num_rows, R_new.num_cols);
 
-	// Compute true cholesky decomposition		
+	// Compute true cholesky decomposition
 	MatrixXd XX_new=mat_new.transpose()*mat_new;
 	SGMatrix<float64_t> R_true=linalg::cholesky_factor(SGMatrix<float64_t>(XX_new), false);
 
-	Map<MatrixXd> map_R_true(R_true.matrix, num_feats, num_feats);	
-	EXPECT_NEAR( (map_R_true - map_R_new).norm(), 0.0, 1E-12);	
-
+	Map<MatrixXd> map_R_true(R_true.matrix, num_feats, num_feats);
+	EXPECT_NEAR((map_R_true - map_R_new).norm(), 0.0, 1E-12);
 }
 
 TEST(LeastAngleRegression, ols_equivalence)
 {
 	int32_t n_feat=25, n_vec=100;
-	SGMatrix<float64_t> data(n_feat, n_vec);		
+	SGMatrix<float64_t> data(n_feat, n_vec);
 	for (index_t i=0; i<n_feat; i++)
-	{	
+	{
 		for (index_t j=0; j<n_vec; j++)
 			data(i,j)=CMath::random(0.0,1.0);
 	}
-	
+
 	SGVector<float64_t> lab=SGVector<float64_t>(n_vec);
 	lab.random(0.0,1.0);
 	float64_t mean=linalg::mean(lab);
-	
+
 	for (index_t i=0; i<lab.size(); i++)
 		lab[i]-=mean;
 
@@ -446,7 +444,7 @@ TEST(LeastAngleRegression, ols_equivalence)
 	lars->train(features);
 	// Full LAR model
 	SGVector<float64_t> w=lars->get_w();
-	Map<VectorXd> map_w(w.vector, w.size());	
+	Map<VectorXd> map_w(w.vector, w.size());
 
 	SGMatrix<float64_t> mat=features->get_feature_matrix();
 	Map<MatrixXd> feat(mat.matrix, mat.num_rows, mat.num_cols);
@@ -462,7 +460,6 @@ TEST(LeastAngleRegression, ols_equivalence)
 	// Check if full LAR model is equivalent to OLS
 	EXPECT_EQ( w.size(), n_feat);
 	EXPECT_NEAR( (map_w - solve).norm(), 0.0, 1E-12);
-	
 
 	SG_UNREF(proc1);
 	SG_UNREF(proc2);
diff --git a/tests/unit/statistical_testing/KernelSelection_unittest.cc b/tests/unit/statistical_testing/KernelSelection_unittest.cc
index d6717a25f82..21a97f07994 100644
--- a/tests/unit/statistical_testing/KernelSelection_unittest.cc
+++ b/tests/unit/statistical_testing/KernelSelection_unittest.cc
@@ -108,6 +108,7 @@ TEST(KernelSelectionMaxMMD, quadratic_time_single_kernel_dense)
 	EXPECT_NEAR(selected_kernel->get_width(), 0.25, 1E-10);
 }
 
+#ifdef USE_GPL_SHOGUN
 TEST(KernelSelectionMaxMMD, linear_time_weighted_kernel_streaming)
 {
 	const index_t m=5;
@@ -145,6 +146,7 @@ TEST(KernelSelectionMaxMMD, linear_time_weighted_kernel_streaming)
 	for (auto i=0; i<weights.size(); ++i)
 		EXPECT_NEAR(weights[i], 0.1, 1E-10);
 }
+#endif // USE_GPL_SHOGUN
 
 TEST(KernelSelectionMaxTestPower, linear_time_single_kernel_streaming)
 {
@@ -211,6 +213,7 @@ TEST(KernelSelectionMaxTestPower, quadratic_time_single_kernel)
 	EXPECT_NEAR(selected_kernel->get_width(), 0.25, 1E-10);
 }
 
+#ifdef USE_GPL_SHOGUN
 TEST(KernelSelectionMaxTestPower, linear_time_weighted_kernel_streaming)
 {
 	const index_t m=5;
@@ -248,6 +251,7 @@ TEST(KernelSelectionMaxTestPower, linear_time_weighted_kernel_streaming)
 	for (auto i=0; i<weights.size(); ++i)
 		EXPECT_NEAR(weights[i], 0.1, 1E-10);
 }
+#endif // USE_GPL_SHOGUN
 
 TEST(KernelSelectionMaxCrossValidation, quadratic_time_single_kernel_dense)
 {
diff --git a/tests/unit/statistical_testing/QuadraticTimeMMD_unittest.cc b/tests/unit/statistical_testing/QuadraticTimeMMD_unittest.cc
index 58e4e1c1aa0..13a211f7721 100644
--- a/tests/unit/statistical_testing/QuadraticTimeMMD_unittest.cc
+++ b/tests/unit/statistical_testing/QuadraticTimeMMD_unittest.cc
@@ -29,6 +29,9 @@
  * either expressed or implied, of the Shogun Development Team.
  */
 
+#include <gtest/gtest.h>
+#include <cfloat>
+
 #include <shogun/base/some.h>
 #include <shogun/kernel/GaussianKernel.h>
 #include <shogun/kernel/CustomKernel.h>
@@ -40,7 +43,6 @@
 #include <shogun/statistical_testing/TestEnums.h>
 #include <shogun/statistical_testing/QuadraticTimeMMD.h>
 #include <shogun/statistical_testing/MultiKernelQuadraticTimeMMD.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
@@ -274,7 +276,7 @@ TEST(QuadraticTimeMMD, compute_variance_h0)
 	mmd->set_kernel(kernel);
 
 	float64_t var=mmd->compute_variance_h0();
-	EXPECT_NEAR(var, 0.0042963027954101562, 1E-10);
+	EXPECT_NEAR(var, 0.0042963027954101562, FLT_EPSILON);
 }
 
 TEST(QuadraticTimeMMD, compute_variance_h1)
diff --git a/tests/unit/statistical_testing/TwoDistributionTest_unittest.cc b/tests/unit/statistical_testing/TwoDistributionTest_unittest.cc
index 0db451d8147..d675bf21277 100644
--- a/tests/unit/statistical_testing/TwoDistributionTest_unittest.cc
+++ b/tests/unit/statistical_testing/TwoDistributionTest_unittest.cc
@@ -81,7 +81,7 @@ TEST(TwoDistributionTest, compute_distance_dense)
 
 	EXPECT_TRUE(distance_mat1.num_rows==distance_mat2.num_rows);
 	EXPECT_TRUE(distance_mat1.num_cols==distance_mat2.num_cols);
-	for (size_t i=0; i<distance_mat1.size(); ++i)
+	for (int64_t i=0; i<distance_mat1.size(); ++i)
 		EXPECT_NEAR(distance_mat1.data()[i], distance_mat2.data()[i], 1E-6);
 }
 
@@ -118,7 +118,7 @@ TEST(TwoDistributionTest, compute_joint_distance_dense)
 
 	EXPECT_TRUE(distance_mat1.num_rows==distance_mat2.num_rows);
 	EXPECT_TRUE(distance_mat1.num_cols==distance_mat2.num_cols);
-	for (size_t i=0; i<distance_mat1.size(); ++i)
+	for (int64_t i=0; i<distance_mat1.size(); ++i)
 		EXPECT_NEAR(distance_mat1.data()[i], distance_mat2.data()[i], 1E-6);
 
 	SG_UNREF(distance);
@@ -153,7 +153,7 @@ TEST(TwoDistributionTest, compute_distance_streaming)
 
 	EXPECT_TRUE(distance_mat1.num_rows==distance_mat2.num_rows);
 	EXPECT_TRUE(distance_mat1.num_cols==distance_mat2.num_cols);
-	for (size_t i=0; i<distance_mat1.size(); ++i)
+	for (int64_t i=0; i<distance_mat1.size(); ++i)
 		EXPECT_NEAR(distance_mat1.data()[i], distance_mat2.data()[i], 1E-6);
 
 	SG_UNREF(distance);
@@ -198,7 +198,7 @@ TEST(TwoDistributionTest, compute_joint_distance_streaming)
 
 	EXPECT_TRUE(distance_mat1.num_rows==distance_mat2.num_rows);
 	EXPECT_TRUE(distance_mat1.num_cols==distance_mat2.num_cols);
-	for (size_t i=0; i<distance_mat1.size(); ++i)
+	for (int64_t i=0; i<distance_mat1.size(); ++i)
 		EXPECT_NEAR(distance_mat1.data()[i], distance_mat2.data()[i], 1E-6);
 
 	SG_UNREF(distance);
diff --git a/tests/unit/statistical_testing/internals/CrossValidationMMD_unittest.cc b/tests/unit/statistical_testing/internals/CrossValidationMMD_unittest.cc
index f8568adb795..89fbd59a574 100644
--- a/tests/unit/statistical_testing/internals/CrossValidationMMD_unittest.cc
+++ b/tests/unit/statistical_testing/internals/CrossValidationMMD_unittest.cc
@@ -28,6 +28,8 @@
  * either expressed or implied, of the Shogun Development Team.
  */
 
+#include <gtest/gtest.h>
+
 #include <shogun/base/some.h>
 #include <shogun/lib/SGMatrix.h>
 #include <shogun/lib/SGVector.h>
@@ -42,7 +44,6 @@
 #include <shogun/statistical_testing/TestEnums.h>
 #include <shogun/statistical_testing/internals/KernelManager.h>
 #include <shogun/statistical_testing/internals/mmd/CrossValidationMMD.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace internal;
diff --git a/tests/unit/statistical_testing/internals/MultiKernelMMD_unittest.cc b/tests/unit/statistical_testing/internals/MultiKernelMMD_unittest.cc
index 82869eada48..04676ce8987 100644
--- a/tests/unit/statistical_testing/internals/MultiKernelMMD_unittest.cc
+++ b/tests/unit/statistical_testing/internals/MultiKernelMMD_unittest.cc
@@ -28,6 +28,9 @@
  * either expressed or implied, of the Shogun Development Team.
  */
 
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+
 #include <shogun/base/some.h>
 #include <shogun/lib/SGMatrix.h>
 #include <shogun/lib/SGVector.h>
@@ -43,8 +46,6 @@
 #include <shogun/statistical_testing/TwoDistributionTest.h>
 #include <shogun/statistical_testing/internals/KernelManager.h>
 #include <shogun/statistical_testing/internals/mmd/ComputeMMD.h>
-#include <gtest/gtest.h>
-#include <gmock/gmock.h>
 
 namespace shogun
 {
diff --git a/tests/unit/statistical_testing/internals/PermutationMMD_unittest.cc b/tests/unit/statistical_testing/internals/PermutationMMD_unittest.cc
index b250ca56fd6..7d4e09f202c 100644
--- a/tests/unit/statistical_testing/internals/PermutationMMD_unittest.cc
+++ b/tests/unit/statistical_testing/internals/PermutationMMD_unittest.cc
@@ -28,6 +28,8 @@
  * either expressed or implied, of the Shogun Development Team.
  */
 
+#include <gtest/gtest.h>
+
 #include <numeric>
 #include <algorithm>
 #include <shogun/base/some.h>
@@ -45,7 +47,6 @@
 #include <shogun/statistical_testing/internals/Kernel.h>
 #include <shogun/statistical_testing/internals/mmd/ComputeMMD.h>
 #include <shogun/statistical_testing/internals/mmd/PermutationMMD.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace internal;
diff --git a/tests/unit/statistical_testing/internals/WithinBlockPermutation_unittest.cc b/tests/unit/statistical_testing/internals/WithinBlockPermutation_unittest.cc
index c65ea42df3d..53526fd3dd1 100644
--- a/tests/unit/statistical_testing/internals/WithinBlockPermutation_unittest.cc
+++ b/tests/unit/statistical_testing/internals/WithinBlockPermutation_unittest.cc
@@ -27,6 +27,7 @@
  * of the authors and should not be interpreted as representing official policies,
  * either expressed or implied, of the Shogun Development Team.
  */
+#include <gtest/gtest.h>
 
 #include <algorithm>
 #include <numeric>
@@ -43,7 +44,6 @@
 #include <shogun/statistical_testing/TestEnums.h>
 #include <shogun/statistical_testing/internals/mmd/ComputeMMD.h>
 #include <shogun/statistical_testing/internals/mmd/WithinBlockPermutation.h>
-#include <gtest/gtest.h>
 
 using namespace shogun;
 using namespace Eigen;
diff --git a/tests/unit/utils/Utils.cpp b/tests/unit/utils/Utils.cpp
index 9db4147ea79..7bcc19b08a9 100644
--- a/tests/unit/utils/Utils.cpp
+++ b/tests/unit/utils/Utils.cpp
@@ -32,39 +32,166 @@
 * Written (W) 2017 Giovanni De Toni
 *
 */
-#include <string>
+#include "Utils.h"
+#include <algorithm>
 #include <cstdlib>
 #include <cstring>
-#include <cstdlib>
-#include <algorithm>
-#include "Utils.h"
+#include <shogun/io/SGIO.h>
+#include <string>
 
-char * mktemp_cst(char * __template)
+#ifdef _MSC_VER
+#include <io.h>
+#endif
+
+void generate_temp_filename(char* file_name)
+{
+#ifdef _WIN32
+	int err = _mktemp_s(file_name, strlen(file_name) + 1);
+	ASSERT(err == 0);
+#else
+	int fd = mkstemp(file_name);
+	ASSERT(fd != -1);
+	int retval = close(fd);
+	ASSERT(retval != -1);
+#endif
+}
+
+void generate_toy_data_weather(
+    SGMatrix<float64_t>& data, SGVector<float64_t>& labels,
+    bool load_train_data)
 {
+	const double sunny = 1.;
+	const double overcast = 2.;
+	const double rain = 3.;
+
+	const double hot = 1.;
+	const double mild = 2.;
+	const double cool = 3.;
+
+	const double high = 1.;
+	const double normal = 2.;
+
+	const double weak = 1.;
+	const double strong = 2.;
+
+	// vector = [Outlook Temperature Humidity Wind]
+	if (load_train_data)
+	{
+		data(0, 0) = sunny;
+		data(1, 0) = hot;
+		data(2, 0) = high;
+		data(3, 0) = weak;
+
+		data(0, 1) = sunny;
+		data(1, 1) = hot;
+		data(2, 1) = high;
+		data(3, 1) = strong;
+
+		data(0, 2) = overcast;
+		data(1, 2) = hot;
+		data(2, 2) = high;
+		data(3, 2) = weak;
+
+		data(0, 3) = rain;
+		data(1, 3) = mild;
+		data(2, 3) = high;
+		data(3, 3) = weak;
+
+		data(0, 4) = rain;
+		data(1, 4) = cool;
+		data(2, 4) = normal;
+		data(3, 4) = weak;
+
+		data(0, 5) = rain;
+		data(1, 5) = cool;
+		data(2, 5) = normal;
+		data(3, 5) = strong;
+
+		data(0, 6) = overcast;
+		data(1, 6) = cool;
+		data(2, 6) = normal;
+		data(3, 6) = strong;
+
+		data(0, 7) = sunny;
+		data(1, 7) = mild;
+		data(2, 7) = high;
+		data(3, 7) = weak;
+
+		data(0, 8) = sunny;
+		data(1, 8) = cool;
+		data(2, 8) = normal;
+		data(3, 8) = weak;
+
+		data(0, 9) = rain;
+		data(1, 9) = mild;
+		data(2, 9) = normal;
+		data(3, 9) = weak;
+
+		data(0, 10) = sunny;
+		data(1, 10) = mild;
+		data(2, 10) = normal;
+		data(3, 10) = strong;
+
+		data(0, 11) = overcast;
+		data(1, 11) = mild;
+		data(2, 11) = high;
+		data(3, 11) = strong;
+
+		data(0, 12) = overcast;
+		data(1, 12) = hot;
+		data(2, 12) = normal;
+		data(3, 12) = weak;
 
-	/* If the string is null*/
-	if (__template == NULL)
-		return NULL;
+		data(0, 13) = rain;
+		data(1, 13) = mild;
+		data(2, 13) = high;
+		data(3, 13) = strong;
 
-	/* Method to generate a random char */
-	auto random = [] {
-        const char chars[] =
-        "0123456789"
-        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-        "abcdefghijklmnopqrstuvwxyz";
-        const size_t size = (sizeof(chars) - 1);
-        return chars[ rand() % size ];
-    };
+		labels[0] = 0.0;
+		labels[1] = 0.0;
+		labels[2] = 1.0;
+		labels[3] = 1.0;
+		labels[4] = 1.0;
+		labels[5] = 0.0;
+		labels[6] = 1.0;
+		labels[7] = 0.0;
+		labels[8] = 1.0;
+		labels[9] = 1.0;
+		labels[10] = 1.0;
+		labels[11] = 1.0;
+		labels[12] = 1.0;
+		labels[13] = 0.0;
+	}
+	else
+	{
+		data(0, 0) = overcast;
+		data(0, 1) = rain;
+		data(0, 2) = sunny;
+		data(0, 3) = rain;
+		data(0, 4) = sunny;
 
-	/* Check that __template has an alterable pattern */
-	char * pos = strstr(__template,"XXXXXX");
+		data(1, 0) = hot;
+		data(1, 1) = cool;
+		data(1, 2) = mild;
+		data(1, 3) = mild;
+		data(1, 4) = hot;
 
-	if (pos == NULL)
-		return NULL;
+		data(2, 0) = normal;
+		data(2, 1) = high;
+		data(2, 2) = high;
+		data(2, 3) = normal;
+		data(2, 4) = normal;
 
-	std::string pattern(6,0);
-	std::generate_n(pattern.begin(), 6, random);
-	strncpy(pos, pattern.c_str(), 6);
+		data(3, 0) = strong;
+		data(3, 1) = strong;
+		data(3, 2) = weak;
+		data(3, 3) = weak;
+		data(3, 4) = strong;
 
-    return __template;
+		labels[0] = 1.0;
+		labels[1] = 0.0;
+		labels[2] = 0.0;
+		labels[3] = 1.0;
+		labels[3] = 1.0;
+	}
 }
diff --git a/tests/unit/utils/Utils.h b/tests/unit/utils/Utils.h
index f6f15fa2cd8..759455a85dd 100644
--- a/tests/unit/utils/Utils.h
+++ b/tests/unit/utils/Utils.h
@@ -35,15 +35,42 @@
 #ifndef __UTILS_H__
 #define __UTILS_H__
 
-/*
-* Simple implementation of mktemp method to prevent compilation warnings,
-* since the original is not safe to use.
-* This method is not safe either, but we get rid of those annoying messages.
-*
-* For more information see:
-* http://man7.org/linux/man-pages/man3/mktemp.3.html
-* https://www.gnu.org/software/libc/manual/html_node/Temporary-Files.html
-*/
-char * mktemp_cst(char * __template);
+#include <shogun/lib/SGMatrix.h>
+#include <shogun/lib/SGVector.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+
+using namespace shogun;
+
+/** Generate file name for serialization test
+ *
+ * @param file_name template of file name
+ */
+void generate_temp_filename(char* file_name);
+
+/** Generate toy weather data
+ *
+ * @param data feature matrix to be set, shape = [n_features, n_samples]
+ * @param labels labels vector to be set, shape = [n_samples]
+ */
+void generate_toy_data_weather(
+    SGMatrix<float64_t>& data, SGVector<float64_t>& labels,
+    bool load_train_data = true);
+
+/** Check eigenvector equality
+ * This expects that the input vectors are normalised
+ *
+ * @param gt eigenvector
+ * @param gt length of the eigenvector
+ * @param calc_ev calculated eigenvector
+ * @return 1.0 if the eigen vectors are pointing to the same director, -1.0
+ * pointing to the opposite direction.
+ */
+template<class T>
+inline T check_eigenvector_eq(const SGVector<T>& a, const SGVector<T>& b, float64_t epsilon = 10E-8)
+{
+	T sign = linalg::dot(a, b);
+	EXPECT_NEAR(1.0, CMath::abs(sign), epsilon);
+	return (sign < 0.0) ? -1.0 : 1.0;
+}
 
 #endif